1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.dev.test.translit; 8 9 import java.util.ArrayList; 10 import java.util.Enumeration; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.List; 15 import java.util.Locale; 16 import java.util.Map.Entry; 17 18 import com.ibm.icu.dev.test.TestFmwk; 19 import com.ibm.icu.dev.test.TestUtil; 20 import com.ibm.icu.dev.util.UnicodeMap; 21 import com.ibm.icu.impl.Utility; 22 import com.ibm.icu.impl.UtilityExtensions; 23 import com.ibm.icu.lang.CharSequences; 24 import com.ibm.icu.lang.UCharacter; 25 import com.ibm.icu.lang.UScript; 26 import com.ibm.icu.text.CanonicalIterator; 27 import com.ibm.icu.text.Normalizer2; 28 import com.ibm.icu.text.Replaceable; 29 import com.ibm.icu.text.ReplaceableString; 30 import com.ibm.icu.text.StringTransform; 31 import com.ibm.icu.text.Transliterator; 32 import com.ibm.icu.text.UTF16; 33 import com.ibm.icu.text.UnicodeFilter; 34 import com.ibm.icu.text.UnicodeSet; 35 import com.ibm.icu.text.UnicodeSetIterator; 36 import com.ibm.icu.util.CaseInsensitiveString; 37 import com.ibm.icu.util.ULocale; 38 39 /*********************************************************************** 40 41 HOW TO USE THIS TEST FILE 42 -or- 43 How I developed on two platforms 44 without losing (too much of) my mind 45 46 47 1. Add new tests by copying/pasting/changing existing tests. On Java, 48 any public void method named Test...() taking no parameters becomes 49 a test. On C++, you need to modify the header and add a line to 50 the runIndexedTest() dispatch method. 51 52 2. Make liberal use of the expect() method; it is your friend. 53 54 3. The tests in this file exactly match those in a sister file on the 55 other side. The two files are: 56 57 icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java 58 icu4c: source/test/intltest/transtst.cpp 59 60 ==> THIS IS THE IMPORTANT PART <== 61 62 When you add a test in this file, add it in transtst.cpp too. 63 Give it the same name and put it in the same relative place. This 64 makes maintenance a lot simpler for any poor soul who ends up 65 trying to synchronize the tests between icu4j and icu4c. 66 67 4. If you MUST enter a test that is NOT paralleled in the sister file, 68 then add it in the special non-mirrored section. These are 69 labeled 70 71 "icu4j ONLY" 72 73 or 74 75 "icu4c ONLY" 76 77 Make sure you document the reason the test is here and not there. 78 79 80 Thank you. 81 The Management 82 ***********************************************************************/ 83 84 /** 85 * @test 86 * @summary General test of Transliterator 87 */ 88 public class TransliteratorTest extends TestFmwk { 89 main(String[] args)90 public static void main(String[] args) throws Exception { 91 new TransliteratorTest().run(args); 92 } 93 TestHangul()94 public void TestHangul() { 95 96 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 97 Transliterator hl = lh.getInverse(); 98 99 assertTransform("Transform", "\uCE20", lh, "ch"); 100 101 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 102 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 103 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 104 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 105 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 106 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 107 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 108 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 109 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 110 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 111 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 112 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 113 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 114 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 115 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 116 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 117 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 118 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 119 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 120 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 121 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 122 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 123 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 124 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 125 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 126 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 127 128 } 129 TestChinese()130 public void TestChinese() { 131 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 132 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 133 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 134 } 135 TestRegistry()136 public void TestRegistry() { 137 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 138 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 139 checkRegistry("foo1", "[:letter:] a > b;"); 140 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 141 String id = (String) e.nextElement(); 142 checkRegistry(id); 143 } 144 } 145 checkRegistry(String id, String rules)146 private void checkRegistry (String id, String rules) { 147 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 148 Transliterator.registerInstance(foo); 149 checkRegistry(id); 150 } 151 checkRegistry(String id)152 private void checkRegistry(String id) { 153 Transliterator fie = Transliterator.getInstance(id); 154 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 155 fie.setFilter(fae); 156 Transliterator foe = Transliterator.getInstance(id); 157 UnicodeFilter fee = foe.getFilter(); 158 if (fae.equals(fee)) { 159 errln("Changed what is in registry for " + id); 160 } 161 } 162 TestInstantiation()163 public void TestInstantiation() { 164 long ms = System.currentTimeMillis(); 165 String ID; 166 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 167 ID = (String) e.nextElement(); 168 if (ID.equals("Latin-Han/definition")) { 169 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later"); 170 continue; 171 } 172 Transliterator t = null; 173 try { 174 t = Transliterator.getInstance(ID); 175 // This is only true for some subclasses 176 // // We should get a new instance if we try again 177 // Transliterator t2 = Transliterator.getInstance(ID); 178 // if (t != t2) { 179 // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t); 180 // } else { 181 // errln("FAIL: " + ID + " returned identical instances"); 182 // t = null; 183 // } 184 } catch (IllegalArgumentException ex) { 185 errln("FAIL: " + ID); 186 throw ex; 187 } 188 189 // if (t.getFilter() != null) { 190 // errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter()); 191 // } 192 193 if (t != null) { 194 // Now test toRules 195 String rules = null; 196 try { 197 rules = t.toRules(true); 198 199 Transliterator.createFromRules("x", rules, Transliterator.FORWARD); 200 } catch (IllegalArgumentException ex2) { 201 errln("FAIL: " + ID + ".toRules() => bad rules: " + 202 rules); 203 throw ex2; 204 } 205 } 206 } 207 208 // Now test the failure path 209 try { 210 ID = "<Not a valid Transliterator ID>"; 211 Transliterator t = Transliterator.getInstance(ID); 212 errln("FAIL: " + ID + " returned " + t); 213 } catch (IllegalArgumentException ex) { 214 logln("OK: Bogus ID handled properly"); 215 } 216 217 ms = System.currentTimeMillis() - ms; 218 logln("Elapsed time: " + ms + " ms"); 219 } 220 TestSimpleRules()221 public void TestSimpleRules() { 222 /* Example: rules 1. ab>x|y 223 * 2. yc>z 224 * 225 * []|eabcd start - no match, copy e to tranlated buffer 226 * [e]|abcd match rule 1 - copy output & adjust cursor 227 * [ex|y]cd match rule 2 - copy output & adjust cursor 228 * [exz]|d no match, copy d to transliterated buffer 229 * [exzd]| done 230 */ 231 expect("ab>x|y;" + 232 "yc>z", 233 "eabcd", "exzd"); 234 235 /* Another set of rules: 236 * 1. ab>x|yzacw 237 * 2. za>q 238 * 3. qc>r 239 * 4. cw>n 240 * 241 * []|ab Rule 1 242 * [x|yzacw] No match 243 * [xy|zacw] Rule 2 244 * [xyq|cw] Rule 4 245 * [xyqn]| Done 246 */ 247 expect("ab>x|yzacw;" + 248 "za>q;" + 249 "qc>r;" + 250 "cw>n", 251 "ab", "xyqn"); 252 253 /* Test categories 254 */ 255 Transliterator t = Transliterator.createFromRules("<ID>", 256 "$dummy=\uE100;" + 257 "$vowel=[aeiouAEIOU];" + 258 "$lu=[:Lu:];" + 259 "$vowel } $lu > '!';" + 260 "$vowel > '&';" + 261 "'!' { $lu > '^';" + 262 "$lu > '*';" + 263 "a>ERROR", 264 Transliterator.FORWARD); 265 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 266 } 267 268 /** 269 * Test inline set syntax and set variable syntax. 270 */ TestInlineSet()271 public void TestInlineSet() { 272 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 273 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 274 275 expect("$digit = [0-9];" + 276 "$alpha = [a-zA-Z];" + 277 "$alphanumeric = [$digit $alpha];" + // *** 278 "$special = [^$alphanumeric];" + // *** 279 "$alphanumeric > '-';" + 280 "$special > '*';", 281 282 "thx-1138", "---*----"); 283 } 284 285 /** 286 * Create some inverses and confirm that they work. We have to be 287 * careful how we do this, since the inverses will not be true 288 * inverses -- we can't throw any random string at the composition 289 * of the transliterators and expect the identity function. F x 290 * F' != I. However, if we are careful about the input, we will 291 * get the expected results. 292 */ TestRuleBasedInverse()293 public void TestRuleBasedInverse() { 294 String RULES = 295 "abc>zyx;" + 296 "ab>yz;" + 297 "bc>zx;" + 298 "ca>xy;" + 299 "a>x;" + 300 "b>y;" + 301 "c>z;" + 302 303 "abc<zyx;" + 304 "ab<yz;" + 305 "bc<zx;" + 306 "ca<xy;" + 307 "a<x;" + 308 "b<y;" + 309 "c<z;" + 310 311 ""; 312 313 String[] DATA = { 314 // Careful here -- random strings will not work. If we keep 315 // the left side to the domain and the right side to the range 316 // we will be okay though (left, abc; right xyz). 317 "a", "x", 318 "abcacab", "zyxxxyy", 319 "caccb", "xyzzy", 320 }; 321 322 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 323 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 324 for (int i=0; i<DATA.length; i+=2) { 325 expect(fwd, DATA[i], DATA[i+1]); 326 expect(rev, DATA[i+1], DATA[i]); 327 } 328 } 329 330 /** 331 * Basic test of keyboard. 332 */ TestKeyboard()333 public void TestKeyboard() { 334 Transliterator t = Transliterator.createFromRules("<ID>", 335 "psch>Y;" 336 +"ps>y;" 337 +"ch>x;" 338 +"a>A;", Transliterator.FORWARD); 339 String DATA[] = { 340 // insertion, buffer 341 "a", "A", 342 "p", "Ap", 343 "s", "Aps", 344 "c", "Apsc", 345 "a", "AycA", 346 "psch", "AycAY", 347 null, "AycAY", // null means finishKeyboardTransliteration 348 }; 349 350 keyboardAux(t, DATA); 351 } 352 353 /** 354 * Basic test of keyboard with cursor. 355 */ TestKeyboard2()356 public void TestKeyboard2() { 357 Transliterator t = Transliterator.createFromRules("<ID>", 358 "ych>Y;" 359 +"ps>|y;" 360 +"ch>x;" 361 +"a>A;", Transliterator.FORWARD); 362 String DATA[] = { 363 // insertion, buffer 364 "a", "A", 365 "p", "Ap", 366 "s", "Aps", // modified for rollback - "Ay", 367 "c", "Apsc", // modified for rollback - "Ayc", 368 "a", "AycA", 369 "p", "AycAp", 370 "s", "AycAps", // modified for rollback - "AycAy", 371 "c", "AycApsc", // modified for rollback - "AycAyc", 372 "h", "AycAY", 373 null, "AycAY", // null means finishKeyboardTransliteration 374 }; 375 376 keyboardAux(t, DATA); 377 } 378 379 /** 380 * Test keyboard transliteration with back-replacement. 381 */ TestKeyboard3()382 public void TestKeyboard3() { 383 // We want th>z but t>y. Furthermore, during keyboard 384 // transliteration we want t>y then yh>z if t, then h are 385 // typed. 386 String RULES = 387 "t>|y;" + 388 "yh>z;" + 389 ""; 390 391 String[] DATA = { 392 // Column 1: characters to add to buffer (as if typed) 393 // Column 2: expected appearance of buffer after 394 // keyboard xliteration. 395 "a", "a", 396 "b", "ab", 397 "t", "abt", // modified for rollback - "aby", 398 "c", "abyc", 399 "t", "abyct", // modified for rollback - "abycy", 400 "h", "abycz", 401 null, "abycz", // null means finishKeyboardTransliteration 402 }; 403 404 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 405 keyboardAux(t, DATA); 406 } 407 keyboardAux(Transliterator t, String[] DATA)408 private void keyboardAux(Transliterator t, String[] DATA) { 409 Transliterator.Position index = new Transliterator.Position(); 410 ReplaceableString s = new ReplaceableString(); 411 for (int i=0; i<DATA.length; i+=2) { 412 StringBuffer log; 413 if (DATA[i] != null) { 414 log = new StringBuffer(s.toString() + " + " 415 + DATA[i] 416 + " -> "); 417 t.transliterate(s, index, DATA[i]); 418 } else { 419 log = new StringBuffer(s.toString() + " => "); 420 t.finishTransliteration(s, index); 421 } 422 UtilityExtensions.formatInput(log, s, index); 423 if (s.toString().equals(DATA[i+1])) { 424 logln(log.toString()); 425 } else { 426 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 427 } 428 } 429 } 430 431 // Latin-Arabic has been temporarily removed until it can be 432 // done correctly. 433 434 // public void TestArabic() { 435 // String DATA[] = { 436 // "Arabic", 437 // "\u062a\u062a\u0645\u062a\u0639 "+ 438 // "\u0627\u0644\u0644\u063a\u0629 "+ 439 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 440 // "\u0628\u0628\u0646\u0638\u0645 "+ 441 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 442 // "\u062c\u0645\u064a\u0644\u0629" 443 // }; 444 445 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 446 // for (int i=0; i<DATA.length; i+=2) { 447 // expect(t, DATA[i], DATA[i+1]); 448 // } 449 // } 450 451 /** 452 * Compose the Kana transliterator forward and reverse and try 453 * some strings that should come out unchanged. 454 */ TestCompoundKana()455 public void TestCompoundKana() { 456 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 457 expect(t, "aaaaa", "aaaaa"); 458 } 459 460 /** 461 * Compose the hex transliterators forward and reverse. 462 */ TestCompoundHex()463 public void TestCompoundHex() { 464 Transliterator a = Transliterator.getInstance("Any-Hex"); 465 Transliterator b = Transliterator.getInstance("Hex-Any"); 466 // Transliterator[] trans = { a, b }; 467 // Transliterator ab = Transliterator.getInstance(trans); 468 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 469 470 // Do some basic tests of b 471 expect(b, "\\u0030\\u0031", "01"); 472 473 String s = "abcde"; 474 expect(ab, s, s); 475 476 // trans = new Transliterator[] { b, a }; 477 // Transliterator ba = Transliterator.getInstance(trans); 478 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 479 ReplaceableString str = new ReplaceableString(s); 480 a.transliterate(str); 481 expect(ba, str.toString(), str.toString()); 482 } 483 484 /** 485 * Do some basic tests of filtering. 486 */ TestFiltering()487 public void TestFiltering() { 488 489 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 490 tempTrans.setFilter(new UnicodeSet("[a]")); 491 String tempResult = tempTrans.transform("xa"); 492 assertEquals("context should not be filtered ", "xb", tempResult); 493 494 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 495 tempResult = tempTrans.transform("xa"); 496 assertEquals("context should not be filtered ", "xb", tempResult); 497 498 Transliterator hex = Transliterator.getInstance("Any-Hex"); 499 hex.setFilter(new UnicodeFilter() { 500 public boolean contains(int c) { 501 return c != 'c'; 502 } 503 public String toPattern(boolean escapeUnprintable) { 504 return ""; 505 } 506 public boolean matchesIndexValue(int v) { 507 return false; 508 } 509 public void addMatchSetTo(UnicodeSet toUnionTo) {} 510 }); 511 String s = "abcde"; 512 String out = hex.transliterate(s); 513 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 514 if (out.equals(exp)) { 515 logln("Ok: \"" + exp + "\""); 516 } else { 517 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 518 } 519 } 520 521 /** 522 * Test anchors 523 */ TestAnchors()524 public void TestAnchors() { 525 expect("^ab > 01 ;" + 526 " ab > |8 ;" + 527 " b > k ;" + 528 " 8x$ > 45 ;" + 529 " 8x > 77 ;", 530 531 "ababbabxabx", 532 "018k7745"); 533 expect("$s = [z$] ;" + 534 "$s{ab > 01 ;" + 535 " ab > |8 ;" + 536 " b > k ;" + 537 " 8x}$s > 45 ;" + 538 " 8x > 77 ;", 539 540 "abzababbabxzabxabx", 541 "01z018k45z01x45"); 542 } 543 544 /** 545 * Test pattern quoting and escape mechanisms. 546 */ TestPatternQuoting()547 public void TestPatternQuoting() { 548 // Array of 3n items 549 // Each item is <rules>, <input>, <expected output> 550 String[] DATA = { 551 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 552 }; 553 554 for (int i=0; i<DATA.length; i+=3) { 555 logln("Pattern: " + Utility.escape(DATA[i])); 556 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 557 expect(t, DATA[i+1], DATA[i+2]); 558 } 559 } 560 TestVariableNames()561 public void TestVariableNames() { 562 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 563 if (gl == null) { 564 errln("FAIL: null Transliterator returned."); 565 } 566 } 567 568 /** 569 * Regression test for bugs found in Greek transliteration. 570 */ TestJ277()571 public void TestJ277() { 572 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 573 574 char sigma = (char)0x3C3; 575 char upsilon = (char)0x3C5; 576 char nu = (char)0x3BD; 577 // not used char PHI = (char)0x3A6; 578 char alpha = (char)0x3B1; 579 // not used char omega = (char)0x3C9; 580 // not used char omicron = (char)0x3BF; 581 // not used char epsilon = (char)0x3B5; 582 583 // sigma upsilon nu -> syn 584 StringBuffer buf = new StringBuffer(); 585 buf.append(sigma).append(upsilon).append(nu); 586 String syn = buf.toString(); 587 expect(gl, syn, "syn"); 588 589 // sigma alpha upsilon nu -> saun 590 buf.setLength(0); 591 buf.append(sigma).append(alpha).append(upsilon).append(nu); 592 String sayn = buf.toString(); 593 expect(gl, sayn, "saun"); 594 595 // Again, using a smaller rule set 596 String rules = 597 "$alpha = \u03B1;" + 598 "$nu = \u03BD;" + 599 "$sigma = \u03C3;" + 600 "$ypsilon = \u03C5;" + 601 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 602 "s <> $sigma;" + 603 "a <> $alpha;" + 604 "u <> $vowel { $ypsilon;" + 605 "y <> $ypsilon;" + 606 "n <> $nu;"; 607 Transliterator mini = Transliterator.createFromRules 608 ("mini", rules, Transliterator.REVERSE); 609 expect(mini, syn, "syn"); 610 expect(mini, sayn, "saun"); 611 612 //| // Transliterate the Greek locale data 613 //| Locale el("el"); 614 //| DateFormatSymbols syms(el, status); 615 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 616 //| int32_t i, count; 617 //| const UnicodeString* data = syms.getMonths(count); 618 //| for (i=0; i<count; ++i) { 619 //| if (data[i].length() == 0) { 620 //| continue; 621 //| } 622 //| UnicodeString out(data[i]); 623 //| gl->transliterate(out); 624 //| bool_t ok = TRUE; 625 //| if (data[i].length() >= 2 && out.length() >= 2 && 626 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 627 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 628 //| ok = FALSE; 629 //| } 630 //| } 631 //| if (ok) { 632 //| logln(prettify(data[i] + " -> " + out)); 633 //| } else { 634 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 635 //| } 636 //| } 637 } 638 639 // /** 640 // * Prefix, suffix support in hex transliterators 641 // */ 642 // public void TestJ243() { 643 // // Test default Hex-Any, which should handle 644 // // \\u, \\U, u+, and U+ 645 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 646 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 647 // 648 // // Try a custom Hex-Any 649 // // \\uXXXX and &#xXXXX; 650 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 651 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 652 // "abcd5fx0123"); 653 // 654 // // Try custom Any-Hex (default is tested elsewhere) 655 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 656 // expect(hex3, "012", "012"); 657 // } 658 TestJ329()659 public void TestJ329() { 660 661 Object[] DATA = { 662 Boolean.FALSE, "a > b; c > d", 663 Boolean.TRUE, "a > b; no operator; c > d", 664 }; 665 666 for (int i=0; i<DATA.length; i+=2) { 667 String err = null; 668 try { 669 Transliterator.createFromRules("<ID>", 670 (String) DATA[i+1], 671 Transliterator.FORWARD); 672 } catch (IllegalArgumentException e) { 673 err = e.getMessage(); 674 } 675 boolean gotError = (err != null); 676 String desc = (String) DATA[i+1] + 677 (gotError ? (" -> error: " + err) : " -> no error"); 678 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 679 logln("Ok: " + desc); 680 } else { 681 errln("FAIL: " + desc); 682 } 683 } 684 } 685 686 /** 687 * Test segments and segment references. 688 */ TestSegments()689 public void TestSegments() { 690 // Array of 3n items 691 // Each item is <rules>, <input>, <expected output> 692 String[] DATA = { 693 "([a-z]) '.' ([0-9]) > $2 '-' $1", 694 "abc.123.xyz.456", 695 "ab1-c23.xy4-z56", 696 }; 697 698 for (int i=0; i<DATA.length; i+=3) { 699 logln("Pattern: " + Utility.escape(DATA[i])); 700 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 701 expect(t, DATA[i+1], DATA[i+2]); 702 } 703 } 704 705 /** 706 * Test cursor positioning outside of the key 707 */ TestCursorOffset()708 public void TestCursorOffset() { 709 // Array of 3n items 710 // Each item is <rules>, <input>, <expected output> 711 String[] DATA = { 712 "pre {alpha} post > | @ ALPHA ;" + 713 "eALPHA > beta ;" + 714 "pre {beta} post > BETA @@ | ;" + 715 "post > xyz", 716 717 "prealphapost prebetapost", 718 "prbetaxyz preBETApost", 719 }; 720 721 for (int i=0; i<DATA.length; i+=3) { 722 logln("Pattern: " + Utility.escape(DATA[i])); 723 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 724 expect(t, DATA[i+1], DATA[i+2]); 725 } 726 } 727 728 /** 729 * Test zero length and > 1 char length variable values. Test 730 * use of variable refs in UnicodeSets. 731 */ TestArbitraryVariableValues()732 public void TestArbitraryVariableValues() { 733 // Array of 3n items 734 // Each item is <rules>, <input>, <expected output> 735 String[] DATA = { 736 "$abe = ab;" + 737 "$pat = x[yY]z;" + 738 "$ll = 'a-z';" + 739 "$llZ = [$ll];" + 740 "$llY = [$ll$pat];" + 741 "$emp = ;" + 742 743 "$abe > ABE;" + 744 "$pat > END;" + 745 "$llZ > 1;" + 746 "$llY > 2;" + 747 "7$emp 8 > 9;" + 748 "", 749 750 "ab xYzxyz stY78", 751 "ABE ENDEND 1129", 752 }; 753 754 for (int i=0; i<DATA.length; i+=3) { 755 logln("Pattern: " + Utility.escape(DATA[i])); 756 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 757 expect(t, DATA[i+1], DATA[i+2]); 758 } 759 } 760 761 /** 762 * Confirm that the contextStart, contextLimit, start, and limit 763 * behave correctly. 764 */ TestPositionHandling()765 public void TestPositionHandling() { 766 // Array of 3n items 767 // Each item is <rules>, <input>, <expected output> 768 String[] DATA = { 769 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 770 "xtat txtb", // pos 0,9,0,9 771 "xTTaSS TTxUUb", 772 773 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 774 "xtat txtb", // pos 2,9,3,8 775 "xtaSS TTxUUb", 776 777 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 778 "xtat txtb", // pos 3,8,3,8 779 "xtaTT TTxTTb", 780 }; 781 782 // Array of 4n positions -- these go with the DATA array 783 // They are: contextStart, contextLimit, start, limit 784 int[] POS = { 785 0, 9, 0, 9, 786 2, 9, 3, 8, 787 3, 8, 3, 8, 788 }; 789 790 int n = DATA.length/3; 791 for (int i=0; i<n; i++) { 792 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 793 Transliterator.Position pos = new Transliterator.Position( 794 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 795 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 796 t.transliterate(rsource, pos); 797 t.finishTransliteration(rsource, pos); 798 String result = rsource.toString(); 799 String exp = DATA[3*i+2]; 800 expectAux(Utility.escape(DATA[3*i]), 801 DATA[3*i+1], 802 result, 803 result.equals(exp), 804 exp); 805 } 806 } 807 808 /** 809 * Test the Hiragana-Katakana transliterator. 810 */ TestHiraganaKatakana()811 public void TestHiraganaKatakana() { 812 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 813 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 814 815 // Array of 3n items 816 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 817 String[] DATA = { 818 "both", 819 "\u3042\u3090\u3099\u3092\u3050", 820 "\u30A2\u30F8\u30F2\u30B0", 821 822 "kh", 823 "\u307C\u3051\u3060\u3042\u3093\u30FC", 824 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 825 }; 826 827 for (int i=0; i<DATA.length; i+=3) { 828 switch (DATA[i].charAt(0)) { 829 case 'h': // Hiragana-Katakana 830 expect(hk, DATA[i+1], DATA[i+2]); 831 break; 832 case 'k': // Katakana-Hiragana 833 expect(kh, DATA[i+2], DATA[i+1]); 834 break; 835 case 'b': // both 836 expect(hk, DATA[i+1], DATA[i+2]); 837 expect(kh, DATA[i+2], DATA[i+1]); 838 break; 839 } 840 } 841 842 } 843 TestCopyJ476()844 public void TestCopyJ476() { 845 // This is a C++-only copy constructor test 846 } 847 848 /** 849 * Test inter-Indic transliterators. These are composed. 850 */ TestInterIndic()851 public void TestInterIndic() { 852 String ID = "Devanagari-Gujarati"; 853 Transliterator dg = Transliterator.getInstance(ID); 854 if (dg == null) { 855 errln("FAIL: getInstance(" + ID + ") returned null"); 856 return; 857 } 858 String id = dg.getID(); 859 if (!id.equals(ID)) { 860 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 861 } 862 String dev = "\u0901\u090B\u0925"; 863 String guj = "\u0A81\u0A8B\u0AA5"; 864 expect(dg, dev, guj); 865 } 866 867 /** 868 * Test filter syntax in IDs. (J23) 869 */ TestFilterIDs()870 public void TestFilterIDs() { 871 String[] DATA = { 872 "[aeiou]Any-Hex", // ID 873 "[aeiou]Hex-Any", // expected inverse ID 874 "quizzical", // src 875 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 876 877 "[aeiou]Any-Hex;[^5]Hex-Any", 878 "[^5]Any-Hex;[aeiou]Hex-Any", 879 "quizzical", 880 "q\\u0075izzical", 881 882 "[abc]Null", 883 "[abc]Null", 884 "xyz", 885 "xyz", 886 }; 887 888 for (int i=0; i<DATA.length; i+=4) { 889 String ID = DATA[i]; 890 Transliterator t = Transliterator.getInstance(ID); 891 expect(t, DATA[i+2], DATA[i+3]); 892 893 // Check the ID 894 if (!ID.equals(t.getID())) { 895 errln("FAIL: getInstance(" + ID + ").getID() => " + 896 t.getID()); 897 } 898 899 // Check the inverse 900 String uID = DATA[i+1]; 901 Transliterator u = t.getInverse(); 902 if (u == null) { 903 errln("FAIL: " + ID + ".getInverse() returned NULL"); 904 } else if (!u.getID().equals(uID)) { 905 errln("FAIL: " + ID + ".getInverse().getID() => " + 906 u.getID() + ", expected " + uID); 907 } 908 } 909 } 910 911 /** 912 * Test the case mapping transliterators. 913 */ TestCaseMap()914 public void TestCaseMap() { 915 Transliterator toUpper = 916 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 917 Transliterator toLower = 918 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 919 Transliterator toTitle = 920 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 921 922 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 923 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 924 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 925 "the quick brown foX jumped over the lazY dogs."); 926 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 927 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 928 } 929 930 /** 931 * Test the name mapping transliterators. 932 */ TestNameMap()933 public void TestNameMap() { 934 Transliterator uni2name = 935 Transliterator.getInstance("Any-Name[^abc]"); 936 Transliterator name2uni = 937 Transliterator.getInstance("Name-Any"); 938 939 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 940 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 941 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 942 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 943 944 // round trip 945 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 946 947 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 948 expect(t, s, s); 949 } 950 951 /** 952 * Test liberalized ID syntax. 1006c 953 */ TestLiberalizedID()954 public void TestLiberalizedID() { 955 // Some test cases have an expected getID() value of NULL. This 956 // means I have disabled the test case for now. This stuff is 957 // still under development, and I haven't decided whether to make 958 // getID() return canonical case yet. It will all get rewritten 959 // with the move to Source-Target/Variant IDs anyway. [aliu] 960 String DATA[] = { 961 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 962 " Null ", "Null", "whitespace", 963 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 964 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 965 }; 966 967 for (int i=0; i<DATA.length; i+=3) { 968 try { 969 Transliterator t = Transliterator.getInstance(DATA[i]); 970 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 971 logln("Ok: " + DATA[i+2] + 972 " create ID \"" + DATA[i] + "\" => \"" + 973 t.getID() + "\""); 974 } else { 975 errln("FAIL: " + DATA[i+2] + 976 " create ID \"" + DATA[i] + "\" => \"" + 977 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 978 } 979 } catch (IllegalArgumentException e) { 980 errln("FAIL: " + DATA[i+2] + 981 " create ID \"" + DATA[i] + "\""); 982 } 983 } 984 } 985 TestCreateInstance()986 public void TestCreateInstance() { 987 String FORWARD = "F"; 988 String REVERSE = "R"; 989 String DATA[] = { 990 // Column 1: id 991 // Column 2: direction 992 // Column 3: expected ID, or "" if expect failure 993 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 994 995 // JB#2689: bad compound causes crash 996 "InvalidSource-InvalidTarget", FORWARD, "", 997 "InvalidSource-InvalidTarget", REVERSE, "", 998 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 999 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 1000 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 1001 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 1002 1003 null 1004 }; 1005 1006 for (int i=0; DATA[i]!=null; i+=3) { 1007 String id=DATA[i]; 1008 int dir = (DATA[i+1]==FORWARD)? 1009 Transliterator.FORWARD:Transliterator.REVERSE; 1010 String expID=DATA[i+2]; 1011 Exception e = null; 1012 Transliterator t; 1013 try { 1014 t = Transliterator.getInstance(id,dir); 1015 } catch (Exception e1) { 1016 e = e1; 1017 t = null; 1018 } 1019 String newID = (t!=null)?t.getID():""; 1020 boolean ok = (newID.equals(expID)); 1021 if (t==null) { 1022 newID = e.getMessage(); 1023 } 1024 if (ok) { 1025 logln("Ok: createInstance(" + 1026 id + "," + DATA[i+1] + ") => " + newID); 1027 } else { 1028 errln("FAIL: createInstance(" + 1029 id + "," + DATA[i+1] + ") => " + newID + 1030 ", expected " + expID); 1031 } 1032 } 1033 } 1034 1035 /** 1036 * Test the normalization transliterator. 1037 */ TestNormalizationTransliterator()1038 public void TestNormalizationTransliterator() { 1039 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest 1040 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1041 String[][] CANON = { 1042 // Input Decomposed Composed 1043 {"cat", "cat", "cat" }, 1044 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1045 1046 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1047 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1048 1049 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1050 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1051 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1052 1053 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1054 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1055 1056 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1057 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1058 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1059 1060 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1061 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1062 1063 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1064 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1065 1066 {"Henry IV", "Henry IV", "Henry IV" }, 1067 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1068 1069 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1070 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1071 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1072 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1073 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1074 1075 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1076 }; 1077 1078 String[][] COMPAT = { 1079 // Input Decomposed Composed 1080 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1081 1082 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1083 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1084 1085 {"Henry IV", "Henry IV", "Henry IV" }, 1086 {"Henry \u2163", "Henry IV", "Henry IV" }, 1087 1088 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1089 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1090 1091 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1092 }; 1093 1094 Transliterator NFD = Transliterator.getInstance("NFD"); 1095 Transliterator NFC = Transliterator.getInstance("NFC"); 1096 for (int i=0; i<CANON.length; ++i) { 1097 String in = CANON[i][0]; 1098 String expd = CANON[i][1]; 1099 String expc = CANON[i][2]; 1100 expect(NFD, in, expd); 1101 expect(NFC, in, expc); 1102 } 1103 1104 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1105 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1106 for (int i=0; i<COMPAT.length; ++i) { 1107 String in = COMPAT[i][0]; 1108 String expkd = COMPAT[i][1]; 1109 String expkc = COMPAT[i][2]; 1110 expect(NFKD, in, expkd); 1111 expect(NFKC, in, expkc); 1112 } 1113 1114 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1115 expect(t, "\u010dx", "c\u030C"); 1116 } 1117 1118 /** 1119 * Test compound RBT rules. 1120 */ TestCompoundRBT()1121 public void TestCompoundRBT() { 1122 // Careful with spacing and ';' here: Phrase this exactly 1123 // as toRules() is going to return it. If toRules() changes 1124 // with regard to spacing or ';', then adjust this string. 1125 String rule = "::Hex-Any;\n" + 1126 "::Any-Lower;\n" + 1127 "a > '.A.';\n" + 1128 "b > '.B.';\n" + 1129 "::[^t]Any-Upper;"; 1130 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1131 if (t == null) { 1132 errln("FAIL: createFromRules failed"); 1133 return; 1134 } 1135 expect(t, "\u0043at in the hat, bat on the mat", 1136 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1137 String r = t.toRules(true); 1138 if (r.equals(rule)) { 1139 logln("OK: toRules() => " + r); 1140 } else { 1141 errln("FAIL: toRules() => " + r + 1142 ", expected " + rule); 1143 } 1144 1145 // Now test toRules 1146 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1147 if (t == null) { 1148 errln("FAIL: createInstance failed"); 1149 return; 1150 } 1151 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1152 r = t.toRules(true); 1153 if (!r.equals(exp)) { 1154 errln("FAIL: toRules() => " + r + 1155 ", expected " + exp); 1156 } else { 1157 logln("OK: toRules() => " + r); 1158 } 1159 1160 // Round trip the result of toRules 1161 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1162 if (t == null) { 1163 errln("FAIL: createFromRules #2 failed"); 1164 return; 1165 } else { 1166 logln("OK: createFromRules(" + r + ") succeeded"); 1167 } 1168 1169 // Test toRules again 1170 r = t.toRules(true); 1171 if (!r.equals(exp)) { 1172 errln("FAIL: toRules() => " + r + 1173 ", expected " + exp); 1174 } else { 1175 logln("OK: toRules() => " + r); 1176 } 1177 1178 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1179 // to what the regenerated ID will look like. 1180 String id = "Upper(Lower);(NFKC)"; 1181 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1182 if (t == null) { 1183 errln("FAIL: createInstance #2 failed"); 1184 return; 1185 } 1186 if (t.getID().equals(id)) { 1187 logln("OK: created " + id); 1188 } else { 1189 errln("FAIL: createInstance(" + id + 1190 ").getID() => " + t.getID()); 1191 } 1192 1193 Transliterator u = t.getInverse(); 1194 if (u == null) { 1195 errln("FAIL: createInverse failed"); 1196 return; 1197 } 1198 exp = "NFKC();Lower(Upper)"; 1199 if (u.getID().equals(exp)) { 1200 logln("OK: createInverse(" + id + ") => " + 1201 u.getID()); 1202 } else { 1203 errln("FAIL: createInverse(" + id + ") => " + 1204 u.getID()); 1205 } 1206 } 1207 1208 /** 1209 * Compound filter semantics were orginially not implemented 1210 * correctly. Originally, each component filter f(i) is replaced by 1211 * f'(i) = f(i) && g, where g is the filter for the compound 1212 * transliterator. 1213 * 1214 * From Mark: 1215 * 1216 * Suppose and I have a transliterator X. Internally X is 1217 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1218 * 1219 * The compound should convert all greek characters (through latin) to 1220 * cyrillic, then lowercase the result. The filter should say "don't 1221 * touch 'A' in the original". But because an intermediate result 1222 * happens to go through "A", the Greek Alpha gets hung up. 1223 */ TestCompoundFilter()1224 public void TestCompoundFilter() { 1225 Transliterator t = Transliterator.getInstance 1226 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1227 t.setFilter(new UnicodeSet("[^A]")); 1228 1229 // Only the 'A' at index 1 should remain unchanged 1230 expect(t, 1231 CharsToUnicodeString("BA\\u039A\\u0391"), 1232 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1233 } 1234 1235 /** 1236 * Test the "Remove" transliterator. 1237 */ TestRemove()1238 public void TestRemove() { 1239 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1240 expect(t, "The quick brown fox.", 1241 "Th qck brwn fx."); 1242 } 1243 TestToRules()1244 public void TestToRules() { 1245 String RBT = "rbt"; 1246 String SET = "set"; 1247 String[] DATA = { 1248 RBT, 1249 "$a=\\u4E61; [$a] > A;", 1250 "[\\u4E61] > A;", 1251 1252 RBT, 1253 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1254 "[[:Zs:][:Zl:]]{a} > A;", 1255 1256 SET, 1257 "[[:Zs:][:Zl:]]", 1258 "[[:Zs:][:Zl:]]", 1259 1260 SET, 1261 "[:Ps:]", 1262 "[:Ps:]", 1263 1264 SET, 1265 "[:L:]", 1266 "[:L:]", 1267 1268 SET, 1269 "[[:L:]-[A]]", 1270 "[[:L:]-[A]]", 1271 1272 SET, 1273 "[~[:Lu:][:Ll:]]", 1274 "[~[:Lu:][:Ll:]]", 1275 1276 SET, 1277 "[~[a-z]]", 1278 "[~[a-z]]", 1279 1280 RBT, 1281 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1282 "[^[:Zs:]]{a} > A;", 1283 1284 RBT, 1285 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1286 "[[a-z]-[:Zs:]]{a} > A;", 1287 1288 RBT, 1289 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1290 "[[:Zs:]&[a-z]]{a} > A;", 1291 1292 RBT, 1293 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1294 "[x[:Zs:]]{a} > A;", 1295 1296 RBT, 1297 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1298 "$macron = \\u0304 ;"+ 1299 "$evowel = [aeiouyAEIOUY] ;"+ 1300 "$iotasub = \\u0345 ;"+ 1301 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1302 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1303 1304 RBT, 1305 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1306 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1307 }; 1308 1309 for (int d=0; d < DATA.length; d+=3) { 1310 if (DATA[d] == RBT) { 1311 // Transliterator test 1312 Transliterator t = Transliterator.createFromRules("ID", 1313 DATA[d+1], Transliterator.FORWARD); 1314 if (t == null) { 1315 errln("FAIL: createFromRules failed"); 1316 return; 1317 } 1318 String rules, escapedRules; 1319 rules = t.toRules(false); 1320 escapedRules = t.toRules(true); 1321 String expRules = Utility.unescape(DATA[d+2]); 1322 String expEscapedRules = DATA[d+2]; 1323 if (rules.equals(expRules)) { 1324 logln("Ok: " + DATA[d+1] + 1325 " => " + Utility.escape(rules)); 1326 } else { 1327 errln("FAIL: " + DATA[d+1] + 1328 " => " + Utility.escape(rules + ", exp " + expRules)); 1329 } 1330 if (escapedRules.equals(expEscapedRules)) { 1331 logln("Ok: " + DATA[d+1] + 1332 " => " + escapedRules); 1333 } else { 1334 errln("FAIL: " + DATA[d+1] + 1335 " => " + escapedRules + ", exp " + expEscapedRules); 1336 } 1337 1338 } else { 1339 // UnicodeSet test 1340 String pat = DATA[d+1]; 1341 String expToPat = DATA[d+2]; 1342 UnicodeSet set = new UnicodeSet(pat); 1343 1344 // Adjust spacing etc. as necessary. 1345 String toPat; 1346 toPat = set.toPattern(true); 1347 if (expToPat.equals(toPat)) { 1348 logln("Ok: " + pat + 1349 " => " + toPat); 1350 } else { 1351 errln("FAIL: " + pat + 1352 " => " + Utility.escape(toPat) + 1353 ", exp " + Utility.escape(pat)); 1354 } 1355 } 1356 } 1357 } 1358 TestContext()1359 public void TestContext() { 1360 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1361 1362 expect("de > x; {d}e > y;", 1363 "de", 1364 "ye", 1365 pos); 1366 1367 expect("ab{c} > z;", 1368 "xadabdabcy", 1369 "xadabdabzy"); 1370 } 1371 CharsToUnicodeString(String s)1372 static final String CharsToUnicodeString(String s) { 1373 return Utility.unescape(s); 1374 } 1375 TestSupplemental()1376 public void TestSupplemental() { 1377 1378 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1379 "a > $a; $s > i;"), 1380 CharsToUnicodeString("ab\\U0001030Fx"), 1381 CharsToUnicodeString("\\U00010300bix")); 1382 1383 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1384 "$b=[A-Z\\U00010400-\\U0001044D];" + 1385 "($a)($b) > $2 $1;"), 1386 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1387 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1388 1389 // k|ax\\U00010300xm 1390 1391 // k|a\\U00010400\\U00010300xm 1392 // ky|\\U00010400\\U00010300xm 1393 // ky\\U00010400|\\U00010300xm 1394 1395 // ky\\U00010400|\\U00010300\\U00010400m 1396 // ky\\U00010400y|\\U00010400m 1397 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1398 "$a {x} > | @ \\U00010400;" + 1399 "{$a} [^\\u0000-\\uFFFF] > y;"), 1400 CharsToUnicodeString("kax\\U00010300xm"), 1401 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1402 1403 expect(Transliterator.getInstance("Any-Name"), 1404 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1405 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1406 1407 expect(Transliterator.getInstance("Name-Any"), 1408 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1409 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1410 1411 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1412 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1413 "U+10330U+10FF00U+E0061U+00A0"); 1414 1415 expect(Transliterator.getInstance("Any-Hex/C"), 1416 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1417 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1418 1419 expect(Transliterator.getInstance("Any-Hex/Perl"), 1420 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1421 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1422 1423 expect(Transliterator.getInstance("Any-Hex/Java"), 1424 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1425 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1426 1427 expect(Transliterator.getInstance("Any-Hex/XML"), 1428 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1429 "𐌰􏼀󠁡 "); 1430 1431 expect(Transliterator.getInstance("Any-Hex/XML10"), 1432 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1433 "𐌰􏼀󠁡 "); 1434 1435 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1436 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1437 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1438 } 1439 TestQuantifier()1440 public void TestQuantifier() { 1441 1442 // Make sure @ in a quantified anteContext works 1443 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1444 "AAAAAb", 1445 "aaa(aac)"); 1446 1447 // Make sure @ in a quantified postContext works 1448 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1449 "baaaaa", 1450 "caa(aaa)"); 1451 1452 // Make sure @ in a quantified postContext with seg ref works 1453 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1454 "baaaaa", 1455 "baa(aaa)"); 1456 1457 // Make sure @ past ante context doesn't enter ante context 1458 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1459 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1460 "xxxab", 1461 "xxx(ac)", 1462 pos); 1463 1464 // Make sure @ past post context doesn't pass limit 1465 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1466 expect("{b} a+ > c @@ |; x > y; a > A;", 1467 "baxx", 1468 "caxx", 1469 pos2); 1470 1471 // Make sure @ past post context doesn't enter post context 1472 expect("{b} a+ > c @@ |; x > y; a > A;", 1473 "baxx", 1474 "cayy"); 1475 1476 expect("(ab)? c > d;", 1477 "c abc ababc", 1478 "d d abd"); 1479 1480 // NOTE: The (ab)+ when referenced just yields a single "ab", 1481 // not the full sequence of them. This accords with perl behavior. 1482 expect("(ab)+ {x} > '(' $1 ')';", 1483 "x abx ababxy", 1484 "x ab(ab) abab(ab)y"); 1485 1486 expect("b+ > x;", 1487 "ac abc abbc abbbc", 1488 "ac axc axc axc"); 1489 1490 expect("[abc]+ > x;", 1491 "qac abrc abbcs abtbbc", 1492 "qx xrx xs xtx"); 1493 1494 expect("q{(ab)+} > x;", 1495 "qa qab qaba qababc qaba", 1496 "qa qx qxa qxc qxa"); 1497 1498 expect("q(ab)* > x;", 1499 "qa qab qaba qababc", 1500 "xa x xa xc"); 1501 1502 // NOTE: The (ab)+ when referenced just yields a single "ab", 1503 // not the full sequence of them. This accords with perl behavior. 1504 expect("q(ab)* > '(' $1 ')';", 1505 "qa qab qaba qababc", 1506 "()a (ab) (ab)a (ab)c"); 1507 1508 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1509 // quoted string 1510 expect("'ab'+ > x;", 1511 "bb ab ababb", 1512 "bb x xb"); 1513 1514 // $foo+ and $foo* -- the quantifier should apply to the entire 1515 // variable reference 1516 expect("$var = ab; $var+ > x;", 1517 "bb ab ababb", 1518 "bb x xb"); 1519 } 1520 1521 static class TestFact implements Transliterator.Factory { 1522 static class NameableNullTrans extends Transliterator { NameableNullTrans(String id)1523 public NameableNullTrans(String id) { 1524 super(id, null); 1525 } handleTransliterate(Replaceable text, Position offsets, boolean incremental)1526 protected void handleTransliterate(Replaceable text, 1527 Position offsets, boolean incremental) { 1528 offsets.start = offsets.limit; 1529 } 1530 } 1531 String id; TestFact(String theID)1532 public TestFact(String theID) { 1533 id = theID; 1534 } getInstance(String ignoredID)1535 public Transliterator getInstance(String ignoredID) { 1536 return new NameableNullTrans(id); 1537 } 1538 } 1539 TestSTV()1540 public void TestSTV() { 1541 Enumeration es = Transliterator.getAvailableSources(); 1542 for (int i=0; es.hasMoreElements(); ++i) { 1543 String source = (String) es.nextElement(); 1544 logln("" + i + ": " + source); 1545 if (source.length() == 0) { 1546 errln("FAIL: empty source"); 1547 continue; 1548 } 1549 Enumeration et = Transliterator.getAvailableTargets(source); 1550 for (int j=0; et.hasMoreElements(); ++j) { 1551 String target = (String) et.nextElement(); 1552 logln(" " + j + ": " + target); 1553 if (target.length() == 0) { 1554 errln("FAIL: empty target"); 1555 continue; 1556 } 1557 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1558 for (int k=0; ev.hasMoreElements(); ++k) { 1559 String variant = (String) ev.nextElement(); 1560 if (variant.length() == 0) { 1561 logln(" " + k + ": <empty>"); 1562 } else { 1563 logln(" " + k + ": " + variant); 1564 } 1565 } 1566 } 1567 } 1568 1569 // Test registration 1570 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1571 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1572 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1573 for (int i=0; i<3; ++i) { 1574 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1575 try { 1576 Transliterator t = Transliterator.getInstance(IDS[i]); 1577 if (t.getID().equals(IDS[i])) { 1578 logln("Ok: Registration/creation succeeded for ID " + 1579 IDS[i]); 1580 } else { 1581 errln("FAIL: Registration of ID " + 1582 IDS[i] + " creates ID " + t.getID()); 1583 } 1584 Transliterator.unregister(IDS[i]); 1585 try { 1586 t = Transliterator.getInstance(IDS[i]); 1587 errln("FAIL: Unregistration failed for ID " + 1588 IDS[i] + "; still receiving ID " + t.getID()); 1589 } catch (IllegalArgumentException e2) { 1590 // Good; this is what we expect 1591 logln("Ok; Unregistered " + IDS[i]); 1592 } 1593 } catch (IllegalArgumentException e) { 1594 errln("FAIL: Registration/creation failed for ID " + 1595 IDS[i]); 1596 } finally { 1597 Transliterator.unregister(IDS[i]); 1598 } 1599 } 1600 1601 // Make sure getAvailable API reflects removal 1602 for (Enumeration e = Transliterator.getAvailableIDs(); 1603 e.hasMoreElements(); ) { 1604 String id = (String) e.nextElement(); 1605 for (int i=0; i<3; ++i) { 1606 if (id.equals(FULL_IDS[i])) { 1607 errln("FAIL: unregister(" + id + ") failed"); 1608 } 1609 } 1610 } 1611 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1612 e.hasMoreElements(); ) { 1613 String t = (String) e.nextElement(); 1614 if (t.equals(IDS[0])) { 1615 errln("FAIL: unregister(Any-" + t + ") failed"); 1616 } 1617 } 1618 for (Enumeration e = Transliterator.getAvailableSources(); 1619 e.hasMoreElements(); ) { 1620 String s = (String) e.nextElement(); 1621 for (int i=0; i<3; ++i) { 1622 if (SOURCES[i] == null) continue; 1623 if (s.equals(SOURCES[i])) { 1624 errln("FAIL: unregister(" + s + "-*) failed"); 1625 } 1626 } 1627 } 1628 } 1629 1630 /** 1631 * Test inverse of Greek-Latin; Title() 1632 */ TestCompoundInverse()1633 public void TestCompoundInverse() { 1634 Transliterator t = Transliterator.getInstance 1635 ("Greek-Latin; Title()", Transliterator.REVERSE); 1636 if (t == null) { 1637 errln("FAIL: createInstance"); 1638 return; 1639 } 1640 String exp = "(Title);Latin-Greek"; 1641 if (t.getID().equals(exp)) { 1642 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1643 t.getID()); 1644 } else { 1645 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1646 t.getID() + "\", expected \"" + exp + "\""); 1647 } 1648 } 1649 1650 /** 1651 * Test NFD chaining with RBT 1652 */ TestNFDChainRBT()1653 public void TestNFDChainRBT() { 1654 Transliterator t = Transliterator.createFromRules( 1655 "TEST", "::NFD; aa > Q; a > q;", 1656 Transliterator.FORWARD); 1657 logln(t.toRules(true)); 1658 expect(t, "aa", "Q"); 1659 } 1660 1661 /** 1662 * Inverse of "Null" should be "Null". (J21) 1663 */ TestNullInverse()1664 public void TestNullInverse() { 1665 Transliterator t = Transliterator.getInstance("Null"); 1666 Transliterator u = t.getInverse(); 1667 if (!u.getID().equals("Null")) { 1668 errln("FAIL: Inverse of Null should be Null"); 1669 } 1670 } 1671 1672 /** 1673 * Check ID of inverse of alias. (J22) 1674 */ TestAliasInverseID()1675 public void TestAliasInverseID() { 1676 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1677 Transliterator t = Transliterator.getInstance(ID); 1678 Transliterator u = t.getInverse(); 1679 String exp = "Hangul-Latin"; 1680 String got = u.getID(); 1681 if (!got.equals(exp)) { 1682 errln("FAIL: Inverse of " + ID + " is " + got + 1683 ", expected " + exp); 1684 } 1685 } 1686 1687 /** 1688 * Test IDs of inverses of compound transliterators. (J20) 1689 */ TestCompoundInverseID()1690 public void TestCompoundInverseID() { 1691 String ID = "Latin-Jamo;NFC(NFD)"; 1692 Transliterator t = Transliterator.getInstance(ID); 1693 Transliterator u = t.getInverse(); 1694 String exp = "NFD(NFC);Jamo-Latin"; 1695 String got = u.getID(); 1696 if (!got.equals(exp)) { 1697 errln("FAIL: Inverse of " + ID + " is " + got + 1698 ", expected " + exp); 1699 } 1700 } 1701 1702 /** 1703 * Test undefined variable. 1704 */ TestUndefinedVariable()1705 public void TestUndefinedVariable() { 1706 String rule = "$initial } a <> \u1161;"; 1707 try { 1708 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1709 } catch (IllegalArgumentException e) { 1710 logln("OK: Got exception for " + rule + ", as expected: " + 1711 e.getMessage()); 1712 return; 1713 } 1714 errln("Fail: bogus rule " + rule + " compiled without error"); 1715 } 1716 1717 /** 1718 * Test empty context. 1719 */ TestEmptyContext()1720 public void TestEmptyContext() { 1721 expect(" { a } > b;", "xay a ", "xby b "); 1722 } 1723 1724 /** 1725 * Test compound filter ID syntax 1726 */ TestCompoundFilterID()1727 public void TestCompoundFilterID() { 1728 String[] DATA = { 1729 // Col. 1 = ID or rule set (latter must start with #) 1730 1731 // = columns > 1 are null if expect col. 1 to be illegal = 1732 1733 // Col. 2 = direction, "F..." or "R..." 1734 // Col. 3 = source string 1735 // Col. 4 = exp result 1736 1737 "[abc]; [abc]", null, null, null, // multiple filters 1738 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1739 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1740 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1741 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1742 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1743 }; 1744 1745 for (int i=0; i<DATA.length; i+=4) { 1746 String id = DATA[i]; 1747 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1748 Transliterator.REVERSE : Transliterator.FORWARD; 1749 String source = DATA[i+2]; 1750 String exp = DATA[i+3]; 1751 boolean expOk = (DATA[i+1] != null); 1752 Transliterator t = null; 1753 IllegalArgumentException e = null; 1754 try { 1755 if (id.charAt(0) == '#') { 1756 t = Transliterator.createFromRules("ID", id, direction); 1757 } else { 1758 t = Transliterator.getInstance(id, direction); 1759 } 1760 } catch (IllegalArgumentException ee) { 1761 e = ee; 1762 } 1763 boolean ok = (t != null && e == null); 1764 if (ok == expOk) { 1765 logln("Ok: " + id + " => " + t + 1766 (e != null ? (", " + e.getMessage()) : "")); 1767 if (source != null) { 1768 expect(t, source, exp); 1769 } 1770 } else { 1771 errln("FAIL: " + id + " => " + t + 1772 (e != null ? (", " + e.getMessage()) : "")); 1773 } 1774 } 1775 } 1776 1777 /** 1778 * Test new property set syntax 1779 */ TestPropertySet()1780 public void TestPropertySet() { 1781 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1782 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1783 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1784 } 1785 1786 /** 1787 * Test various failure points of the new 2.0 engine. 1788 */ TestNewEngine()1789 public void TestNewEngine() { 1790 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1791 // Katakana should be untouched 1792 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1793 1794 if (true) { 1795 // This test will only work if Transliterator.ROLLBACK is 1796 // true. Otherwise, this test will fail, revealing a 1797 // limitation of global filters in incremental mode. 1798 1799 Transliterator a = 1800 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1801 Transliterator A = 1802 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1803 1804 //Transliterator array[] = new Transliterator[] { 1805 // a, 1806 // Transliterator.getInstance("NFD"), 1807 // A }; 1808 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1809 1810 try { 1811 Transliterator.registerInstance(a); 1812 Transliterator.registerInstance(A); 1813 1814 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1815 expect(t, "aAaA", "bAbA"); 1816 1817 Transliterator[] u = t.getElements(); 1818 assertTrue("getElements().length", u.length == 3); 1819 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1820 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1821 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1822 1823 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1824 t.setFilter(new UnicodeSet("[:Ll:]")); 1825 expect(t, "aAaA", "bAbA"); 1826 } finally { 1827 Transliterator.unregister("a_to_A"); 1828 Transliterator.unregister("A_to_b"); 1829 } 1830 } 1831 1832 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1833 "a", 1834 "ax"); 1835 1836 String gr = 1837 "$ddot = \u0308 ;" + 1838 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1839 "$rough = \u0314 ;" + 1840 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1841 "\u03b1 <> a ;" + 1842 "$rough <> h ;"; 1843 1844 expect(gr, "\u03B1\u0314", "ha"); 1845 } 1846 1847 /** 1848 * Test quantified segment behavior. We want: 1849 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1850 */ TestQuantifiedSegment()1851 public void TestQuantifiedSegment() { 1852 // The normal case 1853 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1854 1855 // The tricky case; the quantifier is around the segment 1856 expect("([abc])+ > x $1 x;", "cba", "xax"); 1857 1858 // Tricky case in reverse direction 1859 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1860 1861 // Check post-context segment 1862 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1863 1864 // Test toRule/toPattern for non-quantified segment. 1865 // Careful with spacing here. 1866 String r = "([a-c]){q} > x $1 x;"; 1867 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1868 String rr = t.toRules(true); 1869 if (!r.equals(rr)) { 1870 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1871 } else { 1872 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1873 } 1874 1875 // Test toRule/toPattern for quantified segment. 1876 // Careful with spacing here. 1877 r = "([a-c])+{q} > x $1 x;"; 1878 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1879 rr = t.toRules(true); 1880 if (!r.equals(rr)) { 1881 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1882 } else { 1883 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1884 } 1885 } 1886 1887 //====================================================================== 1888 // Ram's tests 1889 //====================================================================== 1890 /* this test performs test of rules in ISO 15915 */ TestDevanagariLatinRT()1891 public void TestDevanagariLatinRT(){ 1892 String[] source = { 1893 "bh\u0101rata", 1894 "kra", 1895 "k\u1E63a", 1896 "khra", 1897 "gra", 1898 "\u1E45ra", 1899 "cra", 1900 "chra", 1901 "j\u00F1a", 1902 "jhra", 1903 "\u00F1ra", 1904 "\u1E6Dya", 1905 "\u1E6Dhra", 1906 "\u1E0Dya", 1907 //"r\u0323ya", // \u095c is not valid in Devanagari 1908 "\u1E0Dhya", 1909 "\u1E5Bhra", 1910 "\u1E47ra", 1911 "tta", 1912 "thra", 1913 "dda", 1914 "dhra", 1915 "nna", 1916 "pra", 1917 "phra", 1918 "bra", 1919 "bhra", 1920 "mra", 1921 "\u1E49ra", 1922 //"l\u0331ra", 1923 "yra", 1924 "\u1E8Fra", 1925 //"l-", 1926 "vra", 1927 "\u015Bra", 1928 "\u1E63ra", 1929 "sra", 1930 "hma", 1931 "\u1E6D\u1E6Da", 1932 "\u1E6D\u1E6Dha", 1933 "\u1E6Dh\u1E6Dha", 1934 "\u1E0D\u1E0Da", 1935 "\u1E0D\u1E0Dha", 1936 "\u1E6Dya", 1937 "\u1E6Dhya", 1938 "\u1E0Dya", 1939 "\u1E0Dhya", 1940 // Not roundtrippable -- 1941 // \u0939\u094d\u094d\u092E - hma 1942 // \u0939\u094d\u092E - hma 1943 // CharsToUnicodeString("hma"), 1944 "hya", 1945 "\u015Br\u0325", 1946 "\u015Bca", 1947 "\u0115", 1948 "san\u0304j\u012Bb s\u0113nagupta", 1949 "\u0101nand vaddir\u0101ju", 1950 }; 1951 String[] expected = { 1952 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 1953 "\u0915\u094D\u0930", /* kra */ 1954 "\u0915\u094D\u0937", /* ks\u0323a */ 1955 "\u0916\u094D\u0930", /* khra */ 1956 "\u0917\u094D\u0930", /* gra */ 1957 "\u0919\u094D\u0930", /* n\u0307ra */ 1958 "\u091A\u094D\u0930", /* cra */ 1959 "\u091B\u094D\u0930", /* chra */ 1960 "\u091C\u094D\u091E", /* jn\u0303a */ 1961 "\u091D\u094D\u0930", /* jhra */ 1962 "\u091E\u094D\u0930", /* n\u0303ra */ 1963 "\u091F\u094D\u092F", /* t\u0323ya */ 1964 "\u0920\u094D\u0930", /* t\u0323hra */ 1965 "\u0921\u094D\u092F", /* d\u0323ya */ 1966 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 1967 "\u0922\u094D\u092F", /* d\u0323hya */ 1968 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 1969 "\u0923\u094D\u0930", /* n\u0323ra */ 1970 "\u0924\u094D\u0924", /* tta */ 1971 "\u0925\u094D\u0930", /* thra */ 1972 "\u0926\u094D\u0926", /* dda */ 1973 "\u0927\u094D\u0930", /* dhra */ 1974 "\u0928\u094D\u0928", /* nna */ 1975 "\u092A\u094D\u0930", /* pra */ 1976 "\u092B\u094D\u0930", /* phra */ 1977 "\u092C\u094D\u0930", /* bra */ 1978 "\u092D\u094D\u0930", /* bhra */ 1979 "\u092E\u094D\u0930", /* mra */ 1980 "\u0929\u094D\u0930", /* n\u0331ra */ 1981 //"\u0934\u094D\u0930", /* l\u0331ra */ 1982 "\u092F\u094D\u0930", /* yra */ 1983 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 1984 //"l-", 1985 "\u0935\u094D\u0930", /* vra */ 1986 "\u0936\u094D\u0930", /* s\u0301ra */ 1987 "\u0937\u094D\u0930", /* s\u0323ra */ 1988 "\u0938\u094D\u0930", /* sra */ 1989 "\u0939\u094d\u092E", /* hma */ 1990 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 1991 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 1992 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 1993 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 1994 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 1995 "\u091F\u094D\u092F", /* t\u0323ya */ 1996 "\u0920\u094D\u092F", /* t\u0323hya */ 1997 "\u0921\u094D\u092F", /* d\u0323ya */ 1998 "\u0922\u094D\u092F", /* d\u0323hya */ 1999 // "hma", /* hma */ 2000 "\u0939\u094D\u092F", /* hya */ 2001 "\u0936\u0943", /* s\u0301r\u0325a */ 2002 "\u0936\u094D\u091A", /* s\u0301ca */ 2003 "\u090d", /* e\u0306 */ 2004 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2005 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2006 }; 2007 2008 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2009 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2010 2011 for(int i= 0; i<source.length; i++){ 2012 expect(latinToDev,(source[i]),(expected[i])); 2013 expect(devToLatin,(expected[i]),(source[i])); 2014 } 2015 2016 } TestTeluguLatinRT()2017 public void TestTeluguLatinRT(){ 2018 String[] source = { 2019 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2020 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2021 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2022 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2023 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2024 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2025 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2026 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2027 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2028 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2029 }; 2030 2031 String[] expected = { 2032 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2033 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2034 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2035 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2036 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2037 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2038 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2039 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2040 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2041 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2042 }; 2043 2044 2045 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2046 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2047 2048 for(int i= 0; i<source.length; i++){ 2049 expect(latinToDev,(source[i]),(expected[i])); 2050 expect(devToLatin,(expected[i]),(source[i])); 2051 } 2052 } 2053 TestSanskritLatinRT()2054 public void TestSanskritLatinRT(){ 2055 int MAX_LEN =15; 2056 String[] source = { 2057 "rmk\u1E63\u0113t", 2058 "\u015Br\u012Bmad", 2059 "bhagavadg\u012Bt\u0101", 2060 "adhy\u0101ya", 2061 "arjuna", 2062 "vi\u1E63\u0101da", 2063 "y\u014Dga", 2064 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2065 "uv\u0101cr\u0325", 2066 "dharmak\u1E63\u0113tr\u0113", 2067 "kuruk\u1E63\u0113tr\u0113", 2068 "samav\u0113t\u0101", 2069 "yuyutsava\u1E25", 2070 "m\u0101mak\u0101\u1E25", 2071 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2072 "kimakurvata", 2073 "san\u0304java", 2074 }; 2075 String[] expected = { 2076 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2077 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2078 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2079 "\u0905\u0927\u094d\u092f\u093e\u092f", 2080 "\u0905\u0930\u094d\u091c\u0941\u0928", 2081 "\u0935\u093f\u0937\u093e\u0926", 2082 "\u092f\u094b\u0917", 2083 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2084 "\u0909\u0935\u093E\u091A\u0943", 2085 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2086 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2087 "\u0938\u092e\u0935\u0947\u0924\u093e", 2088 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2089 "\u092e\u093e\u092e\u0915\u093e\u0903", 2090 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2091 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2092 "\u0938\u0902\u091c\u0935", 2093 }; 2094 2095 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2096 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2097 for(int i= 0; i<MAX_LEN; i++){ 2098 expect(latinToDev,(source[i]),(expected[i])); 2099 expect(devToLatin,(expected[i]),(source[i])); 2100 } 2101 } 2102 TestCompoundLatinRT()2103 public void TestCompoundLatinRT(){ 2104 int MAX_LEN =15; 2105 String[] source = { 2106 "rmk\u1E63\u0113t", 2107 "\u015Br\u012Bmad", 2108 "bhagavadg\u012Bt\u0101", 2109 "adhy\u0101ya", 2110 "arjuna", 2111 "vi\u1E63\u0101da", 2112 "y\u014Dga", 2113 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2114 "uv\u0101cr\u0325", 2115 "dharmak\u1E63\u0113tr\u0113", 2116 "kuruk\u1E63\u0113tr\u0113", 2117 "samav\u0113t\u0101", 2118 "yuyutsava\u1E25", 2119 "m\u0101mak\u0101\u1E25", 2120 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2121 "kimakurvata", 2122 "san\u0304java" 2123 }; 2124 String[] expected = { 2125 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2126 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2127 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2128 "\u0905\u0927\u094d\u092f\u093e\u092f", 2129 "\u0905\u0930\u094d\u091c\u0941\u0928", 2130 "\u0935\u093f\u0937\u093e\u0926", 2131 "\u092f\u094b\u0917", 2132 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2133 "\u0909\u0935\u093E\u091A\u0943", 2134 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2135 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2136 "\u0938\u092e\u0935\u0947\u0924\u093e", 2137 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2138 "\u092e\u093e\u092e\u0915\u093e\u0903", 2139 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2140 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2141 "\u0938\u0902\u091c\u0935" 2142 }; 2143 2144 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2145 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2146 for(int i= 0; i<MAX_LEN; i++){ 2147 expect(latinToDevToLatin,(source[i]),(source[i])); 2148 expect(devToLatinToDev,(expected[i]),(expected[i])); 2149 } 2150 } 2151 /** 2152 * Test Gurmukhi-Devanagari Tippi and Bindi 2153 */ TestGurmukhiDevanagari()2154 public void TestGurmukhiDevanagari(){ 2155 // the rule says: 2156 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2157 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2158 2159 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2160 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2161 2162 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2163 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2164 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2165 StringBuffer src = new StringBuffer(" \u0902"); 2166 StringBuffer expect = new StringBuffer(" \u0A02"); 2167 while(vIter.next()){ 2168 src.setCharAt(0,(char) vIter.codepoint); 2169 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2170 expect(trans,src.toString(),expect.toString()); 2171 } 2172 2173 expect.setCharAt(1,'\u0A70'); 2174 while(nvIter.next()){ 2175 //src.setCharAt(0,(char) nvIter.codepoint); 2176 src.setCharAt(0,(char)nvIter.codepoint); 2177 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2178 expect(trans,src.toString(),expect.toString()); 2179 } 2180 } 2181 /** 2182 * Test instantiation from a locale. 2183 */ TestLocaleInstantiation()2184 public void TestLocaleInstantiation() { 2185 Transliterator t; 2186 try{ 2187 t = Transliterator.getInstance("te_IN-Latin"); 2188 //expect(t, "\u0430", "a"); 2189 }catch(IllegalArgumentException ex){ 2190 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2191 } 2192 try{ 2193 t = Transliterator.getInstance("ru_RU-Latin"); 2194 expect(t, "\u0430", "a"); 2195 }catch(IllegalArgumentException ex){ 2196 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2197 } 2198 try{ 2199 t = Transliterator.getInstance("en-el"); 2200 expect(t, "a", "\u03B1"); 2201 }catch(IllegalArgumentException ex){ 2202 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2203 } 2204 } 2205 2206 /** 2207 * Test title case handling of accent (should ignore accents) 2208 */ TestTitleAccents()2209 public void TestTitleAccents() { 2210 Transliterator t = Transliterator.getInstance("Title"); 2211 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2212 } 2213 2214 /** 2215 * Basic test of a locale resource based rule. 2216 */ TestLocaleResource()2217 public void TestLocaleResource() { 2218 String DATA[] = { 2219 // id from to 2220 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2221 "Latin-el", "b", "\u03bc\u03c0", 2222 "Latin-Greek", "b", "\u03B2", 2223 "Greek-Latin/UNGEGN", "\u03B2", "v", 2224 "el-Latin", "\u03B2", "v", 2225 "Greek-Latin", "\u03B2", "b", 2226 }; 2227 for (int i=0; i<DATA.length; i+=3) { 2228 Transliterator t = Transliterator.getInstance(DATA[i]); 2229 expect(t, DATA[i+1], DATA[i+2]); 2230 } 2231 } 2232 2233 /** 2234 * Make sure parse errors reference the right line. 2235 */ TestParseError()2236 public void TestParseError() { 2237 String rule = 2238 "a > b;\n" + 2239 "# more stuff\n" + 2240 "d << b;"; 2241 try { 2242 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2243 if(t!=null){ 2244 errln("FAIL: Did not get expected exception"); 2245 } 2246 } catch (IllegalArgumentException e) { 2247 String err = e.getMessage(); 2248 if (err.indexOf("d << b") >= 0) { 2249 logln("Ok: " + err); 2250 } else { 2251 errln("FAIL: " + err); 2252 } 2253 return; 2254 } 2255 errln("FAIL: no syntax error"); 2256 } 2257 2258 /** 2259 * Make sure sets on output are disallowed. 2260 */ TestOutputSet()2261 public void TestOutputSet() { 2262 String rule = "$set = [a-cm-n]; b > $set;"; 2263 Transliterator t = null; 2264 try { 2265 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2266 if(t!=null){ 2267 errln("FAIL: Did not get the expected exception"); 2268 } 2269 } catch (IllegalArgumentException e) { 2270 logln("Ok: " + e.getMessage()); 2271 return; 2272 } 2273 errln("FAIL: No syntax error"); 2274 } 2275 2276 /** 2277 * Test the use variable range pragma, making sure that use of 2278 * variable range characters is detected and flagged as an error. 2279 */ TestVariableRange()2280 public void TestVariableRange() { 2281 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2282 try { 2283 Transliterator t = 2284 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2285 if(t!=null){ 2286 errln("FAIL: Did not get the expected exception"); 2287 } 2288 } catch (IllegalArgumentException e) { 2289 logln("Ok: " + e.getMessage()); 2290 return; 2291 } 2292 errln("FAIL: No syntax error"); 2293 } 2294 2295 /** 2296 * Test invalid post context error handling 2297 */ TestInvalidPostContext()2298 public void TestInvalidPostContext() { 2299 try { 2300 Transliterator t = 2301 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2302 if(t!=null){ 2303 errln("FAIL: Did not get the expected exception"); 2304 } 2305 } catch (IllegalArgumentException e) { 2306 String msg = e.getMessage(); 2307 if (msg.indexOf("a}b{c") >= 0) { 2308 logln("Ok: " + msg); 2309 } else { 2310 errln("FAIL: " + msg); 2311 } 2312 return; 2313 } 2314 errln("FAIL: No syntax error"); 2315 } 2316 2317 /** 2318 * Test ID form variants 2319 */ TestIDForms()2320 public void TestIDForms() { 2321 String DATA[] = { 2322 "NFC", null, "NFD", 2323 "nfd", null, "NFC", // make sure case is ignored 2324 "Any-NFKD", null, "Any-NFKC", 2325 "Null", null, "Null", 2326 "-nfkc", "nfkc", "NFKD", 2327 "-nfkc/", "nfkc", "NFKD", 2328 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2329 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2330 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2331 "Source-", null, null, 2332 "Source/Variant-", null, null, 2333 "Source-/Variant", null, null, 2334 "/Variant", null, null, 2335 "/Variant-", null, null, 2336 "-/Variant", null, null, 2337 "-/", null, null, 2338 "-", null, null, 2339 "/", null, null, 2340 }; 2341 2342 for (int i=0; i<DATA.length; i+=3) { 2343 String ID = DATA[i]; 2344 String expID = DATA[i+1]; 2345 String expInvID = DATA[i+2]; 2346 boolean expValid = (expInvID != null); 2347 if (expID == null) { 2348 expID = ID; 2349 } 2350 try { 2351 Transliterator t = 2352 Transliterator.getInstance(ID); 2353 Transliterator u = t.getInverse(); 2354 if (t.getID().equals(expID) && 2355 u.getID().equals(expInvID)) { 2356 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2357 } else { 2358 errln("FAIL: getInstance(" + ID + ") => " + 2359 t.getID() + " x getInverse() => " + u.getID() + 2360 ", expected " + expInvID); 2361 } 2362 } catch (IllegalArgumentException e) { 2363 if (!expValid) { 2364 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2365 } else { 2366 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2367 } 2368 } 2369 } 2370 } 2371 checkRules(String label, Transliterator t2, String testRulesForward)2372 void checkRules(String label, Transliterator t2, String testRulesForward) { 2373 String rules2 = t2.toRules(true); 2374 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2375 rules2 = TestUtility.replace(rules2, " ", ""); 2376 rules2 = TestUtility.replace(rules2, "\n", ""); 2377 rules2 = TestUtility.replace(rules2, "\r", ""); 2378 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2379 2380 if (!rules2.equals(testRulesForward)) { 2381 errln(label); 2382 logln("GENERATED RULES: " + rules2); 2383 logln("SHOULD BE: " + testRulesForward); 2384 } 2385 } 2386 2387 /** 2388 * Mark's toRules test. 2389 */ TestToRulesMark()2390 public void TestToRulesMark() { 2391 2392 String testRules = 2393 "::[[:Latin:][:Mark:]];" 2394 + "::NFKD (NFC);" 2395 + "::Lower (Lower);" 2396 + "a <> \\u03B1;" // alpha 2397 + "::NFKC (NFD);" 2398 + "::Upper (Lower);" 2399 + "::Lower ();" 2400 + "::([[:Greek:][:Mark:]]);" 2401 ; 2402 String testRulesForward = 2403 "::[[:Latin:][:Mark:]];" 2404 + "::NFKD(NFC);" 2405 + "::Lower(Lower);" 2406 + "a > \\u03B1;" 2407 + "::NFKC(NFD);" 2408 + "::Upper (Lower);" 2409 + "::Lower ();" 2410 ; 2411 String testRulesBackward = 2412 "::[[:Greek:][:Mark:]];" 2413 + "::Lower (Upper);" 2414 + "::NFD(NFKC);" 2415 + "\\u03B1 > a;" 2416 + "::Lower(Lower);" 2417 + "::NFC(NFKD);" 2418 ; 2419 String source = "\u00E1"; // a-acute 2420 String target = "\u03AC"; // alpha-acute 2421 2422 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2423 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2424 2425 expect(t2, source, target); 2426 expect(t3, target, source); 2427 2428 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2429 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2430 } 2431 2432 /** 2433 * Test Escape and Unescape transliterators. 2434 */ TestEscape()2435 public void TestEscape() { 2436 expect(Transliterator.getInstance("Hex-Any"), 2437 "\\x{40}\\U000000312Q", 2438 "@12Q"); 2439 expect(Transliterator.getInstance("Any-Hex/C"), 2440 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2441 "\\u0041\\U0010BEEF\\uFEED"); 2442 expect(Transliterator.getInstance("Any-Hex/Java"), 2443 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2444 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2445 expect(Transliterator.getInstance("Any-Hex/Perl"), 2446 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2447 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2448 } 2449 2450 /** 2451 * Make sure display names of variants look reasonable. 2452 */ TestDisplayName()2453 public void TestDisplayName() { 2454 String DATA[] = { 2455 // ID, forward name, reverse name 2456 // Update the text as necessary -- the important thing is 2457 // not the text itself, but how various cases are handled. 2458 2459 // Basic test 2460 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2461 2462 // Variants 2463 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2464 2465 // Target-only IDs 2466 "NFC", "Any to NFC", "Any to NFD", 2467 }; 2468 2469 Locale US = Locale.US; 2470 2471 for (int i=0; i<DATA.length; i+=3) { 2472 String name = Transliterator.getDisplayName(DATA[i], US); 2473 if (!name.equals(DATA[i+1])) { 2474 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2475 name + ", expected " + DATA[i+1]); 2476 } else { 2477 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2478 } 2479 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2480 name = Transliterator.getDisplayName(t.getID(), US); 2481 if (!name.equals(DATA[i+2])) { 2482 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2483 name + ", expected " + DATA[i+2]); 2484 } else { 2485 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2486 } 2487 2488 // Cover getDisplayName(String) 2489 ULocale save = ULocale.getDefault(); 2490 ULocale.setDefault(ULocale.US); 2491 String name2 = Transliterator.getDisplayName(t.getID()); 2492 if (!name.equals(name2)) 2493 errln("FAIL: getDisplayName with default locale failed"); 2494 ULocale.setDefault(save); 2495 } 2496 } 2497 2498 /** 2499 * Test anchor masking 2500 */ TestAnchorMasking()2501 public void TestAnchorMasking() { 2502 String rule = "^a > Q; a > q;"; 2503 try { 2504 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2505 if(t==null){ 2506 errln("FAIL: Did not get the expected exception"); 2507 } 2508 } catch (IllegalArgumentException e) { 2509 errln("FAIL: " + rule + " => " + e); 2510 } 2511 } 2512 2513 /** 2514 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2515 * during ICU4J modularization to remove dependency of tests on Transliterator. 2516 */ TestScriptAllCodepoints()2517 public void TestScriptAllCodepoints(){ 2518 int code; 2519 HashSet scriptIdsChecked = new HashSet(); 2520 HashSet scriptAbbrsChecked = new HashSet(); 2521 for( int i =0; i <= 0x10ffff; i++){ 2522 code = UScript.getScript(i); 2523 if(code==UScript.INVALID_CODE){ 2524 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2525 } 2526 String id =UScript.getName(code); 2527 String abbr = UScript.getShortName(code); 2528 if (!scriptIdsChecked.contains(id)) { 2529 scriptIdsChecked.add(id); 2530 String newId ="[:"+id+":];NFD"; 2531 try{ 2532 Transliterator t = Transliterator.getInstance(newId); 2533 if(t==null){ 2534 errln("Failed to create transliterator for "+hex(i)+ 2535 " script code: " +id); 2536 } 2537 }catch(Exception e){ 2538 errln("Failed to create transliterator for "+hex(i) 2539 +" script code: " +id 2540 + " Exception: "+e.getMessage()); 2541 } 2542 } 2543 if (!scriptAbbrsChecked.contains(abbr)) { 2544 scriptAbbrsChecked.add(abbr); 2545 String newAbbrId ="[:"+abbr+":];NFD"; 2546 try{ 2547 Transliterator t = Transliterator.getInstance(newAbbrId); 2548 if(t==null){ 2549 errln("Failed to create transliterator for "+hex(i)+ 2550 " script code: " +abbr); 2551 } 2552 }catch(Exception e){ 2553 errln("Failed to create transliterator for "+hex(i) 2554 +" script code: " +abbr 2555 + " Exception: "+e.getMessage()); 2556 } 2557 } 2558 } 2559 } 2560 2561 2562 static final String[][] registerRules = { 2563 {"Any-Dev1", "x > X; y > Y;"}, 2564 {"Any-Dev2", "XY > Z"}, 2565 {"Greek-Latin/FAKE", 2566 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2567 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2568 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2569 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2570 }, 2571 }; 2572 2573 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2574 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2575 2576 static final String[][] testCases = { 2577 2578 // NORMALIZATION 2579 // should add more test cases 2580 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2581 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2582 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2583 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2584 2585 // mp -> b BUG 2586 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2587 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2588 2589 // check for devanagari bug 2590 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2591 2592 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2593 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2594 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2595 //TODO: enable this test once Titlecase works right 2596 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2597 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2598 2599 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2600 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2601 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2602 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2603 2604 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2605 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2606 2607 // FORMS OF S 2608 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2609 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2610 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2611 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2612 2613 // Tatiana bug 2614 // Upper: TAT\u02B9\u00C2NA 2615 // Lower: tat\u02B9\u00E2na 2616 // Title: Tat\u02B9\u00E2na 2617 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2618 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2619 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2620 }; 2621 TestSpecialCases()2622 public void TestSpecialCases() { 2623 2624 for (int i = 0; i < registerRules.length; ++i) { 2625 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2626 registerRules[i][1], Transliterator.FORWARD); 2627 DummyFactory.add(registerRules[i][0], t); 2628 } 2629 for (int i = 0; i < testCases.length; ++i) { 2630 String name = testCases[i][0]; 2631 Transliterator t = Transliterator.getInstance(name); 2632 String id = t.getID(); 2633 String source = testCases[i][1]; 2634 String target = null; 2635 2636 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2637 2638 if (testCases[i].length > 2) target = testCases[i][2]; 2639 else if (id.equalsIgnoreCase("NFD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD); 2640 else if (id.equalsIgnoreCase("NFC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC); 2641 else if (id.equalsIgnoreCase("NFKD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD); 2642 else if (id.equalsIgnoreCase("NFKC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC); 2643 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2644 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2645 2646 expect(t, source, target); 2647 } 2648 for (int i = 0; i < registerRules.length; ++i) { 2649 Transliterator.unregister(registerRules[i][0]); 2650 } 2651 } 2652 2653 // seems like there should be an easier way to just register an instance of a transliterator 2654 2655 static class DummyFactory implements Transliterator.Factory { 2656 static DummyFactory singleton = new DummyFactory(); 2657 static HashMap m = new HashMap(); 2658 2659 // Since Transliterators are immutable, we don't have to clone on set & get add(String ID, Transliterator t)2660 static void add(String ID, Transliterator t) { 2661 m.put(ID, t); 2662 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2663 Transliterator.registerFactory(ID, singleton); 2664 } getInstance(String ID)2665 public Transliterator getInstance(String ID) { 2666 return (Transliterator) m.get(ID); 2667 } 2668 } 2669 TestCasing()2670 public void TestCasing() { 2671 Transliterator toLower = Transliterator.getInstance("lower"); 2672 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2673 Transliterator toUpper = Transliterator.getInstance("upper"); 2674 Transliterator toTitle = Transliterator.getInstance("title"); 2675 for (int i = 0; i < 0x600; ++i) { 2676 String s = UTF16.valueOf(i); 2677 2678 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2679 assertEquals("Lowercase", lower, toLower.transform(s)); 2680 2681 String casefold = UCharacter.foldCase(s, true); 2682 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2683 2684 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2685 assertEquals("Title", title, toTitle.transform(s)); 2686 2687 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2688 assertEquals("Upper", upper, toUpper.transform(s)); 2689 } 2690 } 2691 TestSurrogateCasing()2692 public void TestSurrogateCasing () { 2693 // check that casing handles surrogates 2694 // titlecase is currently defective 2695 int dee = UTF16.charAt(DESERET_dee,0); 2696 int DEE = UCharacter.toTitleCase(dee); 2697 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2698 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2699 } 2700 2701 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2702 errln("Fails uppercase of surrogates"); 2703 } 2704 2705 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2706 errln("Fails lowercase of surrogates"); 2707 } 2708 } 2709 2710 // Check to see that incremental gets at least part way through a reasonable string. 2711 TestIncrementalProgress()2712 public void TestIncrementalProgress() { 2713 String latinTest = "The Quick Brown Fox."; 2714 String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest); 2715 String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest); 2716 String[][] tests = { 2717 {"Any", latinTest}, 2718 {"Latin", latinTest}, 2719 {"Halfwidth", latinTest}, 2720 {"Devanagari", devaTest}, 2721 {"Katakana", kataTest}, 2722 }; 2723 2724 Enumeration sources = Transliterator.getAvailableSources(); 2725 while(sources.hasMoreElements()) { 2726 String source = (String) sources.nextElement(); 2727 String test = findMatch(source, tests); 2728 if (test == null) { 2729 logln("Skipping " + source + "-X"); 2730 continue; 2731 } 2732 Enumeration targets = Transliterator.getAvailableTargets(source); 2733 while(targets.hasMoreElements()) { 2734 String target = (String) targets.nextElement(); 2735 Enumeration variants = Transliterator.getAvailableVariants(source, target); 2736 while(variants.hasMoreElements()) { 2737 String variant = (String) variants.nextElement(); 2738 String id = source + "-" + target + "/" + variant; 2739 logln("id: " + id); 2740 2741 String filter = getTranslitTestFilter(); 2742 if (filter != null && id.indexOf(filter) < 0) continue; 2743 2744 Transliterator t = Transliterator.getInstance(id); 2745 CheckIncrementalAux(t, test); 2746 2747 String rev = t.transliterate(test); 2748 Transliterator inv = t.getInverse(); 2749 CheckIncrementalAux(inv, rev); 2750 } 2751 } 2752 } 2753 } 2754 findMatch(String source, String[][] pairs)2755 public String findMatch (String source, String[][] pairs) { 2756 for (int i = 0; i < pairs.length; ++i) { 2757 if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1]; 2758 } 2759 return null; 2760 } 2761 CheckIncrementalAux(Transliterator t, String input)2762 public void CheckIncrementalAux(Transliterator t, String input) { 2763 2764 Replaceable test = new ReplaceableString(input); 2765 Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length()); 2766 t.transliterate(test, pos); 2767 boolean gotError = false; 2768 2769 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 2770 2771 if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) { 2772 errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2773 gotError = true; 2774 } else { 2775 logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2776 } 2777 t.finishTransliteration(test, pos); 2778 if (pos.start != pos.limit) { 2779 errln("Incomplete, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2780 gotError = true; 2781 } 2782 if(!gotError){ 2783 //errln("FAIL: Did not get expected error"); 2784 } 2785 } 2786 TestFunction()2787 public void TestFunction() { 2788 // Careful with spacing and ';' here: Phrase this exactly 2789 // as toRules() is going to return it. If toRules() changes 2790 // with regard to spacing or ';', then adjust this string. 2791 String rule = 2792 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2793 2794 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2795 if (t == null) { 2796 errln("FAIL: createFromRules failed"); 2797 return; 2798 } 2799 2800 String r = t.toRules(true); 2801 if (r.equals(rule)) { 2802 logln("OK: toRules() => " + r); 2803 } else { 2804 errln("FAIL: toRules() => " + r + 2805 ", expected " + rule); 2806 } 2807 2808 expect(t, "The Quick Brown Fox", 2809 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2810 rule = 2811 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2812 2813 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2814 if (t == null) { 2815 errln("FAIL: createFromRules failed"); 2816 return; 2817 } 2818 2819 r = t.toRules(true); 2820 if (r.equals(rule)) { 2821 logln("OK: toRules() => " + r); 2822 } else { 2823 errln("FAIL: toRules() => " + r + 2824 ", expected " + rule); 2825 } 2826 2827 expect(t, "\u0301", 2828 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2829 } 2830 TestInvalidBackRef()2831 public void TestInvalidBackRef() { 2832 String rule = ". > $1;"; 2833 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2834 try { 2835 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2836 if (t != null) { 2837 errln("FAIL: createFromRules should have returned NULL"); 2838 } 2839 errln("FAIL: Ok: . > $1; => no error"); 2840 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2841 if (t2 != null) { 2842 errln("FAIL: createFromRules should have returned NULL"); 2843 } 2844 errln("FAIL: Ok: . > $1; => no error"); 2845 } catch (IllegalArgumentException e) { 2846 logln("Ok: . > $1; => " + e.getMessage()); 2847 } 2848 } 2849 TestMulticharStringSet()2850 public void TestMulticharStringSet() { 2851 // Basic testing 2852 String rule = 2853 " [{aa}] > x;" + 2854 " a > y;" + 2855 " [b{bc}] > z;" + 2856 "[{gd}] { e > q;" + 2857 " e } [{fg}] > r;" ; 2858 2859 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2860 if (t == null) { 2861 errln("FAIL: createFromRules failed"); 2862 return; 2863 } 2864 2865 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2866 "y x yz z d gd de gdq gdqfg ddrfg"); 2867 2868 // Overlapped string test. Make sure that when multiple 2869 // strings can match that the longest one is matched. 2870 rule = 2871 " [a {ab} {abc}] > x;" + 2872 " b > y;" + 2873 " c > z;" + 2874 " q [t {st} {rst}] { e > p;" ; 2875 2876 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2877 if (t == null) { 2878 errln("FAIL: createFromRules failed"); 2879 return; 2880 } 2881 2882 expect(t, "a ab abc qte qste qrste", 2883 "x x x qtp qstp qrstp"); 2884 } 2885 2886 /** 2887 * Test that user-registered transliterators can be used under function 2888 * syntax. 2889 */ TestUserFunction()2890 public void TestUserFunction() { 2891 Transliterator t; 2892 2893 // There's no need to register inverses if we don't use them 2894 TestUserFunctionFactory.add("Any-gif", 2895 Transliterator.createFromRules("gif", 2896 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2897 Transliterator.FORWARD)); 2898 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2899 2900 TestUserFunctionFactory.add("Any-RemoveCurly", 2901 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2902 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2903 2904 logln("Trying &hex"); 2905 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2906 logln("Registering"); 2907 TestUserFunctionFactory.add("Any-hex2", t); 2908 t = Transliterator.getInstance("Any-hex2"); 2909 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2910 2911 logln("Trying &gif"); 2912 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2913 logln("Registering"); 2914 TestUserFunctionFactory.add("Any-gif2", t); 2915 t = Transliterator.getInstance("Any-gif2"); 2916 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2917 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2918 2919 // Test that filters are allowed after & 2920 t = Transliterator.createFromRules("test", 2921 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2922 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2923 2924 // Unregister our test stuff 2925 TestUserFunctionFactory.unregister(); 2926 } 2927 2928 static class TestUserFunctionFactory implements Transliterator.Factory { 2929 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 2930 static HashMap m = new HashMap(); 2931 add(String ID, Transliterator t)2932 static void add(String ID, Transliterator t) { 2933 m.put(new CaseInsensitiveString(ID), t); 2934 Transliterator.registerFactory(ID, singleton); 2935 } 2936 getInstance(String ID)2937 public Transliterator getInstance(String ID) { 2938 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 2939 } 2940 unregister()2941 static void unregister() { 2942 Iterator ids = m.keySet().iterator(); 2943 while (ids.hasNext()) { 2944 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 2945 Transliterator.unregister(id.getString()); 2946 ids.remove(); // removes pair from m 2947 } 2948 } 2949 } 2950 2951 /** 2952 * Test the Any-X transliterators. 2953 */ TestAnyX()2954 public void TestAnyX() { 2955 Transliterator anyLatin = 2956 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 2957 2958 expect(anyLatin, 2959 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 2960 "greek:abkABK hiragana:abuku cyrillic:abc"); 2961 } 2962 2963 /** 2964 * Test Any-X transliterators with sample letters from all scripts. 2965 */ TestAny()2966 public void TestAny() { 2967 UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze(); 2968 StringBuffer testString = new StringBuffer(); 2969 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 2970 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 2971 int count = 5; 2972 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 2973 testString.append(it.getString()); 2974 if (--count < 0) break; 2975 } 2976 } 2977 logln("Sample set for Any-Latin: " + testString); 2978 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 2979 String result = anyLatin.transliterate(testString.toString()); 2980 logln("Sample result for Any-Latin: " + result); 2981 } 2982 2983 2984 /** 2985 * Test the source and target set API. These are only implemented 2986 * for RBT and CompoundTransliterator at this time. 2987 */ TestSourceTargetSet()2988 public void TestSourceTargetSet() { 2989 // Rules 2990 String r = 2991 "a > b; " + 2992 "r [x{lu}] > q;"; 2993 2994 // Expected source 2995 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 2996 2997 // Expected target 2998 UnicodeSet expTrg = new UnicodeSet("[bq]"); 2999 3000 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3001 UnicodeSet src = t.getSourceSet(); 3002 UnicodeSet trg = t.getTargetSet(); 3003 3004 if (src.equals(expSrc) && trg.equals(expTrg)) { 3005 logln("Ok: " + r + " => source = " + src.toPattern(true) + 3006 ", target = " + trg.toPattern(true)); 3007 } else { 3008 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 3009 ", expected " + expSrc.toPattern(true) + 3010 "; target = " + trg.toPattern(true) + 3011 ", expected " + expTrg.toPattern(true)); 3012 } 3013 } 3014 TestSourceTargetSet2()3015 public void TestSourceTargetSet2() { 3016 3017 3018 Normalizer2 nfc = Normalizer2.getNFCInstance(); 3019 Normalizer2 nfd = Normalizer2.getNFDInstance(); 3020 3021 // Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE); 3022 // UnicodeSet nfkdSource = new UnicodeSet(); 3023 // UnicodeSet nfkdTarget = new UnicodeSet(); 3024 // for (int i = 0; i <= 0x10FFFF; ++i) { 3025 // if (nfkd.isInert(i)) { 3026 // continue; 3027 // } 3028 // nfkdSource.add(i); 3029 // String t = nfkd.getDecomposition(i); 3030 // if (t != null) { 3031 // nfkdTarget.addAll(t); 3032 // } else { 3033 // nfkdTarget.add(i); 3034 // } 3035 // } 3036 // nfkdSource.freeze(); 3037 // nfkdTarget.freeze(); 3038 // logln("NFKD Source: " + nfkdSource.toPattern(false)); 3039 // logln("NFKD Target: " + nfkdTarget.toPattern(false)); 3040 3041 UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap(); 3042 UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap(); 3043 UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze(); 3044 CanonicalIterator can = new CanonicalIterator(""); 3045 3046 UnicodeSet disorderedMarks = new UnicodeSet(); 3047 3048 for (int i = 0; i <= 0x10FFFF; ++i) { 3049 String s = nfd.getDecomposition(i); 3050 if (s == null) { 3051 continue; 3052 } 3053 3054 can.setSource(s); 3055 for (String t = can.next(); t != null; t = can.next()) { 3056 disorderedMarks.add(t); 3057 } 3058 3059 // if s has two code points, (or more), add the lead/trail information 3060 int first = s.codePointAt(0); 3061 int firstCount = Character.charCount(first); 3062 if (s.length() == firstCount) continue; 3063 String trailString = s.substring(firstCount); 3064 3065 // add all the trail characters 3066 if (!nonStarters.containsSome(trailString)) { 3067 continue; 3068 } 3069 UnicodeSet trailSet = leadToTrail.get(first); 3070 if (trailSet == null) { 3071 leadToTrail.put(first, trailSet = new UnicodeSet()); 3072 } 3073 trailSet.addAll(trailString); // add remaining trails 3074 3075 // add the sources 3076 UnicodeSet sourcesSet = leadToSources.get(first); 3077 if (sourcesSet == null) { 3078 leadToSources.put(first, sourcesSet = new UnicodeSet()); 3079 } 3080 sourcesSet.add(i); 3081 } 3082 3083 3084 for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) { 3085 String lead = x.getKey(); 3086 UnicodeSet sources = x.getValue(); 3087 UnicodeSet trailSet = leadToTrail.get(lead); 3088 for (String source : sources) { 3089 for (String trail : trailSet) { 3090 can.setSource(source + trail); 3091 for (String t = can.next(); t != null; t = can.next()) { 3092 if (t.endsWith(trail)) continue; 3093 disorderedMarks.add(t); 3094 } 3095 } 3096 } 3097 } 3098 3099 3100 for (String s : nonStarters) { 3101 disorderedMarks.add("\u0345" + s); 3102 disorderedMarks.add(s+"\u0323"); 3103 String xx = nfc.normalize("\u01EC" + s); 3104 if (!xx.startsWith("\u01EC")) { 3105 logln("??"); 3106 } 3107 } 3108 3109 // for (int i = 0; i <= 0x10FFFF; ++i) { 3110 // String s = nfkd.getDecomposition(i); 3111 // if (s != null) { 3112 // disorderedMarks.add(s); 3113 // disorderedMarks.add(nfc.normalize(s)); 3114 // addDerivedStrings(nfc, disorderedMarks, s); 3115 // } 3116 // s = nfd.getDecomposition(i); 3117 // if (s != null) { 3118 // disorderedMarks.add(s); 3119 // } 3120 // if (!nfc.isInert(i)) { 3121 // if (i == 0x00C0) { 3122 // logln("\u00C0"); 3123 // } 3124 // can.setSource(s+"\u0334"); 3125 // for (String t = can.next(); t != null; t = can.next()) { 3126 // addDerivedStrings(nfc, disorderedMarks, t); 3127 // } 3128 // can.setSource(s+"\u0345"); 3129 // for (String t = can.next(); t != null; t = can.next()) { 3130 // addDerivedStrings(nfc, disorderedMarks, t); 3131 // } 3132 // can.setSource(s+"\u0323"); 3133 // for (String t = can.next(); t != null; t = can.next()) { 3134 // addDerivedStrings(nfc, disorderedMarks, t); 3135 // } 3136 // } 3137 // } 3138 logln("Test cases: " + disorderedMarks.size()); 3139 disorderedMarks.addAll(0,0x10FFFF).freeze(); 3140 logln("isInert \u0104 " + nfc.isInert('\u0104')); 3141 3142 Object[][] rules = { 3143 {":: [:sc=COMMON:] any-name;", null}, 3144 3145 {":: [:Greek:] hex-any/C;", null}, 3146 {":: [:Greek:] any-hex/C;", null}, 3147 3148 {":: [[:Mn:][:Me:]] remove;", null}, 3149 {":: [[:Mn:][:Me:]] null;", null}, 3150 3151 3152 {":: lower;", null}, 3153 {":: upper;", null}, 3154 {":: title;", null}, 3155 {":: CaseFold;", null}, 3156 3157 {":: NFD;", null}, 3158 {":: NFC;", null}, 3159 {":: NFKD;", null}, 3160 {":: NFKC;", null}, 3161 3162 {":: [[:Mn:][:Me:]] NFKD;", null}, 3163 {":: Latin-Greek;", null}, 3164 {":: [:Latin:] NFKD;", null}, 3165 {":: NFKD;", null}, 3166 {":: NFKD;\n" + 3167 ":: [[:Mn:][:Me:]] remove;\n" + 3168 ":: NFC;", null}, 3169 }; 3170 for (Object[] rulex : rules) { 3171 String rule = (String) rulex[0]; 3172 Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD); 3173 UnicodeSet actualSource = trans.getSourceSet(); 3174 UnicodeSet actualTarget = trans.getTargetSet(); 3175 UnicodeSet empiricalSource = new UnicodeSet(); 3176 UnicodeSet empiricalTarget = new UnicodeSet(); 3177 String ruleDisplay = rule.replace("\n", "\t\t"); 3178 UnicodeSet toTest = disorderedMarks; 3179 // if (rulex[1] != null) { 3180 // toTest = new UnicodeSet(disorderedMarks); 3181 // toTest.addAll((UnicodeSet) rulex[1]); 3182 // } 3183 3184 String test = nfd.normalize("\u0104"); 3185 boolean DEBUG = true; 3186 @SuppressWarnings("unused") 3187 int count = 0; // for debugging 3188 for (String s : toTest) { 3189 if (s.equals(test)) { 3190 logln(test); 3191 } 3192 String t = trans.transform(s); 3193 if (!s.equals(t)) { 3194 if (!isAtomic(s, t, trans)) { 3195 isAtomic(s, t, trans); 3196 continue; 3197 } 3198 3199 // only keep the part that changed; so skip the front and end. 3200 // int start = findSharedStartLength(s,t); 3201 // int end = findSharedEndLength(s,t); 3202 // if (start != 0 || end != 0) { 3203 // s = s.substring(start, s.length() - end); 3204 // t = t.substring(start, t.length() - end); 3205 // } 3206 if (DEBUG) { 3207 if (!actualSource.containsAll(s)) { 3208 count++; 3209 } 3210 if (!actualTarget.containsAll(t)) { 3211 count++; 3212 } 3213 } 3214 addSourceTarget(s, empiricalSource, t, empiricalTarget); 3215 } 3216 } 3217 assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK); 3218 assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK); 3219 } 3220 } 3221 TestSourceTargetSetFilter()3222 public void TestSourceTargetSetFilter() { 3223 String[][] tests = { 3224 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 3225 {"[] Latin-Greek", null, "[\']"}, 3226 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 3227 {"[] Any-Latin"}, 3228 {"[] casefold"}, 3229 {"[] NFKD;"}, 3230 {"[] NFKC;"}, 3231 {"[] hex"}, 3232 {"[] lower"}, 3233 {"[] null"}, 3234 {"[] remove"}, 3235 {"[] title"}, 3236 {"[] upper"}, 3237 }; 3238 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3239 for (String[] testPair : tests) { 3240 String test = testPair[0]; 3241 Transliterator t0; 3242 try { 3243 t0 = Transliterator.getInstance(test); 3244 } catch (Exception e) { 3245 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3246 } 3247 Transliterator t1; 3248 try { 3249 t1 = t0.getInverse(); 3250 } catch (Exception e) { 3251 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3252 } 3253 int targetIndex = 0; 3254 for (Transliterator t : new Transliterator[]{t0, t1}) { 3255 boolean ok; 3256 UnicodeSet source = t.getSourceSet(); 3257 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3258 targetIndex++; 3259 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3260 : testPair[targetIndex] == null ? expectedSource 3261 : testPair[targetIndex].length() == 0 ? expectedSource 3262 : new UnicodeSet(testPair[targetIndex]); 3263 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3264 if (!ok) { // for debugging 3265 source = t.getSourceSet(); 3266 } 3267 UnicodeSet target = t.getTargetSet(); 3268 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3269 if (!ok) { // for debugging 3270 target = t.getTargetSet(); 3271 } 3272 } 3273 } 3274 } 3275 isAtomic(String s, String t, Transliterator trans)3276 private boolean isAtomic(String s, String t, Transliterator trans) { 3277 for (int i = 1; i < s.length(); ++i) { 3278 if (!CharSequences.onCharacterBoundary(s, i)) { 3279 continue; 3280 } 3281 String q = trans.transform(s.substring(0,i)); 3282 if (t.startsWith(q)) { 3283 String r = trans.transform(s.substring(i)); 3284 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3285 return false; 3286 } 3287 } 3288 } 3289 return true; 3290 // // make sure that every part is different 3291 // if (s.codePointCount(0, s.length()) > 1) { 3292 // int[] codePoints = It.codePoints(s); 3293 // for (int k = 0; k < codePoints.length; ++k) { 3294 // int pos = indexOf(t,codePoints[k]); 3295 // if (pos >= 0) { 3296 // int x; 3297 // } 3298 // } 3299 // if (s.contains("\u00C0")) { 3300 // logln("\u00C0"); 3301 // } 3302 // } 3303 } 3304 addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3305 private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3306 expectedSource.addAll(s); 3307 if (t.length() > 0) { 3308 expectedTarget.addAll(t); 3309 } 3310 } 3311 3312 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3313 // disorderedMarks.add(s); 3314 // for (int j = 1; j < s.length(); ++j) { 3315 // if (CharSequences.onCharacterBoundary(s, j)) { 3316 // String shorter = s.substring(0,j); 3317 // disorderedMarks.add(shorter); 3318 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3319 // } 3320 // } 3321 // } 3322 TestCharUtils()3323 public void TestCharUtils() { 3324 String[][] startTests = { 3325 {"1", "a", "ab"}, 3326 {"0", "a", "xb"}, 3327 {"0", "\uD800", "\uD800\uDC01"}, 3328 {"1", "\uD800a", "\uD800b"}, 3329 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3330 }; 3331 for (String[] row : startTests) { 3332 int actual = findSharedStartLength(row[1], row[2]); 3333 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3334 Integer.parseInt(row[0]), 3335 actual); 3336 } 3337 String[][] endTests = { 3338 {"0", "\uDC00", "\uD801\uDC00"}, 3339 {"1", "a", "ba"}, 3340 {"0", "a", "bx"}, 3341 {"1", "a\uDC00", "b\uDC00"}, 3342 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3343 }; 3344 for (String[] row : endTests) { 3345 int actual = findSharedEndLength(row[1], row[2]); 3346 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3347 Integer.parseInt(row[0]), 3348 actual); 3349 } 3350 } 3351 3352 /** 3353 * @param s 3354 * @param t 3355 * @return 3356 */ 3357 // TODO make generally available findSharedStartLength(CharSequence s, CharSequence t)3358 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3359 int min = Math.min(s.length(), t.length()); 3360 int i; 3361 char sch, tch; 3362 for (i = 0; i < min; ++i) { 3363 sch = s.charAt(i); 3364 tch = t.charAt(i); 3365 if (sch != tch) { 3366 break; 3367 } 3368 } 3369 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3370 } 3371 3372 /** 3373 * @param s 3374 * @param t 3375 * @return 3376 */ 3377 // TODO make generally available findSharedEndLength(CharSequence s, CharSequence t)3378 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3379 int slength = s.length(); 3380 int tlength = t.length(); 3381 int min = Math.min(slength, tlength); 3382 int i; 3383 char sch, tch; 3384 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3385 for (i = 0; i < min; ++i) { 3386 sch = s.charAt(slength - i - 1); 3387 tch = t.charAt(tlength - i - 1); 3388 if (sch != tch) { 3389 break; 3390 } 3391 } 3392 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3393 } 3394 3395 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3396 assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3397 void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3398 boolean haveError = false; 3399 if (!actual.containsAll(empirical)) { 3400 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3401 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3402 haveError = true; 3403 } 3404 if (!empirical.containsAll(actual)) { 3405 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3406 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3407 haveError = true; 3408 } 3409 if (!haveError) { 3410 logln("OK " + message + ' ' + toPattern(empirical)); 3411 } 3412 } 3413 toPattern(UnicodeSet missing)3414 private String toPattern(UnicodeSet missing) { 3415 String result = missing.toPattern(false); 3416 if (result.length() < 200) { 3417 return result; 3418 } 3419 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3420 } 3421 3422 3423 /** 3424 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3425 */ TestPatternWhitespace()3426 public void TestPatternWhitespace() { 3427 // Rules 3428 String r = "a > \u200E b;"; 3429 3430 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3431 3432 expect(t, "a", "b"); 3433 3434 // UnicodeSet 3435 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3436 3437 if (set.contains(0x200E)) { 3438 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3439 } 3440 } 3441 TestAlternateSyntax()3442 public void TestAlternateSyntax() { 3443 // U+2206 == & 3444 // U+2190 == < 3445 // U+2192 == > 3446 // U+2194 == <> 3447 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3448 "abc", 3449 "xbz"); 3450 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3451 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3452 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3453 } 3454 TestPositionAPI()3455 public void TestPositionAPI() { 3456 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3457 Transliterator.Position b = new Transliterator.Position(a); 3458 Transliterator.Position c = new Transliterator.Position(); 3459 c.set(a); 3460 // Call the toString() API: 3461 if (a.equals(b) && a.equals(c)) { 3462 logln("Ok: " + a + " == " + b + " == " + c); 3463 } else { 3464 errln("FAIL: " + a + " != " + b + " != " + c); 3465 } 3466 } 3467 3468 //====================================================================== 3469 // New tests for the ::BEGIN/::END syntax 3470 //====================================================================== 3471 3472 private static final String[] BEGIN_END_RULES = new String[] { 3473 // [0] 3474 "abc > xy;" 3475 + "aba > z;", 3476 3477 // [1] 3478 /* 3479 "::BEGIN;" 3480 + "abc > xy;" 3481 + "::END;" 3482 + "::BEGIN;" 3483 + "aba > z;" 3484 + "::END;", 3485 */ 3486 "", // test case commented out below, this is here to keep from messing up the indexes 3487 3488 // [2] 3489 /* 3490 "abc > xy;" 3491 + "::BEGIN;" 3492 + "aba > z;" 3493 + "::END;", 3494 */ 3495 "", // test case commented out below, this is here to keep from messing up the indexes 3496 3497 // [3] 3498 /* 3499 "::BEGIN;" 3500 + "abc > xy;" 3501 + "::END;" 3502 + "aba > z;", 3503 */ 3504 "", // test case commented out below, this is here to keep from messing up the indexes 3505 3506 // [4] 3507 "abc > xy;" 3508 + "::Null;" 3509 + "aba > z;", 3510 3511 // [5] 3512 "::Upper;" 3513 + "ABC > xy;" 3514 + "AB > x;" 3515 + "C > z;" 3516 + "::Upper;" 3517 + "XYZ > p;" 3518 + "XY > q;" 3519 + "Z > r;" 3520 + "::Upper;", 3521 3522 // [6] 3523 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3524 + "$delim = [\\-$ws];" 3525 + "$ws $delim* > ' ';" 3526 + "'-' $delim* > '-';", 3527 3528 // [7] 3529 "::Null;" 3530 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3531 + "$delim = [\\-$ws];" 3532 + "$ws $delim* > ' ';" 3533 + "'-' $delim* > '-';", 3534 3535 // [8] 3536 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3537 + "$delim = [\\-$ws];" 3538 + "$ws $delim* > ' ';" 3539 + "'-' $delim* > '-';" 3540 + "::Null;", 3541 3542 // [9] 3543 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3544 + "$delim = [\\-$ws];" 3545 + "::Null;" 3546 + "$ws $delim* > ' ';" 3547 + "'-' $delim* > '-';", 3548 3549 // [10] 3550 /* 3551 "::BEGIN;" 3552 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3553 + "$delim = [\\-$ws];" 3554 + "::END;" 3555 + "$ws $delim* > ' ';" 3556 + "'-' $delim* > '-';", 3557 */ 3558 "", // test case commented out below, this is here to keep from messing up the indexes 3559 3560 // [11] 3561 /* 3562 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3563 + "$delim = [\\-$ws];" 3564 + "::BEGIN;" 3565 + "$ws $delim* > ' ';" 3566 + "'-' $delim* > '-';" 3567 + "::END;", 3568 */ 3569 "", // test case commented out below, this is here to keep from messing up the indexes 3570 3571 // [12] 3572 /* 3573 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3574 + "$delim = [\\-$ws];" 3575 + "$ab = [ab];" 3576 + "::BEGIN;" 3577 + "$ws $delim* > ' ';" 3578 + "'-' $delim* > '-';" 3579 + "::END;" 3580 + "::BEGIN;" 3581 + "$ab { ' ' } $ab > '-';" 3582 + "c { ' ' > ;" 3583 + "::END;" 3584 + "::BEGIN;" 3585 + "'a-a' > a\\%|a;" 3586 + "::END;", 3587 */ 3588 "", // test case commented out below, this is here to keep from messing up the indexes 3589 3590 // [13] 3591 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3592 + "$delim = [\\-$ws];" 3593 + "$ab = [ab];" 3594 + "::Null;" 3595 + "$ws $delim* > ' ';" 3596 + "'-' $delim* > '-';" 3597 + "::Null;" 3598 + "$ab { ' ' } $ab > '-';" 3599 + "c { ' ' > ;" 3600 + "::Null;" 3601 + "'a-a' > a\\%|a;", 3602 3603 // [14] 3604 /* 3605 "::[abc];" 3606 + "::BEGIN;" 3607 + "abc > xy;" 3608 + "::END;" 3609 + "::BEGIN;" 3610 + "aba > yz;" 3611 + "::END;" 3612 + "::Upper;", 3613 */ 3614 "", // test case commented out below, this is here to keep from messing up the indexes 3615 3616 // [15] 3617 "::[abc];" 3618 + "abc > xy;" 3619 + "::Null;" 3620 + "aba > yz;" 3621 + "::Upper;", 3622 3623 // [16] 3624 /* 3625 "::[abc];" 3626 + "::BEGIN;" 3627 + "abc <> xy;" 3628 + "::END;" 3629 + "::BEGIN;" 3630 + "aba <> yz;" 3631 + "::END;" 3632 + "::Upper(Lower);" 3633 + "::([XYZ]);", 3634 */ 3635 "", // test case commented out below, this is here to keep from messing up the indexes 3636 3637 // [17] 3638 "::[abc];" 3639 + "abc <> xy;" 3640 + "::Null;" 3641 + "aba <> yz;" 3642 + "::Upper(Lower);" 3643 + "::([XYZ]);" 3644 }; 3645 3646 /* 3647 (This entire test is commented out below and will need some heavy revision when we re-add 3648 the ::BEGIN/::END stuff) 3649 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3650 // [7] 3651 "::BEGIN;" 3652 + "abc > xy;" 3653 + "::BEGIN;" 3654 + "aba > z;" 3655 + "::END;" 3656 + "::END;", 3657 3658 // [8] 3659 "abc > xy;" 3660 + " aba > z;" 3661 + "::END;", 3662 3663 // [9] 3664 "::BEGIN;" 3665 + "::Upper;" 3666 + "::END;" 3667 }; 3668 */ 3669 3670 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3671 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3672 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3673 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3674 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3675 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3676 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3677 3678 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3679 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3680 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3681 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3682 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3683 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3684 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3685 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3686 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3687 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3688 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3689 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3690 3691 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3692 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3693 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3694 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3695 }; 3696 TestBeginEnd()3697 public void TestBeginEnd() { 3698 // run through the list of test cases above 3699 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3700 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3701 } 3702 3703 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3704 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3705 Transliterator.REVERSE); 3706 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3707 3708 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3709 // that all of them cause errors 3710 /* 3711 (commented out until we have the real ::BEGIN/::END stuff in place 3712 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3713 try { 3714 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3715 Transliterator.FORWARD); 3716 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3717 } 3718 catch (IllegalArgumentException e) { 3719 // this is supposed to happen; do nothing here 3720 } 3721 } 3722 */ 3723 } 3724 TestBeginEndToRules()3725 public void TestBeginEndToRules() { 3726 // run through the same list of test cases we used above, but this time, instead of just 3727 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3728 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3729 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3730 // to (i.e., does the same thing as) the original rule set 3731 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3732 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3733 Transliterator.FORWARD); 3734 String rules = t.toRules(false); 3735 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3736 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3737 } 3738 3739 // do the same thing for the reversible test case 3740 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3741 Transliterator.REVERSE); 3742 String rules = reversed.toRules(false); 3743 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3744 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3745 } 3746 TestRegisterAlias()3747 public void TestRegisterAlias() { 3748 String longID = "Lower;[aeiou]Upper"; 3749 String shortID = "Any-CapVowels"; 3750 String reallyShortID = "CapVowels"; 3751 3752 Transliterator.registerAlias(shortID, longID); 3753 3754 Transliterator t1 = Transliterator.getInstance(longID); 3755 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3756 3757 if (!t1.getID().equals(longID)) 3758 errln("Transliterator instantiated with long ID doesn't have long ID"); 3759 if (!t2.getID().equals(reallyShortID)) 3760 errln("Transliterator instantiated with short ID doesn't have short ID"); 3761 3762 if (!t1.toRules(true).equals(t2.toRules(true))) 3763 errln("Alias transliterators aren't the same"); 3764 3765 Transliterator.unregister(shortID); 3766 3767 try { 3768 t1 = Transliterator.getInstance(shortID); 3769 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3770 } 3771 catch (IllegalArgumentException e) { 3772 } 3773 3774 // try the same thing again, but this time with something other than 3775 // an instance of CompoundTransliterator 3776 String realID = "Latin-Greek"; 3777 String fakeID = "Latin-dlgkjdflkjdl"; 3778 Transliterator.registerAlias(fakeID, realID); 3779 3780 t1 = Transliterator.getInstance(realID); 3781 t2 = Transliterator.getInstance(fakeID); 3782 3783 if (!t1.toRules(true).equals(t2.toRules(true))) 3784 errln("Alias transliterators aren't the same"); 3785 3786 Transliterator.unregister(fakeID); 3787 } 3788 3789 /** 3790 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3791 */ TestHalfwidthFullwidth()3792 public void TestHalfwidthFullwidth() { 3793 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3794 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3795 3796 // Array of 3n items 3797 // Each item is 3798 // "hf"|"fh"|"both", 3799 // <Halfwidth>, 3800 // <Fullwidth> 3801 String[] DATA = { 3802 "both", 3803 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3804 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3805 }; 3806 3807 for (int i=0; i<DATA.length; i+=3) { 3808 switch (DATA[i].charAt(0)) { 3809 case 'h': // Halfwidth-Fullwidth only 3810 expect(hf, DATA[i+1], DATA[i+2]); 3811 break; 3812 case 'f': // Fullwidth-Halfwidth only 3813 expect(fh, DATA[i+2], DATA[i+1]); 3814 break; 3815 case 'b': // both directions 3816 expect(hf, DATA[i+1], DATA[i+2]); 3817 expect(fh, DATA[i+2], DATA[i+1]); 3818 break; 3819 } 3820 } 3821 3822 } 3823 3824 /** 3825 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3826 * TODO: confirm that the expected results are correct. 3827 * For now, test just confirms that C++ and Java give identical results. 3828 */ TestThai()3829 public void TestThai() { 3830 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3831 String thaiText = 3832 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3833 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3834 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3835 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3836 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3837 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3838 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3839 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3840 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3841 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3842 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3843 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3844 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3845 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3846 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3847 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3848 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3849 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3850 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3851 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3852 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3853 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3854 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3855 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3856 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3857 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3858 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3859 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3860 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3861 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3862 3863 String latinText = 3864 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3865 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3866 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3867 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3868 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3869 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3870 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3871 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3872 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3873 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3874 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3875 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3876 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3877 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3878 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3879 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3880 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3881 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3882 3883 expect(tr, thaiText, latinText); 3884 } 3885 3886 3887 //====================================================================== 3888 // These tests are not mirrored (yet) in icu4c at 3889 // source/test/intltest/transtst.cpp 3890 //====================================================================== 3891 3892 /** 3893 * Improve code coverage. 3894 */ TestCoverage()3895 public void TestCoverage() { 3896 // NullTransliterator 3897 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3898 expect(t, "a", "a"); 3899 3900 // Source, target set 3901 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3902 t.setFilter(new UnicodeSet("[A-Z]")); 3903 logln("source = " + t.getSourceSet()); 3904 logln("target = " + t.getTargetSet()); 3905 3906 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3907 logln("source = " + t.getSourceSet()); 3908 logln("target = " + t.getTargetSet()); 3909 } 3910 /* 3911 * Test case for threading problem in NormalizationTransliterator 3912 * reported by ticket#5160 3913 */ TestT5160()3914 public void TestT5160() { 3915 final String[] testData = { 3916 "a", 3917 "b", 3918 "\u09BE", 3919 "A\u0301", 3920 }; 3921 final String[] expected = { 3922 "a", 3923 "b", 3924 "\u09BE", 3925 "\u00C1", 3926 }; 3927 Transliterator translit = Transliterator.getInstance("NFC"); 3928 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 3929 for (int i = 0; i < tasks.length; i++) { 3930 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 3931 } 3932 TestUtil.runUntilDone(tasks); 3933 3934 for (int i = 0; i < tasks.length; i++) { 3935 if (tasks[i].getErrorMessage() != null) { 3936 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 3937 break; 3938 } 3939 } 3940 } 3941 3942 static class NormTranslitTask implements Runnable { 3943 Transliterator translit; 3944 String testData; 3945 String expectedData; 3946 String errorMsg; 3947 NormTranslitTask(Transliterator translit, String testData, String expectedData)3948 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 3949 this.translit = translit; 3950 this.testData = testData; 3951 this.expectedData = expectedData; 3952 } 3953 run()3954 public void run() { 3955 errorMsg = null; 3956 StringBuffer inBuf = new StringBuffer(testData); 3957 StringBuffer expectedBuf = new StringBuffer(expectedData); 3958 3959 for(int i = 0; i < 1000; i++) { 3960 String in = inBuf.toString(); 3961 String out = translit.transliterate(in); 3962 String expected = expectedBuf.toString(); 3963 if (!out.equals(expected)) { 3964 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 3965 break; 3966 } 3967 inBuf.append(testData); 3968 expectedBuf.append(expectedData); 3969 } 3970 } 3971 getErrorMessage()3972 public String getErrorMessage() { 3973 return errorMsg; 3974 } 3975 } 3976 3977 //====================================================================== 3978 // Support methods 3979 //====================================================================== expect(String rules, String source, String expectedResult, Transliterator.Position pos)3980 void expect(String rules, 3981 String source, 3982 String expectedResult, 3983 Transliterator.Position pos) { 3984 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 3985 expect(t, source, expectedResult, pos); 3986 } 3987 expect(String rules, String source, String expectedResult)3988 void expect(String rules, String source, String expectedResult) { 3989 expect(rules, source, expectedResult, null); 3990 } 3991 expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3992 void expect(Transliterator t, String source, String expectedResult, 3993 Transliterator reverseTransliterator) { 3994 expect(t, source, expectedResult); 3995 if (reverseTransliterator != null) { 3996 expect(reverseTransliterator, expectedResult, source); 3997 } 3998 } 3999 expect(Transliterator t, String source, String expectedResult)4000 void expect(Transliterator t, String source, String expectedResult) { 4001 expect(t, source, expectedResult, (Transliterator.Position) null); 4002 } 4003 expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)4004 void expect(Transliterator t, String source, String expectedResult, 4005 Transliterator.Position pos) { 4006 if (pos == null) { 4007 String result = t.transliterate(source); 4008 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 4009 } 4010 4011 Transliterator.Position index = null; 4012 if (pos == null) { 4013 index = new Transliterator.Position(0, source.length(), 0, source.length()); 4014 } else { 4015 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4016 pos.start, pos.limit); 4017 } 4018 4019 ReplaceableString rsource = new ReplaceableString(source); 4020 4021 t.finishTransliteration(rsource, index); 4022 // Do it all at once -- below we do it incrementally 4023 4024 if (index.start != index.limit) { 4025 expectAux(t.getID() + ":UNFINISHED", source, 4026 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 4027 return; 4028 } 4029 String result = rsource.toString(); 4030 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 4031 4032 4033 if (pos == null) { 4034 index = new Transliterator.Position(); 4035 } else { 4036 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4037 pos.start, pos.limit); 4038 } 4039 4040 // Test incremental transliteration -- this result 4041 // must be the same after we finalize (see below). 4042 List<String> v = new ArrayList<String>(); 4043 v.add(source); 4044 rsource.replace(0, rsource.length(), ""); 4045 if (pos != null) { 4046 rsource.replace(0, 0, source); 4047 v.add(UtilityExtensions.formatInput(rsource, index)); 4048 t.transliterate(rsource, index); 4049 v.add(UtilityExtensions.formatInput(rsource, index)); 4050 } else { 4051 for (int i=0; i<source.length(); ++i) { 4052 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 4053 //log.append(source.charAt(i)).append(" -> ")); 4054 t.transliterate(rsource, index, source.charAt(i)); 4055 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 4056 v.add(UtilityExtensions.formatInput(rsource, index) + 4057 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 4058 } 4059 } 4060 4061 // As a final step in keyboard transliteration, we must call 4062 // transliterate to finish off any pending partial matches that 4063 // were waiting for more input. 4064 t.finishTransliteration(rsource, index); 4065 result = rsource.toString(); 4066 //log.append(" => ").append(rsource.toString()); 4067 v.add(result); 4068 4069 String[] results = new String[v.size()]; 4070 v.toArray(results); 4071 expectAux(t.getID() + ":Incremental", results, 4072 result.equals(expectedResult), 4073 expectedResult); 4074 } 4075 4076 boolean expectAux(String tag, String source, 4077 String result, String expectedResult) { 4078 return expectAux(tag, new String[] {source, result}, 4079 result.equals(expectedResult), 4080 expectedResult); 4081 } 4082 4083 boolean expectAux(String tag, String source, 4084 String result, boolean pass, 4085 String expectedResult) { 4086 return expectAux(tag, new String[] {source, result}, 4087 pass, 4088 expectedResult); 4089 } 4090 4091 boolean expectAux(String tag, String source, 4092 boolean pass, 4093 String expectedResult) { 4094 return expectAux(tag, new String[] {source}, 4095 pass, 4096 expectedResult); 4097 } 4098 4099 boolean expectAux(String tag, String[] results, boolean pass, 4100 String expectedResult) { 4101 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 4102 4103 for (int i = 0; i < results.length; ++i) { 4104 String label; 4105 if (i == 0) { 4106 label = "source: "; 4107 } else if (i == results.length - 1) { 4108 label = "result: "; 4109 } else { 4110 if (!isVerbose() && pass) continue; 4111 label = "interm" + i + ": "; 4112 } 4113 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 4114 } 4115 4116 if (!pass) { 4117 msg( " expected: " + expectedResult, ERR, false, true); 4118 } 4119 4120 return pass; 4121 } 4122 4123 private void assertTransform(String message, String expected, StringTransform t, String source) { 4124 assertEquals(message + " " + source, expected, t.transform(source)); 4125 } 4126 4127 4128 private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 4129 assertEquals(message + " " +source, expected, t.transform(source)); 4130 assertEquals(message + " " +source2, expected, t.transform(source2)); 4131 assertEquals(message + " " + expected, source, back.transform(expected)); 4132 } 4133 4134 /* 4135 * Tests the method public Enumeration<String> getAvailableTargets(String source) 4136 */ 4137 public void TestGetAvailableTargets() { 4138 try { 4139 // Tests when if (targets == null) is true 4140 Transliterator.getAvailableTargets(""); 4141 } catch (Exception e) { 4142 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 4143 } 4144 } 4145 4146 /* 4147 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 4148 */ 4149 public void TestGetAvailableVariants() { 4150 try { 4151 // Tests when if (targets == null) is true 4152 Transliterator.getAvailableVariants("", ""); 4153 } catch (Exception e) { 4154 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 4155 } 4156 } 4157 4158 /* 4159 * Tests the mehtod String nextLine() in RuleBody 4160 */ 4161 public void TestNextLine() { 4162 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 4163 try{ 4164 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 4165 } catch(Exception e){ 4166 errln("TransliteratorParser.nextLine() was not suppose to return an " + 4167 "exception for a rule of '\\'"); 4168 } 4169 } 4170 } 4171