1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.test.translit;
8 
9 import java.util.ArrayList;
10 import java.util.Enumeration;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Locale;
16 import java.util.Map.Entry;
17 
18 import com.ibm.icu.dev.test.TestFmwk;
19 import com.ibm.icu.dev.test.TestUtil;
20 import com.ibm.icu.dev.util.UnicodeMap;
21 import com.ibm.icu.impl.Utility;
22 import com.ibm.icu.impl.UtilityExtensions;
23 import com.ibm.icu.lang.CharSequences;
24 import com.ibm.icu.lang.UCharacter;
25 import com.ibm.icu.lang.UScript;
26 import com.ibm.icu.text.CanonicalIterator;
27 import com.ibm.icu.text.Normalizer2;
28 import com.ibm.icu.text.Replaceable;
29 import com.ibm.icu.text.ReplaceableString;
30 import com.ibm.icu.text.StringTransform;
31 import com.ibm.icu.text.Transliterator;
32 import com.ibm.icu.text.UTF16;
33 import com.ibm.icu.text.UnicodeFilter;
34 import com.ibm.icu.text.UnicodeSet;
35 import com.ibm.icu.text.UnicodeSetIterator;
36 import com.ibm.icu.util.CaseInsensitiveString;
37 import com.ibm.icu.util.ULocale;
38 
39 /***********************************************************************
40 
41                      HOW TO USE THIS TEST FILE
42                                -or-
43                   How I developed on two platforms
44                 without losing (too much of) my mind
45 
46 
47 1. Add new tests by copying/pasting/changing existing tests.  On Java,
48    any public void method named Test...() taking no parameters becomes
49    a test.  On C++, you need to modify the header and add a line to
50    the runIndexedTest() dispatch method.
51 
52 2. Make liberal use of the expect() method; it is your friend.
53 
54 3. The tests in this file exactly match those in a sister file on the
55    other side.  The two files are:
56 
57    icu4j:  src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
58    icu4c:  source/test/intltest/transtst.cpp
59 
60                   ==> THIS IS THE IMPORTANT PART <==
61 
62    When you add a test in this file, add it in transtst.cpp too.
63    Give it the same name and put it in the same relative place.  This
64    makes maintenance a lot simpler for any poor soul who ends up
65    trying to synchronize the tests between icu4j and icu4c.
66 
67 4. If you MUST enter a test that is NOT paralleled in the sister file,
68    then add it in the special non-mirrored section.  These are
69    labeled
70 
71      "icu4j ONLY"
72 
73    or
74 
75      "icu4c ONLY"
76 
77    Make sure you document the reason the test is here and not there.
78 
79 
80 Thank you.
81 The Management
82  ***********************************************************************/
83 
84 /**
85  * @test
86  * @summary General test of Transliterator
87  */
88 public class TransliteratorTest extends TestFmwk {
89 
main(String[] args)90     public static void main(String[] args) throws Exception {
91         new TransliteratorTest().run(args);
92     }
93 
TestHangul()94     public void TestHangul() {
95 
96         Transliterator lh = Transliterator.getInstance("Latin-Hangul");
97         Transliterator hl = lh.getInverse();
98 
99         assertTransform("Transform", "\uCE20", lh, "ch");
100 
101         assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
102         assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
103         assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
104         assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
105         assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
106         assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
107         assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
108         assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
109         assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
110         assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
111         assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
112         assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
113         assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
114         assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
115         assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
116         assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
117         assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
118         assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
119         assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
120         assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
121         assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
122         assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
123         assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
124         assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
125         assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
126         assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
127 
128     }
129 
TestChinese()130     public void TestChinese() {
131         Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
132         assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
133         assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
134     }
135 
TestRegistry()136     public void TestRegistry() {
137         checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
138         checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
139         checkRegistry("foo1", "[:letter:] a > b;");
140         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
141             String id = (String) e.nextElement();
142             checkRegistry(id);
143         }
144     }
145 
checkRegistry(String id, String rules)146     private void checkRegistry (String id, String rules) {
147         Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
148         Transliterator.registerInstance(foo);
149         checkRegistry(id);
150     }
151 
checkRegistry(String id)152     private void checkRegistry(String id) {
153         Transliterator fie = Transliterator.getInstance(id);
154         final UnicodeSet fae = new UnicodeSet("[a-z5]");
155         fie.setFilter(fae);
156         Transliterator foe = Transliterator.getInstance(id);
157         UnicodeFilter fee = foe.getFilter();
158         if (fae.equals(fee)) {
159             errln("Changed what is in registry for " + id);
160         }
161     }
162 
TestInstantiation()163     public void TestInstantiation() {
164         long ms = System.currentTimeMillis();
165         String ID;
166         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
167             ID = (String) e.nextElement();
168             if (ID.equals("Latin-Han/definition")) {
169                 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later");
170                 continue;
171             }
172             Transliterator t = null;
173             try {
174                 t = Transliterator.getInstance(ID);
175                 // This is only true for some subclasses
176                 //                // We should get a new instance if we try again
177                 //                Transliterator t2 = Transliterator.getInstance(ID);
178                 //                if (t != t2) {
179                 //                    logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);
180                 //                } else {
181                 //                    errln("FAIL: " + ID + " returned identical instances");
182                 //                    t = null;
183                 //                }
184             } catch (IllegalArgumentException ex) {
185                 errln("FAIL: " + ID);
186                 throw ex;
187             }
188 
189             //            if (t.getFilter() != null) {
190             //                errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter());
191             //            }
192 
193             if (t != null) {
194                 // Now test toRules
195                 String rules = null;
196                 try {
197                     rules = t.toRules(true);
198 
199                     Transliterator.createFromRules("x", rules, Transliterator.FORWARD);
200                 } catch (IllegalArgumentException ex2) {
201                     errln("FAIL: " + ID + ".toRules() => bad rules: " +
202                             rules);
203                     throw ex2;
204                 }
205             }
206         }
207 
208         // Now test the failure path
209         try {
210             ID = "<Not a valid Transliterator ID>";
211             Transliterator t = Transliterator.getInstance(ID);
212             errln("FAIL: " + ID + " returned " + t);
213         } catch (IllegalArgumentException ex) {
214             logln("OK: Bogus ID handled properly");
215         }
216 
217         ms = System.currentTimeMillis() - ms;
218         logln("Elapsed time: " + ms + " ms");
219     }
220 
TestSimpleRules()221     public void TestSimpleRules() {
222         /* Example: rules 1. ab>x|y
223          *                2. yc>z
224          *
225          * []|eabcd  start - no match, copy e to tranlated buffer
226          * [e]|abcd  match rule 1 - copy output & adjust cursor
227          * [ex|y]cd  match rule 2 - copy output & adjust cursor
228          * [exz]|d   no match, copy d to transliterated buffer
229          * [exzd]|   done
230          */
231         expect("ab>x|y;" +
232                 "yc>z",
233                 "eabcd", "exzd");
234 
235         /* Another set of rules:
236          *    1. ab>x|yzacw
237          *    2. za>q
238          *    3. qc>r
239          *    4. cw>n
240          *
241          * []|ab       Rule 1
242          * [x|yzacw]   No match
243          * [xy|zacw]   Rule 2
244          * [xyq|cw]    Rule 4
245          * [xyqn]|     Done
246          */
247         expect("ab>x|yzacw;" +
248                 "za>q;" +
249                 "qc>r;" +
250                 "cw>n",
251                 "ab", "xyqn");
252 
253         /* Test categories
254          */
255         Transliterator t = Transliterator.createFromRules("<ID>",
256                 "$dummy=\uE100;" +
257                 "$vowel=[aeiouAEIOU];" +
258                 "$lu=[:Lu:];" +
259                 "$vowel } $lu > '!';" +
260                 "$vowel > '&';" +
261                 "'!' { $lu > '^';" +
262                 "$lu > '*';" +
263                 "a>ERROR",
264                 Transliterator.FORWARD);
265         expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
266     }
267 
268     /**
269      * Test inline set syntax and set variable syntax.
270      */
TestInlineSet()271     public void TestInlineSet() {
272         expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
273         expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
274 
275         expect("$digit = [0-9];" +
276                 "$alpha = [a-zA-Z];" +
277                 "$alphanumeric = [$digit $alpha];" + // ***
278                 "$special = [^$alphanumeric];" +     // ***
279                 "$alphanumeric > '-';" +
280                 "$special > '*';",
281 
282                 "thx-1138", "---*----");
283     }
284 
285     /**
286      * Create some inverses and confirm that they work.  We have to be
287      * careful how we do this, since the inverses will not be true
288      * inverses -- we can't throw any random string at the composition
289      * of the transliterators and expect the identity function.  F x
290      * F' != I.  However, if we are careful about the input, we will
291      * get the expected results.
292      */
TestRuleBasedInverse()293     public void TestRuleBasedInverse() {
294         String RULES =
295             "abc>zyx;" +
296             "ab>yz;" +
297             "bc>zx;" +
298             "ca>xy;" +
299             "a>x;" +
300             "b>y;" +
301             "c>z;" +
302 
303             "abc<zyx;" +
304             "ab<yz;" +
305             "bc<zx;" +
306             "ca<xy;" +
307             "a<x;" +
308             "b<y;" +
309             "c<z;" +
310 
311             "";
312 
313         String[] DATA = {
314                 // Careful here -- random strings will not work.  If we keep
315                 // the left side to the domain and the right side to the range
316                 // we will be okay though (left, abc; right xyz).
317                 "a", "x",
318                 "abcacab", "zyxxxyy",
319                 "caccb", "xyzzy",
320         };
321 
322         Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
323         Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
324         for (int i=0; i<DATA.length; i+=2) {
325             expect(fwd, DATA[i], DATA[i+1]);
326             expect(rev, DATA[i+1], DATA[i]);
327         }
328     }
329 
330     /**
331      * Basic test of keyboard.
332      */
TestKeyboard()333     public void TestKeyboard() {
334         Transliterator t = Transliterator.createFromRules("<ID>",
335                 "psch>Y;"
336                 +"ps>y;"
337                 +"ch>x;"
338                 +"a>A;", Transliterator.FORWARD);
339         String DATA[] = {
340                 // insertion, buffer
341                 "a", "A",
342                 "p", "Ap",
343                 "s", "Aps",
344                 "c", "Apsc",
345                 "a", "AycA",
346                 "psch", "AycAY",
347                 null, "AycAY", // null means finishKeyboardTransliteration
348         };
349 
350         keyboardAux(t, DATA);
351     }
352 
353     /**
354      * Basic test of keyboard with cursor.
355      */
TestKeyboard2()356     public void TestKeyboard2() {
357         Transliterator t = Transliterator.createFromRules("<ID>",
358                 "ych>Y;"
359                 +"ps>|y;"
360                 +"ch>x;"
361                 +"a>A;", Transliterator.FORWARD);
362         String DATA[] = {
363                 // insertion, buffer
364                 "a", "A",
365                 "p", "Ap",
366                 "s", "Aps", // modified for rollback - "Ay",
367                 "c", "Apsc", // modified for rollback - "Ayc",
368                 "a", "AycA",
369                 "p", "AycAp",
370                 "s", "AycAps", // modified for rollback - "AycAy",
371                 "c", "AycApsc", // modified for rollback - "AycAyc",
372                 "h", "AycAY",
373                 null, "AycAY", // null means finishKeyboardTransliteration
374         };
375 
376         keyboardAux(t, DATA);
377     }
378 
379     /**
380      * Test keyboard transliteration with back-replacement.
381      */
TestKeyboard3()382     public void TestKeyboard3() {
383         // We want th>z but t>y.  Furthermore, during keyboard
384         // transliteration we want t>y then yh>z if t, then h are
385         // typed.
386         String RULES =
387             "t>|y;" +
388             "yh>z;" +
389             "";
390 
391         String[] DATA = {
392                 // Column 1: characters to add to buffer (as if typed)
393                 // Column 2: expected appearance of buffer after
394                 //           keyboard xliteration.
395                 "a", "a",
396                 "b", "ab",
397                 "t", "abt", // modified for rollback - "aby",
398                 "c", "abyc",
399                 "t", "abyct", // modified for rollback - "abycy",
400                 "h", "abycz",
401                 null, "abycz", // null means finishKeyboardTransliteration
402         };
403 
404         Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
405         keyboardAux(t, DATA);
406     }
407 
keyboardAux(Transliterator t, String[] DATA)408     private void keyboardAux(Transliterator t, String[] DATA) {
409         Transliterator.Position index = new Transliterator.Position();
410         ReplaceableString s = new ReplaceableString();
411         for (int i=0; i<DATA.length; i+=2) {
412             StringBuffer log;
413             if (DATA[i] != null) {
414                 log = new StringBuffer(s.toString() + " + "
415                         + DATA[i]
416                                + " -> ");
417                 t.transliterate(s, index, DATA[i]);
418             } else {
419                 log = new StringBuffer(s.toString() + " => ");
420                 t.finishTransliteration(s, index);
421             }
422             UtilityExtensions.formatInput(log, s, index);
423             if (s.toString().equals(DATA[i+1])) {
424                 logln(log.toString());
425             } else {
426                 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
427             }
428         }
429     }
430 
431     // Latin-Arabic has been temporarily removed until it can be
432     // done correctly.
433 
434     //  public void TestArabic() {
435     //      String DATA[] = {
436     //          "Arabic",
437     //              "\u062a\u062a\u0645\u062a\u0639 "+
438     //              "\u0627\u0644\u0644\u063a\u0629 "+
439     //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
440     //              "\u0628\u0628\u0646\u0638\u0645 "+
441     //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
442     //              "\u062c\u0645\u064a\u0644\u0629"
443     //      };
444 
445     //      Transliterator t = Transliterator.getInstance("Latin-Arabic");
446     //      for (int i=0; i<DATA.length; i+=2) {
447     //          expect(t, DATA[i], DATA[i+1]);
448     //      }
449     //  }
450 
451     /**
452      * Compose the Kana transliterator forward and reverse and try
453      * some strings that should come out unchanged.
454      */
TestCompoundKana()455     public void TestCompoundKana() {
456         Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
457         expect(t, "aaaaa", "aaaaa");
458     }
459 
460     /**
461      * Compose the hex transliterators forward and reverse.
462      */
TestCompoundHex()463     public void TestCompoundHex() {
464         Transliterator a = Transliterator.getInstance("Any-Hex");
465         Transliterator b = Transliterator.getInstance("Hex-Any");
466         // Transliterator[] trans = { a, b };
467         // Transliterator ab = Transliterator.getInstance(trans);
468         Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
469 
470         // Do some basic tests of b
471         expect(b, "\\u0030\\u0031", "01");
472 
473         String s = "abcde";
474         expect(ab, s, s);
475 
476         // trans = new Transliterator[] { b, a };
477         // Transliterator ba = Transliterator.getInstance(trans);
478         Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
479         ReplaceableString str = new ReplaceableString(s);
480         a.transliterate(str);
481         expect(ba, str.toString(), str.toString());
482     }
483 
484     /**
485      * Do some basic tests of filtering.
486      */
TestFiltering()487     public void TestFiltering() {
488 
489         Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
490         tempTrans.setFilter(new UnicodeSet("[a]"));
491         String tempResult = tempTrans.transform("xa");
492         assertEquals("context should not be filtered ", "xb", tempResult);
493 
494         tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
495         tempResult = tempTrans.transform("xa");
496         assertEquals("context should not be filtered ", "xb", tempResult);
497 
498         Transliterator hex = Transliterator.getInstance("Any-Hex");
499         hex.setFilter(new UnicodeFilter() {
500             public boolean contains(int c) {
501                 return c != 'c';
502             }
503             public String toPattern(boolean escapeUnprintable) {
504                 return "";
505             }
506             public boolean matchesIndexValue(int v) {
507                 return false;
508             }
509             public void addMatchSetTo(UnicodeSet toUnionTo) {}
510         });
511         String s = "abcde";
512         String out = hex.transliterate(s);
513         String exp = "\\u0061\\u0062c\\u0064\\u0065";
514         if (out.equals(exp)) {
515             logln("Ok:   \"" + exp + "\"");
516         } else {
517             logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
518         }
519     }
520 
521     /**
522      * Test anchors
523      */
TestAnchors()524     public void TestAnchors() {
525         expect("^ab  > 01 ;" +
526                 " ab  > |8 ;" +
527                 "  b  > k ;" +
528                 " 8x$ > 45 ;" +
529                 " 8x  > 77 ;",
530 
531                 "ababbabxabx",
532         "018k7745");
533         expect("$s = [z$] ;" +
534                 "$s{ab    > 01 ;" +
535                 "   ab    > |8 ;" +
536                 "    b    > k ;" +
537                 "   8x}$s > 45 ;" +
538                 "   8x    > 77 ;",
539 
540                 "abzababbabxzabxabx",
541         "01z018k45z01x45");
542     }
543 
544     /**
545      * Test pattern quoting and escape mechanisms.
546      */
TestPatternQuoting()547     public void TestPatternQuoting() {
548         // Array of 3n items
549         // Each item is <rules>, <input>, <expected output>
550         String[] DATA = {
551                 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
552         };
553 
554         for (int i=0; i<DATA.length; i+=3) {
555             logln("Pattern: " + Utility.escape(DATA[i]));
556             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
557             expect(t, DATA[i+1], DATA[i+2]);
558         }
559     }
560 
TestVariableNames()561     public void TestVariableNames() {
562         Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
563         if (gl == null) {
564             errln("FAIL: null Transliterator returned.");
565         }
566     }
567 
568     /**
569      * Regression test for bugs found in Greek transliteration.
570      */
TestJ277()571     public void TestJ277() {
572         Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
573 
574         char sigma = (char)0x3C3;
575         char upsilon = (char)0x3C5;
576         char nu = (char)0x3BD;
577         // not used char PHI = (char)0x3A6;
578         char alpha = (char)0x3B1;
579         // not used char omega = (char)0x3C9;
580         // not used char omicron = (char)0x3BF;
581         // not used char epsilon = (char)0x3B5;
582 
583         // sigma upsilon nu -> syn
584         StringBuffer buf = new StringBuffer();
585         buf.append(sigma).append(upsilon).append(nu);
586         String syn = buf.toString();
587         expect(gl, syn, "syn");
588 
589         // sigma alpha upsilon nu -> saun
590         buf.setLength(0);
591         buf.append(sigma).append(alpha).append(upsilon).append(nu);
592         String sayn = buf.toString();
593         expect(gl, sayn, "saun");
594 
595         // Again, using a smaller rule set
596         String rules =
597             "$alpha   = \u03B1;" +
598             "$nu      = \u03BD;" +
599             "$sigma   = \u03C3;" +
600             "$ypsilon = \u03C5;" +
601             "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +
602             "s <>           $sigma;" +
603             "a <>           $alpha;" +
604             "u <>  $vowel { $ypsilon;" +
605             "y <>           $ypsilon;" +
606             "n <>           $nu;";
607         Transliterator mini = Transliterator.createFromRules
608         ("mini", rules, Transliterator.REVERSE);
609         expect(mini, syn, "syn");
610         expect(mini, sayn, "saun");
611 
612         //|    // Transliterate the Greek locale data
613         //|    Locale el("el");
614         //|    DateFormatSymbols syms(el, status);
615         //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
616         //|    int32_t i, count;
617         //|    const UnicodeString* data = syms.getMonths(count);
618         //|    for (i=0; i<count; ++i) {
619         //|        if (data[i].length() == 0) {
620         //|            continue;
621         //|        }
622         //|        UnicodeString out(data[i]);
623         //|        gl->transliterate(out);
624         //|        bool_t ok = TRUE;
625         //|        if (data[i].length() >= 2 && out.length() >= 2 &&
626         //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
627         //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
628         //|                ok = FALSE;
629         //|            }
630         //|        }
631         //|        if (ok) {
632         //|            logln(prettify(data[i] + " -> " + out));
633         //|        } else {
634         //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
635         //|        }
636         //|    }
637     }
638 
639     //    /**
640     //     * Prefix, suffix support in hex transliterators
641     //     */
642     //    public void TestJ243() {
643     //        // Test default Hex-Any, which should handle
644     //        // \\u, \\U, u+, and U+
645     //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
646     //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
647     //
648     //        // Try a custom Hex-Any
649     //        // \\uXXXX and &#xXXXX;
650     //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
651     //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
652     //               "abcd5fx012&#x00033;");
653     //
654     //        // Try custom Any-Hex (default is tested elsewhere)
655     //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
656     //        expect(hex3, "012", "&#x30;&#x31;&#x32;");
657     //    }
658 
TestJ329()659     public void TestJ329() {
660 
661         Object[] DATA = {
662                 Boolean.FALSE, "a > b; c > d",
663                 Boolean.TRUE,  "a > b; no operator; c > d",
664         };
665 
666         for (int i=0; i<DATA.length; i+=2) {
667             String err = null;
668             try {
669                 Transliterator.createFromRules("<ID>",
670                         (String) DATA[i+1],
671                         Transliterator.FORWARD);
672             } catch (IllegalArgumentException e) {
673                 err = e.getMessage();
674             }
675             boolean gotError = (err != null);
676             String desc = (String) DATA[i+1] +
677             (gotError ? (" -> error: " + err) : " -> no error");
678             if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
679                 logln("Ok:   " + desc);
680             } else {
681                 errln("FAIL: " + desc);
682             }
683         }
684     }
685 
686     /**
687      * Test segments and segment references.
688      */
TestSegments()689     public void TestSegments() {
690         // Array of 3n items
691         // Each item is <rules>, <input>, <expected output>
692         String[] DATA = {
693                 "([a-z]) '.' ([0-9]) > $2 '-' $1",
694                 "abc.123.xyz.456",
695                 "ab1-c23.xy4-z56",
696         };
697 
698         for (int i=0; i<DATA.length; i+=3) {
699             logln("Pattern: " + Utility.escape(DATA[i]));
700             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
701             expect(t, DATA[i+1], DATA[i+2]);
702         }
703     }
704 
705     /**
706      * Test cursor positioning outside of the key
707      */
TestCursorOffset()708     public void TestCursorOffset() {
709         // Array of 3n items
710         // Each item is <rules>, <input>, <expected output>
711         String[] DATA = {
712                 "pre {alpha} post > | @ ALPHA ;" +
713                 "eALPHA > beta ;" +
714                 "pre {beta} post > BETA @@ | ;" +
715                 "post > xyz",
716 
717                 "prealphapost prebetapost",
718                 "prbetaxyz preBETApost",
719         };
720 
721         for (int i=0; i<DATA.length; i+=3) {
722             logln("Pattern: " + Utility.escape(DATA[i]));
723             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
724             expect(t, DATA[i+1], DATA[i+2]);
725         }
726     }
727 
728     /**
729      * Test zero length and > 1 char length variable values.  Test
730      * use of variable refs in UnicodeSets.
731      */
TestArbitraryVariableValues()732     public void TestArbitraryVariableValues() {
733         // Array of 3n items
734         // Each item is <rules>, <input>, <expected output>
735         String[] DATA = {
736                 "$abe = ab;" +
737                 "$pat = x[yY]z;" +
738                 "$ll  = 'a-z';" +
739                 "$llZ = [$ll];" +
740                 "$llY = [$ll$pat];" +
741                 "$emp = ;" +
742 
743                 "$abe > ABE;" +
744                 "$pat > END;" +
745                 "$llZ > 1;" +
746                 "$llY > 2;" +
747                 "7$emp 8 > 9;" +
748                 "",
749 
750                 "ab xYzxyz stY78",
751                 "ABE ENDEND 1129",
752         };
753 
754         for (int i=0; i<DATA.length; i+=3) {
755             logln("Pattern: " + Utility.escape(DATA[i]));
756             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
757             expect(t, DATA[i+1], DATA[i+2]);
758         }
759     }
760 
761     /**
762      * Confirm that the contextStart, contextLimit, start, and limit
763      * behave correctly.
764      */
TestPositionHandling()765     public void TestPositionHandling() {
766         // Array of 3n items
767         // Each item is <rules>, <input>, <expected output>
768         String[] DATA = {
769                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
770                 "xtat txtb", // pos 0,9,0,9
771                 "xTTaSS TTxUUb",
772 
773                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
774                 "xtat txtb", // pos 2,9,3,8
775                 "xtaSS TTxUUb",
776 
777                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
778                 "xtat txtb", // pos 3,8,3,8
779                 "xtaTT TTxTTb",
780         };
781 
782         // Array of 4n positions -- these go with the DATA array
783         // They are: contextStart, contextLimit, start, limit
784         int[] POS = {
785                 0, 9, 0, 9,
786                 2, 9, 3, 8,
787                 3, 8, 3, 8,
788         };
789 
790         int n = DATA.length/3;
791         for (int i=0; i<n; i++) {
792             Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
793             Transliterator.Position pos = new Transliterator.Position(
794                     POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
795             ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
796             t.transliterate(rsource, pos);
797             t.finishTransliteration(rsource, pos);
798             String result = rsource.toString();
799             String exp = DATA[3*i+2];
800             expectAux(Utility.escape(DATA[3*i]),
801                     DATA[3*i+1],
802                     result,
803                     result.equals(exp),
804                     exp);
805         }
806     }
807 
808     /**
809      * Test the Hiragana-Katakana transliterator.
810      */
TestHiraganaKatakana()811     public void TestHiraganaKatakana() {
812         Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
813         Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
814 
815         // Array of 3n items
816         // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
817         String[] DATA = {
818                 "both",
819                 "\u3042\u3090\u3099\u3092\u3050",
820                 "\u30A2\u30F8\u30F2\u30B0",
821 
822                 "kh",
823                 "\u307C\u3051\u3060\u3042\u3093\u30FC",
824                 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
825         };
826 
827         for (int i=0; i<DATA.length; i+=3) {
828             switch (DATA[i].charAt(0)) {
829             case 'h': // Hiragana-Katakana
830                 expect(hk, DATA[i+1], DATA[i+2]);
831                 break;
832             case 'k': // Katakana-Hiragana
833                 expect(kh, DATA[i+2], DATA[i+1]);
834                 break;
835             case 'b': // both
836                 expect(hk, DATA[i+1], DATA[i+2]);
837                 expect(kh, DATA[i+2], DATA[i+1]);
838                 break;
839             }
840         }
841 
842     }
843 
TestCopyJ476()844     public void TestCopyJ476() {
845         // This is a C++-only copy constructor test
846     }
847 
848     /**
849      * Test inter-Indic transliterators.  These are composed.
850      */
TestInterIndic()851     public void TestInterIndic() {
852         String ID = "Devanagari-Gujarati";
853         Transliterator dg = Transliterator.getInstance(ID);
854         if (dg == null) {
855             errln("FAIL: getInstance(" + ID + ") returned null");
856             return;
857         }
858         String id = dg.getID();
859         if (!id.equals(ID)) {
860             errln("FAIL: getInstance(" + ID + ").getID() => " + id);
861         }
862         String dev = "\u0901\u090B\u0925";
863         String guj = "\u0A81\u0A8B\u0AA5";
864         expect(dg, dev, guj);
865     }
866 
867     /**
868      * Test filter syntax in IDs. (J23)
869      */
TestFilterIDs()870     public void TestFilterIDs() {
871         String[] DATA = {
872                 "[aeiou]Any-Hex", // ID
873                 "[aeiou]Hex-Any", // expected inverse ID
874                 "quizzical",      // src
875                 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
876 
877                 "[aeiou]Any-Hex;[^5]Hex-Any",
878                 "[^5]Any-Hex;[aeiou]Hex-Any",
879                 "quizzical",
880                 "q\\u0075izzical",
881 
882                 "[abc]Null",
883                 "[abc]Null",
884                 "xyz",
885                 "xyz",
886         };
887 
888         for (int i=0; i<DATA.length; i+=4) {
889             String ID = DATA[i];
890             Transliterator t = Transliterator.getInstance(ID);
891             expect(t, DATA[i+2], DATA[i+3]);
892 
893             // Check the ID
894             if (!ID.equals(t.getID())) {
895                 errln("FAIL: getInstance(" + ID + ").getID() => " +
896                         t.getID());
897             }
898 
899             // Check the inverse
900             String uID = DATA[i+1];
901             Transliterator u = t.getInverse();
902             if (u == null) {
903                 errln("FAIL: " + ID + ".getInverse() returned NULL");
904             } else if (!u.getID().equals(uID)) {
905                 errln("FAIL: " + ID + ".getInverse().getID() => " +
906                         u.getID() + ", expected " + uID);
907             }
908         }
909     }
910 
911     /**
912      * Test the case mapping transliterators.
913      */
TestCaseMap()914     public void TestCaseMap() {
915         Transliterator toUpper =
916             Transliterator.getInstance("Any-Upper[^xyzXYZ]");
917         Transliterator toLower =
918             Transliterator.getInstance("Any-Lower[^xyzXYZ]");
919         Transliterator toTitle =
920             Transliterator.getInstance("Any-Title[^xyzXYZ]");
921 
922         expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
923         "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
924         expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
925         "the quick brown foX jumped over the lazY dogs.");
926         expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
927         "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
928     }
929 
930     /**
931      * Test the name mapping transliterators.
932      */
TestNameMap()933     public void TestNameMap() {
934         Transliterator uni2name =
935             Transliterator.getInstance("Any-Name[^abc]");
936         Transliterator name2uni =
937             Transliterator.getInstance("Name-Any");
938 
939         expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
940         "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
941         expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
942         "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
943 
944         // round trip
945         Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
946 
947         String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
948         expect(t, s, s);
949     }
950 
951     /**
952      * Test liberalized ID syntax.  1006c
953      */
TestLiberalizedID()954     public void TestLiberalizedID() {
955         // Some test cases have an expected getID() value of NULL.  This
956         // means I have disabled the test case for now.  This stuff is
957         // still under development, and I haven't decided whether to make
958         // getID() return canonical case yet.  It will all get rewritten
959         // with the move to Source-Target/Variant IDs anyway. [aliu]
960         String DATA[] = {
961                 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
962                 "  Null  ", "Null", "whitespace",
963                 " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
964                 "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",
965         };
966 
967         for (int i=0; i<DATA.length; i+=3) {
968             try {
969                 Transliterator t = Transliterator.getInstance(DATA[i]);
970                 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
971                     logln("Ok: " + DATA[i+2] +
972                             " create ID \"" + DATA[i] + "\" => \"" +
973                             t.getID() + "\"");
974                 } else {
975                     errln("FAIL: " + DATA[i+2] +
976                             " create ID \"" + DATA[i] + "\" => \"" +
977                             t.getID() + "\", exp \"" + DATA[i+1] + "\"");
978                 }
979             } catch (IllegalArgumentException e) {
980                 errln("FAIL: " + DATA[i+2] +
981                         " create ID \"" + DATA[i] + "\"");
982             }
983         }
984     }
985 
TestCreateInstance()986     public void TestCreateInstance() {
987         String FORWARD = "F";
988         String REVERSE = "R";
989         String DATA[] = {
990                 // Column 1: id
991                 // Column 2: direction
992                 // Column 3: expected ID, or "" if expect failure
993                 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
994 
995                 // JB#2689: bad compound causes crash
996                 "InvalidSource-InvalidTarget", FORWARD, "",
997                 "InvalidSource-InvalidTarget", REVERSE, "",
998                 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
999                 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1000                 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1001                 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1002 
1003                 null
1004         };
1005 
1006         for (int i=0; DATA[i]!=null; i+=3) {
1007             String id=DATA[i];
1008             int dir = (DATA[i+1]==FORWARD)?
1009                     Transliterator.FORWARD:Transliterator.REVERSE;
1010             String expID=DATA[i+2];
1011             Exception e = null;
1012             Transliterator t;
1013             try {
1014                 t = Transliterator.getInstance(id,dir);
1015             } catch (Exception e1) {
1016                 e = e1;
1017                 t = null;
1018             }
1019             String newID = (t!=null)?t.getID():"";
1020             boolean ok = (newID.equals(expID));
1021             if (t==null) {
1022                 newID = e.getMessage();
1023             }
1024             if (ok) {
1025                 logln("Ok: createInstance(" +
1026                         id + "," + DATA[i+1] + ") => " + newID);
1027             } else {
1028                 errln("FAIL: createInstance(" +
1029                         id + "," + DATA[i+1] + ") => " + newID +
1030                         ", expected " + expID);
1031             }
1032         }
1033     }
1034 
1035     /**
1036      * Test the normalization transliterator.
1037      */
TestNormalizationTransliterator()1038     public void TestNormalizationTransliterator() {
1039         // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
1040         // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1041         String[][] CANON = {
1042                 // Input               Decomposed            Composed
1043                 {"cat",                "cat",                "cat"               },
1044                 {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },
1045 
1046                 {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above
1047                 {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above
1048 
1049                 {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above
1050                 {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below
1051                 {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above
1052 
1053                 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1054                 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1055 
1056                 {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave
1057                 {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave
1058                 {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron
1059 
1060                 {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign
1061                 {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring
1062 
1063                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0
1064                 {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0
1065 
1066                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1067                 {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },
1068 
1069                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1070                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1071                 {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten
1072                 {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten
1073                 {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten
1074 
1075                 {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },
1076         };
1077 
1078         String[][] COMPAT = {
1079                 // Input               Decomposed            Composed
1080                 {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed
1081 
1082                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0
1083                 {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i
1084 
1085                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1086                 {"Henry \u2163",       "Henry IV",           "Henry IV"          },
1087 
1088                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1089                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1090 
1091                 {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten
1092         };
1093 
1094         Transliterator NFD = Transliterator.getInstance("NFD");
1095         Transliterator NFC = Transliterator.getInstance("NFC");
1096         for (int i=0; i<CANON.length; ++i) {
1097             String in = CANON[i][0];
1098             String expd = CANON[i][1];
1099             String expc = CANON[i][2];
1100             expect(NFD, in, expd);
1101             expect(NFC, in, expc);
1102         }
1103 
1104         Transliterator NFKD = Transliterator.getInstance("NFKD");
1105         Transliterator NFKC = Transliterator.getInstance("NFKC");
1106         for (int i=0; i<COMPAT.length; ++i) {
1107             String in = COMPAT[i][0];
1108             String expkd = COMPAT[i][1];
1109             String expkc = COMPAT[i][2];
1110             expect(NFKD, in, expkd);
1111             expect(NFKC, in, expkc);
1112         }
1113 
1114         Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1115         expect(t, "\u010dx", "c\u030C");
1116     }
1117 
1118     /**
1119      * Test compound RBT rules.
1120      */
TestCompoundRBT()1121     public void TestCompoundRBT() {
1122         // Careful with spacing and ';' here:  Phrase this exactly
1123         // as toRules() is going to return it.  If toRules() changes
1124         // with regard to spacing or ';', then adjust this string.
1125         String rule = "::Hex-Any;\n" +
1126         "::Any-Lower;\n" +
1127         "a > '.A.';\n" +
1128         "b > '.B.';\n" +
1129         "::[^t]Any-Upper;";
1130         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1131         if (t == null) {
1132             errln("FAIL: createFromRules failed");
1133             return;
1134         }
1135         expect(t, "\u0043at in the hat, bat on the mat",
1136         "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1137         String r = t.toRules(true);
1138         if (r.equals(rule)) {
1139             logln("OK: toRules() => " + r);
1140         } else {
1141             errln("FAIL: toRules() => " + r +
1142                     ", expected " + rule);
1143         }
1144 
1145         // Now test toRules
1146         t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1147         if (t == null) {
1148             errln("FAIL: createInstance failed");
1149             return;
1150         }
1151         String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1152         r = t.toRules(true);
1153         if (!r.equals(exp)) {
1154             errln("FAIL: toRules() => " + r +
1155                     ", expected " + exp);
1156         } else {
1157             logln("OK: toRules() => " + r);
1158         }
1159 
1160         // Round trip the result of toRules
1161         t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1162         if (t == null) {
1163             errln("FAIL: createFromRules #2 failed");
1164             return;
1165         } else {
1166             logln("OK: createFromRules(" + r + ") succeeded");
1167         }
1168 
1169         // Test toRules again
1170         r = t.toRules(true);
1171         if (!r.equals(exp)) {
1172             errln("FAIL: toRules() => " + r +
1173                     ", expected " + exp);
1174         } else {
1175             logln("OK: toRules() => " + r);
1176         }
1177 
1178         // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1179         // to what the regenerated ID will look like.
1180         String id = "Upper(Lower);(NFKC)";
1181         t = Transliterator.getInstance(id, Transliterator.FORWARD);
1182         if (t == null) {
1183             errln("FAIL: createInstance #2 failed");
1184             return;
1185         }
1186         if (t.getID().equals(id)) {
1187             logln("OK: created " + id);
1188         } else {
1189             errln("FAIL: createInstance(" + id +
1190                     ").getID() => " + t.getID());
1191         }
1192 
1193         Transliterator u = t.getInverse();
1194         if (u == null) {
1195             errln("FAIL: createInverse failed");
1196             return;
1197         }
1198         exp = "NFKC();Lower(Upper)";
1199         if (u.getID().equals(exp)) {
1200             logln("OK: createInverse(" + id + ") => " +
1201                     u.getID());
1202         } else {
1203             errln("FAIL: createInverse(" + id + ") => " +
1204                     u.getID());
1205         }
1206     }
1207 
1208     /**
1209      * Compound filter semantics were orginially not implemented
1210      * correctly.  Originally, each component filter f(i) is replaced by
1211      * f'(i) = f(i) && g, where g is the filter for the compound
1212      * transliterator.
1213      *
1214      * From Mark:
1215      *
1216      * Suppose and I have a transliterator X. Internally X is
1217      * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1218      *
1219      * The compound should convert all greek characters (through latin) to
1220      * cyrillic, then lowercase the result. The filter should say "don't
1221      * touch 'A' in the original". But because an intermediate result
1222      * happens to go through "A", the Greek Alpha gets hung up.
1223      */
TestCompoundFilter()1224     public void TestCompoundFilter() {
1225         Transliterator t = Transliterator.getInstance
1226         ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1227         t.setFilter(new UnicodeSet("[^A]"));
1228 
1229         // Only the 'A' at index 1 should remain unchanged
1230         expect(t,
1231                 CharsToUnicodeString("BA\\u039A\\u0391"),
1232                 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1233     }
1234 
1235     /**
1236      * Test the "Remove" transliterator.
1237      */
TestRemove()1238     public void TestRemove() {
1239         Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1240         expect(t, "The quick brown fox.",
1241         "Th qck brwn fx.");
1242     }
1243 
TestToRules()1244     public void TestToRules() {
1245         String RBT = "rbt";
1246         String SET = "set";
1247         String[] DATA = {
1248                 RBT,
1249                 "$a=\\u4E61; [$a] > A;",
1250                 "[\\u4E61] > A;",
1251 
1252                 RBT,
1253                 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1254                 "[[:Zs:][:Zl:]]{a} > A;",
1255 
1256                 SET,
1257                 "[[:Zs:][:Zl:]]",
1258                 "[[:Zs:][:Zl:]]",
1259 
1260                 SET,
1261                 "[:Ps:]",
1262                 "[:Ps:]",
1263 
1264                 SET,
1265                 "[:L:]",
1266                 "[:L:]",
1267 
1268                 SET,
1269                 "[[:L:]-[A]]",
1270                 "[[:L:]-[A]]",
1271 
1272                 SET,
1273                 "[~[:Lu:][:Ll:]]",
1274                 "[~[:Lu:][:Ll:]]",
1275 
1276                 SET,
1277                 "[~[a-z]]",
1278                 "[~[a-z]]",
1279 
1280                 RBT,
1281                 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1282                 "[^[:Zs:]]{a} > A;",
1283 
1284                 RBT,
1285                 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1286                 "[[a-z]-[:Zs:]]{a} > A;",
1287 
1288                 RBT,
1289                 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1290                 "[[:Zs:]&[a-z]]{a} > A;",
1291 
1292                 RBT,
1293                 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1294                 "[x[:Zs:]]{a} > A;",
1295 
1296                 RBT,
1297                 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1298                 "$macron = \\u0304 ;"+
1299                 "$evowel = [aeiouyAEIOUY] ;"+
1300                 "$iotasub = \\u0345 ;"+
1301                 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1302                 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1303 
1304                 RBT,
1305                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1306                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1307         };
1308 
1309         for (int d=0; d < DATA.length; d+=3) {
1310             if (DATA[d] == RBT) {
1311                 // Transliterator test
1312                 Transliterator t = Transliterator.createFromRules("ID",
1313                         DATA[d+1], Transliterator.FORWARD);
1314                 if (t == null) {
1315                     errln("FAIL: createFromRules failed");
1316                     return;
1317                 }
1318                 String rules, escapedRules;
1319                 rules = t.toRules(false);
1320                 escapedRules = t.toRules(true);
1321                 String expRules = Utility.unescape(DATA[d+2]);
1322                 String expEscapedRules = DATA[d+2];
1323                 if (rules.equals(expRules)) {
1324                     logln("Ok: " + DATA[d+1] +
1325                             " => " + Utility.escape(rules));
1326                 } else {
1327                     errln("FAIL: " + DATA[d+1] +
1328                             " => " + Utility.escape(rules + ", exp " + expRules));
1329                 }
1330                 if (escapedRules.equals(expEscapedRules)) {
1331                     logln("Ok: " + DATA[d+1] +
1332                             " => " + escapedRules);
1333                 } else {
1334                     errln("FAIL: " + DATA[d+1] +
1335                             " => " + escapedRules + ", exp " + expEscapedRules);
1336                 }
1337 
1338             } else {
1339                 // UnicodeSet test
1340                 String pat = DATA[d+1];
1341                 String expToPat = DATA[d+2];
1342                 UnicodeSet set = new UnicodeSet(pat);
1343 
1344                 // Adjust spacing etc. as necessary.
1345                 String toPat;
1346                 toPat = set.toPattern(true);
1347                 if (expToPat.equals(toPat)) {
1348                     logln("Ok: " + pat +
1349                             " => " + toPat);
1350                 } else {
1351                     errln("FAIL: " + pat +
1352                             " => " + Utility.escape(toPat) +
1353                             ", exp " + Utility.escape(pat));
1354                 }
1355             }
1356         }
1357     }
1358 
TestContext()1359     public void TestContext() {
1360         Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1361 
1362         expect("de > x; {d}e > y;",
1363                 "de",
1364                 "ye",
1365                 pos);
1366 
1367         expect("ab{c} > z;",
1368                 "xadabdabcy",
1369         "xadabdabzy");
1370     }
1371 
CharsToUnicodeString(String s)1372     static final String CharsToUnicodeString(String s) {
1373         return Utility.unescape(s);
1374     }
1375 
TestSupplemental()1376     public void TestSupplemental() {
1377 
1378         expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1379         "a > $a; $s > i;"),
1380         CharsToUnicodeString("ab\\U0001030Fx"),
1381         CharsToUnicodeString("\\U00010300bix"));
1382 
1383         expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1384                 "$b=[A-Z\\U00010400-\\U0001044D];" +
1385         "($a)($b) > $2 $1;"),
1386         CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1387         CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1388 
1389         // k|ax\\U00010300xm
1390 
1391         // k|a\\U00010400\\U00010300xm
1392         // ky|\\U00010400\\U00010300xm
1393         // ky\\U00010400|\\U00010300xm
1394 
1395         // ky\\U00010400|\\U00010300\\U00010400m
1396         // ky\\U00010400y|\\U00010400m
1397         expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1398                 "$a {x} > | @ \\U00010400;" +
1399         "{$a} [^\\u0000-\\uFFFF] > y;"),
1400         CharsToUnicodeString("kax\\U00010300xm"),
1401         CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1402 
1403         expect(Transliterator.getInstance("Any-Name"),
1404                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1405         "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1406 
1407         expect(Transliterator.getInstance("Name-Any"),
1408                 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1409                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1410 
1411         expect(Transliterator.getInstance("Any-Hex/Unicode"),
1412                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1413         "U+10330U+10FF00U+E0061U+00A0");
1414 
1415         expect(Transliterator.getInstance("Any-Hex/C"),
1416                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1417         "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1418 
1419         expect(Transliterator.getInstance("Any-Hex/Perl"),
1420                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1421         "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1422 
1423         expect(Transliterator.getInstance("Any-Hex/Java"),
1424                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1425         "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1426 
1427         expect(Transliterator.getInstance("Any-Hex/XML"),
1428                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1429         "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1430 
1431         expect(Transliterator.getInstance("Any-Hex/XML10"),
1432                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1433         "&#66352;&#1113856;&#917601;&#160;");
1434 
1435         expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1436                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1437                 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1438     }
1439 
TestQuantifier()1440     public void TestQuantifier() {
1441 
1442         // Make sure @ in a quantified anteContext works
1443         expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1444                 "AAAAAb",
1445         "aaa(aac)");
1446 
1447         // Make sure @ in a quantified postContext works
1448         expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1449                 "baaaaa",
1450         "caa(aaa)");
1451 
1452         // Make sure @ in a quantified postContext with seg ref works
1453         expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1454                 "baaaaa",
1455         "baa(aaa)");
1456 
1457         // Make sure @ past ante context doesn't enter ante context
1458         Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1459         expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1460                 "xxxab",
1461                 "xxx(ac)",
1462                 pos);
1463 
1464         // Make sure @ past post context doesn't pass limit
1465         Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1466         expect("{b} a+ > c @@ |; x > y; a > A;",
1467                 "baxx",
1468                 "caxx",
1469                 pos2);
1470 
1471         // Make sure @ past post context doesn't enter post context
1472         expect("{b} a+ > c @@ |; x > y; a > A;",
1473                 "baxx",
1474         "cayy");
1475 
1476         expect("(ab)? c > d;",
1477                 "c abc ababc",
1478         "d d abd");
1479 
1480         // NOTE: The (ab)+ when referenced just yields a single "ab",
1481         // not the full sequence of them.  This accords with perl behavior.
1482         expect("(ab)+ {x} > '(' $1 ')';",
1483                 "x abx ababxy",
1484         "x ab(ab) abab(ab)y");
1485 
1486         expect("b+ > x;",
1487                 "ac abc abbc abbbc",
1488         "ac axc axc axc");
1489 
1490         expect("[abc]+ > x;",
1491                 "qac abrc abbcs abtbbc",
1492         "qx xrx xs xtx");
1493 
1494         expect("q{(ab)+} > x;",
1495                 "qa qab qaba qababc qaba",
1496         "qa qx qxa qxc qxa");
1497 
1498         expect("q(ab)* > x;",
1499                 "qa qab qaba qababc",
1500         "xa x xa xc");
1501 
1502         // NOTE: The (ab)+ when referenced just yields a single "ab",
1503         // not the full sequence of them.  This accords with perl behavior.
1504         expect("q(ab)* > '(' $1 ')';",
1505                 "qa qab qaba qababc",
1506         "()a (ab) (ab)a (ab)c");
1507 
1508         // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1509         // quoted string
1510         expect("'ab'+ > x;",
1511                 "bb ab ababb",
1512         "bb x xb");
1513 
1514         // $foo+ and $foo* -- the quantifier should apply to the entire
1515         // variable reference
1516         expect("$var = ab; $var+ > x;",
1517                 "bb ab ababb",
1518         "bb x xb");
1519     }
1520 
1521     static class TestFact implements Transliterator.Factory {
1522         static class NameableNullTrans extends Transliterator {
NameableNullTrans(String id)1523             public NameableNullTrans(String id) {
1524                 super(id, null);
1525             }
handleTransliterate(Replaceable text, Position offsets, boolean incremental)1526             protected void handleTransliterate(Replaceable text,
1527                     Position offsets, boolean incremental) {
1528                 offsets.start = offsets.limit;
1529             }
1530         }
1531         String id;
TestFact(String theID)1532         public TestFact(String theID) {
1533             id = theID;
1534         }
getInstance(String ignoredID)1535         public Transliterator getInstance(String ignoredID) {
1536             return new NameableNullTrans(id);
1537         }
1538     }
1539 
TestSTV()1540     public void TestSTV() {
1541         Enumeration es = Transliterator.getAvailableSources();
1542         for (int i=0; es.hasMoreElements(); ++i) {
1543             String source = (String) es.nextElement();
1544             logln("" + i + ": " + source);
1545             if (source.length() == 0) {
1546                 errln("FAIL: empty source");
1547                 continue;
1548             }
1549             Enumeration et = Transliterator.getAvailableTargets(source);
1550             for (int j=0; et.hasMoreElements(); ++j) {
1551                 String target = (String) et.nextElement();
1552                 logln(" " + j + ": " + target);
1553                 if (target.length() == 0) {
1554                     errln("FAIL: empty target");
1555                     continue;
1556                 }
1557                 Enumeration ev = Transliterator.getAvailableVariants(source, target);
1558                 for (int k=0; ev.hasMoreElements(); ++k) {
1559                     String variant = (String) ev.nextElement();
1560                     if (variant.length() == 0) {
1561                         logln("  " + k + ": <empty>");
1562                     } else {
1563                         logln("  " + k + ": " + variant);
1564                     }
1565                 }
1566             }
1567         }
1568 
1569         // Test registration
1570         String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1571         String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1572         String[] SOURCES = { null, "Seoridf", "Oewoir" };
1573         for (int i=0; i<3; ++i) {
1574             Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1575             try {
1576                 Transliterator t = Transliterator.getInstance(IDS[i]);
1577                 if (t.getID().equals(IDS[i])) {
1578                     logln("Ok: Registration/creation succeeded for ID " +
1579                             IDS[i]);
1580                 } else {
1581                     errln("FAIL: Registration of ID " +
1582                             IDS[i] + " creates ID " + t.getID());
1583                 }
1584                 Transliterator.unregister(IDS[i]);
1585                 try {
1586                     t = Transliterator.getInstance(IDS[i]);
1587                     errln("FAIL: Unregistration failed for ID " +
1588                             IDS[i] + "; still receiving ID " + t.getID());
1589                 } catch (IllegalArgumentException e2) {
1590                     // Good; this is what we expect
1591                     logln("Ok; Unregistered " + IDS[i]);
1592                 }
1593             } catch (IllegalArgumentException e) {
1594                 errln("FAIL: Registration/creation failed for ID " +
1595                         IDS[i]);
1596             } finally {
1597                 Transliterator.unregister(IDS[i]);
1598             }
1599         }
1600 
1601         // Make sure getAvailable API reflects removal
1602         for (Enumeration e = Transliterator.getAvailableIDs();
1603         e.hasMoreElements(); ) {
1604             String id = (String) e.nextElement();
1605             for (int i=0; i<3; ++i) {
1606                 if (id.equals(FULL_IDS[i])) {
1607                     errln("FAIL: unregister(" + id + ") failed");
1608                 }
1609             }
1610         }
1611         for (Enumeration e = Transliterator.getAvailableTargets("Any");
1612         e.hasMoreElements(); ) {
1613             String t = (String) e.nextElement();
1614             if (t.equals(IDS[0])) {
1615                 errln("FAIL: unregister(Any-" + t + ") failed");
1616             }
1617         }
1618         for (Enumeration e = Transliterator.getAvailableSources();
1619         e.hasMoreElements(); ) {
1620             String s = (String) e.nextElement();
1621             for (int i=0; i<3; ++i) {
1622                 if (SOURCES[i] == null) continue;
1623                 if (s.equals(SOURCES[i])) {
1624                     errln("FAIL: unregister(" + s + "-*) failed");
1625                 }
1626             }
1627         }
1628     }
1629 
1630     /**
1631      * Test inverse of Greek-Latin; Title()
1632      */
TestCompoundInverse()1633     public void TestCompoundInverse() {
1634         Transliterator t = Transliterator.getInstance
1635         ("Greek-Latin; Title()", Transliterator.REVERSE);
1636         if (t == null) {
1637             errln("FAIL: createInstance");
1638             return;
1639         }
1640         String exp = "(Title);Latin-Greek";
1641         if (t.getID().equals(exp)) {
1642             logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1643                     t.getID());
1644         } else {
1645             errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1646                     t.getID() + "\", expected \"" + exp + "\"");
1647         }
1648     }
1649 
1650     /**
1651      * Test NFD chaining with RBT
1652      */
TestNFDChainRBT()1653     public void TestNFDChainRBT() {
1654         Transliterator t = Transliterator.createFromRules(
1655                 "TEST", "::NFD; aa > Q; a > q;",
1656                 Transliterator.FORWARD);
1657         logln(t.toRules(true));
1658         expect(t, "aa", "Q");
1659     }
1660 
1661     /**
1662      * Inverse of "Null" should be "Null". (J21)
1663      */
TestNullInverse()1664     public void TestNullInverse() {
1665         Transliterator t = Transliterator.getInstance("Null");
1666         Transliterator u = t.getInverse();
1667         if (!u.getID().equals("Null")) {
1668             errln("FAIL: Inverse of Null should be Null");
1669         }
1670     }
1671 
1672     /**
1673      * Check ID of inverse of alias. (J22)
1674      */
TestAliasInverseID()1675     public void TestAliasInverseID() {
1676         String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1677         Transliterator t = Transliterator.getInstance(ID);
1678         Transliterator u = t.getInverse();
1679         String exp = "Hangul-Latin";
1680         String got = u.getID();
1681         if (!got.equals(exp)) {
1682             errln("FAIL: Inverse of " + ID + " is " + got +
1683                     ", expected " + exp);
1684         }
1685     }
1686 
1687     /**
1688      * Test IDs of inverses of compound transliterators. (J20)
1689      */
TestCompoundInverseID()1690     public void TestCompoundInverseID() {
1691         String ID = "Latin-Jamo;NFC(NFD)";
1692         Transliterator t = Transliterator.getInstance(ID);
1693         Transliterator u = t.getInverse();
1694         String exp = "NFD(NFC);Jamo-Latin";
1695         String got = u.getID();
1696         if (!got.equals(exp)) {
1697             errln("FAIL: Inverse of " + ID + " is " + got +
1698                     ", expected " + exp);
1699         }
1700     }
1701 
1702     /**
1703      * Test undefined variable.
1704      */
TestUndefinedVariable()1705     public void TestUndefinedVariable() {
1706         String rule = "$initial } a <> \u1161;";
1707         try {
1708             Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1709         } catch (IllegalArgumentException e) {
1710             logln("OK: Got exception for " + rule + ", as expected: " +
1711                     e.getMessage());
1712             return;
1713         }
1714         errln("Fail: bogus rule " + rule + " compiled without error");
1715     }
1716 
1717     /**
1718      * Test empty context.
1719      */
TestEmptyContext()1720     public void TestEmptyContext() {
1721         expect(" { a } > b;", "xay a ", "xby b ");
1722     }
1723 
1724     /**
1725      * Test compound filter ID syntax
1726      */
TestCompoundFilterID()1727     public void TestCompoundFilterID() {
1728         String[] DATA = {
1729                 // Col. 1 = ID or rule set (latter must start with #)
1730 
1731                 // = columns > 1 are null if expect col. 1 to be illegal =
1732 
1733                 // Col. 2 = direction, "F..." or "R..."
1734                 // Col. 3 = source string
1735                 // Col. 4 = exp result
1736 
1737                 "[abc]; [abc]", null, null, null, // multiple filters
1738                 "Latin-Greek; [abc];", null, null, null, // misplaced filter
1739                 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1740                 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1741                 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1742                 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1743         };
1744 
1745         for (int i=0; i<DATA.length; i+=4) {
1746             String id = DATA[i];
1747             int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1748                     Transliterator.REVERSE : Transliterator.FORWARD;
1749             String source = DATA[i+2];
1750             String exp = DATA[i+3];
1751             boolean expOk = (DATA[i+1] != null);
1752             Transliterator t = null;
1753             IllegalArgumentException e = null;
1754             try {
1755                 if (id.charAt(0) == '#') {
1756                     t = Transliterator.createFromRules("ID", id, direction);
1757                 } else {
1758                     t = Transliterator.getInstance(id, direction);
1759                 }
1760             } catch (IllegalArgumentException ee) {
1761                 e = ee;
1762             }
1763             boolean ok = (t != null && e == null);
1764             if (ok == expOk) {
1765                 logln("Ok: " + id + " => " + t +
1766                         (e != null ? (", " + e.getMessage()) : ""));
1767                 if (source != null) {
1768                     expect(t, source, exp);
1769                 }
1770             } else {
1771                 errln("FAIL: " + id + " => " + t +
1772                         (e != null ? (", " + e.getMessage()) : ""));
1773             }
1774         }
1775     }
1776 
1777     /**
1778      * Test new property set syntax
1779      */
TestPropertySet()1780     public void TestPropertySet() {
1781         expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1782         expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1783         "[ a stitch ]\n[ in time ]\r[ saves 9]");
1784     }
1785 
1786     /**
1787      * Test various failure points of the new 2.0 engine.
1788      */
TestNewEngine()1789     public void TestNewEngine() {
1790         Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1791         // Katakana should be untouched
1792         expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1793 
1794         if (true) {
1795             // This test will only work if Transliterator.ROLLBACK is
1796             // true.  Otherwise, this test will fail, revealing a
1797             // limitation of global filters in incremental mode.
1798 
1799             Transliterator a =
1800                 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1801             Transliterator A =
1802                 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1803 
1804             //Transliterator array[] = new Transliterator[] {
1805             //    a,
1806             //    Transliterator.getInstance("NFD"),
1807             //    A };
1808             //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1809 
1810             try {
1811                 Transliterator.registerInstance(a);
1812                 Transliterator.registerInstance(A);
1813 
1814                 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1815                 expect(t, "aAaA", "bAbA");
1816 
1817                 Transliterator[] u = t.getElements();
1818                 assertTrue("getElements().length", u.length == 3);
1819                 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1820                 assertEquals("getElements()[1]", u[1].getID(), "NFD");
1821                 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1822 
1823                 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1824                 t.setFilter(new UnicodeSet("[:Ll:]"));
1825                 expect(t, "aAaA", "bAbA");
1826             } finally {
1827                 Transliterator.unregister("a_to_A");
1828                 Transliterator.unregister("A_to_b");
1829             }
1830         }
1831 
1832         expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1833                 "a",
1834         "ax");
1835 
1836         String gr =
1837             "$ddot = \u0308 ;" +
1838             "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1839             "$rough = \u0314 ;" +
1840             "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1841             "\u03b1 <> a ;" +
1842             "$rough <> h ;";
1843 
1844         expect(gr, "\u03B1\u0314", "ha");
1845     }
1846 
1847     /**
1848      * Test quantified segment behavior.  We want:
1849      * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1850      */
TestQuantifiedSegment()1851     public void TestQuantifiedSegment() {
1852         // The normal case
1853         expect("([abc]+) > x $1 x;", "cba", "xcbax");
1854 
1855         // The tricky case; the quantifier is around the segment
1856         expect("([abc])+ > x $1 x;", "cba", "xax");
1857 
1858         // Tricky case in reverse direction
1859         expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1860 
1861         // Check post-context segment
1862         expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1863 
1864         // Test toRule/toPattern for non-quantified segment.
1865         // Careful with spacing here.
1866         String r = "([a-c]){q} > x $1 x;";
1867         Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1868         String rr = t.toRules(true);
1869         if (!r.equals(rr)) {
1870             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1871         } else {
1872             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1873         }
1874 
1875         // Test toRule/toPattern for quantified segment.
1876         // Careful with spacing here.
1877         r = "([a-c])+{q} > x $1 x;";
1878         t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1879         rr = t.toRules(true);
1880         if (!r.equals(rr)) {
1881             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1882         } else {
1883             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1884         }
1885     }
1886 
1887     //======================================================================
1888     // Ram's tests
1889     //======================================================================
1890     /* this test performs  test of rules in ISO 15915 */
TestDevanagariLatinRT()1891     public void  TestDevanagariLatinRT(){
1892         String[]  source = {
1893                 "bh\u0101rata",
1894                 "kra",
1895                 "k\u1E63a",
1896                 "khra",
1897                 "gra",
1898                 "\u1E45ra",
1899                 "cra",
1900                 "chra",
1901                 "j\u00F1a",
1902                 "jhra",
1903                 "\u00F1ra",
1904                 "\u1E6Dya",
1905                 "\u1E6Dhra",
1906                 "\u1E0Dya",
1907                 //"r\u0323ya", // \u095c is not valid in Devanagari
1908                 "\u1E0Dhya",
1909                 "\u1E5Bhra",
1910                 "\u1E47ra",
1911                 "tta",
1912                 "thra",
1913                 "dda",
1914                 "dhra",
1915                 "nna",
1916                 "pra",
1917                 "phra",
1918                 "bra",
1919                 "bhra",
1920                 "mra",
1921                 "\u1E49ra",
1922                 //"l\u0331ra",
1923                 "yra",
1924                 "\u1E8Fra",
1925                 //"l-",
1926                 "vra",
1927                 "\u015Bra",
1928                 "\u1E63ra",
1929                 "sra",
1930                 "hma",
1931                 "\u1E6D\u1E6Da",
1932                 "\u1E6D\u1E6Dha",
1933                 "\u1E6Dh\u1E6Dha",
1934                 "\u1E0D\u1E0Da",
1935                 "\u1E0D\u1E0Dha",
1936                 "\u1E6Dya",
1937                 "\u1E6Dhya",
1938                 "\u1E0Dya",
1939                 "\u1E0Dhya",
1940                 // Not roundtrippable --
1941                 // \u0939\u094d\u094d\u092E  - hma
1942                 // \u0939\u094d\u092E         - hma
1943                 // CharsToUnicodeString("hma"),
1944                 "hya",
1945                 "\u015Br\u0325",
1946                 "\u015Bca",
1947                 "\u0115",
1948                 "san\u0304j\u012Bb s\u0113nagupta",
1949                 "\u0101nand vaddir\u0101ju",
1950         };
1951         String[]  expected = {
1952                 "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */
1953                 "\u0915\u094D\u0930",          /* kra         */
1954                 "\u0915\u094D\u0937",          /* ks\u0323a  */
1955                 "\u0916\u094D\u0930",          /* khra        */
1956                 "\u0917\u094D\u0930",          /* gra         */
1957                 "\u0919\u094D\u0930",          /* n\u0307ra  */
1958                 "\u091A\u094D\u0930",          /* cra         */
1959                 "\u091B\u094D\u0930",          /* chra        */
1960                 "\u091C\u094D\u091E",          /* jn\u0303a  */
1961                 "\u091D\u094D\u0930",          /* jhra        */
1962                 "\u091E\u094D\u0930",          /* n\u0303ra  */
1963                 "\u091F\u094D\u092F",          /* t\u0323ya  */
1964                 "\u0920\u094D\u0930",          /* t\u0323hra */
1965                 "\u0921\u094D\u092F",          /* d\u0323ya  */
1966                 //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari
1967                 "\u0922\u094D\u092F",          /* d\u0323hya */
1968                 "\u0922\u093C\u094D\u0930",    /* r\u0323hra */
1969                 "\u0923\u094D\u0930",          /* n\u0323ra  */
1970                 "\u0924\u094D\u0924",          /* tta         */
1971                 "\u0925\u094D\u0930",          /* thra        */
1972                 "\u0926\u094D\u0926",          /* dda         */
1973                 "\u0927\u094D\u0930",          /* dhra        */
1974                 "\u0928\u094D\u0928",          /* nna         */
1975                 "\u092A\u094D\u0930",          /* pra         */
1976                 "\u092B\u094D\u0930",          /* phra        */
1977                 "\u092C\u094D\u0930",          /* bra         */
1978                 "\u092D\u094D\u0930",          /* bhra        */
1979                 "\u092E\u094D\u0930",          /* mra         */
1980                 "\u0929\u094D\u0930",          /* n\u0331ra  */
1981                 //"\u0934\u094D\u0930",          /* l\u0331ra  */
1982                 "\u092F\u094D\u0930",          /* yra         */
1983                 "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */
1984                 //"l-",
1985                 "\u0935\u094D\u0930",          /* vra         */
1986                 "\u0936\u094D\u0930",          /* s\u0301ra  */
1987                 "\u0937\u094D\u0930",          /* s\u0323ra  */
1988                 "\u0938\u094D\u0930",          /* sra         */
1989                 "\u0939\u094d\u092E",          /* hma         */
1990                 "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */
1991                 "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */
1992                 "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/
1993                 "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */
1994                 "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */
1995                 "\u091F\u094D\u092F",          /* t\u0323ya  */
1996                 "\u0920\u094D\u092F",          /* t\u0323hya */
1997                 "\u0921\u094D\u092F",          /* d\u0323ya  */
1998                 "\u0922\u094D\u092F",          /* d\u0323hya */
1999                 // "hma",                         /* hma         */
2000                 "\u0939\u094D\u092F",          /* hya         */
2001                 "\u0936\u0943",                /* s\u0301r\u0325a  */
2002                 "\u0936\u094D\u091A",          /* s\u0301ca  */
2003                 "\u090d",                      /* e\u0306    */
2004                 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2005                 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2006         };
2007 
2008         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2009         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2010 
2011         for(int i= 0; i<source.length; i++){
2012             expect(latinToDev,(source[i]),(expected[i]));
2013             expect(devToLatin,(expected[i]),(source[i]));
2014         }
2015 
2016     }
TestTeluguLatinRT()2017     public void  TestTeluguLatinRT(){
2018         String[]  source = {
2019                 "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */
2020                 "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */
2021                 "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */
2022                 "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */
2023                 "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */
2024                 "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */
2025                 "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */
2026                 "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */
2027                 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */
2028                 "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */
2029         };
2030 
2031         String[]  expected = {
2032                 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2033                 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2034                 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2035                 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2036                 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2037                 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2038                 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2039                 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2040                 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2041                 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2042         };
2043 
2044 
2045         Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2046         Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2047 
2048         for(int i= 0; i<source.length; i++){
2049             expect(latinToDev,(source[i]),(expected[i]));
2050             expect(devToLatin,(expected[i]),(source[i]));
2051         }
2052     }
2053 
TestSanskritLatinRT()2054     public void  TestSanskritLatinRT(){
2055         int MAX_LEN =15;
2056         String[]  source = {
2057                 "rmk\u1E63\u0113t",
2058                 "\u015Br\u012Bmad",
2059                 "bhagavadg\u012Bt\u0101",
2060                 "adhy\u0101ya",
2061                 "arjuna",
2062                 "vi\u1E63\u0101da",
2063                 "y\u014Dga",
2064                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2065                 "uv\u0101cr\u0325",
2066                 "dharmak\u1E63\u0113tr\u0113",
2067                 "kuruk\u1E63\u0113tr\u0113",
2068                 "samav\u0113t\u0101",
2069                 "yuyutsava\u1E25",
2070                 "m\u0101mak\u0101\u1E25",
2071                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2072                 "kimakurvata",
2073                 "san\u0304java",
2074         };
2075         String[]  expected = {
2076                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2077                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2078                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2079                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2080                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2081                 "\u0935\u093f\u0937\u093e\u0926",
2082                 "\u092f\u094b\u0917",
2083                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2084                 "\u0909\u0935\u093E\u091A\u0943",
2085                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2086                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2087                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2088                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2089                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2090                 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2091                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2092                 "\u0938\u0902\u091c\u0935",
2093         };
2094 
2095         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2096         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2097         for(int i= 0; i<MAX_LEN; i++){
2098             expect(latinToDev,(source[i]),(expected[i]));
2099             expect(devToLatin,(expected[i]),(source[i]));
2100         }
2101     }
2102 
TestCompoundLatinRT()2103     public void  TestCompoundLatinRT(){
2104         int MAX_LEN =15;
2105         String[]  source = {
2106                 "rmk\u1E63\u0113t",
2107                 "\u015Br\u012Bmad",
2108                 "bhagavadg\u012Bt\u0101",
2109                 "adhy\u0101ya",
2110                 "arjuna",
2111                 "vi\u1E63\u0101da",
2112                 "y\u014Dga",
2113                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2114                 "uv\u0101cr\u0325",
2115                 "dharmak\u1E63\u0113tr\u0113",
2116                 "kuruk\u1E63\u0113tr\u0113",
2117                 "samav\u0113t\u0101",
2118                 "yuyutsava\u1E25",
2119                 "m\u0101mak\u0101\u1E25",
2120                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2121                 "kimakurvata",
2122                 "san\u0304java"
2123         };
2124         String[]  expected = {
2125                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2126                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2127                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2128                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2129                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2130                 "\u0935\u093f\u0937\u093e\u0926",
2131                 "\u092f\u094b\u0917",
2132                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2133                 "\u0909\u0935\u093E\u091A\u0943",
2134                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2135                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2136                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2137                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2138                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2139                 //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2140                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2141                 "\u0938\u0902\u091c\u0935"
2142         };
2143 
2144         Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2145         Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2146         for(int i= 0; i<MAX_LEN; i++){
2147             expect(latinToDevToLatin,(source[i]),(source[i]));
2148             expect(devToLatinToDev,(expected[i]),(expected[i]));
2149         }
2150     }
2151     /**
2152      * Test Gurmukhi-Devanagari Tippi and Bindi
2153      */
TestGurmukhiDevanagari()2154     public void TestGurmukhiDevanagari(){
2155         // the rule says:
2156         // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2157         // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2158 
2159         UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2160         UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2161 
2162         UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2163         UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2164         Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2165         StringBuffer src = new StringBuffer(" \u0902");
2166         StringBuffer expect = new StringBuffer(" \u0A02");
2167         while(vIter.next()){
2168             src.setCharAt(0,(char) vIter.codepoint);
2169             expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2170             expect(trans,src.toString(),expect.toString());
2171         }
2172 
2173         expect.setCharAt(1,'\u0A70');
2174         while(nvIter.next()){
2175             //src.setCharAt(0,(char) nvIter.codepoint);
2176             src.setCharAt(0,(char)nvIter.codepoint);
2177             expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2178             expect(trans,src.toString(),expect.toString());
2179         }
2180     }
2181     /**
2182      * Test instantiation from a locale.
2183      */
TestLocaleInstantiation()2184     public void TestLocaleInstantiation() {
2185         Transliterator t;
2186         try{
2187             t = Transliterator.getInstance("te_IN-Latin");
2188             //expect(t, "\u0430", "a");
2189         }catch(IllegalArgumentException ex){
2190             warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2191         }
2192         try{
2193             t = Transliterator.getInstance("ru_RU-Latin");
2194             expect(t, "\u0430", "a");
2195         }catch(IllegalArgumentException ex){
2196             warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2197         }
2198         try{
2199             t = Transliterator.getInstance("en-el");
2200             expect(t, "a", "\u03B1");
2201         }catch(IllegalArgumentException ex){
2202             warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2203         }
2204     }
2205 
2206     /**
2207      * Test title case handling of accent (should ignore accents)
2208      */
TestTitleAccents()2209     public void TestTitleAccents() {
2210         Transliterator t = Transliterator.getInstance("Title");
2211         expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2212     }
2213 
2214     /**
2215      * Basic test of a locale resource based rule.
2216      */
TestLocaleResource()2217     public void TestLocaleResource() {
2218         String DATA[] = {
2219                 // id                    from             to
2220                 "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",
2221                 "Latin-el",              "b",             "\u03bc\u03c0",
2222                 "Latin-Greek",           "b",             "\u03B2",
2223                 "Greek-Latin/UNGEGN",    "\u03B2",        "v",
2224                 "el-Latin",              "\u03B2",        "v",
2225                 "Greek-Latin",           "\u03B2",        "b",
2226         };
2227         for (int i=0; i<DATA.length; i+=3) {
2228             Transliterator t = Transliterator.getInstance(DATA[i]);
2229             expect(t, DATA[i+1], DATA[i+2]);
2230         }
2231     }
2232 
2233     /**
2234      * Make sure parse errors reference the right line.
2235      */
TestParseError()2236     public void TestParseError() {
2237         String rule =
2238             "a > b;\n" +
2239             "# more stuff\n" +
2240             "d << b;";
2241         try {
2242             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2243             if(t!=null){
2244                 errln("FAIL: Did not get expected exception");
2245             }
2246         } catch (IllegalArgumentException e) {
2247             String err = e.getMessage();
2248             if (err.indexOf("d << b") >= 0) {
2249                 logln("Ok: " + err);
2250             } else {
2251                 errln("FAIL: " + err);
2252             }
2253             return;
2254         }
2255         errln("FAIL: no syntax error");
2256     }
2257 
2258     /**
2259      * Make sure sets on output are disallowed.
2260      */
TestOutputSet()2261     public void TestOutputSet() {
2262         String rule = "$set = [a-cm-n]; b > $set;";
2263         Transliterator t = null;
2264         try {
2265             t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2266             if(t!=null){
2267                 errln("FAIL: Did not get the expected exception");
2268             }
2269         } catch (IllegalArgumentException e) {
2270             logln("Ok: " + e.getMessage());
2271             return;
2272         }
2273         errln("FAIL: No syntax error");
2274     }
2275 
2276     /**
2277      * Test the use variable range pragma, making sure that use of
2278      * variable range characters is detected and flagged as an error.
2279      */
TestVariableRange()2280     public void TestVariableRange() {
2281         String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2282         try {
2283             Transliterator t =
2284                 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2285             if(t!=null){
2286                 errln("FAIL: Did not get the expected exception");
2287             }
2288         } catch (IllegalArgumentException e) {
2289             logln("Ok: " + e.getMessage());
2290             return;
2291         }
2292         errln("FAIL: No syntax error");
2293     }
2294 
2295     /**
2296      * Test invalid post context error handling
2297      */
TestInvalidPostContext()2298     public void TestInvalidPostContext() {
2299         try {
2300             Transliterator t =
2301                 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2302             if(t!=null){
2303                 errln("FAIL: Did not get the expected exception");
2304             }
2305         } catch (IllegalArgumentException e) {
2306             String msg = e.getMessage();
2307             if (msg.indexOf("a}b{c") >= 0) {
2308                 logln("Ok: " + msg);
2309             } else {
2310                 errln("FAIL: " + msg);
2311             }
2312             return;
2313         }
2314         errln("FAIL: No syntax error");
2315     }
2316 
2317     /**
2318      * Test ID form variants
2319      */
TestIDForms()2320     public void TestIDForms() {
2321         String DATA[] = {
2322                 "NFC", null, "NFD",
2323                 "nfd", null, "NFC", // make sure case is ignored
2324                 "Any-NFKD", null, "Any-NFKC",
2325                 "Null", null, "Null",
2326                 "-nfkc", "nfkc", "NFKD",
2327                 "-nfkc/", "nfkc", "NFKD",
2328                 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2329                 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2330                 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2331                 "Source-", null, null,
2332                 "Source/Variant-", null, null,
2333                 "Source-/Variant", null, null,
2334                 "/Variant", null, null,
2335                 "/Variant-", null, null,
2336                 "-/Variant", null, null,
2337                 "-/", null, null,
2338                 "-", null, null,
2339                 "/", null, null,
2340         };
2341 
2342         for (int i=0; i<DATA.length; i+=3) {
2343             String ID = DATA[i];
2344             String expID = DATA[i+1];
2345             String expInvID = DATA[i+2];
2346             boolean expValid = (expInvID != null);
2347             if (expID == null) {
2348                 expID = ID;
2349             }
2350             try {
2351                 Transliterator t =
2352                     Transliterator.getInstance(ID);
2353                 Transliterator u = t.getInverse();
2354                 if (t.getID().equals(expID) &&
2355                         u.getID().equals(expInvID)) {
2356                     logln("Ok: " + ID + ".getInverse() => " + expInvID);
2357                 } else {
2358                     errln("FAIL: getInstance(" + ID + ") => " +
2359                             t.getID() + " x getInverse() => " + u.getID() +
2360                             ", expected " + expInvID);
2361                 }
2362             } catch (IllegalArgumentException e) {
2363                 if (!expValid) {
2364                     logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2365                 } else {
2366                     errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2367                 }
2368             }
2369         }
2370     }
2371 
checkRules(String label, Transliterator t2, String testRulesForward)2372     void checkRules(String label, Transliterator t2, String testRulesForward) {
2373         String rules2 = t2.toRules(true);
2374         //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2375         rules2 = TestUtility.replace(rules2, " ", "");
2376         rules2 = TestUtility.replace(rules2, "\n", "");
2377         rules2 = TestUtility.replace(rules2, "\r", "");
2378         testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2379 
2380         if (!rules2.equals(testRulesForward)) {
2381             errln(label);
2382             logln("GENERATED RULES: " + rules2);
2383             logln("SHOULD BE:       " + testRulesForward);
2384         }
2385     }
2386 
2387     /**
2388      * Mark's toRules test.
2389      */
TestToRulesMark()2390     public void TestToRulesMark() {
2391 
2392         String testRules =
2393             "::[[:Latin:][:Mark:]];"
2394             + "::NFKD (NFC);"
2395             + "::Lower (Lower);"
2396             + "a <> \\u03B1;" // alpha
2397             + "::NFKC (NFD);"
2398             + "::Upper (Lower);"
2399             + "::Lower ();"
2400             + "::([[:Greek:][:Mark:]]);"
2401             ;
2402         String testRulesForward =
2403             "::[[:Latin:][:Mark:]];"
2404             + "::NFKD(NFC);"
2405             + "::Lower(Lower);"
2406             + "a > \\u03B1;"
2407             + "::NFKC(NFD);"
2408             + "::Upper (Lower);"
2409             + "::Lower ();"
2410             ;
2411         String testRulesBackward =
2412             "::[[:Greek:][:Mark:]];"
2413             + "::Lower (Upper);"
2414             + "::NFD(NFKC);"
2415             + "\\u03B1 > a;"
2416             + "::Lower(Lower);"
2417             + "::NFC(NFKD);"
2418             ;
2419         String source = "\u00E1"; // a-acute
2420         String target = "\u03AC"; // alpha-acute
2421 
2422         Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2423         Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2424 
2425         expect(t2, source, target);
2426         expect(t3, target, source);
2427 
2428         checkRules("Failed toRules FORWARD", t2, testRulesForward);
2429         checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2430     }
2431 
2432     /**
2433      * Test Escape and Unescape transliterators.
2434      */
TestEscape()2435     public void TestEscape() {
2436         expect(Transliterator.getInstance("Hex-Any"),
2437                 "\\x{40}\\U00000031&#x32;&#81;",
2438         "@12Q");
2439         expect(Transliterator.getInstance("Any-Hex/C"),
2440                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2441         "\\u0041\\U0010BEEF\\uFEED");
2442         expect(Transliterator.getInstance("Any-Hex/Java"),
2443                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2444         "\\u0041\\uDBEF\\uDEEF\\uFEED");
2445         expect(Transliterator.getInstance("Any-Hex/Perl"),
2446                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2447         "\\x{41}\\x{10BEEF}\\x{FEED}");
2448     }
2449 
2450     /**
2451      * Make sure display names of variants look reasonable.
2452      */
TestDisplayName()2453     public void TestDisplayName() {
2454         String DATA[] = {
2455                 // ID, forward name, reverse name
2456                 // Update the text as necessary -- the important thing is
2457                 // not the text itself, but how various cases are handled.
2458 
2459                 // Basic test
2460                 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2461 
2462                 // Variants
2463                 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2464 
2465                 // Target-only IDs
2466                 "NFC", "Any to NFC", "Any to NFD",
2467         };
2468 
2469         Locale US = Locale.US;
2470 
2471         for (int i=0; i<DATA.length; i+=3) {
2472             String name = Transliterator.getDisplayName(DATA[i], US);
2473             if (!name.equals(DATA[i+1])) {
2474                 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2475                         name + ", expected " + DATA[i+1]);
2476             } else {
2477                 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2478             }
2479             Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2480             name = Transliterator.getDisplayName(t.getID(), US);
2481             if (!name.equals(DATA[i+2])) {
2482                 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2483                         name + ", expected " + DATA[i+2]);
2484             } else {
2485                 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2486             }
2487 
2488             // Cover getDisplayName(String)
2489             ULocale save = ULocale.getDefault();
2490             ULocale.setDefault(ULocale.US);
2491             String name2 = Transliterator.getDisplayName(t.getID());
2492             if (!name.equals(name2))
2493                 errln("FAIL: getDisplayName with default locale failed");
2494             ULocale.setDefault(save);
2495         }
2496     }
2497 
2498     /**
2499      * Test anchor masking
2500      */
TestAnchorMasking()2501     public void TestAnchorMasking() {
2502         String rule = "^a > Q; a > q;";
2503         try {
2504             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2505             if(t==null){
2506                 errln("FAIL: Did not get the expected exception");
2507             }
2508         } catch (IllegalArgumentException e) {
2509             errln("FAIL: " + rule + " => " + e);
2510         }
2511     }
2512 
2513     /**
2514      * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2515      * during ICU4J modularization to remove dependency of tests on Transliterator.
2516      */
TestScriptAllCodepoints()2517     public void TestScriptAllCodepoints(){
2518         int code;
2519         HashSet  scriptIdsChecked   = new HashSet();
2520         HashSet  scriptAbbrsChecked = new HashSet();
2521         for( int i =0; i <= 0x10ffff; i++){
2522             code = UScript.getScript(i);
2523             if(code==UScript.INVALID_CODE){
2524                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2525             }
2526             String id =UScript.getName(code);
2527             String abbr = UScript.getShortName(code);
2528             if (!scriptIdsChecked.contains(id)) {
2529                 scriptIdsChecked.add(id);
2530                 String newId ="[:"+id+":];NFD";
2531                 try{
2532                     Transliterator t = Transliterator.getInstance(newId);
2533                     if(t==null){
2534                         errln("Failed to create transliterator for "+hex(i)+
2535                                 " script code: " +id);
2536                     }
2537                 }catch(Exception e){
2538                     errln("Failed to create transliterator for "+hex(i)
2539                             +" script code: " +id
2540                             + " Exception: "+e.getMessage());
2541                 }
2542             }
2543             if (!scriptAbbrsChecked.contains(abbr)) {
2544                 scriptAbbrsChecked.add(abbr);
2545                 String newAbbrId ="[:"+abbr+":];NFD";
2546                 try{
2547                     Transliterator t = Transliterator.getInstance(newAbbrId);
2548                     if(t==null){
2549                         errln("Failed to create transliterator for "+hex(i)+
2550                                 " script code: " +abbr);
2551                     }
2552                 }catch(Exception e){
2553                     errln("Failed to create transliterator for "+hex(i)
2554                             +" script code: " +abbr
2555                             + " Exception: "+e.getMessage());
2556                 }
2557             }
2558         }
2559     }
2560 
2561 
2562     static final String[][] registerRules = {
2563         {"Any-Dev1", "x > X; y > Y;"},
2564         {"Any-Dev2", "XY > Z"},
2565         {"Greek-Latin/FAKE",
2566             "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2567             "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2568             "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2569             "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2570         },
2571     };
2572 
2573     static final String DESERET_DEE = UTF16.valueOf(0x10414);
2574     static final String DESERET_dee = UTF16.valueOf(0x1043C);
2575 
2576     static final String[][] testCases = {
2577 
2578         // NORMALIZATION
2579         // should add more test cases
2580         {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2581         {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2582         {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2583         {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2584 
2585         // mp -> b BUG
2586         {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2587         {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2588 
2589         // check for devanagari bug
2590         {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2591 
2592         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2593         {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2594             "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2595             //TODO: enable this test once Titlecase works right
2596             //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2597             //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2598 
2599             {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2600                 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2601                 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2602                     "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2603 
2604                     {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2605                     {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2606 
2607                     // FORMS OF S
2608                     {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2609                     {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2610                     {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2611                     {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2612 
2613                     // Tatiana bug
2614                     // Upper: TAT\u02B9\u00C2NA
2615                     // Lower: tat\u02B9\u00E2na
2616                     // Title: Tat\u02B9\u00E2na
2617                     {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2618                     {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2619                     {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2620     };
2621 
TestSpecialCases()2622     public void TestSpecialCases() {
2623 
2624         for (int i = 0; i < registerRules.length; ++i) {
2625             Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2626                     registerRules[i][1], Transliterator.FORWARD);
2627             DummyFactory.add(registerRules[i][0], t);
2628         }
2629         for (int i = 0; i < testCases.length; ++i) {
2630             String name = testCases[i][0];
2631             Transliterator t = Transliterator.getInstance(name);
2632             String id = t.getID();
2633             String source = testCases[i][1];
2634             String target = null;
2635 
2636             // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2637 
2638             if (testCases[i].length > 2)    target = testCases[i][2];
2639             else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
2640             else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
2641             else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
2642             else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
2643             else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
2644             else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);
2645 
2646             expect(t, source, target);
2647         }
2648         for (int i = 0; i < registerRules.length; ++i) {
2649             Transliterator.unregister(registerRules[i][0]);
2650         }
2651     }
2652 
2653     // seems like there should be an easier way to just register an instance of a transliterator
2654 
2655     static class DummyFactory implements Transliterator.Factory {
2656         static DummyFactory singleton = new DummyFactory();
2657         static HashMap m = new HashMap();
2658 
2659         // Since Transliterators are immutable, we don't have to clone on set & get
add(String ID, Transliterator t)2660         static void add(String ID, Transliterator t) {
2661             m.put(ID, t);
2662             //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2663             Transliterator.registerFactory(ID, singleton);
2664         }
getInstance(String ID)2665         public Transliterator getInstance(String ID) {
2666             return (Transliterator) m.get(ID);
2667         }
2668     }
2669 
TestCasing()2670     public void TestCasing() {
2671         Transliterator toLower = Transliterator.getInstance("lower");
2672         Transliterator toCasefold = Transliterator.getInstance("casefold");
2673         Transliterator toUpper = Transliterator.getInstance("upper");
2674         Transliterator toTitle = Transliterator.getInstance("title");
2675         for (int i = 0; i < 0x600; ++i) {
2676             String s = UTF16.valueOf(i);
2677 
2678             String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2679             assertEquals("Lowercase", lower, toLower.transform(s));
2680 
2681             String casefold = UCharacter.foldCase(s, true);
2682             assertEquals("Casefold", casefold, toCasefold.transform(s));
2683 
2684             String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2685             assertEquals("Title", title, toTitle.transform(s));
2686 
2687             String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2688             assertEquals("Upper", upper, toUpper.transform(s));
2689         }
2690     }
2691 
TestSurrogateCasing()2692     public void TestSurrogateCasing () {
2693         // check that casing handles surrogates
2694         // titlecase is currently defective
2695         int dee = UTF16.charAt(DESERET_dee,0);
2696         int DEE = UCharacter.toTitleCase(dee);
2697         if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2698             errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2699         }
2700 
2701         if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2702             errln("Fails uppercase of surrogates");
2703         }
2704 
2705         if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2706             errln("Fails lowercase of surrogates");
2707         }
2708     }
2709 
2710     // Check to see that incremental gets at least part way through a reasonable string.
2711 
TestIncrementalProgress()2712     public void TestIncrementalProgress() {
2713         String latinTest = "The Quick Brown Fox.";
2714         String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest);
2715         String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest);
2716         String[][] tests = {
2717                 {"Any", latinTest},
2718                 {"Latin", latinTest},
2719                 {"Halfwidth", latinTest},
2720                 {"Devanagari", devaTest},
2721                 {"Katakana", kataTest},
2722         };
2723 
2724         Enumeration sources = Transliterator.getAvailableSources();
2725         while(sources.hasMoreElements()) {
2726             String source = (String) sources.nextElement();
2727             String test = findMatch(source, tests);
2728             if (test == null) {
2729                 logln("Skipping " + source + "-X");
2730                 continue;
2731             }
2732             Enumeration targets = Transliterator.getAvailableTargets(source);
2733             while(targets.hasMoreElements()) {
2734                 String target = (String) targets.nextElement();
2735                 Enumeration variants = Transliterator.getAvailableVariants(source, target);
2736                 while(variants.hasMoreElements()) {
2737                     String variant = (String) variants.nextElement();
2738                     String id = source + "-" + target + "/" + variant;
2739                     logln("id: " + id);
2740 
2741                     String filter = getTranslitTestFilter();
2742                     if (filter != null && id.indexOf(filter) < 0) continue;
2743 
2744                     Transliterator t = Transliterator.getInstance(id);
2745                     CheckIncrementalAux(t, test);
2746 
2747                     String rev = t.transliterate(test);
2748                     Transliterator inv = t.getInverse();
2749                     CheckIncrementalAux(inv, rev);
2750                 }
2751             }
2752         }
2753     }
2754 
findMatch(String source, String[][] pairs)2755     public String findMatch (String source, String[][] pairs) {
2756         for (int i = 0; i < pairs.length; ++i) {
2757             if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1];
2758         }
2759         return null;
2760     }
2761 
CheckIncrementalAux(Transliterator t, String input)2762     public void CheckIncrementalAux(Transliterator t, String input) {
2763 
2764         Replaceable test = new ReplaceableString(input);
2765         Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length());
2766         t.transliterate(test, pos);
2767         boolean gotError = false;
2768 
2769         // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
2770 
2771         if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) {
2772             errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2773             gotError = true;
2774         } else {
2775             logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2776         }
2777         t.finishTransliteration(test, pos);
2778         if (pos.start != pos.limit) {
2779             errln("Incomplete, " + t.getID() + ":  " + UtilityExtensions.formatInput(test, pos));
2780             gotError = true;
2781         }
2782         if(!gotError){
2783             //errln("FAIL: Did not get expected error");
2784         }
2785     }
2786 
TestFunction()2787     public void TestFunction() {
2788         // Careful with spacing and ';' here:  Phrase this exactly
2789         // as toRules() is going to return it.  If toRules() changes
2790         // with regard to spacing or ';', then adjust this string.
2791         String rule =
2792             "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2793 
2794         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2795         if (t == null) {
2796             errln("FAIL: createFromRules failed");
2797             return;
2798         }
2799 
2800         String r = t.toRules(true);
2801         if (r.equals(rule)) {
2802             logln("OK: toRules() => " + r);
2803         } else {
2804             errln("FAIL: toRules() => " + r +
2805                     ", expected " + rule);
2806         }
2807 
2808         expect(t, "The Quick Brown Fox",
2809         "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2810         rule =
2811             "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2812 
2813         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2814         if (t == null) {
2815             errln("FAIL: createFromRules failed");
2816             return;
2817         }
2818 
2819         r = t.toRules(true);
2820         if (r.equals(rule)) {
2821             logln("OK: toRules() => " + r);
2822         } else {
2823             errln("FAIL: toRules() => " + r +
2824                     ", expected " + rule);
2825         }
2826 
2827         expect(t, "\u0301",
2828         "U+0301 \\N{COMBINING ACUTE ACCENT}");
2829     }
2830 
TestInvalidBackRef()2831     public void TestInvalidBackRef() {
2832         String rule =  ". > $1;";
2833         String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2834         try {
2835             Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2836             if (t != null) {
2837                 errln("FAIL: createFromRules should have returned NULL");
2838             }
2839             errln("FAIL: Ok: . > $1; => no error");
2840             Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2841             if (t2 != null) {
2842                 errln("FAIL: createFromRules should have returned NULL");
2843             }
2844             errln("FAIL: Ok: . > $1; => no error");
2845         } catch (IllegalArgumentException e) {
2846             logln("Ok: . > $1; => " + e.getMessage());
2847         }
2848     }
2849 
TestMulticharStringSet()2850     public void TestMulticharStringSet() {
2851         // Basic testing
2852         String rule =
2853             "       [{aa}]       > x;" +
2854             "         a          > y;" +
2855             "       [b{bc}]      > z;" +
2856             "[{gd}] { e          > q;" +
2857             "         e } [{fg}] > r;" ;
2858 
2859         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2860         if (t == null) {
2861             errln("FAIL: createFromRules failed");
2862             return;
2863         }
2864 
2865         expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2866         "y x yz z d gd de gdq gdqfg ddrfg");
2867 
2868         // Overlapped string test.  Make sure that when multiple
2869         // strings can match that the longest one is matched.
2870         rule =
2871             "    [a {ab} {abc}]    > x;" +
2872             "           b          > y;" +
2873             "           c          > z;" +
2874             " q [t {st} {rst}] { e > p;" ;
2875 
2876         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2877         if (t == null) {
2878             errln("FAIL: createFromRules failed");
2879             return;
2880         }
2881 
2882         expect(t, "a ab abc qte qste qrste",
2883         "x x x qtp qstp qrstp");
2884     }
2885 
2886     /**
2887      * Test that user-registered transliterators can be used under function
2888      * syntax.
2889      */
TestUserFunction()2890     public void TestUserFunction() {
2891         Transliterator t;
2892 
2893         // There's no need to register inverses if we don't use them
2894         TestUserFunctionFactory.add("Any-gif",
2895                 Transliterator.createFromRules("gif",
2896                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2897                         Transliterator.FORWARD));
2898         //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2899 
2900         TestUserFunctionFactory.add("Any-RemoveCurly",
2901                 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2902         //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2903 
2904         logln("Trying &hex");
2905         t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2906         logln("Registering");
2907         TestUserFunctionFactory.add("Any-hex2", t);
2908         t = Transliterator.getInstance("Any-hex2");
2909         expect(t, "abc", "\\u0061\\u0062\\u0063");
2910 
2911         logln("Trying &gif");
2912         t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2913         logln("Registering");
2914         TestUserFunctionFactory.add("Any-gif2", t);
2915         t = Transliterator.getInstance("Any-gif2");
2916         expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2917         "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2918 
2919         // Test that filters are allowed after &
2920         t = Transliterator.createFromRules("test",
2921                 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2922         expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2923 
2924         // Unregister our test stuff
2925         TestUserFunctionFactory.unregister();
2926     }
2927 
2928     static class TestUserFunctionFactory implements Transliterator.Factory {
2929         static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2930         static HashMap m = new HashMap();
2931 
add(String ID, Transliterator t)2932         static void add(String ID, Transliterator t) {
2933             m.put(new CaseInsensitiveString(ID), t);
2934             Transliterator.registerFactory(ID, singleton);
2935         }
2936 
getInstance(String ID)2937         public Transliterator getInstance(String ID) {
2938             return (Transliterator) m.get(new CaseInsensitiveString(ID));
2939         }
2940 
unregister()2941         static void unregister() {
2942             Iterator ids = m.keySet().iterator();
2943             while (ids.hasNext()) {
2944                 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
2945                 Transliterator.unregister(id.getString());
2946                 ids.remove(); // removes pair from m
2947             }
2948         }
2949     }
2950 
2951     /**
2952      * Test the Any-X transliterators.
2953      */
TestAnyX()2954     public void TestAnyX() {
2955         Transliterator anyLatin =
2956             Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
2957 
2958         expect(anyLatin,
2959                 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2960         "greek:abkABK hiragana:abuku cyrillic:abc");
2961     }
2962 
2963     /**
2964      * Test Any-X transliterators with sample letters from all scripts.
2965      */
TestAny()2966     public void TestAny() {
2967         UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
2968         StringBuffer testString = new StringBuffer();
2969         for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
2970             UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
2971             int count = 5;
2972             for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
2973                 testString.append(it.getString());
2974                 if (--count < 0) break;
2975             }
2976         }
2977         logln("Sample set for Any-Latin: " + testString);
2978         Transliterator anyLatin = Transliterator.getInstance("any-Latn");
2979         String result = anyLatin.transliterate(testString.toString());
2980         logln("Sample result for Any-Latin: " + result);
2981     }
2982 
2983 
2984     /**
2985      * Test the source and target set API.  These are only implemented
2986      * for RBT and CompoundTransliterator at this time.
2987      */
TestSourceTargetSet()2988     public void TestSourceTargetSet() {
2989         // Rules
2990         String r =
2991             "a > b; " +
2992             "r [x{lu}] > q;";
2993 
2994         // Expected source
2995         UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
2996 
2997         // Expected target
2998         UnicodeSet expTrg = new UnicodeSet("[bq]");
2999 
3000         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3001         UnicodeSet src = t.getSourceSet();
3002         UnicodeSet trg = t.getTargetSet();
3003 
3004         if (src.equals(expSrc) && trg.equals(expTrg)) {
3005             logln("Ok: " + r + " => source = " + src.toPattern(true) +
3006                     ", target = " + trg.toPattern(true));
3007         } else {
3008             errln("FAIL: " + r + " => source = " + src.toPattern(true) +
3009                     ", expected " + expSrc.toPattern(true) +
3010                     "; target = " + trg.toPattern(true) +
3011                     ", expected " + expTrg.toPattern(true));
3012         }
3013     }
3014 
TestSourceTargetSet2()3015     public void TestSourceTargetSet2() {
3016 
3017 
3018         Normalizer2 nfc = Normalizer2.getNFCInstance();
3019         Normalizer2 nfd = Normalizer2.getNFDInstance();
3020 
3021         //        Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
3022         //        UnicodeSet nfkdSource = new UnicodeSet();
3023         //        UnicodeSet nfkdTarget = new UnicodeSet();
3024         //        for (int i = 0; i <= 0x10FFFF; ++i) {
3025         //            if (nfkd.isInert(i)) {
3026         //                continue;
3027         //            }
3028         //            nfkdSource.add(i);
3029         //            String t = nfkd.getDecomposition(i);
3030         //            if (t != null) {
3031         //                nfkdTarget.addAll(t);
3032         //            } else {
3033         //                nfkdTarget.add(i);
3034         //            }
3035         //        }
3036         //        nfkdSource.freeze();
3037         //        nfkdTarget.freeze();
3038         //        logln("NFKD Source: " + nfkdSource.toPattern(false));
3039         //        logln("NFKD Target: " + nfkdTarget.toPattern(false));
3040 
3041         UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
3042         UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
3043         UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
3044         CanonicalIterator can = new CanonicalIterator("");
3045 
3046         UnicodeSet disorderedMarks = new UnicodeSet();
3047 
3048         for (int i = 0; i <= 0x10FFFF; ++i) {
3049             String s = nfd.getDecomposition(i);
3050             if (s == null) {
3051                 continue;
3052             }
3053 
3054             can.setSource(s);
3055             for (String t = can.next(); t != null; t = can.next()) {
3056                 disorderedMarks.add(t);
3057             }
3058 
3059             // if s has two code points, (or more), add the lead/trail information
3060             int first = s.codePointAt(0);
3061             int firstCount = Character.charCount(first);
3062             if (s.length() == firstCount) continue;
3063             String trailString = s.substring(firstCount);
3064 
3065             // add all the trail characters
3066             if (!nonStarters.containsSome(trailString)) {
3067                 continue;
3068             }
3069             UnicodeSet trailSet = leadToTrail.get(first);
3070             if (trailSet == null) {
3071                 leadToTrail.put(first, trailSet = new UnicodeSet());
3072             }
3073             trailSet.addAll(trailString); // add remaining trails
3074 
3075             // add the sources
3076             UnicodeSet sourcesSet = leadToSources.get(first);
3077             if (sourcesSet == null) {
3078                 leadToSources.put(first, sourcesSet = new UnicodeSet());
3079             }
3080             sourcesSet.add(i);
3081         }
3082 
3083 
3084         for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
3085             String lead = x.getKey();
3086             UnicodeSet sources = x.getValue();
3087             UnicodeSet trailSet = leadToTrail.get(lead);
3088             for (String source : sources) {
3089                 for (String trail : trailSet) {
3090                     can.setSource(source + trail);
3091                     for (String t = can.next(); t != null; t = can.next()) {
3092                         if (t.endsWith(trail)) continue;
3093                         disorderedMarks.add(t);
3094                     }
3095                 }
3096             }
3097         }
3098 
3099 
3100         for (String s : nonStarters) {
3101             disorderedMarks.add("\u0345" + s);
3102             disorderedMarks.add(s+"\u0323");
3103             String xx = nfc.normalize("\u01EC" + s);
3104             if (!xx.startsWith("\u01EC")) {
3105                 logln("??");
3106             }
3107         }
3108 
3109         //        for (int i = 0; i <= 0x10FFFF; ++i) {
3110         //            String s = nfkd.getDecomposition(i);
3111         //            if (s != null) {
3112         //                disorderedMarks.add(s);
3113         //                disorderedMarks.add(nfc.normalize(s));
3114         //                addDerivedStrings(nfc, disorderedMarks, s);
3115         //            }
3116         //            s = nfd.getDecomposition(i);
3117         //            if (s != null) {
3118         //                disorderedMarks.add(s);
3119         //            }
3120         //            if (!nfc.isInert(i)) {
3121         //                if (i == 0x00C0) {
3122         //                    logln("\u00C0");
3123         //                }
3124         //                can.setSource(s+"\u0334");
3125         //                for (String t = can.next(); t != null; t = can.next()) {
3126         //                    addDerivedStrings(nfc, disorderedMarks, t);
3127         //                }
3128         //                can.setSource(s+"\u0345");
3129         //                for (String t = can.next(); t != null; t = can.next()) {
3130         //                    addDerivedStrings(nfc, disorderedMarks, t);
3131         //                }
3132         //                can.setSource(s+"\u0323");
3133         //                for (String t = can.next(); t != null; t = can.next()) {
3134         //                    addDerivedStrings(nfc, disorderedMarks, t);
3135         //                }
3136         //            }
3137         //        }
3138         logln("Test cases: " + disorderedMarks.size());
3139         disorderedMarks.addAll(0,0x10FFFF).freeze();
3140         logln("isInert \u0104 " + nfc.isInert('\u0104'));
3141 
3142         Object[][] rules = {
3143                 {":: [:sc=COMMON:] any-name;", null},
3144 
3145                 {":: [:Greek:] hex-any/C;", null},
3146                 {":: [:Greek:] any-hex/C;", null},
3147 
3148                 {":: [[:Mn:][:Me:]] remove;", null},
3149                 {":: [[:Mn:][:Me:]] null;", null},
3150 
3151 
3152                 {":: lower;", null},
3153                 {":: upper;", null},
3154                 {":: title;", null},
3155                 {":: CaseFold;", null},
3156 
3157                 {":: NFD;", null},
3158                 {":: NFC;", null},
3159                 {":: NFKD;", null},
3160                 {":: NFKC;", null},
3161 
3162                 {":: [[:Mn:][:Me:]] NFKD;", null},
3163                 {":: Latin-Greek;", null},
3164                 {":: [:Latin:] NFKD;", null},
3165                 {":: NFKD;", null},
3166                 {":: NFKD;\n" +
3167                     ":: [[:Mn:][:Me:]] remove;\n" +
3168                     ":: NFC;", null},
3169         };
3170         for (Object[] rulex : rules) {
3171             String rule = (String) rulex[0];
3172             Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
3173             UnicodeSet actualSource = trans.getSourceSet();
3174             UnicodeSet actualTarget = trans.getTargetSet();
3175             UnicodeSet empiricalSource = new UnicodeSet();
3176             UnicodeSet empiricalTarget = new UnicodeSet();
3177             String ruleDisplay = rule.replace("\n", "\t\t");
3178             UnicodeSet toTest = disorderedMarks;
3179             //            if (rulex[1] != null) {
3180             //                toTest = new UnicodeSet(disorderedMarks);
3181             //                toTest.addAll((UnicodeSet) rulex[1]);
3182             //            }
3183 
3184             String test = nfd.normalize("\u0104");
3185             boolean DEBUG = true;
3186             @SuppressWarnings("unused")
3187             int count = 0; // for debugging
3188             for (String s : toTest) {
3189                 if (s.equals(test)) {
3190                     logln(test);
3191                 }
3192                 String t = trans.transform(s);
3193                 if (!s.equals(t)) {
3194                     if (!isAtomic(s, t, trans)) {
3195                         isAtomic(s, t, trans);
3196                         continue;
3197                     }
3198 
3199                     // only keep the part that changed; so skip the front and end.
3200                     //                    int start = findSharedStartLength(s,t);
3201                     //                    int end = findSharedEndLength(s,t);
3202                     //                    if (start != 0 || end != 0) {
3203                     //                        s = s.substring(start, s.length() - end);
3204                     //                        t = t.substring(start, t.length() - end);
3205                     //                    }
3206                     if (DEBUG) {
3207                         if (!actualSource.containsAll(s)) {
3208                             count++;
3209                         }
3210                         if (!actualTarget.containsAll(t)) {
3211                             count++;
3212                         }
3213                     }
3214                     addSourceTarget(s, empiricalSource, t, empiricalTarget);
3215                 }
3216             }
3217             assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
3218             assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
3219         }
3220     }
3221 
TestSourceTargetSetFilter()3222     public void TestSourceTargetSetFilter() {
3223         String[][] tests = {
3224                 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
3225                 {"[] Latin-Greek", null, "[\']"},
3226                 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
3227                 {"[] Any-Latin"},
3228                 {"[] casefold"},
3229                 {"[] NFKD;"},
3230                 {"[] NFKC;"},
3231                 {"[] hex"},
3232                 {"[] lower"},
3233                 {"[] null"},
3234                 {"[] remove"},
3235                 {"[] title"},
3236                 {"[] upper"},
3237         };
3238         UnicodeSet expectedSource = UnicodeSet.EMPTY;
3239         for (String[] testPair : tests) {
3240             String test = testPair[0];
3241             Transliterator t0;
3242             try {
3243                 t0 = Transliterator.getInstance(test);
3244             } catch (Exception e) {
3245                 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3246             }
3247             Transliterator t1;
3248             try {
3249                 t1 = t0.getInverse();
3250             } catch (Exception e) {
3251                 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3252             }
3253             int targetIndex = 0;
3254             for (Transliterator t : new Transliterator[]{t0, t1}) {
3255                 boolean ok;
3256                 UnicodeSet source = t.getSourceSet();
3257                 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3258                 targetIndex++;
3259                 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3260                         : testPair[targetIndex] == null ? expectedSource
3261                                 : testPair[targetIndex].length() == 0 ? expectedSource
3262                                         : new UnicodeSet(testPair[targetIndex]);
3263                 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3264                 if (!ok) { // for debugging
3265                     source = t.getSourceSet();
3266                 }
3267                 UnicodeSet target = t.getTargetSet();
3268                 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3269                 if (!ok) { // for debugging
3270                     target = t.getTargetSet();
3271                 }
3272             }
3273         }
3274     }
3275 
isAtomic(String s, String t, Transliterator trans)3276     private boolean isAtomic(String s, String t, Transliterator trans) {
3277         for (int i = 1; i < s.length(); ++i) {
3278             if (!CharSequences.onCharacterBoundary(s, i)) {
3279                 continue;
3280             }
3281             String q = trans.transform(s.substring(0,i));
3282             if (t.startsWith(q)) {
3283                 String r = trans.transform(s.substring(i));
3284                 if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3285                     return false;
3286                 }
3287             }
3288         }
3289         return true;
3290         //        // make sure that every part is different
3291         //        if (s.codePointCount(0, s.length()) > 1) {
3292         //            int[] codePoints = It.codePoints(s);
3293         //            for (int k = 0; k < codePoints.length; ++k) {
3294         //                int pos = indexOf(t,codePoints[k]);
3295         //                if (pos >= 0) {
3296         //                    int x;
3297         //                }
3298         //            }
3299         //            if (s.contains("\u00C0")) {
3300         //                logln("\u00C0");
3301         //            }
3302         //        }
3303     }
3304 
addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3305     private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3306         expectedSource.addAll(s);
3307         if (t.length() > 0) {
3308             expectedTarget.addAll(t);
3309         }
3310     }
3311 
3312 //    private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3313 //        disorderedMarks.add(s);
3314 //        for (int j = 1; j < s.length(); ++j) {
3315 //            if (CharSequences.onCharacterBoundary(s, j)) {
3316 //                String shorter = s.substring(0,j);
3317 //                disorderedMarks.add(shorter);
3318 //                disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3319 //            }
3320 //        }
3321 //    }
3322 
TestCharUtils()3323     public void TestCharUtils() {
3324         String[][] startTests = {
3325                 {"1", "a", "ab"},
3326                 {"0", "a", "xb"},
3327                 {"0", "\uD800", "\uD800\uDC01"},
3328                 {"1", "\uD800a", "\uD800b"},
3329                 {"0", "\uD800\uDC00", "\uD800\uDC01"},
3330         };
3331         for (String[] row : startTests) {
3332             int actual = findSharedStartLength(row[1], row[2]);
3333             assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3334                     Integer.parseInt(row[0]),
3335                     actual);
3336         }
3337         String[][] endTests = {
3338                 {"0", "\uDC00", "\uD801\uDC00"},
3339                 {"1", "a", "ba"},
3340                 {"0", "a", "bx"},
3341                 {"1", "a\uDC00", "b\uDC00"},
3342                 {"0", "\uD800\uDC00", "\uD801\uDC00"},
3343         };
3344         for (String[] row : endTests) {
3345             int actual = findSharedEndLength(row[1], row[2]);
3346             assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3347                     Integer.parseInt(row[0]),
3348                     actual);
3349         }
3350     }
3351 
3352     /**
3353      * @param s
3354      * @param t
3355      * @return
3356      */
3357     // TODO make generally available
findSharedStartLength(CharSequence s, CharSequence t)3358     private static int findSharedStartLength(CharSequence s, CharSequence t) {
3359         int min = Math.min(s.length(), t.length());
3360         int i;
3361         char sch, tch;
3362         for (i = 0; i < min; ++i) {
3363             sch = s.charAt(i);
3364             tch = t.charAt(i);
3365             if (sch != tch) {
3366                 break;
3367             }
3368         }
3369         return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3370     }
3371 
3372     /**
3373      * @param s
3374      * @param t
3375      * @return
3376      */
3377     // TODO make generally available
findSharedEndLength(CharSequence s, CharSequence t)3378     private static int findSharedEndLength(CharSequence s, CharSequence t) {
3379         int slength = s.length();
3380         int tlength = t.length();
3381         int min = Math.min(slength, tlength);
3382         int i;
3383         char sch, tch;
3384         // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3385         for (i = 0; i < min; ++i) {
3386             sch = s.charAt(slength - i - 1);
3387             tch = t.charAt(tlength - i - 1);
3388             if (sch != tch) {
3389                 break;
3390             }
3391         }
3392         return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3393     }
3394 
3395     enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3396 
assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3397     void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3398         boolean haveError = false;
3399         if (!actual.containsAll(empirical)) {
3400             UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3401             errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3402             haveError = true;
3403         }
3404         if (!empirical.containsAll(actual)) {
3405             UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3406             logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3407             haveError = true;
3408         }
3409         if (!haveError) {
3410             logln("OK " + message + ' ' + toPattern(empirical));
3411         }
3412     }
3413 
toPattern(UnicodeSet missing)3414     private String toPattern(UnicodeSet missing) {
3415         String result = missing.toPattern(false);
3416         if (result.length() < 200) {
3417             return result;
3418         }
3419         return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3420     }
3421 
3422 
3423     /**
3424      * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3425      */
TestPatternWhitespace()3426     public void TestPatternWhitespace() {
3427         // Rules
3428         String r = "a > \u200E b;";
3429 
3430         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3431 
3432         expect(t, "a", "b");
3433 
3434         // UnicodeSet
3435         UnicodeSet set = new UnicodeSet("[a \u200E]");
3436 
3437         if (set.contains(0x200E)) {
3438             errln("FAIL: U+200E not being ignored by UnicodeSet");
3439         }
3440     }
3441 
TestAlternateSyntax()3442     public void TestAlternateSyntax() {
3443         // U+2206 == &
3444         // U+2190 == <
3445         // U+2192 == >
3446         // U+2194 == <>
3447         expect("a \u2192 x; b \u2190 y; c \u2194 z",
3448                 "abc",
3449         "xbz");
3450         expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3451                 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3452         "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3453     }
3454 
TestPositionAPI()3455     public void TestPositionAPI() {
3456         Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3457         Transliterator.Position b = new Transliterator.Position(a);
3458         Transliterator.Position c = new Transliterator.Position();
3459         c.set(a);
3460         // Call the toString() API:
3461         if (a.equals(b) && a.equals(c)) {
3462             logln("Ok: " + a + " == " + b + " == " + c);
3463         } else {
3464             errln("FAIL: " + a + " != " + b + " != " + c);
3465         }
3466     }
3467 
3468     //======================================================================
3469     // New tests for the ::BEGIN/::END syntax
3470     //======================================================================
3471 
3472     private static final String[] BEGIN_END_RULES = new String[] {
3473         // [0]
3474         "abc > xy;"
3475         + "aba > z;",
3476 
3477         // [1]
3478         /*
3479         "::BEGIN;"
3480         + "abc > xy;"
3481         + "::END;"
3482         + "::BEGIN;"
3483         + "aba > z;"
3484         + "::END;",
3485          */
3486         "", // test case commented out below, this is here to keep from messing up the indexes
3487 
3488         // [2]
3489         /*
3490         "abc > xy;"
3491         + "::BEGIN;"
3492         + "aba > z;"
3493         + "::END;",
3494          */
3495         "", // test case commented out below, this is here to keep from messing up the indexes
3496 
3497         // [3]
3498         /*
3499         "::BEGIN;"
3500         + "abc > xy;"
3501         + "::END;"
3502         + "aba > z;",
3503          */
3504         "", // test case commented out below, this is here to keep from messing up the indexes
3505 
3506         // [4]
3507         "abc > xy;"
3508         + "::Null;"
3509         + "aba > z;",
3510 
3511         // [5]
3512         "::Upper;"
3513         + "ABC > xy;"
3514         + "AB > x;"
3515         + "C > z;"
3516         + "::Upper;"
3517         + "XYZ > p;"
3518         + "XY > q;"
3519         + "Z > r;"
3520         + "::Upper;",
3521 
3522         // [6]
3523         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3524         + "$delim = [\\-$ws];"
3525         + "$ws $delim* > ' ';"
3526         + "'-' $delim* > '-';",
3527 
3528         // [7]
3529         "::Null;"
3530         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3531         + "$delim = [\\-$ws];"
3532         + "$ws $delim* > ' ';"
3533         + "'-' $delim* > '-';",
3534 
3535         // [8]
3536         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3537         + "$delim = [\\-$ws];"
3538         + "$ws $delim* > ' ';"
3539         + "'-' $delim* > '-';"
3540         + "::Null;",
3541 
3542         // [9]
3543         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3544         + "$delim = [\\-$ws];"
3545         + "::Null;"
3546         + "$ws $delim* > ' ';"
3547         + "'-' $delim* > '-';",
3548 
3549         // [10]
3550         /*
3551         "::BEGIN;"
3552         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3553         + "$delim = [\\-$ws];"
3554         + "::END;"
3555         + "$ws $delim* > ' ';"
3556         + "'-' $delim* > '-';",
3557          */
3558         "", // test case commented out below, this is here to keep from messing up the indexes
3559 
3560         // [11]
3561         /*
3562         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3563         + "$delim = [\\-$ws];"
3564         + "::BEGIN;"
3565         + "$ws $delim* > ' ';"
3566         + "'-' $delim* > '-';"
3567         + "::END;",
3568          */
3569         "", // test case commented out below, this is here to keep from messing up the indexes
3570 
3571         // [12]
3572         /*
3573         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3574         + "$delim = [\\-$ws];"
3575         + "$ab = [ab];"
3576         + "::BEGIN;"
3577         + "$ws $delim* > ' ';"
3578         + "'-' $delim* > '-';"
3579         + "::END;"
3580         + "::BEGIN;"
3581         + "$ab { ' ' } $ab > '-';"
3582         + "c { ' ' > ;"
3583         + "::END;"
3584         + "::BEGIN;"
3585         + "'a-a' > a\\%|a;"
3586         + "::END;",
3587          */
3588         "", // test case commented out below, this is here to keep from messing up the indexes
3589 
3590         // [13]
3591         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3592         + "$delim = [\\-$ws];"
3593         + "$ab = [ab];"
3594         + "::Null;"
3595         + "$ws $delim* > ' ';"
3596         + "'-' $delim* > '-';"
3597         + "::Null;"
3598         + "$ab { ' ' } $ab > '-';"
3599         + "c { ' ' > ;"
3600         + "::Null;"
3601         + "'a-a' > a\\%|a;",
3602 
3603         // [14]
3604         /*
3605         "::[abc];"
3606         + "::BEGIN;"
3607         + "abc > xy;"
3608         + "::END;"
3609         + "::BEGIN;"
3610         + "aba > yz;"
3611         + "::END;"
3612         + "::Upper;",
3613          */
3614         "", // test case commented out below, this is here to keep from messing up the indexes
3615 
3616         // [15]
3617         "::[abc];"
3618         + "abc > xy;"
3619         + "::Null;"
3620         + "aba > yz;"
3621         + "::Upper;",
3622 
3623         // [16]
3624         /*
3625         "::[abc];"
3626         + "::BEGIN;"
3627         + "abc <> xy;"
3628         + "::END;"
3629         + "::BEGIN;"
3630         + "aba <> yz;"
3631         + "::END;"
3632         + "::Upper(Lower);"
3633         + "::([XYZ]);",
3634          */
3635         "", // test case commented out below, this is here to keep from messing up the indexes
3636 
3637         // [17]
3638         "::[abc];"
3639         + "abc <> xy;"
3640         + "::Null;"
3641         + "aba <> yz;"
3642         + "::Upper(Lower);"
3643         + "::([XYZ]);"
3644     };
3645 
3646     /*
3647 (This entire test is commented out below and will need some heavy revision when we re-add
3648 the ::BEGIN/::END stuff)
3649     private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3650         // [7]
3651         "::BEGIN;"
3652         + "abc > xy;"
3653         + "::BEGIN;"
3654         + "aba > z;"
3655         + "::END;"
3656         + "::END;",
3657 
3658         // [8]
3659         "abc > xy;"
3660         + " aba > z;"
3661         + "::END;",
3662 
3663         // [9]
3664         "::BEGIN;"
3665         + "::Upper;"
3666         + "::END;"
3667     };
3668      */
3669 
3670     private static final String[] BEGIN_END_TEST_CASES = new String[] {
3671         BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3672         //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3673         //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3674         //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3675         BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3676         BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3677 
3678         BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",
3679         BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",
3680         BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",
3681         BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",
3682         //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",
3683         //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",
3684         //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",
3685         //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",
3686         //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3687         BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",
3688         BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",
3689         BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3690 
3691         //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3692         BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3693         //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3694         BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3695     };
3696 
TestBeginEnd()3697     public void TestBeginEnd() {
3698         // run through the list of test cases above
3699         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3700             expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3701         }
3702 
3703         // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3704         Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3705                 Transliterator.REVERSE);
3706         expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3707 
3708         // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3709         // that all of them cause errors
3710         /*
3711 (commented out until we have the real ::BEGIN/::END stuff in place
3712         for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3713             try {
3714                 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3715                         Transliterator.FORWARD);
3716                 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3717             }
3718             catch (IllegalArgumentException e) {
3719                 // this is supposed to happen; do nothing here
3720             }
3721         }
3722          */
3723     }
3724 
TestBeginEndToRules()3725     public void TestBeginEndToRules() {
3726         // run through the same list of test cases we used above, but this time, instead of just
3727         // instantiating a Transliterator from the rules and running the test against it, we instantiate
3728         // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3729         // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3730         // to (i.e., does the same thing as) the original rule set
3731         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3732             Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3733                     Transliterator.FORWARD);
3734             String rules = t.toRules(false);
3735             Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3736             expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3737         }
3738 
3739         // do the same thing for the reversible test case
3740         Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3741                 Transliterator.REVERSE);
3742         String rules = reversed.toRules(false);
3743         Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3744         expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3745     }
3746 
TestRegisterAlias()3747     public void TestRegisterAlias() {
3748         String longID = "Lower;[aeiou]Upper";
3749         String shortID = "Any-CapVowels";
3750         String reallyShortID = "CapVowels";
3751 
3752         Transliterator.registerAlias(shortID, longID);
3753 
3754         Transliterator t1 = Transliterator.getInstance(longID);
3755         Transliterator t2 = Transliterator.getInstance(reallyShortID);
3756 
3757         if (!t1.getID().equals(longID))
3758             errln("Transliterator instantiated with long ID doesn't have long ID");
3759         if (!t2.getID().equals(reallyShortID))
3760             errln("Transliterator instantiated with short ID doesn't have short ID");
3761 
3762         if (!t1.toRules(true).equals(t2.toRules(true)))
3763             errln("Alias transliterators aren't the same");
3764 
3765         Transliterator.unregister(shortID);
3766 
3767         try {
3768             t1 = Transliterator.getInstance(shortID);
3769             errln("Instantiation with short ID succeeded after short ID was unregistered");
3770         }
3771         catch (IllegalArgumentException e) {
3772         }
3773 
3774         // try the same thing again, but this time with something other than
3775         // an instance of CompoundTransliterator
3776         String realID = "Latin-Greek";
3777         String fakeID = "Latin-dlgkjdflkjdl";
3778         Transliterator.registerAlias(fakeID, realID);
3779 
3780         t1 = Transliterator.getInstance(realID);
3781         t2 = Transliterator.getInstance(fakeID);
3782 
3783         if (!t1.toRules(true).equals(t2.toRules(true)))
3784             errln("Alias transliterators aren't the same");
3785 
3786         Transliterator.unregister(fakeID);
3787     }
3788 
3789     /**
3790      * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3791      */
TestHalfwidthFullwidth()3792     public void TestHalfwidthFullwidth() {
3793         Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3794         Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3795 
3796         // Array of 3n items
3797         // Each item is
3798         //   "hf"|"fh"|"both",
3799         //   <Halfwidth>,
3800         //   <Fullwidth>
3801         String[] DATA = {
3802                 "both",
3803                 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3804                 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3805         };
3806 
3807         for (int i=0; i<DATA.length; i+=3) {
3808             switch (DATA[i].charAt(0)) {
3809             case 'h': // Halfwidth-Fullwidth only
3810                 expect(hf, DATA[i+1], DATA[i+2]);
3811                 break;
3812             case 'f': // Fullwidth-Halfwidth only
3813                 expect(fh, DATA[i+2], DATA[i+1]);
3814                 break;
3815             case 'b': // both directions
3816                 expect(hf, DATA[i+1], DATA[i+2]);
3817                 expect(fh, DATA[i+2], DATA[i+1]);
3818                 break;
3819             }
3820         }
3821 
3822     }
3823 
3824     /**
3825      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3826      *              TODO: confirm that the expected results are correct.
3827      *              For now, test just confirms that C++ and Java give identical results.
3828      */
TestThai()3829     public void TestThai() {
3830         Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3831         String thaiText =
3832             "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3833             "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3834             "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3835             "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3836             "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3837             "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3838             "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3839             "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3840             "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3841             "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3842             "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3843             "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3844             "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3845             "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3846             "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3847             "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3848             "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3849             "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3850             "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3851             "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3852             "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3853             "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3854             "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3855             "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3856             " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3857             "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3858             "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3859             " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3860             "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3861             "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3862 
3863         String latinText =
3864             "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3865             "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3866             "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3867             "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3868             "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3869             " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3870             "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3871             "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3872             "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3873             "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3874             "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3875             "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3876             " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3877             "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3878             " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3879             "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3880             "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3881             "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3882 
3883         expect(tr, thaiText, latinText);
3884     }
3885 
3886 
3887     //======================================================================
3888     // These tests are not mirrored (yet) in icu4c at
3889     // source/test/intltest/transtst.cpp
3890     //======================================================================
3891 
3892     /**
3893      * Improve code coverage.
3894      */
TestCoverage()3895     public void TestCoverage() {
3896         // NullTransliterator
3897         Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3898         expect(t, "a", "a");
3899 
3900         // Source, target set
3901         t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3902         t.setFilter(new UnicodeSet("[A-Z]"));
3903         logln("source = " + t.getSourceSet());
3904         logln("target = " + t.getTargetSet());
3905 
3906         t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3907         logln("source = " + t.getSourceSet());
3908         logln("target = " + t.getTargetSet());
3909     }
3910     /*
3911      * Test case for threading problem in NormalizationTransliterator
3912      * reported by ticket#5160
3913      */
TestT5160()3914     public void TestT5160() {
3915         final String[] testData = {
3916                 "a",
3917                 "b",
3918                 "\u09BE",
3919                 "A\u0301",
3920         };
3921         final String[] expected = {
3922                 "a",
3923                 "b",
3924                 "\u09BE",
3925                 "\u00C1",
3926         };
3927         Transliterator translit = Transliterator.getInstance("NFC");
3928         NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
3929         for (int i = 0; i < tasks.length; i++) {
3930             tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
3931         }
3932         TestUtil.runUntilDone(tasks);
3933 
3934         for (int i = 0; i < tasks.length; i++) {
3935             if (tasks[i].getErrorMessage() != null) {
3936                 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
3937                 break;
3938             }
3939         }
3940     }
3941 
3942     static class NormTranslitTask implements Runnable {
3943         Transliterator translit;
3944         String testData;
3945         String expectedData;
3946         String errorMsg;
3947 
NormTranslitTask(Transliterator translit, String testData, String expectedData)3948         NormTranslitTask(Transliterator translit, String testData, String expectedData) {
3949             this.translit = translit;
3950             this.testData = testData;
3951             this.expectedData = expectedData;
3952         }
3953 
run()3954         public void run() {
3955             errorMsg = null;
3956             StringBuffer inBuf = new StringBuffer(testData);
3957             StringBuffer expectedBuf = new StringBuffer(expectedData);
3958 
3959             for(int i = 0; i < 1000; i++) {
3960                 String in = inBuf.toString();
3961                 String out = translit.transliterate(in);
3962                 String expected = expectedBuf.toString();
3963                 if (!out.equals(expected)) {
3964                     errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
3965                     break;
3966                 }
3967                 inBuf.append(testData);
3968                 expectedBuf.append(expectedData);
3969             }
3970         }
3971 
getErrorMessage()3972         public String getErrorMessage() {
3973             return errorMsg;
3974         }
3975     }
3976 
3977     //======================================================================
3978     // Support methods
3979     //======================================================================
expect(String rules, String source, String expectedResult, Transliterator.Position pos)3980     void expect(String rules,
3981             String source,
3982             String expectedResult,
3983             Transliterator.Position pos) {
3984         Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
3985         expect(t, source, expectedResult, pos);
3986     }
3987 
expect(String rules, String source, String expectedResult)3988     void expect(String rules, String source, String expectedResult) {
3989         expect(rules, source, expectedResult, null);
3990     }
3991 
expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3992     void expect(Transliterator t, String source, String expectedResult,
3993             Transliterator reverseTransliterator) {
3994         expect(t, source, expectedResult);
3995         if (reverseTransliterator != null) {
3996             expect(reverseTransliterator, expectedResult, source);
3997         }
3998     }
3999 
expect(Transliterator t, String source, String expectedResult)4000     void expect(Transliterator t, String source, String expectedResult) {
4001         expect(t, source, expectedResult, (Transliterator.Position) null);
4002     }
4003 
expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)4004     void expect(Transliterator t, String source, String expectedResult,
4005             Transliterator.Position pos) {
4006         if (pos == null) {
4007             String result = t.transliterate(source);
4008             if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
4009         }
4010 
4011         Transliterator.Position index = null;
4012         if (pos == null) {
4013             index = new Transliterator.Position(0, source.length(), 0, source.length());
4014         } else {
4015             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4016                     pos.start, pos.limit);
4017         }
4018 
4019         ReplaceableString rsource = new ReplaceableString(source);
4020 
4021         t.finishTransliteration(rsource, index);
4022         // Do it all at once -- below we do it incrementally
4023 
4024         if (index.start != index.limit) {
4025             expectAux(t.getID() + ":UNFINISHED", source,
4026                     "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
4027             return;
4028         }
4029         String result = rsource.toString();
4030         if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
4031 
4032 
4033         if (pos == null) {
4034             index = new Transliterator.Position();
4035         } else {
4036             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4037                     pos.start, pos.limit);
4038         }
4039 
4040         // Test incremental transliteration -- this result
4041         // must be the same after we finalize (see below).
4042         List<String> v = new ArrayList<String>();
4043         v.add(source);
4044         rsource.replace(0, rsource.length(), "");
4045         if (pos != null) {
4046             rsource.replace(0, 0, source);
4047             v.add(UtilityExtensions.formatInput(rsource, index));
4048             t.transliterate(rsource, index);
4049             v.add(UtilityExtensions.formatInput(rsource, index));
4050         } else {
4051             for (int i=0; i<source.length(); ++i) {
4052                 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
4053                 //log.append(source.charAt(i)).append(" -> "));
4054                 t.transliterate(rsource, index, source.charAt(i));
4055                 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
4056                 v.add(UtilityExtensions.formatInput(rsource, index) +
4057                         ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
4058             }
4059         }
4060 
4061         // As a final step in keyboard transliteration, we must call
4062         // transliterate to finish off any pending partial matches that
4063         // were waiting for more input.
4064         t.finishTransliteration(rsource, index);
4065         result = rsource.toString();
4066         //log.append(" => ").append(rsource.toString());
4067         v.add(result);
4068 
4069         String[] results = new String[v.size()];
4070         v.toArray(results);
4071         expectAux(t.getID() + ":Incremental", results,
4072                 result.equals(expectedResult),
4073                 expectedResult);
4074     }
4075 
4076     boolean expectAux(String tag, String source,
4077             String result, String expectedResult) {
4078         return expectAux(tag, new String[] {source, result},
4079                 result.equals(expectedResult),
4080                 expectedResult);
4081     }
4082 
4083     boolean expectAux(String tag, String source,
4084             String result, boolean pass,
4085             String expectedResult) {
4086         return expectAux(tag, new String[] {source, result},
4087                 pass,
4088                 expectedResult);
4089     }
4090 
4091     boolean expectAux(String tag, String source,
4092             boolean pass,
4093             String expectedResult) {
4094         return expectAux(tag, new String[] {source},
4095                 pass,
4096                 expectedResult);
4097     }
4098 
4099     boolean expectAux(String tag, String[] results, boolean pass,
4100             String expectedResult) {
4101         msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
4102 
4103         for (int i = 0; i < results.length; ++i) {
4104             String label;
4105             if (i == 0) {
4106                 label = "source:   ";
4107             } else if (i == results.length - 1) {
4108                 label = "result:   ";
4109             } else {
4110                 if (!isVerbose() && pass) continue;
4111                 label = "interm" + i + ":  ";
4112             }
4113             msg("    " + label + results[i], pass ? LOG : ERR, false, true);
4114         }
4115 
4116         if (!pass) {
4117             msg(  "    expected: " + expectedResult, ERR, false, true);
4118         }
4119 
4120         return pass;
4121     }
4122 
4123     private void assertTransform(String message, String expected, StringTransform t, String source) {
4124         assertEquals(message + " " + source, expected, t.transform(source));
4125     }
4126 
4127 
4128     private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
4129         assertEquals(message + " " +source, expected, t.transform(source));
4130         assertEquals(message + " " +source2, expected, t.transform(source2));
4131         assertEquals(message + " " + expected, source, back.transform(expected));
4132     }
4133 
4134     /*
4135      * Tests the method public Enumeration<String> getAvailableTargets(String source)
4136      */
4137     public void TestGetAvailableTargets() {
4138         try {
4139             // Tests when if (targets == null) is true
4140             Transliterator.getAvailableTargets("");
4141         } catch (Exception e) {
4142             errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
4143         }
4144     }
4145 
4146     /*
4147      * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
4148      */
4149     public void TestGetAvailableVariants() {
4150         try {
4151             // Tests when if (targets == null) is true
4152             Transliterator.getAvailableVariants("", "");
4153         } catch (Exception e) {
4154             errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
4155         }
4156     }
4157 
4158     /*
4159      * Tests the mehtod String nextLine() in RuleBody
4160      */
4161     public void TestNextLine() {
4162         // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
4163         try{
4164             Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
4165         } catch(Exception e){
4166             errln("TransliteratorParser.nextLine() was not suppose to return an " +
4167             "exception for a rule of '\\'");
4168         }
4169     }
4170 }
4171