1 /*
2 **********************************************************************
3 *   Copyright (C) 1999-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/10/99    aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_TRANSLITERATION
14 
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "unicode/utf16.h"
27 #include "cpdtrans.h"
28 #include "nultrans.h"
29 #include "rbt.h"
30 #include "rbt_pars.h"
31 #include "anytrans.h"
32 #include "esctrn.h"
33 #include "name2uni.h"
34 #include "nortrans.h"
35 #include "remtrans.h"
36 #include "titletrn.h"
37 #include "tolowtrn.h"
38 #include "toupptrn.h"
39 #include "unesctrn.h"
40 #include "uni2name.h"
41 #include "cstring.h"
42 #include "cmemory.h"
43 #include <stdio.h>
44 
45 /***********************************************************************
46 
47                      HOW TO USE THIS TEST FILE
48                                -or-
49                   How I developed on two platforms
50                 without losing (too much of) my mind
51 
52 
53 1. Add new tests by copying/pasting/changing existing tests.  On Java,
54    any public void method named Test...() taking no parameters becomes
55    a test.  On C++, you need to modify the header and add a line to
56    the runIndexedTest() dispatch method.
57 
58 2. Make liberal use of the expect() method; it is your friend.
59 
60 3. The tests in this file exactly match those in a sister file on the
61    other side.  The two files are:
62 
63    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
64    icu4c:  source/test/intltest/transtst.cpp
65 
66                   ==> THIS IS THE IMPORTANT PART <==
67 
68    When you add a test in this file, add it in TransliteratorTest.java
69    too.  Give it the same name and put it in the same relative place.
70    This makes maintenance a lot simpler for any poor soul who ends up
71    trying to synchronize the tests between icu4j and icu4c.
72 
73 4. If you MUST enter a test that is NOT paralleled in the sister file,
74    then add it in the special non-mirrored section.  These are
75    labeled
76 
77      "icu4j ONLY"
78 
79    or
80 
81      "icu4c ONLY"
82 
83    Make sure you document the reason the test is here and not there.
84 
85 
86 Thank you.
87 The Management
88 ***********************************************************************/
89 
90 // Define character constants thusly to be EBCDIC-friendly
91 enum {
92     LEFT_BRACE=((UChar)0x007B), /*{*/
93     PIPE      =((UChar)0x007C), /*|*/
94     ZERO      =((UChar)0x0030), /*0*/
95     UPPER_A   =((UChar)0x0041)  /*A*/
96 };
97 
TransliteratorTest()98 TransliteratorTest::TransliteratorTest()
99 :   DESERET_DEE((UChar32)0x10414),
100     DESERET_dee((UChar32)0x1043C)
101 {
102 }
103 
~TransliteratorTest()104 TransliteratorTest::~TransliteratorTest() {}
105 
106 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
108                                    const char* &name, char* /*par*/) {
109     switch (index) {
110         TESTCASE(0,TestInstantiation);
111         TESTCASE(1,TestSimpleRules);
112         TESTCASE(2,TestRuleBasedInverse);
113         TESTCASE(3,TestKeyboard);
114         TESTCASE(4,TestKeyboard2);
115         TESTCASE(5,TestKeyboard3);
116         TESTCASE(6,TestArabic);
117         TESTCASE(7,TestCompoundKana);
118         TESTCASE(8,TestCompoundHex);
119         TESTCASE(9,TestFiltering);
120         TESTCASE(10,TestInlineSet);
121         TESTCASE(11,TestPatternQuoting);
122         TESTCASE(12,TestJ277);
123         TESTCASE(13,TestJ243);
124         TESTCASE(14,TestJ329);
125         TESTCASE(15,TestSegments);
126         TESTCASE(16,TestCursorOffset);
127         TESTCASE(17,TestArbitraryVariableValues);
128         TESTCASE(18,TestPositionHandling);
129         TESTCASE(19,TestHiraganaKatakana);
130         TESTCASE(20,TestCopyJ476);
131         TESTCASE(21,TestAnchors);
132         TESTCASE(22,TestInterIndic);
133         TESTCASE(23,TestFilterIDs);
134         TESTCASE(24,TestCaseMap);
135         TESTCASE(25,TestNameMap);
136         TESTCASE(26,TestLiberalizedID);
137         TESTCASE(27,TestCreateInstance);
138         TESTCASE(28,TestNormalizationTransliterator);
139         TESTCASE(29,TestCompoundRBT);
140         TESTCASE(30,TestCompoundFilter);
141         TESTCASE(31,TestRemove);
142         TESTCASE(32,TestToRules);
143         TESTCASE(33,TestContext);
144         TESTCASE(34,TestSupplemental);
145         TESTCASE(35,TestQuantifier);
146         TESTCASE(36,TestSTV);
147         TESTCASE(37,TestCompoundInverse);
148         TESTCASE(38,TestNFDChainRBT);
149         TESTCASE(39,TestNullInverse);
150         TESTCASE(40,TestAliasInverseID);
151         TESTCASE(41,TestCompoundInverseID);
152         TESTCASE(42,TestUndefinedVariable);
153         TESTCASE(43,TestEmptyContext);
154         TESTCASE(44,TestCompoundFilterID);
155         TESTCASE(45,TestPropertySet);
156         TESTCASE(46,TestNewEngine);
157         TESTCASE(47,TestQuantifiedSegment);
158         TESTCASE(48,TestDevanagariLatinRT);
159         TESTCASE(49,TestTeluguLatinRT);
160         TESTCASE(50,TestCompoundLatinRT);
161         TESTCASE(51,TestSanskritLatinRT);
162         TESTCASE(52,TestLocaleInstantiation);
163         TESTCASE(53,TestTitleAccents);
164         TESTCASE(54,TestLocaleResource);
165         TESTCASE(55,TestParseError);
166         TESTCASE(56,TestOutputSet);
167         TESTCASE(57,TestVariableRange);
168         TESTCASE(58,TestInvalidPostContext);
169         TESTCASE(59,TestIDForms);
170         TESTCASE(60,TestToRulesMark);
171         TESTCASE(61,TestEscape);
172         TESTCASE(62,TestAnchorMasking);
173         TESTCASE(63,TestDisplayName);
174         TESTCASE(64,TestSpecialCases);
175 #if !UCONFIG_NO_FILE_IO
176         TESTCASE(65,TestIncrementalProgress);
177 #endif
178         TESTCASE(66,TestSurrogateCasing);
179         TESTCASE(67,TestFunction);
180         TESTCASE(68,TestInvalidBackRef);
181         TESTCASE(69,TestMulticharStringSet);
182         TESTCASE(70,TestUserFunction);
183         TESTCASE(71,TestAnyX);
184         TESTCASE(72,TestSourceTargetSet);
185         TESTCASE(73,TestGurmukhiDevanagari);
186         TESTCASE(74,TestPatternWhiteSpace);
187         TESTCASE(75,TestAllCodepoints);
188         TESTCASE(76,TestBoilerplate);
189         TESTCASE(77,TestAlternateSyntax);
190         TESTCASE(78,TestBeginEnd);
191         TESTCASE(79,TestBeginEndToRules);
192         TESTCASE(80,TestRegisterAlias);
193         TESTCASE(81,TestRuleStripping);
194         TESTCASE(82,TestHalfwidthFullwidth);
195         TESTCASE(83,TestThai);
196         TESTCASE(84,TestAny);
197         default: name = ""; break;
198     }
199 }
200 
201 /**
202  * Make sure every system transliterator can be instantiated.
203  *
204  * ALSO test that the result of toRules() for each rule is a valid
205  * rule.  Do this here so we don't have to have another test that
206  * instantiates everything as well.
207  */
TestInstantiation()208 void TransliteratorTest::TestInstantiation() {
209     UErrorCode ec = U_ZERO_ERROR;
210     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
211     assertSuccess("getAvailableIDs()", ec);
212     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
213     int32_t n = Transliterator::countAvailableIDs();
214     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
215                avail->count(ec) == n);
216     assertSuccess("count()", ec);
217     UnicodeString name;
218     for (int32_t i=0; i<n; ++i) {
219         const UnicodeString& id = *avail->snext(ec);
220         if (!assertSuccess("snext()", ec) ||
221             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
222             break;
223         }
224         UnicodeString id2 = Transliterator::getAvailableID(i);
225         if (id.length() < 1) {
226             errln(UnicodeString("FAIL: getAvailableID(") +
227                   i + ") returned empty string");
228             continue;
229         }
230         if (id != id2) {
231             errln(UnicodeString("FAIL: getAvailableID(") +
232                   i + ") != getAvailableIDs().snext()");
233             continue;
234         }
235         UParseError parseError;
236         UErrorCode status = U_ZERO_ERROR;
237         Transliterator* t = Transliterator::createInstance(id,
238                               UTRANS_FORWARD, parseError,status);
239         name.truncate(0);
240         Transliterator::getDisplayName(id, name);
241         if (t == 0) {
242 #if UCONFIG_NO_BREAK_ITERATION
243             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
244             if (id.compare((UnicodeString)"Thai-Latin") != 0)
245 #endif
246                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
247                       /*", parse error " + parseError.code +*/
248                       ", line " + parseError.line +
249                       ", offset " + parseError.offset +
250                       ", pre-context " + prettify(parseError.preContext, TRUE) +
251                       ", post-context " +prettify(parseError.postContext,TRUE) +
252                       ", Error: " + u_errorName(status));
253                 // When createInstance fails, it deletes the failing
254                 // entry from the available ID list.  We detect this
255                 // here by looking for a change in countAvailableIDs.
256             int32_t nn = Transliterator::countAvailableIDs();
257             if (nn == (n - 1)) {
258                 n = nn;
259                 --i; // Compensate for deleted entry
260             }
261         } else {
262             logln(UnicodeString("OK: ") + name + " (" + id + ")");
263 
264             // Now test toRules
265             UnicodeString rules;
266             t->toRules(rules, TRUE);
267             Transliterator *u = Transliterator::createFromRules("x",
268                                     rules, UTRANS_FORWARD, parseError,status);
269             if (u == 0) {
270                 errln(UnicodeString("FAIL: ") + id +
271                       ".createFromRules() => bad rules" +
272                       /*", parse error " + parseError.code +*/
273                       ", line " + parseError.line +
274                       ", offset " + parseError.offset +
275                       ", context " + prettify(parseError.preContext, TRUE) +
276                       ", rules: " + prettify(rules, TRUE));
277             } else {
278                 delete u;
279             }
280             delete t;
281         }
282     }
283     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
284     assertSuccess("snext()", ec);
285     delete avail;
286 
287     // Now test the failure path
288     UParseError parseError;
289     UErrorCode status = U_ZERO_ERROR;
290     UnicodeString id("<Not a valid Transliterator ID>");
291     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
292     if (t != 0) {
293         errln("FAIL: " + id + " returned a transliterator");
294         delete t;
295     } else {
296         logln("OK: Bogus ID handled properly");
297     }
298 }
299 
TestSimpleRules(void)300 void TransliteratorTest::TestSimpleRules(void) {
301     /* Example: rules 1. ab>x|y
302      *                2. yc>z
303      *
304      * []|eabcd  start - no match, copy e to tranlated buffer
305      * [e]|abcd  match rule 1 - copy output & adjust cursor
306      * [ex|y]cd  match rule 2 - copy output & adjust cursor
307      * [exz]|d   no match, copy d to transliterated buffer
308      * [exzd]|   done
309      */
310     expect(UnicodeString("ab>x|y;", "") +
311            "yc>z",
312            "eabcd", "exzd");
313 
314     /* Another set of rules:
315      *    1. ab>x|yzacw
316      *    2. za>q
317      *    3. qc>r
318      *    4. cw>n
319      *
320      * []|ab       Rule 1
321      * [x|yzacw]   No match
322      * [xy|zacw]   Rule 2
323      * [xyq|cw]    Rule 4
324      * [xyqn]|     Done
325      */
326     expect(UnicodeString("ab>x|yzacw;") +
327            "za>q;" +
328            "qc>r;" +
329            "cw>n",
330            "ab", "xyqn");
331 
332     /* Test categories
333      */
334     UErrorCode status = U_ZERO_ERROR;
335     UParseError parseError;
336     Transliterator *t = Transliterator::createFromRules(
337         "<ID>",
338         UnicodeString("$dummy=").append((UChar)0xE100) +
339         UnicodeString(";"
340                       "$vowel=[aeiouAEIOU];"
341                       "$lu=[:Lu:];"
342                       "$vowel } $lu > '!';"
343                       "$vowel > '&';"
344                       "'!' { $lu > '^';"
345                       "$lu > '*';"
346                       "a > ERROR", ""),
347         UTRANS_FORWARD, parseError,
348         status);
349     if (U_FAILURE(status)) {
350         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
351         return;
352     }
353     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
354     delete t;
355 }
356 
357 /**
358  * Test inline set syntax and set variable syntax.
359  */
TestInlineSet(void)360 void TransliteratorTest::TestInlineSet(void) {
361     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
362     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
363 
364     expect(UnicodeString(
365            "$digit = [0-9];"
366            "$alpha = [a-zA-Z];"
367            "$alphanumeric = [$digit $alpha];" // ***
368            "$special = [^$alphanumeric];"     // ***
369            "$alphanumeric > '-';"
370            "$special > '*';", ""),
371 
372            "thx-1138", "---*----");
373 }
374 
375 /**
376  * Create some inverses and confirm that they work.  We have to be
377  * careful how we do this, since the inverses will not be true
378  * inverses -- we can't throw any random string at the composition
379  * of the transliterators and expect the identity function.  F x
380  * F' != I.  However, if we are careful about the input, we will
381  * get the expected results.
382  */
TestRuleBasedInverse(void)383 void TransliteratorTest::TestRuleBasedInverse(void) {
384     UnicodeString RULES =
385         UnicodeString("abc>zyx;") +
386         "ab>yz;" +
387         "bc>zx;" +
388         "ca>xy;" +
389         "a>x;" +
390         "b>y;" +
391         "c>z;" +
392 
393         "abc<zyx;" +
394         "ab<yz;" +
395         "bc<zx;" +
396         "ca<xy;" +
397         "a<x;" +
398         "b<y;" +
399         "c<z;" +
400 
401         "";
402 
403     const char* DATA[] = {
404         // Careful here -- random strings will not work.  If we keep
405         // the left side to the domain and the right side to the range
406         // we will be okay though (left, abc; right xyz).
407         "a", "x",
408         "abcacab", "zyxxxyy",
409         "caccb", "xyzzy",
410     };
411 
412     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
413 
414     UErrorCode status = U_ZERO_ERROR;
415     UParseError parseError;
416     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
417                                 UTRANS_FORWARD, parseError, status);
418     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
419                                 UTRANS_REVERSE, parseError, status);
420     if (U_FAILURE(status)) {
421         errln("FAIL: RBT constructor failed");
422         return;
423     }
424     for (int32_t i=0; i<DATA_length; i+=2) {
425         expect(*fwd, DATA[i], DATA[i+1]);
426         expect(*rev, DATA[i+1], DATA[i]);
427     }
428     delete fwd;
429     delete rev;
430 }
431 
432 /**
433  * Basic test of keyboard.
434  */
TestKeyboard(void)435 void TransliteratorTest::TestKeyboard(void) {
436     UParseError parseError;
437     UErrorCode status = U_ZERO_ERROR;
438     Transliterator *t = Transliterator::createFromRules("<ID>",
439                               UnicodeString("psch>Y;")
440                               +"ps>y;"
441                               +"ch>x;"
442                               +"a>A;",
443                               UTRANS_FORWARD, parseError,
444                               status);
445     if (U_FAILURE(status)) {
446         errln("FAIL: RBT constructor failed");
447         return;
448     }
449     const char* DATA[] = {
450         // insertion, buffer
451         "a", "A",
452         "p", "Ap",
453         "s", "Aps",
454         "c", "Apsc",
455         "a", "AycA",
456         "psch", "AycAY",
457         0, "AycAY", // null means finishKeyboardTransliteration
458     };
459 
460     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
461     delete t;
462 }
463 
464 /**
465  * Basic test of keyboard with cursor.
466  */
TestKeyboard2(void)467 void TransliteratorTest::TestKeyboard2(void) {
468     UParseError parseError;
469     UErrorCode status = U_ZERO_ERROR;
470     Transliterator *t = Transliterator::createFromRules("<ID>",
471                               UnicodeString("ych>Y;")
472                               +"ps>|y;"
473                               +"ch>x;"
474                               +"a>A;",
475                               UTRANS_FORWARD, parseError,
476                               status);
477     if (U_FAILURE(status)) {
478         errln("FAIL: RBT constructor failed");
479         return;
480     }
481     const char* DATA[] = {
482         // insertion, buffer
483         "a", "A",
484         "p", "Ap",
485         "s", "Aps", // modified for rollback - "Ay",
486         "c", "Apsc", // modified for rollback - "Ayc",
487         "a", "AycA",
488         "p", "AycAp",
489         "s", "AycAps", // modified for rollback - "AycAy",
490         "c", "AycApsc", // modified for rollback - "AycAyc",
491         "h", "AycAY",
492         0, "AycAY", // null means finishKeyboardTransliteration
493     };
494 
495     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
496     delete t;
497 }
498 
499 /**
500  * Test keyboard transliteration with back-replacement.
501  */
TestKeyboard3(void)502 void TransliteratorTest::TestKeyboard3(void) {
503     // We want th>z but t>y.  Furthermore, during keyboard
504     // transliteration we want t>y then yh>z if t, then h are
505     // typed.
506     UnicodeString RULES("t>|y;"
507                         "yh>z;");
508 
509     const char* DATA[] = {
510         // Column 1: characters to add to buffer (as if typed)
511         // Column 2: expected appearance of buffer after
512         //           keyboard xliteration.
513         "a", "a",
514         "b", "ab",
515         "t", "abt", // modified for rollback - "aby",
516         "c", "abyc",
517         "t", "abyct", // modified for rollback - "abycy",
518         "h", "abycz",
519         0, "abycz", // null means finishKeyboardTransliteration
520     };
521 
522     UParseError parseError;
523     UErrorCode status = U_ZERO_ERROR;
524     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
525     if (U_FAILURE(status)) {
526         errln("FAIL: RBT constructor failed");
527         return;
528     }
529     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
530     delete t;
531 }
532 
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)533 void TransliteratorTest::keyboardAux(const Transliterator& t,
534                                      const char* DATA[], int32_t DATA_length) {
535     UErrorCode status = U_ZERO_ERROR;
536     UTransPosition index={0, 0, 0, 0};
537     UnicodeString s;
538     for (int32_t i=0; i<DATA_length; i+=2) {
539         UnicodeString log;
540         if (DATA[i] != 0) {
541             log = s + " + "
542                 + DATA[i]
543                 + " -> ";
544             t.transliterate(s, index, DATA[i], status);
545         } else {
546             log = s + " => ";
547             t.finishTransliteration(s, index);
548         }
549         // Show the start index '{' and the cursor '|'
550         UnicodeString a, b, c;
551         s.extractBetween(0, index.contextStart, a);
552         s.extractBetween(index.contextStart, index.start, b);
553         s.extractBetween(index.start, s.length(), c);
554         log.append(a).
555             append((UChar)LEFT_BRACE).
556             append(b).
557             append((UChar)PIPE).
558             append(c);
559         if (s == DATA[i+1] && U_SUCCESS(status)) {
560             logln(log);
561         } else {
562             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
563         }
564     }
565 }
566 
TestArabic(void)567 void TransliteratorTest::TestArabic(void) {
568 // Test disabled for 2.0 until new Arabic transliterator can be written.
569 //    /*
570 //    const char* DATA[] = {
571 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
572 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
573 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
574 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
575 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
576 //                  "\u062c\u0645\u064a\u0644\u0629",
577 //    };
578 //    */
579 //
580 //    UChar ar_raw[] = {
581 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
582 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
583 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
584 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
585 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
586 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
587 //    };
588 //    UnicodeString ar(ar_raw);
589 //    UErrorCode status=U_ZERO_ERROR;
590 //    UParseError parseError;
591 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
592 //    if (t == 0) {
593 //        errln("FAIL: createInstance failed");
594 //        return;
595 //    }
596 //    expect(*t, "Arabic", ar);
597 //    delete t;
598 }
599 
600 /**
601  * Compose the Kana transliterator forward and reverse and try
602  * some strings that should come out unchanged.
603  */
TestCompoundKana(void)604 void TransliteratorTest::TestCompoundKana(void) {
605     UParseError parseError;
606     UErrorCode status = U_ZERO_ERROR;
607     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
608     if (t == 0) {
609         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
610     } else {
611         expect(*t, "aaaaa", "aaaaa");
612         delete t;
613     }
614 }
615 
616 /**
617  * Compose the hex transliterators forward and reverse.
618  */
TestCompoundHex(void)619 void TransliteratorTest::TestCompoundHex(void) {
620     UParseError parseError;
621     UErrorCode status = U_ZERO_ERROR;
622     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
623     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
624     Transliterator* transab[] = { a, b };
625     Transliterator* transba[] = { b, a };
626     if (a == 0 || b == 0) {
627         errln("FAIL: construction failed");
628         delete a;
629         delete b;
630         return;
631     }
632     // Do some basic tests of a
633     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
634     // Do some basic tests of b
635     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
636 
637     Transliterator* ab = new CompoundTransliterator(transab, 2);
638     UnicodeString s("abcde", "");
639     expect(*ab, s, s);
640 
641     UnicodeString str(s);
642     a->transliterate(str);
643     Transliterator* ba = new CompoundTransliterator(transba, 2);
644     expect(*ba, str, str);
645 
646     delete ab;
647     delete ba;
648     delete a;
649     delete b;
650 }
651 
652 int gTestFilterClassID = 0;
653 /**
654  * Used by TestFiltering().
655  */
656 class TestFilter : public UnicodeFilter {
clone() const657     virtual UnicodeFunctor* clone() const {
658         return new TestFilter(*this);
659     }
contains(UChar32 c) const660     virtual UBool contains(UChar32 c) const {
661         return c != (UChar)0x0063 /*c*/;
662     }
663     // Stubs
toPattern(UnicodeString & result,UBool) const664     virtual UnicodeString& toPattern(UnicodeString& result,
665                                      UBool /*escapeUnprintable*/) const {
666         return result;
667     }
matchesIndexValue(uint8_t) const668     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
669         return FALSE;
670     }
addMatchSetTo(UnicodeSet &) const671     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
672 public:
getDynamicClassID() const673     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
674 };
675 
676 /**
677  * Do some basic tests of filtering.
678  */
TestFiltering(void)679 void TransliteratorTest::TestFiltering(void) {
680     UParseError parseError;
681     UErrorCode status = U_ZERO_ERROR;
682     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
683     if (hex == 0) {
684         errln("FAIL: createInstance(Any-Hex) failed");
685         return;
686     }
687     hex->adoptFilter(new TestFilter());
688     UnicodeString s("abcde");
689     hex->transliterate(s);
690     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
691     if (s == exp) {
692         logln(UnicodeString("Ok:   \"") + exp + "\"");
693     } else {
694         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
695     }
696 
697     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
698     UnicodeFilter *f = hex->orphanFilter();
699     if (f == NULL){
700         errln("FAIL: orphanFilter() should get a UnicodeFilter");
701     } else {
702         delete f;
703     }
704     delete hex;
705 }
706 
707 /**
708  * Test anchors
709  */
TestAnchors(void)710 void TransliteratorTest::TestAnchors(void) {
711     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
712            "aaa",
713            "012");
714     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
715            "aaa",
716            "012");
717     expect(UnicodeString("^ab  > 01 ;"
718            " ab  > |8 ;"
719            "  b  > k ;"
720            " 8x$ > 45 ;"
721            " 8x  > 77 ;", ""),
722 
723            "ababbabxabx",
724            "018k7745");
725     expect(UnicodeString("$s = [z$] ;"
726            "$s{ab    > 01 ;"
727            "   ab    > |8 ;"
728            "    b    > k ;"
729            "   8x}$s > 45 ;"
730            "   8x    > 77 ;", ""),
731 
732            "abzababbabxzabxabx",
733            "01z018k45z01x45");
734 }
735 
736 /**
737  * Test pattern quoting and escape mechanisms.
738  */
TestPatternQuoting(void)739 void TransliteratorTest::TestPatternQuoting(void) {
740     // Array of 3n items
741     // Each item is <rules>, <input>, <expected output>
742     const UnicodeString DATA[] = {
743         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
744         UnicodeString(UChar(0x4E01)),
745         "[male adult]"
746     };
747 
748     for (int32_t i=0; i<3; i+=3) {
749         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
750         UParseError parseError;
751         UErrorCode status = U_ZERO_ERROR;
752         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
753         if (U_FAILURE(status)) {
754             errln("RBT constructor failed");
755         } else {
756             expect(*t, DATA[i+1], DATA[i+2]);
757         }
758         delete t;
759     }
760 }
761 
762 /**
763  * Regression test for bugs found in Greek transliteration.
764  */
TestJ277(void)765 void TransliteratorTest::TestJ277(void) {
766     UErrorCode status = U_ZERO_ERROR;
767     UParseError parseError;
768     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
769     if (gl == NULL) {
770         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
771         return;
772     }
773 
774     UChar sigma = 0x3C3;
775     UChar upsilon = 0x3C5;
776     UChar nu = 0x3BD;
777 //    UChar PHI = 0x3A6;
778     UChar alpha = 0x3B1;
779 //    UChar omega = 0x3C9;
780 //    UChar omicron = 0x3BF;
781 //    UChar epsilon = 0x3B5;
782 
783     // sigma upsilon nu -> syn
784     UnicodeString syn;
785     syn.append(sigma).append(upsilon).append(nu);
786     expect(*gl, syn, "syn");
787 
788     // sigma alpha upsilon nu -> saun
789     UnicodeString sayn;
790     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
791     expect(*gl, sayn, "saun");
792 
793     // Again, using a smaller rule set
794     UnicodeString rules(
795                 "$alpha   = \\u03B1;"
796                 "$nu      = \\u03BD;"
797                 "$sigma   = \\u03C3;"
798                 "$ypsilon = \\u03C5;"
799                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
800                 "s <>           $sigma;"
801                 "a <>           $alpha;"
802                 "u <>  $vowel { $ypsilon;"
803                 "y <>           $ypsilon;"
804                 "n <>           $nu;",
805                 "");
806     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
807     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
808     expect(*mini, syn, "syn");
809     expect(*mini, sayn, "saun");
810     delete mini;
811     mini = NULL;
812 
813 #if !UCONFIG_NO_FORMATTING
814     // Transliterate the Greek locale data
815     Locale el("el");
816     DateFormatSymbols syms(el, status);
817     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
818     int32_t i, count;
819     const UnicodeString* data = syms.getMonths(count);
820     for (i=0; i<count; ++i) {
821         if (data[i].length() == 0) {
822             continue;
823         }
824         UnicodeString out(data[i]);
825         gl->transliterate(out);
826         UBool ok = TRUE;
827         if (data[i].length() >= 2 && out.length() >= 2 &&
828             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
829             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
830                 ok = FALSE;
831             }
832         }
833         if (ok) {
834             logln(prettify(data[i] + " -> " + out));
835         } else {
836             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
837         }
838     }
839 #endif
840 
841     delete gl;
842 }
843 
844 /**
845  * Prefix, suffix support in hex transliterators
846  */
TestJ243(void)847 void TransliteratorTest::TestJ243(void) {
848     UErrorCode ec = U_ZERO_ERROR;
849 
850     // Test default Hex-Any, which should handle
851     // \u, \U, u+, and U+
852     Transliterator *hex =
853         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
854     if (assertSuccess("getInstance", ec)) {
855         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
856     }
857     delete hex;
858 
859 //    // Try a custom Hex-Unicode
860 //    // \uXXXX and &#xXXXX;
861 //    ec = U_ZERO_ERROR;
862 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
863 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
864 //           "abcd5fx012&#x00033;");
865 //    // Try custom Any-Hex (default is tested elsewhere)
866 //    ec = U_ZERO_ERROR;
867 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
868 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
869 }
870 
871 /**
872  * Parsers need better syntax error messages.
873  */
TestJ329(void)874 void TransliteratorTest::TestJ329(void) {
875 
876     struct { UBool containsErrors; const char* rule; } DATA[] = {
877         { FALSE, "a > b; c > d" },
878         { TRUE,  "a > b; no operator; c > d" },
879     };
880     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
881 
882     for (int32_t i=0; i<DATA_length; ++i) {
883         UErrorCode status = U_ZERO_ERROR;
884         UParseError parseError;
885         Transliterator *rbt = Transliterator::createFromRules("<ID>",
886                                     DATA[i].rule,
887                                     UTRANS_FORWARD,
888                                     parseError,
889                                     status);
890         UBool gotError = U_FAILURE(status);
891         UnicodeString desc(DATA[i].rule);
892         desc.append(gotError ? " -> error" : " -> no error");
893         if (gotError) {
894             desc = desc + ", ParseError code=" + u_errorName(status) +
895                 " line=" + parseError.line +
896                 " offset=" + parseError.offset +
897                 " context=" + parseError.preContext;
898         }
899         if (gotError == DATA[i].containsErrors) {
900             logln(UnicodeString("Ok:   ") + desc);
901         } else {
902             errln(UnicodeString("FAIL: ") + desc);
903         }
904         delete rbt;
905     }
906 }
907 
908 /**
909  * Test segments and segment references.
910  */
TestSegments(void)911 void TransliteratorTest::TestSegments(void) {
912     // Array of 3n items
913     // Each item is <rules>, <input>, <expected output>
914     UnicodeString DATA[] = {
915         "([a-z]) '.' ([0-9]) > $2 '-' $1",
916         "abc.123.xyz.456",
917         "ab1-c23.xy4-z56",
918 
919         // nested
920         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
921         "a1 b2",
922         "a1.a.1 b2.b.2",
923     };
924     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
925 
926     for (int32_t i=0; i<DATA_length; i+=3) {
927         logln("Pattern: " + prettify(DATA[i]));
928         UParseError parseError;
929         UErrorCode status = U_ZERO_ERROR;
930         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
931         if (U_FAILURE(status)) {
932             errln("FAIL: RBT constructor");
933         } else {
934             expect(*t, DATA[i+1], DATA[i+2]);
935         }
936         delete t;
937     }
938 }
939 
940 /**
941  * Test cursor positioning outside of the key
942  */
TestCursorOffset(void)943 void TransliteratorTest::TestCursorOffset(void) {
944     // Array of 3n items
945     // Each item is <rules>, <input>, <expected output>
946     UnicodeString DATA[] = {
947         "pre {alpha} post > | @ ALPHA ;"
948         "eALPHA > beta ;"
949         "pre {beta} post > BETA @@ | ;"
950         "post > xyz",
951 
952         "prealphapost prebetapost",
953 
954         "prbetaxyz preBETApost",
955     };
956     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
957 
958     for (int32_t i=0; i<DATA_length; i+=3) {
959         logln("Pattern: " + prettify(DATA[i]));
960         UParseError parseError;
961         UErrorCode status = U_ZERO_ERROR;
962         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
963         if (U_FAILURE(status)) {
964             errln("FAIL: RBT constructor");
965         } else {
966             expect(*t, DATA[i+1], DATA[i+2]);
967         }
968         delete t;
969     }
970 }
971 
972 /**
973  * Test zero length and > 1 char length variable values.  Test
974  * use of variable refs in UnicodeSets.
975  */
TestArbitraryVariableValues(void)976 void TransliteratorTest::TestArbitraryVariableValues(void) {
977     // Array of 3n items
978     // Each item is <rules>, <input>, <expected output>
979     UnicodeString DATA[] = {
980         "$abe = ab;"
981         "$pat = x[yY]z;"
982         "$ll  = 'a-z';"
983         "$llZ = [$ll];"
984         "$llY = [$ll$pat];"
985         "$emp = ;"
986 
987         "$abe > ABE;"
988         "$pat > END;"
989         "$llZ > 1;"
990         "$llY > 2;"
991         "7$emp 8 > 9;"
992         "",
993 
994         "ab xYzxyz stY78",
995         "ABE ENDEND 1129",
996     };
997     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
998 
999     for (int32_t i=0; i<DATA_length; i+=3) {
1000         logln("Pattern: " + prettify(DATA[i]));
1001         UParseError parseError;
1002         UErrorCode status = U_ZERO_ERROR;
1003         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1004         if (U_FAILURE(status)) {
1005             errln("FAIL: RBT constructor");
1006         } else {
1007             expect(*t, DATA[i+1], DATA[i+2]);
1008         }
1009         delete t;
1010     }
1011 }
1012 
1013 /**
1014  * Confirm that the contextStart, contextLimit, start, and limit
1015  * behave correctly. J474.
1016  */
TestPositionHandling(void)1017 void TransliteratorTest::TestPositionHandling(void) {
1018     // Array of 3n items
1019     // Each item is <rules>, <input>, <expected output>
1020     const char* DATA[] = {
1021         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1022         "xtat txtb", // pos 0,9,0,9
1023         "xTTaSS TTxUUb",
1024 
1025         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1026         "xtat txtb", // pos 2,9,3,8
1027         "xtaSS TTxUUb",
1028 
1029         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030         "xtat txtb", // pos 3,8,3,8
1031         "xtaTT TTxTTb",
1032     };
1033 
1034     // Array of 4n positions -- these go with the DATA array
1035     // They are: contextStart, contextLimit, start, limit
1036     int32_t POS[] = {
1037         0, 9, 0, 9,
1038         2, 9, 3, 8,
1039         3, 8, 3, 8,
1040     };
1041 
1042     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1043     for (int32_t i=0; i<n; i++) {
1044         UErrorCode status = U_ZERO_ERROR;
1045         UParseError parseError;
1046         Transliterator *t = Transliterator::createFromRules("<ID>",
1047                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1048         if (U_FAILURE(status)) {
1049             delete t;
1050             errln("FAIL: RBT constructor");
1051             return;
1052         }
1053         UTransPosition pos;
1054         pos.contextStart= POS[4*i];
1055         pos.contextLimit = POS[4*i+1];
1056         pos.start = POS[4*i+2];
1057         pos.limit = POS[4*i+3];
1058         UnicodeString rsource(DATA[3*i+1]);
1059         t->transliterate(rsource, pos, status);
1060         if (U_FAILURE(status)) {
1061             delete t;
1062             errln("FAIL: transliterate");
1063             return;
1064         }
1065         t->finishTransliteration(rsource, pos);
1066         expectAux(DATA[3*i],
1067                   DATA[3*i+1],
1068                   rsource,
1069                   DATA[3*i+2]);
1070         delete t;
1071     }
1072 }
1073 
1074 /**
1075  * Test the Hiragana-Katakana transliterator.
1076  */
TestHiraganaKatakana(void)1077 void TransliteratorTest::TestHiraganaKatakana(void) {
1078     UParseError parseError;
1079     UErrorCode status = U_ZERO_ERROR;
1080     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1081     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1082     if (hk == 0 || kh == 0) {
1083         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1084         delete hk;
1085         delete kh;
1086         return;
1087     }
1088 
1089     // Array of 3n items
1090     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1091     const char* DATA[] = {
1092         "both",
1093         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1094         "\\u30A2\\u30F8\\u30F2\\u30B0",
1095 
1096         "kh",
1097         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1098         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1099     };
1100     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1101 
1102     for (int32_t i=0; i<DATA_length; i+=3) {
1103         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1104         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1105         switch (*DATA[i]) {
1106         case 0x68: //'h': // Hiragana-Katakana
1107             expect(*hk, h, k);
1108             break;
1109         case 0x6B: //'k': // Katakana-Hiragana
1110             expect(*kh, k, h);
1111             break;
1112         case 0x62: //'b': // both
1113             expect(*hk, h, k);
1114             expect(*kh, k, h);
1115             break;
1116         }
1117     }
1118     delete hk;
1119     delete kh;
1120 }
1121 
1122 /**
1123  * Test cloning / copy constructor of RBT.
1124  */
TestCopyJ476(void)1125 void TransliteratorTest::TestCopyJ476(void) {
1126     // The real test here is what happens when the destructors are
1127     // called.  So we let one object get destructed, and check to
1128     // see that its copy still works.
1129     Transliterator *t2 = 0;
1130     {
1131         UParseError parseError;
1132         UErrorCode status = U_ZERO_ERROR;
1133         Transliterator *t1 = Transliterator::createFromRules("t1",
1134             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1135         if (U_FAILURE(status)) {
1136             errln("FAIL: RBT constructor");
1137             return;
1138         }
1139         t2 = t1->clone(); // Call copy constructor under the covers.
1140         expect(*t1, "abcfoofoo", "ABcbar");
1141         delete t1;
1142     }
1143     expect(*t2, "abcfoofoo", "ABcbar");
1144     delete t2;
1145 }
1146 
1147 /**
1148  * Test inter-Indic transliterators.  These are composed.
1149  * ICU4C Jitterbug 483.
1150  */
TestInterIndic(void)1151 void TransliteratorTest::TestInterIndic(void) {
1152     UnicodeString ID("Devanagari-Gujarati", "");
1153     UErrorCode status = U_ZERO_ERROR;
1154     UParseError parseError;
1155     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1156     if (dg == 0) {
1157         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1158         return;
1159     }
1160     UnicodeString id = dg->getID();
1161     if (id != ID) {
1162         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1163     }
1164     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1165     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1166     expect(*dg, dev, guj);
1167     delete dg;
1168 }
1169 
1170 /**
1171  * Test filter syntax in IDs. (J918)
1172  */
TestFilterIDs(void)1173 void TransliteratorTest::TestFilterIDs(void) {
1174     // Array of 3n strings:
1175     // <id>, <inverse id>, <input>, <expected output>
1176     const char* DATA[] = {
1177         "[aeiou]Any-Hex", // ID
1178         "[aeiou]Hex-Any", // expected inverse ID
1179         "quizzical",      // src
1180         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1181 
1182         "[aeiou]Any-Hex;[^5]Hex-Any",
1183         "[^5]Any-Hex;[aeiou]Hex-Any",
1184         "quizzical",
1185         "q\\u0075izzical",
1186 
1187         "[abc]Null",
1188         "[abc]Null",
1189         "xyz",
1190         "xyz",
1191     };
1192     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1193 
1194     for (int i=0; i<DATA_length; i+=4) {
1195         UnicodeString ID(DATA[i], "");
1196         UnicodeString uID(DATA[i+1], "");
1197         UnicodeString data2(DATA[i+2], "");
1198         UnicodeString data3(DATA[i+3], "");
1199         UParseError parseError;
1200         UErrorCode status = U_ZERO_ERROR;
1201         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1202         if (t == 0) {
1203             errln("FAIL: createInstance(" + ID + ") returned NULL");
1204             return;
1205         }
1206         expect(*t, data2, data3);
1207 
1208         // Check the ID
1209         if (ID != t->getID()) {
1210             errln("FAIL: createInstance(" + ID + ").getID() => " +
1211                   t->getID());
1212         }
1213 
1214         // Check the inverse
1215         Transliterator *u = t->createInverse(status);
1216         if (u == 0) {
1217             errln("FAIL: " + ID + ".createInverse() returned NULL");
1218         } else if (u->getID() != uID) {
1219             errln("FAIL: " + ID + ".createInverse().getID() => " +
1220                   u->getID() + ", expected " + uID);
1221         }
1222 
1223         delete t;
1224         delete u;
1225     }
1226 }
1227 
1228 /**
1229  * Test the case mapping transliterators.
1230  */
TestCaseMap(void)1231 void TransliteratorTest::TestCaseMap(void) {
1232     UParseError parseError;
1233     UErrorCode status = U_ZERO_ERROR;
1234     Transliterator* toUpper =
1235         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1236     Transliterator* toLower =
1237         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1238     Transliterator* toTitle =
1239         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240     if (toUpper==0 || toLower==0 || toTitle==0) {
1241         errln("FAIL: createInstance returned NULL");
1242         delete toUpper;
1243         delete toLower;
1244         delete toTitle;
1245         return;
1246     }
1247 
1248     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1249            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1250     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1251            "the quick brown foX jumped over the lazY dogs.");
1252     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1253            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1254 
1255     delete toUpper;
1256     delete toLower;
1257     delete toTitle;
1258 }
1259 
1260 /**
1261  * Test the name mapping transliterators.
1262  */
TestNameMap(void)1263 void TransliteratorTest::TestNameMap(void) {
1264     UParseError parseError;
1265     UErrorCode status = U_ZERO_ERROR;
1266     Transliterator* uni2name =
1267         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1268     Transliterator* name2uni =
1269         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1270     if (uni2name==0 || name2uni==0) {
1271         errln("FAIL: createInstance returned NULL");
1272         delete uni2name;
1273         delete name2uni;
1274         return;
1275     }
1276 
1277     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1278     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1279            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1280     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1281            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1282 
1283     delete uni2name;
1284     delete name2uni;
1285 
1286     // round trip
1287     Transliterator* t =
1288         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1289     if (t==0) {
1290         errln("FAIL: createInstance returned NULL");
1291         delete t;
1292         return;
1293     }
1294 
1295     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1296     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1297     expect(*t, s, s);
1298     delete t;
1299 }
1300 
1301 /**
1302  * Test liberalized ID syntax.  1006c
1303  */
TestLiberalizedID(void)1304 void TransliteratorTest::TestLiberalizedID(void) {
1305     // Some test cases have an expected getID() value of NULL.  This
1306     // means I have disabled the test case for now.  This stuff is
1307     // still under development, and I haven't decided whether to make
1308     // getID() return canonical case yet.  It will all get rewritten
1309     // with the move to Source-Target/Variant IDs anyway. [aliu]
1310     const char* DATA[] = {
1311         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1312         "  Null  ", "Null", "whitespace",
1313         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1314         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1315     };
1316     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1317     UParseError parseError;
1318     UErrorCode status= U_ZERO_ERROR;
1319     for (int32_t i=0; i<DATA_length; i+=3) {
1320         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1321         if (t == 0) {
1322             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1323                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1324         } else {
1325             UnicodeString exp;
1326             if (DATA[i+1]) {
1327                 exp = UnicodeString(DATA[i+1], "");
1328             }
1329             // Don't worry about getID() if the expected char*
1330             // is NULL -- see above.
1331             if (exp.length() == 0 || exp == t->getID()) {
1332                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1333                       " create ID \"" + DATA[i] + "\" => \"" +
1334                       exp + "\"");
1335             } else {
1336                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1337                       " create ID \"" + DATA[i] + "\" => \"" +
1338                       t->getID() + "\", exp \"" + exp + "\"");
1339             }
1340             delete t;
1341         }
1342     }
1343 }
1344 
1345 /* test for Jitterbug 912 */
TestCreateInstance()1346 void TransliteratorTest::TestCreateInstance(){
1347     const char* FORWARD = "F";
1348     const char* REVERSE = "R";
1349     const char* DATA[] = {
1350         // Column 1: id
1351         // Column 2: direction
1352         // Column 3: expected ID, or "" if expect failure
1353         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1354 
1355         // JB#2689: bad compound causes crash
1356         "InvalidSource-InvalidTarget", FORWARD, "",
1357         "InvalidSource-InvalidTarget", REVERSE, "",
1358         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1359         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1360         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1361         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1362 
1363         NULL
1364     };
1365 
1366     for (int32_t i=0; DATA[i]; i+=3) {
1367         UParseError err;
1368         UErrorCode ec = U_ZERO_ERROR;
1369         UnicodeString id(DATA[i]);
1370         UTransDirection dir = (DATA[i+1]==FORWARD)?
1371             UTRANS_FORWARD:UTRANS_REVERSE;
1372         UnicodeString expID(DATA[i+2]);
1373         Transliterator* t =
1374             Transliterator::createInstance(id,dir,err,ec);
1375         UnicodeString newID;
1376         if (t) {
1377             newID = t->getID();
1378         }
1379         UBool ok = (newID == expID);
1380         if (!t) {
1381             newID = u_errorName(ec);
1382         }
1383         if (ok) {
1384             logln((UnicodeString)"Ok: createInstance(" +
1385                   id + "," + DATA[i+1] + ") => " + newID);
1386         } else {
1387             dataerrln((UnicodeString)"FAIL: createInstance(" +
1388                   id + "," + DATA[i+1] + ") => " + newID +
1389                   ", expected " + expID);
1390         }
1391         delete t;
1392     }
1393 }
1394 
1395 /**
1396  * Test the normalization transliterator.
1397  */
TestNormalizationTransliterator()1398 void TransliteratorTest::TestNormalizationTransliterator() {
1399     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1400     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1401     const char* CANON[] = {
1402         // Input               Decomposed            Composed
1403         "cat",                "cat",                "cat"               ,
1404         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1405 
1406         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1407         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1408 
1409         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1410         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1411         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1412 
1413         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1414         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1415 
1416         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1417         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1418         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1419 
1420         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1421         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1422 
1423         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1424         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1425 
1426         "Henry IV",           "Henry IV",           "Henry IV"          ,
1427         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1428 
1429         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1430         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1431         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1432         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1433         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1434 
1435         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1436         0 // end
1437     };
1438 
1439     const char* COMPAT[] = {
1440         // Input               Decomposed            Composed
1441         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1442 
1443         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1444         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1445 
1446         "Henry IV",           "Henry IV",           "Henry IV"          ,
1447         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1448 
1449         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1450         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1451 
1452         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1453         0 // end
1454     };
1455 
1456     int32_t i;
1457     UParseError parseError;
1458     UErrorCode status = U_ZERO_ERROR;
1459     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1460     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1461     if (!NFD || !NFC) {
1462         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1463         delete NFD;
1464         delete NFC;
1465         return;
1466     }
1467     for (i=0; CANON[i]; i+=3) {
1468         UnicodeString in = CharsToUnicodeString(CANON[i]);
1469         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1470         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1471         expect(*NFD, in, expd);
1472         expect(*NFC, in, expc);
1473     }
1474     delete NFD;
1475     delete NFC;
1476 
1477     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1478     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1479     if (!NFKD || !NFKC) {
1480         dataerrln("FAIL: createInstance failed");
1481         delete NFKD;
1482         delete NFKC;
1483         return;
1484     }
1485     for (i=0; COMPAT[i]; i+=3) {
1486         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1487         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1488         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1489         expect(*NFKD, in, expkd);
1490         expect(*NFKC, in, expkc);
1491     }
1492     delete NFKD;
1493     delete NFKC;
1494 
1495     UParseError pe;
1496     status = U_ZERO_ERROR;
1497     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1498                                                        UTRANS_FORWARD,
1499                                                        pe, status);
1500     if (t == 0) {
1501         errln("FAIL: createInstance failed");
1502     }
1503     expect(*t, CharsToUnicodeString("\\u010dx"),
1504            CharsToUnicodeString("c\\u030C"));
1505     delete t;
1506 }
1507 
1508 /**
1509  * Test compound RBT rules.
1510  */
TestCompoundRBT(void)1511 void TransliteratorTest::TestCompoundRBT(void) {
1512     // Careful with spacing and ';' here:  Phrase this exactly
1513     // as toRules() is going to return it.  If toRules() changes
1514     // with regard to spacing or ';', then adjust this string.
1515     UnicodeString rule("::Hex-Any;\n"
1516                        "::Any-Lower;\n"
1517                        "a > '.A.';\n"
1518                        "b > '.B.';\n"
1519                        "::[^t]Any-Upper;", "");
1520     UParseError parseError;
1521     UErrorCode status = U_ZERO_ERROR;
1522     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1523     if (t == 0) {
1524         errln("FAIL: createFromRules failed");
1525         return;
1526     }
1527     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1528            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1529     UnicodeString r;
1530     t->toRules(r, TRUE);
1531     if (r == rule) {
1532         logln((UnicodeString)"OK: toRules() => " + r);
1533     } else {
1534         errln((UnicodeString)"FAIL: toRules() => " + r +
1535               ", expected " + rule);
1536     }
1537     delete t;
1538 
1539     // Now test toRules
1540     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1541     if (t == 0) {
1542         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1543         return;
1544     }
1545     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1546     t->toRules(r, TRUE);
1547     if (r != exp) {
1548         errln((UnicodeString)"FAIL: toRules() => " + r +
1549               ", expected " + exp);
1550     } else {
1551         logln((UnicodeString)"OK: toRules() => " + r);
1552     }
1553     delete t;
1554 
1555     // Round trip the result of toRules
1556     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1557     if (t == 0) {
1558         errln("FAIL: createFromRules #2 failed");
1559         return;
1560     } else {
1561         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1562     }
1563 
1564     // Test toRules again
1565     t->toRules(r, TRUE);
1566     if (r != exp) {
1567         errln((UnicodeString)"FAIL: toRules() => " + r +
1568               ", expected " + exp);
1569     } else {
1570         logln((UnicodeString)"OK: toRules() => " + r);
1571     }
1572 
1573     delete t;
1574 
1575     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1576     // to what the regenerated ID will look like.
1577     UnicodeString id("Upper(Lower);(NFKC)", "");
1578     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1579     if (t == 0) {
1580         errln("FAIL: createInstance #2 failed");
1581         return;
1582     }
1583     if (t->getID() == id) {
1584         logln((UnicodeString)"OK: created " + id);
1585     } else {
1586         errln((UnicodeString)"FAIL: createInstance(" + id +
1587               ").getID() => " + t->getID());
1588     }
1589 
1590     Transliterator *u = t->createInverse(status);
1591     if (u == 0) {
1592         errln("FAIL: createInverse failed");
1593         delete t;
1594         return;
1595     }
1596     exp = "NFKC();Lower(Upper)";
1597     if (u->getID() == exp) {
1598         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1599               u->getID());
1600     } else {
1601         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1602               u->getID());
1603     }
1604     delete t;
1605     delete u;
1606 }
1607 
1608 /**
1609  * Compound filter semantics were orginially not implemented
1610  * correctly.  Originally, each component filter f(i) is replaced by
1611  * f'(i) = f(i) && g, where g is the filter for the compound
1612  * transliterator.
1613  *
1614  * From Mark:
1615  *
1616  * Suppose and I have a transliterator X. Internally X is
1617  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1618  *
1619  * The compound should convert all greek characters (through latin) to
1620  * cyrillic, then lowercase the result. The filter should say "don't
1621  * touch 'A' in the original". But because an intermediate result
1622  * happens to go through "A", the Greek Alpha gets hung up.
1623  */
TestCompoundFilter(void)1624 void TransliteratorTest::TestCompoundFilter(void) {
1625     UParseError parseError;
1626     UErrorCode status = U_ZERO_ERROR;
1627     Transliterator *t = Transliterator::createInstance
1628         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1629     if (t == 0) {
1630         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1631         return;
1632     }
1633     t->adoptFilter(new UnicodeSet("[^A]", status));
1634     if (U_FAILURE(status)) {
1635         errln("FAIL: UnicodeSet ct failed");
1636         delete t;
1637         return;
1638     }
1639 
1640     // Only the 'A' at index 1 should remain unchanged
1641     expect(*t,
1642            CharsToUnicodeString("BA\\u039A\\u0391"),
1643            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1644     delete t;
1645 }
1646 
TestRemove(void)1647 void TransliteratorTest::TestRemove(void) {
1648     UParseError parseError;
1649     UErrorCode status = U_ZERO_ERROR;
1650     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1651     if (t == 0) {
1652         errln("FAIL: createInstance failed");
1653         return;
1654     }
1655 
1656     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1657 
1658     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1659     // duplicating the filter
1660     Transliterator* t2 = t->clone();
1661     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1662 
1663     delete t;
1664     delete t2;
1665 }
1666 
TestToRules(void)1667 void TransliteratorTest::TestToRules(void) {
1668     const char* RBT = "rbt";
1669     const char* SET = "set";
1670     static const char* DATA[] = {
1671         RBT,
1672         "$a=\\u4E61; [$a] > A;",
1673         "[\\u4E61] > A;",
1674 
1675         RBT,
1676         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1677         "[[:Zs:][:Zl:]]{a} > A;",
1678 
1679         SET,
1680         "[[:Zs:][:Zl:]]",
1681         "[[:Zs:][:Zl:]]",
1682 
1683         SET,
1684         "[:Ps:]",
1685         "[:Ps:]",
1686 
1687         SET,
1688         "[:L:]",
1689         "[:L:]",
1690 
1691         SET,
1692         "[[:L:]-[A]]",
1693         "[[:L:]-[A]]",
1694 
1695         SET,
1696         "[~[:Lu:][:Ll:]]",
1697         "[~[:Lu:][:Ll:]]",
1698 
1699         SET,
1700         "[~[a-z]]",
1701         "[~[a-z]]",
1702 
1703         RBT,
1704         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1705         "[^[:Zs:]]{a} > A;",
1706 
1707         RBT,
1708         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1709         "[[a-z]-[:Zs:]]{a} > A;",
1710 
1711         RBT,
1712         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1713         "[[:Zs:]&[a-z]]{a} > A;",
1714 
1715         RBT,
1716         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1717         "[x[:Zs:]]{a} > A;",
1718 
1719         RBT,
1720         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1721         "$macron = \\u0304 ;"
1722         "$evowel = [aeiouyAEIOUY] ;"
1723         "$iotasub = \\u0345 ;"
1724         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1725         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1726 
1727         RBT,
1728         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1729         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730     };
1731     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1732 
1733     for (int32_t d=0; d < DATA_length; d+=3) {
1734         if (DATA[d] == RBT) {
1735             // Transliterator test
1736             UParseError parseError;
1737             UErrorCode status = U_ZERO_ERROR;
1738             Transliterator *t = Transliterator::createFromRules("ID",
1739                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1740             if (t == 0) {
1741                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1742                 return;
1743             }
1744             UnicodeString rules, escapedRules;
1745             t->toRules(rules, FALSE);
1746             t->toRules(escapedRules, TRUE);
1747             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1748             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1749             if (rules == expRules) {
1750                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1751                       " => " + rules);
1752             } else {
1753                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                       " => " + rules + ", exp " + expRules);
1755             }
1756             if (escapedRules == expEscapedRules) {
1757                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1758                       " => " + escapedRules);
1759             } else {
1760                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                       " => " + escapedRules + ", exp " + expEscapedRules);
1762             }
1763             delete t;
1764 
1765         } else {
1766             // UnicodeSet test
1767             UErrorCode status = U_ZERO_ERROR;
1768             UnicodeString pat(DATA[d+1], -1, US_INV);
1769             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1770             UnicodeSet set(pat, status);
1771             if (U_FAILURE(status)) {
1772                 errln("FAIL: UnicodeSet ct failed");
1773                 return;
1774             }
1775             // Adjust spacing etc. as necessary.
1776             UnicodeString toPat;
1777             set.toPattern(toPat);
1778             if (expToPat == toPat) {
1779                 logln((UnicodeString)"Ok: " + pat +
1780                       " => " + toPat);
1781             } else {
1782                 errln((UnicodeString)"FAIL: " + pat +
1783                       " => " + prettify(toPat, TRUE) +
1784                       ", exp " + prettify(pat, TRUE));
1785             }
1786         }
1787     }
1788 }
1789 
TestContext()1790 void TransliteratorTest::TestContext() {
1791     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1792     expect("de > x; {d}e > y;",
1793            "de",
1794            "ye",
1795            &pos);
1796 
1797     expect("ab{c} > z;",
1798            "xadabdabcy",
1799            "xadabdabzy");
1800 }
1801 
TestSupplemental()1802 void TransliteratorTest::TestSupplemental() {
1803 
1804     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1805                                 "a > $a; $s > i;"),
1806            CharsToUnicodeString("ab\\U0001030Fx"),
1807            CharsToUnicodeString("\\U00010300bix"));
1808 
1809     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1810                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1811                                 "($a)($b) > $2 $1;"),
1812            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1813            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1814 
1815     // k|ax\\U00010300xm
1816 
1817     // k|a\\U00010400\\U00010300xm
1818     // ky|\\U00010400\\U00010300xm
1819     // ky\\U00010400|\\U00010300xm
1820 
1821     // ky\\U00010400|\\U00010300\\U00010400m
1822     // ky\\U00010400y|\\U00010400m
1823     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1824                                 "$a {x} > | @ \\U00010400;"
1825                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1826            CharsToUnicodeString("kax\\U00010300xm"),
1827            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1828 
1829     expectT("Any-Name",
1830            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1831            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1832 
1833     expectT("Any-Hex/Unicode",
1834            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1835            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1836 
1837     expectT("Any-Hex/C",
1838            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1839            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1840 
1841     expectT("Any-Hex/Perl",
1842            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1843            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1844 
1845     expectT("Any-Hex/Java",
1846            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1847            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1848 
1849     expectT("Any-Hex/XML",
1850            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1851            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1852 
1853     expectT("Any-Hex/XML10",
1854            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855            "&#66352;&#1113856;&#917601;&#160;");
1856 
1857     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1858            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1860 }
1861 
TestQuantifier()1862 void TransliteratorTest::TestQuantifier() {
1863 
1864     // Make sure @ in a quantified anteContext works
1865     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1866            "AAAAAb",
1867            "aaa(aac)");
1868 
1869     // Make sure @ in a quantified postContext works
1870     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1871            "baaaaa",
1872            "caa(aaa)");
1873 
1874     // Make sure @ in a quantified postContext with seg ref works
1875     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1876            "baaaaa",
1877            "baa(aaa)");
1878 
1879     // Make sure @ past ante context doesn't enter ante context
1880     UTransPosition pos = {0, 5, 3, 5};
1881     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1882            "xxxab",
1883            "xxx(ac)",
1884            &pos);
1885 
1886     // Make sure @ past post context doesn't pass limit
1887     UTransPosition pos2 = {0, 4, 0, 2};
1888     expect("{b} a+ > c @@ |; x > y; a > A;",
1889            "baxx",
1890            "caxx",
1891            &pos2);
1892 
1893     // Make sure @ past post context doesn't enter post context
1894     expect("{b} a+ > c @@ |; x > y; a > A;",
1895            "baxx",
1896            "cayy");
1897 
1898     expect("(ab)? c > d;",
1899            "c abc ababc",
1900            "d d abd");
1901 
1902     // NOTE: The (ab)+ when referenced just yields a single "ab",
1903     // not the full sequence of them.  This accords with perl behavior.
1904     expect("(ab)+ {x} > '(' $1 ')';",
1905            "x abx ababxy",
1906            "x ab(ab) abab(ab)y");
1907 
1908     expect("b+ > x;",
1909            "ac abc abbc abbbc",
1910            "ac axc axc axc");
1911 
1912     expect("[abc]+ > x;",
1913            "qac abrc abbcs abtbbc",
1914            "qx xrx xs xtx");
1915 
1916     expect("q{(ab)+} > x;",
1917            "qa qab qaba qababc qaba",
1918            "qa qx qxa qxc qxa");
1919 
1920     expect("q(ab)* > x;",
1921            "qa qab qaba qababc",
1922            "xa x xa xc");
1923 
1924     // NOTE: The (ab)+ when referenced just yields a single "ab",
1925     // not the full sequence of them.  This accords with perl behavior.
1926     expect("q(ab)* > '(' $1 ')';",
1927            "qa qab qaba qababc",
1928            "()a (ab) (ab)a (ab)c");
1929 
1930     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1931     // quoted string
1932     expect("'ab'+ > x;",
1933            "bb ab ababb",
1934            "bb x xb");
1935 
1936     // $foo+ and $foo* -- the quantifier should apply to the entire
1937     // variable reference
1938     expect("$var = ab; $var+ > x;",
1939            "bb ab ababb",
1940            "bb x xb");
1941 }
1942 
1943 class TestTrans : public Transliterator {
1944 public:
TestTrans(const UnicodeString & id)1945     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1946     }
clone(void) const1947     virtual Transliterator* clone(void) const {
1948         return new TestTrans(getID());
1949     }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1950     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1951         UBool /*isIncremental*/) const
1952     {
1953         offsets.start = offsets.limit;
1954     }
1955     virtual UClassID getDynamicClassID() const;
1956     static UClassID U_EXPORT2 getStaticClassID();
1957 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1958 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1959 
1960 /**
1961  * Test Source-Target/Variant.
1962  */
1963 void TransliteratorTest::TestSTV(void) {
1964     int32_t ns = Transliterator::countAvailableSources();
1965     if (ns < 0 || ns > 255) {
1966         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1967         return;
1968     }
1969     int32_t i, j;
1970     for (i=0; i<ns; ++i) {
1971         UnicodeString source;
1972         Transliterator::getAvailableSource(i, source);
1973         logln((UnicodeString)"" + i + ": " + source);
1974         if (source.length() == 0) {
1975             errln("FAIL: empty source");
1976             continue;
1977         }
1978         int32_t nt = Transliterator::countAvailableTargets(source);
1979         if (nt < 0 || nt > 255) {
1980             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1981             continue;
1982         }
1983         for (int32_t j=0; j<nt; ++j) {
1984             UnicodeString target;
1985             Transliterator::getAvailableTarget(j, source, target);
1986             logln((UnicodeString)" " + j + ": " + target);
1987             if (target.length() == 0) {
1988                 errln("FAIL: empty target");
1989                 continue;
1990             }
1991             int32_t nv = Transliterator::countAvailableVariants(source, target);
1992             if (nv < 0 || nv > 255) {
1993                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1994                 continue;
1995             }
1996             for (int32_t k=0; k<nv; ++k) {
1997                 UnicodeString variant;
1998                 Transliterator::getAvailableVariant(k, source, target, variant);
1999                 if (variant.length() == 0) {
2000                     logln((UnicodeString)"  " + k + ": <empty>");
2001                 } else {
2002                     logln((UnicodeString)"  " + k + ": " + variant);
2003                 }
2004             }
2005         }
2006     }
2007 
2008     // Test registration
2009     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2010     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2012     for (i=0; i<3; ++i) {
2013         Transliterator *t = new TestTrans(IDS[i]);
2014         if (t == 0) {
2015             errln("FAIL: out of memory");
2016             return;
2017         }
2018         if (t->getID() != IDS[i]) {
2019             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2020             delete t;
2021             return;
2022         }
2023         Transliterator::registerInstance(t);
2024         UErrorCode status = U_ZERO_ERROR;
2025         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2026         if (t == NULL) {
2027             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2028                   IDS[i]);
2029         } else {
2030             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2031                   IDS[i]);
2032             delete t;
2033         }
2034         Transliterator::unregister(IDS[i]);
2035         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2036         if (t != NULL) {
2037             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2038                   IDS[i]);
2039             delete t;
2040         }
2041     }
2042 
2043     // Make sure getAvailable API reflects removal
2044     int32_t n = Transliterator::countAvailableIDs();
2045     for (i=0; i<n; ++i) {
2046         UnicodeString id = Transliterator::getAvailableID(i);
2047         for (j=0; j<3; ++j) {
2048             if (id.caseCompare(FULL_IDS[j],0)==0) {
2049                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2050             }
2051         }
2052     }
2053     n = Transliterator::countAvailableTargets("Any");
2054     for (i=0; i<n; ++i) {
2055         UnicodeString t;
2056         Transliterator::getAvailableTarget(i, "Any", t);
2057         if (t.caseCompare(IDS[0],0)==0) {
2058             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2059         }
2060     }
2061     n = Transliterator::countAvailableSources();
2062     for (i=0; i<n; ++i) {
2063         UnicodeString s;
2064         Transliterator::getAvailableSource(i, s);
2065         for (j=0; j<3; ++j) {
2066             if (SOURCES[j] == NULL) continue;
2067             if (s.caseCompare(SOURCES[j],0)==0) {
2068                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2069             }
2070         }
2071     }
2072 }
2073 
2074 /**
2075  * Test inverse of Greek-Latin; Title()
2076  */
TestCompoundInverse(void)2077 void TransliteratorTest::TestCompoundInverse(void) {
2078     UParseError parseError;
2079     UErrorCode status = U_ZERO_ERROR;
2080     Transliterator *t = Transliterator::createInstance
2081         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2082     if (t == 0) {
2083         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2084         return;
2085     }
2086     UnicodeString exp("(Title);Latin-Greek");
2087     if (t->getID() == exp) {
2088         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2089               t->getID());
2090     } else {
2091         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2092               t->getID() + "\", expected \"" + exp + "\"");
2093     }
2094     delete t;
2095 }
2096 
2097 /**
2098  * Test NFD chaining with RBT
2099  */
TestNFDChainRBT()2100 void TransliteratorTest::TestNFDChainRBT() {
2101     UParseError pe;
2102     UErrorCode ec = U_ZERO_ERROR;
2103     Transliterator* t = Transliterator::createFromRules(
2104                                "TEST", "::NFD; aa > Q; a > q;",
2105                                UTRANS_FORWARD, pe, ec);
2106     if (t == NULL || U_FAILURE(ec)) {
2107         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2108         return;
2109     }
2110     expect(*t, "aa", "Q");
2111     delete t;
2112 
2113     // TEMPORARY TESTS -- BEING DEBUGGED
2114 //=-    UnicodeString s, s2;
2115 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2116 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2117 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2118 //=-    expect(*t, s, s2);
2119 //=-    delete t;
2120 //=-
2121 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2122 //=-    expect(*t, s2, s);
2123 //=-    delete t;
2124 //=-
2125 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2127 //=-    expect(*t, s, s);
2128 //=-    delete t;
2129 
2130 //    const char* source[] = {
2131 //        /*
2132 //        "\\u015Br\\u012Bmad",
2133 //        "bhagavadg\\u012Bt\\u0101",
2134 //        "adhy\\u0101ya",
2135 //        "arjuna",
2136 //        "vi\\u1E63\\u0101da",
2137 //        "y\\u014Dga",
2138 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2139 //        "uv\\u0101cr\\u0325",
2140 //        */
2141 //        "rmk\\u1E63\\u0113t",
2142 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2143 //        /*
2144 //        "kuruk\\u1E63\\u0113tr\\u0113",
2145 //        "samav\\u0113t\\u0101",
2146 //        "yuyutsava-\\u1E25",
2147 //        "m\\u0101mak\\u0101-\\u1E25",
2148 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2149 //        "kimakurvata",
2150 //        "san\\u0304java",
2151 //        */
2152 //
2153 //        0
2154 //    };
2155 //    const char* expected[] = {
2156 //        /*
2157 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2158 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2159 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2160 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2161 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2162 //        "\\u092f\\u094b\\u0917",
2163 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2164 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2165 //        */
2166 //        "\\u0927",
2167 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2168 //        /*
2169 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2171 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2172 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2173 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2174 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2175 //        "\\u0938\\u0902\\u091c\\u0935",
2176 //        */
2177 //        0
2178 //    };
2179 //    UErrorCode status = U_ZERO_ERROR;
2180 //    UParseError parseError;
2181 //    UnicodeString message;
2182 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2183 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2184 //    if(U_FAILURE(status)){
2185 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2186 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2187 //        delete latinToDevToLatin;
2188 //        delete devToLatinToDev;
2189 //        return;
2190 //    }
2191 //    UnicodeString gotResult;
2192 //    for(int i= 0; source[i] != 0; i++){
2193 //        gotResult = source[i];
2194 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2195 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2196 //    }
2197 //    delete latinToDevToLatin;
2198 //    delete devToLatinToDev;
2199 }
2200 
2201 /**
2202  * Inverse of "Null" should be "Null". (J21)
2203  */
TestNullInverse()2204 void TransliteratorTest::TestNullInverse() {
2205     UParseError pe;
2206     UErrorCode ec = U_ZERO_ERROR;
2207     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2208     if (t == 0 || U_FAILURE(ec)) {
2209         errln("FAIL: createInstance");
2210         return;
2211     }
2212     Transliterator *u = t->createInverse(ec);
2213     if (u == 0 || U_FAILURE(ec)) {
2214         errln("FAIL: createInverse");
2215         delete t;
2216         return;
2217     }
2218     if (u->getID() != "Null") {
2219         errln("FAIL: Inverse of Null should be Null");
2220     }
2221     delete t;
2222     delete u;
2223 }
2224 
2225 /**
2226  * Check ID of inverse of alias. (J22)
2227  */
TestAliasInverseID()2228 void TransliteratorTest::TestAliasInverseID() {
2229     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2230     UParseError pe;
2231     UErrorCode ec = U_ZERO_ERROR;
2232     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2233     if (t == 0 || U_FAILURE(ec)) {
2234         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2235         return;
2236     }
2237     Transliterator *u = t->createInverse(ec);
2238     if (u == 0 || U_FAILURE(ec)) {
2239         errln("FAIL: createInverse");
2240         delete t;
2241         return;
2242     }
2243     UnicodeString exp = "Hangul-Latin";
2244     UnicodeString got = u->getID();
2245     if (got != exp) {
2246         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2247               ", expected " + exp);
2248     }
2249     delete t;
2250     delete u;
2251 }
2252 
2253 /**
2254  * Test IDs of inverses of compound transliterators. (J20)
2255  */
TestCompoundInverseID()2256 void TransliteratorTest::TestCompoundInverseID() {
2257     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2258     UParseError pe;
2259     UErrorCode ec = U_ZERO_ERROR;
2260     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2261     if (t == 0 || U_FAILURE(ec)) {
2262         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2263         return;
2264     }
2265     Transliterator *u = t->createInverse(ec);
2266     if (u == 0 || U_FAILURE(ec)) {
2267         errln("FAIL: createInverse");
2268         delete t;
2269         return;
2270     }
2271     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2272     UnicodeString got = u->getID();
2273     if (got != exp) {
2274         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2275               ", expected " + exp);
2276     }
2277     delete t;
2278     delete u;
2279 }
2280 
2281 /**
2282  * Test undefined variable.
2283 
2284  */
TestUndefinedVariable()2285 void TransliteratorTest::TestUndefinedVariable() {
2286     UnicodeString rule = "$initial } a <> \\u1161;";
2287     UParseError pe;
2288     UErrorCode ec = U_ZERO_ERROR;
2289     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2290     delete t;
2291     if (U_FAILURE(ec)) {
2292         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2293               u_errorName(ec));
2294         return;
2295     }
2296     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2297           u_errorName(ec));
2298 }
2299 
2300 /**
2301  * Test empty context.
2302  */
TestEmptyContext()2303 void TransliteratorTest::TestEmptyContext() {
2304     expect(" { a } > b;", "xay a ", "xby b ");
2305 }
2306 
2307 /**
2308 * Test compound filter ID syntax
2309 */
TestCompoundFilterID(void)2310 void TransliteratorTest::TestCompoundFilterID(void) {
2311     static const char* DATA[] = {
2312         // Col. 1 = ID or rule set (latter must start with #)
2313 
2314         // = columns > 1 are null if expect col. 1 to be illegal =
2315 
2316         // Col. 2 = direction, "F..." or "R..."
2317         // Col. 3 = source string
2318         // Col. 4 = exp result
2319 
2320         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2321         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2322         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2323         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2324         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2325         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2326         NULL,
2327     };
2328 
2329     for (int32_t i=0; DATA[i]; i+=4) {
2330         UnicodeString id = CharsToUnicodeString(DATA[i]);
2331         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2332             UTRANS_REVERSE : UTRANS_FORWARD;
2333         UnicodeString source;
2334         UnicodeString exp;
2335         if (DATA[i+2] != NULL) {
2336             source = CharsToUnicodeString(DATA[i+2]);
2337             exp = CharsToUnicodeString(DATA[i+3]);
2338         }
2339         UBool expOk = (DATA[i+1] != NULL);
2340         Transliterator* t = NULL;
2341         UParseError pe;
2342         UErrorCode ec = U_ZERO_ERROR;
2343         if (id.charAt(0) == 0x23/*#*/) {
2344             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2345         } else {
2346             t = Transliterator::createInstance(id, direction, pe, ec);
2347         }
2348         UBool ok = (t != NULL && U_SUCCESS(ec));
2349         UnicodeString transID;
2350         if (t!=0) {
2351             transID = t->getID();
2352         }
2353         else {
2354             transID = UnicodeString("NULL", "");
2355         }
2356         if (ok == expOk) {
2357             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2358                   u_errorName(ec));
2359             if (source.length() != 0) {
2360                 expect(*t, source, exp);
2361             }
2362             delete t;
2363         } else {
2364             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2365                   u_errorName(ec));
2366         }
2367     }
2368 }
2369 
2370 /**
2371  * Test new property set syntax
2372  */
TestPropertySet()2373 void TransliteratorTest::TestPropertySet() {
2374     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2375     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2376            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2377 }
2378 
2379 /**
2380  * Test various failure points of the new 2.0 engine.
2381  */
TestNewEngine()2382 void TransliteratorTest::TestNewEngine() {
2383     UParseError pe;
2384     UErrorCode ec = U_ZERO_ERROR;
2385     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2386     if (t == 0 || U_FAILURE(ec)) {
2387         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2388         return;
2389     }
2390     // Katakana should be untouched
2391     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2392            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2393 
2394     delete t;
2395 
2396 #if 1
2397     // This test will only work if Transliterator.ROLLBACK is
2398     // true.  Otherwise, this test will fail, revealing a
2399     // limitation of global filters in incremental mode.
2400     Transliterator *a =
2401         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2402     Transliterator *A =
2403         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2404     if (U_FAILURE(ec)) {
2405         delete a;
2406         delete A;
2407         return;
2408     }
2409 
2410     Transliterator* array[3];
2411     array[0] = a;
2412     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2413     array[2] = A;
2414     if (U_FAILURE(ec)) {
2415         errln("FAIL: createInstance NFD");
2416         delete a;
2417         delete A;
2418         delete array[1];
2419         return;
2420     }
2421 
2422     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2423     if (U_FAILURE(ec)) {
2424         errln("FAIL: UnicodeSet constructor");
2425         delete a;
2426         delete A;
2427         delete array[1];
2428         delete t;
2429         return;
2430     }
2431 
2432     expect(*t, "aAaA", "bAbA");
2433 
2434     assertTrue("countElements", t->countElements() == 3);
2435     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2436     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2437     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2438     assertSuccess("getElement", ec);
2439 
2440     delete a;
2441     delete A;
2442     delete array[1];
2443     delete t;
2444 #endif
2445 
2446     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2447            "a",
2448            "ax");
2449 
2450     UnicodeString gr = CharsToUnicodeString(
2451         "$ddot = \\u0308 ;"
2452         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2453         "$rough = \\u0314 ;"
2454         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2455         "\\u03b1 <> a ;"
2456         "$rough <> h ;");
2457 
2458     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2459 }
2460 
2461 /**
2462  * Test quantified segment behavior.  We want:
2463  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2464  */
TestQuantifiedSegment(void)2465 void TransliteratorTest::TestQuantifiedSegment(void) {
2466     // The normal case
2467     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2468 
2469     // The tricky case; the quantifier is around the segment
2470     expect("([abc])+ > x $1 x;", "cba", "xax");
2471 
2472     // Tricky case in reverse direction
2473     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2474 
2475     // Check post-context segment
2476     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2477 
2478     // Test toRule/toPattern for non-quantified segment.
2479     // Careful with spacing here.
2480     UnicodeString r("([a-c]){q} > x $1 x;");
2481     UParseError pe;
2482     UErrorCode ec = U_ZERO_ERROR;
2483     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2484     if (U_FAILURE(ec)) {
2485         errln("FAIL: createFromRules");
2486         delete t;
2487         return;
2488     }
2489     UnicodeString rr;
2490     t->toRules(rr, TRUE);
2491     if (r != rr) {
2492         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2493     } else {
2494         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2495     }
2496     delete t;
2497 
2498     // Test toRule/toPattern for quantified segment.
2499     // Careful with spacing here.
2500     r = "([a-c])+{q} > x $1 x;";
2501     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2502     if (U_FAILURE(ec)) {
2503         errln("FAIL: createFromRules");
2504         delete t;
2505         return;
2506     }
2507     t->toRules(rr, TRUE);
2508     if (r != rr) {
2509         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2510     } else {
2511         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2512     }
2513     delete t;
2514 }
2515 
2516 //======================================================================
2517 // Ram's tests
2518 //======================================================================
TestDevanagariLatinRT()2519 void TransliteratorTest::TestDevanagariLatinRT(){
2520     const int MAX_LEN= 52;
2521     const char* const source[MAX_LEN] = {
2522         "bh\\u0101rata",
2523         "kra",
2524         "k\\u1E63a",
2525         "khra",
2526         "gra",
2527         "\\u1E45ra",
2528         "cra",
2529         "chra",
2530         "j\\u00F1a",
2531         "jhra",
2532         "\\u00F1ra",
2533         "\\u1E6Dya",
2534         "\\u1E6Dhra",
2535         "\\u1E0Dya",
2536       //"r\\u0323ya", // \u095c is not valid in Devanagari
2537         "\\u1E0Dhya",
2538         "\\u1E5Bhra",
2539         "\\u1E47ra",
2540         "tta",
2541         "thra",
2542         "dda",
2543         "dhra",
2544         "nna",
2545         "pra",
2546         "phra",
2547         "bra",
2548         "bhra",
2549         "mra",
2550         "\\u1E49ra",
2551       //"l\\u0331ra",
2552         "yra",
2553         "\\u1E8Fra",
2554       //"l-",
2555         "vra",
2556         "\\u015Bra",
2557         "\\u1E63ra",
2558         "sra",
2559         "hma",
2560         "\\u1E6D\\u1E6Da",
2561         "\\u1E6D\\u1E6Dha",
2562         "\\u1E6Dh\\u1E6Dha",
2563         "\\u1E0D\\u1E0Da",
2564         "\\u1E0D\\u1E0Dha",
2565         "\\u1E6Dya",
2566         "\\u1E6Dhya",
2567         "\\u1E0Dya",
2568         "\\u1E0Dhya",
2569         // Not roundtrippable --
2570         // \\u0939\\u094d\\u094d\\u092E  - hma
2571         // \\u0939\\u094d\\u092E         - hma
2572         // CharsToUnicodeString("hma"),
2573         "hya",
2574         "\\u015Br\\u0325",
2575         "\\u015Bca",
2576         "\\u0115",
2577         "san\\u0304j\\u012Bb s\\u0113nagupta",
2578         "\\u0101nand vaddir\\u0101ju",
2579         "\\u0101",
2580         "a"
2581     };
2582     const char* const expected[MAX_LEN] = {
2583         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2584         "\\u0915\\u094D\\u0930",          /* kra         */
2585         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2586         "\\u0916\\u094D\\u0930",          /* khra        */
2587         "\\u0917\\u094D\\u0930",          /* gra         */
2588         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2589         "\\u091A\\u094D\\u0930",          /* cra         */
2590         "\\u091B\\u094D\\u0930",          /* chra        */
2591         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2592         "\\u091D\\u094D\\u0930",          /* jhra        */
2593         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2594         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2595         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2596         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2597       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2598         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2599         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2600         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2601         "\\u0924\\u094D\\u0924",          /* tta         */
2602         "\\u0925\\u094D\\u0930",          /* thra        */
2603         "\\u0926\\u094D\\u0926",          /* dda         */
2604         "\\u0927\\u094D\\u0930",          /* dhra        */
2605         "\\u0928\\u094D\\u0928",          /* nna         */
2606         "\\u092A\\u094D\\u0930",          /* pra         */
2607         "\\u092B\\u094D\\u0930",          /* phra        */
2608         "\\u092C\\u094D\\u0930",          /* bra         */
2609         "\\u092D\\u094D\\u0930",          /* bhra        */
2610         "\\u092E\\u094D\\u0930",          /* mra         */
2611         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2612       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2613         "\\u092F\\u094D\\u0930",          /* yra         */
2614         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2615       //"l-",
2616         "\\u0935\\u094D\\u0930",          /* vra         */
2617         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2618         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2619         "\\u0938\\u094D\\u0930",          /* sra         */
2620         "\\u0939\\u094d\\u092E",          /* hma         */
2621         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2622         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2623         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2624         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2625         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2626         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2627         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2628         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2629         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2630      // "hma",                         /* hma         */
2631         "\\u0939\\u094D\\u092F",          /* hya         */
2632         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2633         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2634         "\\u090d",                        /* e\\u0306    */
2635         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2636         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2637         "\\u0906",
2638         "\\u0905",
2639     };
2640     UErrorCode status = U_ZERO_ERROR;
2641     UParseError parseError;
2642     UnicodeString message;
2643     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2644     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2645     if(U_FAILURE(status)){
2646         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2647         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2648         return;
2649     }
2650     UnicodeString gotResult;
2651     for(int i= 0; i<MAX_LEN; i++){
2652         gotResult = source[i];
2653         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2654         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2655     }
2656     delete latinToDev;
2657     delete devToLatin;
2658 }
2659 
TestTeluguLatinRT()2660 void TransliteratorTest::TestTeluguLatinRT(){
2661     const int MAX_LEN=10;
2662     const char* const source[MAX_LEN] = {
2663         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2664         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2665         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2666         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2667         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2668         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2669         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2670         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2671         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2672         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2673     };
2674 
2675     const char* const expected[MAX_LEN] = {
2676         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2677         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2678         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2679         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2681         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2682         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2683         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2684         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2685         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686     };
2687 
2688     UErrorCode status = U_ZERO_ERROR;
2689     UParseError parseError;
2690     UnicodeString message;
2691     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2692     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2693     if(U_FAILURE(status)){
2694         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2695         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2696         return;
2697     }
2698     UnicodeString gotResult;
2699     for(int i= 0; i<MAX_LEN; i++){
2700         gotResult = source[i];
2701         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2702         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2703     }
2704     delete latinToDev;
2705     delete devToLatin;
2706 }
2707 
TestSanskritLatinRT()2708 void TransliteratorTest::TestSanskritLatinRT(){
2709     const int MAX_LEN =16;
2710     const char* const source[MAX_LEN] = {
2711         "rmk\\u1E63\\u0113t",
2712         "\\u015Br\\u012Bmad",
2713         "bhagavadg\\u012Bt\\u0101",
2714         "adhy\\u0101ya",
2715         "arjuna",
2716         "vi\\u1E63\\u0101da",
2717         "y\\u014Dga",
2718         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2719         "uv\\u0101cr\\u0325",
2720         "dharmak\\u1E63\\u0113tr\\u0113",
2721         "kuruk\\u1E63\\u0113tr\\u0113",
2722         "samav\\u0113t\\u0101",
2723         "yuyutsava\\u1E25",
2724         "m\\u0101mak\\u0101\\u1E25",
2725     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2726         "kimakurvata",
2727         "san\\u0304java",
2728     };
2729     const char* const expected[MAX_LEN] = {
2730         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2731         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2732         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2733         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2734         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2735         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2736         "\\u092f\\u094b\\u0917",
2737         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2738         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2739         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2740         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2742         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2743         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2744     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2745         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2746         "\\u0938\\u0902\\u091c\\u0935",
2747     };
2748     UErrorCode status = U_ZERO_ERROR;
2749     UParseError parseError;
2750     UnicodeString message;
2751     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2752     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2753     if(U_FAILURE(status)){
2754         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2755         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2756         return;
2757     }
2758     UnicodeString gotResult;
2759     for(int i= 0; i<MAX_LEN; i++){
2760         gotResult = source[i];
2761         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2762         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2763     }
2764     delete latinToDev;
2765     delete devToLatin;
2766 }
2767 
2768 
TestCompoundLatinRT()2769 void TransliteratorTest::TestCompoundLatinRT(){
2770     const char* const source[] = {
2771         "rmk\\u1E63\\u0113t",
2772         "\\u015Br\\u012Bmad",
2773         "bhagavadg\\u012Bt\\u0101",
2774         "adhy\\u0101ya",
2775         "arjuna",
2776         "vi\\u1E63\\u0101da",
2777         "y\\u014Dga",
2778         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2779         "uv\\u0101cr\\u0325",
2780         "dharmak\\u1E63\\u0113tr\\u0113",
2781         "kuruk\\u1E63\\u0113tr\\u0113",
2782         "samav\\u0113t\\u0101",
2783         "yuyutsava\\u1E25",
2784         "m\\u0101mak\\u0101\\u1E25",
2785      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2786         "kimakurvata",
2787         "san\\u0304java"
2788     };
2789     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2790     const char* const expected[MAX_LEN] = {
2791         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2792         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2793         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2794         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2795         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2796         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2797         "\\u092f\\u094b\\u0917",
2798         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2799         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2800         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2801         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2803         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2804         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2805     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2806         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2807         "\\u0938\\u0902\\u091c\\u0935"
2808     };
2809     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2810         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2811         return;
2812     }
2813 
2814     UErrorCode status = U_ZERO_ERROR;
2815     UParseError parseError;
2816     UnicodeString message;
2817     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2818     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2819     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2820     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2821 
2822     if(U_FAILURE(status)){
2823         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2824         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2825         return;
2826     }
2827     UnicodeString gotResult;
2828     for(int i= 0; i<MAX_LEN; i++){
2829         gotResult = source[i];
2830         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2831         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2832         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833 
2834     }
2835     delete(latinToDevToLatin);
2836     delete(devToLatinToDev);
2837     delete(devToTelToDev);
2838     delete(latinToTelToLatin);
2839 }
2840 
2841 /**
2842  * Test Gurmukhi-Devanagari Tippi and Bindi
2843  */
TestGurmukhiDevanagari()2844 void TransliteratorTest::TestGurmukhiDevanagari(){
2845     // the rule says:
2846     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2847     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2848     UErrorCode status = U_ZERO_ERROR;
2849     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2850     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2851     UParseError parseError;
2852 
2853     UnicodeSetIterator vIter(vowel);
2854     UnicodeSetIterator nvIter(non_vowel);
2855     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2856     if(U_FAILURE(status)) {
2857       dataerrln("Error creating transliterator %s", u_errorName(status));
2858       delete trans;
2859       return;
2860     }
2861     UnicodeString src (" \\u0902", -1, US_INV);
2862     UnicodeString expected(" \\u0A02", -1, US_INV);
2863     src = src.unescape();
2864     expected= expected.unescape();
2865 
2866     while(vIter.next()){
2867         src.setCharAt(0,(UChar) vIter.getCodepoint());
2868         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2869         expect(*trans,src,expected);
2870     }
2871 
2872     expected.setCharAt(1,0x0A70);
2873     while(nvIter.next()){
2874         //src.setCharAt(0,(char) nvIter.codepoint);
2875         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2876         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2877         expect(*trans,src,expected);
2878     }
2879     delete trans;
2880 }
2881 /**
2882  * Test instantiation from a locale.
2883  */
TestLocaleInstantiation(void)2884 void TransliteratorTest::TestLocaleInstantiation(void) {
2885     UParseError pe;
2886     UErrorCode ec = U_ZERO_ERROR;
2887     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2888     if (U_FAILURE(ec)) {
2889         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2890         delete t;
2891         return;
2892     }
2893     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2894     delete t;
2895 
2896     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2897     if (U_FAILURE(ec)) {
2898         errln("FAIL: createInstance(en-el)");
2899         delete t;
2900         return;
2901     }
2902     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2903     delete t;
2904 }
2905 
2906 /**
2907  * Test title case handling of accent (should ignore accents)
2908  */
TestTitleAccents(void)2909 void TransliteratorTest::TestTitleAccents(void) {
2910     UParseError pe;
2911     UErrorCode ec = U_ZERO_ERROR;
2912     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2913     if (U_FAILURE(ec)) {
2914         errln("FAIL: createInstance(Title)");
2915         delete t;
2916         return;
2917     }
2918     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2919     delete t;
2920 }
2921 
2922 /**
2923  * Basic test of a locale resource based rule.
2924  */
TestLocaleResource()2925 void TransliteratorTest::TestLocaleResource() {
2926     const char* DATA[] = {
2927         // id                    from               to
2928         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2929         "Latin-el",              "b",               "\\u03bc\\u03c0",
2930         "Latin-Greek",           "b",               "\\u03B2",
2931         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2932         "el-Latin",              "\\u03B2",         "v",
2933         "Greek-Latin",           "\\u03B2",         "b",
2934     };
2935     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2936     for (int32_t i=0; i<DATA_length; i+=3) {
2937         UParseError pe;
2938         UErrorCode ec = U_ZERO_ERROR;
2939         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2940         if (U_FAILURE(ec)) {
2941             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2942             delete t;
2943             continue;
2944         }
2945         expect(*t, CharsToUnicodeString(DATA[i+1]),
2946                CharsToUnicodeString(DATA[i+2]));
2947         delete t;
2948     }
2949 }
2950 
2951 /**
2952  * Make sure parse errors reference the right line.
2953  */
TestParseError()2954 void TransliteratorTest::TestParseError() {
2955     static const char* rule =
2956         "a > b;\n"
2957         "# more stuff\n"
2958         "d << b;";
2959     UErrorCode ec = U_ZERO_ERROR;
2960     UParseError pe;
2961     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2962     delete t;
2963     if (U_FAILURE(ec)) {
2964         UnicodeString err(pe.preContext);
2965         err.append((UChar)124/*|*/).append(pe.postContext);
2966         if (err.indexOf("d << b") >= 0) {
2967             logln("Ok: " + err);
2968         } else {
2969             errln("FAIL: " + err);
2970         }
2971     }
2972     else {
2973         errln("FAIL: no syntax error");
2974     }
2975     static const char* maskingRule =
2976         "a>x;\n"
2977         "# more stuff\n"
2978         "ab>y;";
2979     ec = U_ZERO_ERROR;
2980     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2981     if (ec != U_RULE_MASK_ERROR) {
2982         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2983     }
2984     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2985         errln("FAIL: did not get expected precontext");
2986     }
2987     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2988         errln("FAIL: did not get expected postcontext");
2989     }
2990 }
2991 
2992 /**
2993  * Make sure sets on output are disallowed.
2994  */
TestOutputSet()2995 void TransliteratorTest::TestOutputSet() {
2996     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2997     UErrorCode ec = U_ZERO_ERROR;
2998     UParseError pe;
2999     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3000     delete t;
3001     if (U_FAILURE(ec)) {
3002         UnicodeString err(pe.preContext);
3003         err.append((UChar)124/*|*/).append(pe.postContext);
3004         logln("Ok: " + err);
3005         return;
3006     }
3007     errln("FAIL: No syntax error");
3008 }
3009 
3010 /**
3011  * Test the use variable range pragma, making sure that use of
3012  * variable range characters is detected and flagged as an error.
3013  */
TestVariableRange()3014 void TransliteratorTest::TestVariableRange() {
3015     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3016     UErrorCode ec = U_ZERO_ERROR;
3017     UParseError pe;
3018     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3019     delete t;
3020     if (U_FAILURE(ec)) {
3021         UnicodeString err(pe.preContext);
3022         err.append((UChar)124/*|*/).append(pe.postContext);
3023         logln("Ok: " + err);
3024         return;
3025     }
3026     errln("FAIL: No syntax error");
3027 }
3028 
3029 /**
3030  * Test invalid post context error handling
3031  */
TestInvalidPostContext()3032 void TransliteratorTest::TestInvalidPostContext() {
3033     UnicodeString rule = "a}b{c>d;";
3034     UErrorCode ec = U_ZERO_ERROR;
3035     UParseError pe;
3036     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3037     delete t;
3038     if (U_FAILURE(ec)) {
3039         UnicodeString err(pe.preContext);
3040         err.append((UChar)124/*|*/).append(pe.postContext);
3041         if (err.indexOf("a}b{c") >= 0) {
3042             logln("Ok: " + err);
3043         } else {
3044             errln("FAIL: " + err);
3045         }
3046         return;
3047     }
3048     errln("FAIL: No syntax error");
3049 }
3050 
3051 /**
3052  * Test ID form variants
3053  */
TestIDForms()3054 void TransliteratorTest::TestIDForms() {
3055     const char* DATA[] = {
3056         "NFC", NULL, "NFD",
3057         "nfd", NULL, "NFC", // make sure case is ignored
3058         "Any-NFKD", NULL, "Any-NFKC",
3059         "Null", NULL, "Null",
3060         "-nfkc", "nfkc", "NFKD",
3061         "-nfkc/", "nfkc", "NFKD",
3062         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3063         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3064         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3065         "Source-", NULL, NULL,
3066         "Source/Variant-", NULL, NULL,
3067         "Source-/Variant", NULL, NULL,
3068         "/Variant", NULL, NULL,
3069         "/Variant-", NULL, NULL,
3070         "-/Variant", NULL, NULL,
3071         "-/", NULL, NULL,
3072         "-", NULL, NULL,
3073         "/", NULL, NULL,
3074     };
3075     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3076 
3077     for (int32_t i=0; i<DATA_length; i+=3) {
3078         const char* ID = DATA[i];
3079         const char* expID = DATA[i+1];
3080         const char* expInvID = DATA[i+2];
3081         UBool expValid = (expInvID != NULL);
3082         if (expID == NULL) {
3083             expID = ID;
3084         }
3085         UParseError pe;
3086         UErrorCode ec = U_ZERO_ERROR;
3087         Transliterator *t =
3088             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3089         if (U_FAILURE(ec)) {
3090             if (!expValid) {
3091                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3092             } else {
3093                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3094             }
3095             delete t;
3096             continue;
3097         }
3098         Transliterator *u = t->createInverse(ec);
3099         if (U_FAILURE(ec)) {
3100             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3101             delete t;
3102             delete u;
3103             continue;
3104         }
3105         if (t->getID() == expID &&
3106             u->getID() == expInvID) {
3107             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3108         } else {
3109             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3110                   t->getID() + " x getInverse() => " + u->getID() +
3111                   ", expected " + expInvID);
3112         }
3113         delete t;
3114         delete u;
3115     }
3116 }
3117 
3118 static const UChar SPACE[]   = {32,0};
3119 static const UChar NEWLINE[] = {10,0};
3120 static const UChar RETURN[]  = {13,0};
3121 static const UChar EMPTY[]   = {0};
3122 
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3123 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3124                                     const UnicodeString& testRulesForward) {
3125     UnicodeString rules2; t2.toRules(rules2, TRUE);
3126     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3127     rules2.findAndReplace(SPACE, EMPTY);
3128     rules2.findAndReplace(NEWLINE, EMPTY);
3129     rules2.findAndReplace(RETURN, EMPTY);
3130 
3131     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3132 
3133     if (rules2 != testRules) {
3134         errln(label);
3135         logln((UnicodeString)"GENERATED RULES: " + rules2);
3136         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3137     }
3138 }
3139 
3140 /**
3141  * Mark's toRules test.
3142  */
TestToRulesMark()3143 void TransliteratorTest::TestToRulesMark() {
3144     const char* testRules =
3145         "::[[:Latin:][:Mark:]];"
3146         "::NFKD (NFC);"
3147         "::Lower (Lower);"
3148         "a <> \\u03B1;" // alpha
3149         "::NFKC (NFD);"
3150         "::Upper (Lower);"
3151         "::Lower ();"
3152         "::([[:Greek:][:Mark:]]);"
3153         ;
3154     const char* testRulesForward =
3155         "::[[:Latin:][:Mark:]];"
3156         "::NFKD(NFC);"
3157         "::Lower(Lower);"
3158         "a > \\u03B1;"
3159         "::NFKC(NFD);"
3160         "::Upper (Lower);"
3161         "::Lower ();"
3162         ;
3163     const char* testRulesBackward =
3164         "::[[:Greek:][:Mark:]];"
3165         "::Lower (Upper);"
3166         "::NFD(NFKC);"
3167         "\\u03B1 > a;"
3168         "::Lower(Lower);"
3169         "::NFC(NFKD);"
3170         ;
3171     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3172     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3173 
3174     UParseError pe;
3175     UErrorCode ec = U_ZERO_ERROR;
3176     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3177     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3178 
3179     if (U_FAILURE(ec)) {
3180         delete t2;
3181         delete t3;
3182         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3183         return;
3184     }
3185 
3186     expect(*t2, source, target);
3187     expect(*t3, target, source);
3188 
3189     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3190     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3191 
3192     delete t2;
3193     delete t3;
3194 }
3195 
3196 /**
3197  * Test Escape and Unescape transliterators.
3198  */
TestEscape()3199 void TransliteratorTest::TestEscape() {
3200     UParseError pe;
3201     UErrorCode ec;
3202     Transliterator *t;
3203 
3204     ec = U_ZERO_ERROR;
3205     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3206     if (U_FAILURE(ec)) {
3207         errln((UnicodeString)"FAIL: createInstance");
3208     } else {
3209         expect(*t,
3210                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3211                "@12Q");
3212     }
3213     delete t;
3214 
3215     ec = U_ZERO_ERROR;
3216     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3217     if (U_FAILURE(ec)) {
3218         errln((UnicodeString)"FAIL: createInstance");
3219     } else {
3220         expect(*t,
3221                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3222                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3223     }
3224     delete t;
3225 
3226     ec = U_ZERO_ERROR;
3227     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3228     if (U_FAILURE(ec)) {
3229         errln((UnicodeString)"FAIL: createInstance");
3230     } else {
3231         expect(*t,
3232                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3233                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3234     }
3235     delete t;
3236 
3237     ec = U_ZERO_ERROR;
3238     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3239     if (U_FAILURE(ec)) {
3240         errln((UnicodeString)"FAIL: createInstance");
3241     } else {
3242         expect(*t,
3243                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3244                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3245     }
3246     delete t;
3247 }
3248 
3249 
TestAnchorMasking()3250 void TransliteratorTest::TestAnchorMasking(){
3251     UnicodeString rule ("^a > Q; a > q;");
3252     UErrorCode status= U_ZERO_ERROR;
3253     UParseError parseError;
3254 
3255     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3256     if(U_FAILURE(status)){
3257         errln(UnicodeString("FAIL: ") + "ID" +
3258               ".createFromRules() => bad rules" +
3259               /*", parse error " + parseError.code +*/
3260               ", line " + parseError.line +
3261               ", offset " + parseError.offset +
3262               ", context " + prettify(parseError.preContext, TRUE) +
3263               ", rules: " + prettify(rule, TRUE));
3264     }
3265     delete t;
3266 }
3267 
3268 /**
3269  * Make sure display names of variants look reasonable.
3270  */
TestDisplayName()3271 void TransliteratorTest::TestDisplayName() {
3272 #if UCONFIG_NO_FORMATTING
3273     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3274     return;
3275 #else
3276     static const char* DATA[] = {
3277         // ID, forward name, reverse name
3278         // Update the text as necessary -- the important thing is
3279         // not the text itself, but how various cases are handled.
3280 
3281         // Basic test
3282         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3283 
3284         // Variants
3285         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3286 
3287         // Target-only IDs
3288         "NFC", "Any to NFC", "Any to NFD",
3289     };
3290 
3291     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3292 
3293     Locale US("en", "US");
3294 
3295     for (int32_t i=0; i<DATA_length; i+=3) {
3296         UnicodeString name;
3297         Transliterator::getDisplayName(DATA[i], US, name);
3298         if (name != DATA[i+1]) {
3299             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3300                   name + ", expected " + DATA[i+1]);
3301         } else {
3302             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3303         }
3304         UErrorCode ec = U_ZERO_ERROR;
3305         UParseError pe;
3306         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3307         if (U_FAILURE(ec)) {
3308             delete t;
3309             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3310             continue;
3311         }
3312         name = Transliterator::getDisplayName(t->getID(), US, name);
3313         if (name != DATA[i+2]) {
3314             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3315                   name + ", expected " + DATA[i+2]);
3316         } else {
3317             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3318         }
3319         delete t;
3320     }
3321 #endif
3322 }
3323 
TestSpecialCases(void)3324 void TransliteratorTest::TestSpecialCases(void) {
3325     const UnicodeString registerRules[] = {
3326         "Any-Dev1", "x > X; y > Y;",
3327         "Any-Dev2", "XY > Z",
3328         "Greek-Latin/FAKE",
3329             CharsToUnicodeString
3330             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3331         "" // END MARKER
3332     };
3333 
3334     const UnicodeString testCases[] = {
3335         // NORMALIZATION
3336         // should add more test cases
3337         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 
3342         // mp -> b BUG
3343         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345 
3346         // check for devanagari bug
3347         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3348 
3349         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352 
3353         //TODO: enable this test once Titlecase works right
3354         /*
3355         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3357                  */
3358         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3360         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3362 
3363         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3364         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365 
3366          // FORMS OF S
3367         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375         // Tatiana bug
3376         // Upper: TAT\\u02B9\\u00C2NA
3377         // Lower: tat\\u02B9\\u00E2na
3378         // Title: Tat\\u02B9\\u00E2na
3379         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3385 
3386         "" // END MARKER
3387     };
3388 
3389     UParseError pos;
3390     int32_t i;
3391     for (i = 0; registerRules[i].length()!=0; i+=2) {
3392         UErrorCode status = U_ZERO_ERROR;
3393 
3394         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3395             registerRules[i+1], UTRANS_FORWARD, pos, status);
3396         if (U_FAILURE(status)) {
3397             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3398         } else {
3399             Transliterator::registerInstance(t);
3400         }
3401     }
3402     for (i = 0; testCases[i].length()!=0; i+=3) {
3403         UErrorCode ec = U_ZERO_ERROR;
3404         UParseError pe;
3405         const UnicodeString& name = testCases[i];
3406         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3407         if (U_FAILURE(ec)) {
3408             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3409             delete t;
3410             continue;
3411         }
3412         const UnicodeString& id = t->getID();
3413         const UnicodeString& source = testCases[i+1];
3414         UnicodeString target;
3415 
3416         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3417 
3418         if (testCases[i+2].length() > 0) {
3419             target = testCases[i+2];
3420         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3421             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3422         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3423             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3424         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3425             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3426         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3427             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3428         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3429             target = source;
3430             target.toLower(Locale::getUS());
3431         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3432             target = source;
3433             target.toUpper(Locale::getUS());
3434         }
3435         if (U_FAILURE(ec)) {
3436             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3437             continue;
3438         }
3439 
3440         expect(*t, source, target);
3441         delete t;
3442     }
3443     for (i = 0; registerRules[i].length()!=0; i+=2) {
3444         Transliterator::unregister(registerRules[i]);
3445     }
3446 }
3447 
Char32ToEscapedChars(UChar32 ch,char * buffer)3448 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3449     if (ch <= 0xFFFF) {
3450         sprintf(buffer, "\\u%04x", (int)ch);
3451     } else {
3452         sprintf(buffer, "\\U%08x", (int)ch);
3453     }
3454     return buffer;
3455 }
3456 
TestSurrogateCasing(void)3457 void TransliteratorTest::TestSurrogateCasing (void) {
3458     // check that casing handles surrogates
3459     // titlecase is currently defective
3460     char buffer[20];
3461     UChar buffer2[20];
3462     UChar32 dee;
3463     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3464     UnicodeString DEE(u_totitle(dee));
3465     if (DEE != DESERET_DEE) {
3466         err("Fails titlecase of surrogates");
3467         err(Char32ToEscapedChars(dee, buffer));
3468         err(", ");
3469         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3470     }
3471 
3472     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3473     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3474     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3475     UErrorCode status= U_ZERO_ERROR;
3476 
3477     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3479         errln("Fails: Can't uppercase surrogates.");
3480     }
3481 
3482     status= U_ZERO_ERROR;
3483     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3484     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3485         errln("Fails: Can't lowercase surrogates.");
3486     }
3487 }
3488 
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3489 static void _trans(Transliterator& t, const UnicodeString& src,
3490                    UnicodeString& result) {
3491     result = src;
3492     t.transliterate(result);
3493 }
3494 
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3495 static void _trans(const UnicodeString& id, const UnicodeString& src,
3496                    UnicodeString& result, UErrorCode ec) {
3497     UParseError pe;
3498     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3499     if (U_SUCCESS(ec)) {
3500         _trans(*t, src, result);
3501     }
3502     delete t;
3503 }
3504 
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3505 static UnicodeString _findMatch(const UnicodeString& source,
3506                                        const UnicodeString* pairs) {
3507     UnicodeString empty;
3508     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3509         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3510             return pairs[i+1];
3511         }
3512     }
3513     return empty;
3514 }
3515 
3516 // Check to see that incremental gets at least part way through a reasonable string.
3517 
TestIncrementalProgress(void)3518 void TransliteratorTest::TestIncrementalProgress(void) {
3519     UErrorCode ec = U_ZERO_ERROR;
3520     UnicodeString latinTest = "The Quick Brown Fox.";
3521     UnicodeString devaTest;
3522     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3523     UnicodeString kataTest;
3524     _trans("Latin-Katakana", latinTest, kataTest, ec);
3525     if (U_FAILURE(ec)) {
3526         errln("FAIL: Internal error");
3527         return;
3528     }
3529     const UnicodeString tests[] = {
3530         "Any", latinTest,
3531         "Latin", latinTest,
3532         "Halfwidth", latinTest,
3533         "Devanagari", devaTest,
3534         "Katakana", kataTest,
3535         "" // END MARKER
3536     };
3537 
3538     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539     int32_t i = 0, j=0, k=0;
3540     int32_t sources = Transliterator::countAvailableSources();
3541     for (i = 0; i < sources; i++) {
3542         UnicodeString source;
3543         Transliterator::getAvailableSource(i, source);
3544         UnicodeString test = _findMatch(source, tests);
3545         if (test.length() == 0) {
3546             logln((UnicodeString)"Skipping " + source + "-X");
3547             continue;
3548         }
3549         int32_t targets = Transliterator::countAvailableTargets(source);
3550         for (j = 0; j < targets; j++) {
3551             UnicodeString target;
3552             Transliterator::getAvailableTarget(j, source, target);
3553             int32_t variants = Transliterator::countAvailableVariants(source, target);
3554             for (k =0; k< variants; k++) {
3555                 UnicodeString variant;
3556                 UParseError err;
3557                 UErrorCode status = U_ZERO_ERROR;
3558 
3559                 Transliterator::getAvailableVariant(k, source, target, variant);
3560                 UnicodeString id = source + "-" + target + "/" + variant;
3561 
3562                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3563                 if (U_FAILURE(status)) {
3564                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3565                     delete t;
3566                     continue;
3567                 }
3568                 status = U_ZERO_ERROR;
3569                 CheckIncrementalAux(t, test);
3570 
3571                 UnicodeString rev;
3572                 _trans(*t, test, rev);
3573                 Transliterator *inv = t->createInverse(status);
3574                 if (U_FAILURE(status)) {
3575 #if UCONFIG_NO_BREAK_ITERATION
3576                     // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3577                     if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3578 #endif
3579                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3580 
3581                     delete t;
3582                     delete inv;
3583                     continue;
3584                 }
3585                 CheckIncrementalAux(inv, rev);
3586                 delete t;
3587                 delete inv;
3588             }
3589         }
3590     }
3591 }
3592 
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3593 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3594                                                       const UnicodeString& input) {
3595     UErrorCode ec = U_ZERO_ERROR;
3596     UTransPosition pos;
3597     UnicodeString test = input;
3598 
3599     pos.contextStart = 0;
3600     pos.contextLimit = input.length();
3601     pos.start = 0;
3602     pos.limit = input.length();
3603 
3604     t->transliterate(test, pos, ec);
3605     if (U_FAILURE(ec)) {
3606         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3607         return;
3608     }
3609     UBool gotError = FALSE;
3610     (void)gotError;    // Suppress set but not used warning.
3611 
3612     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3613 
3614     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3615         errln((UnicodeString)"No Progress, " +
3616               t->getID() + ": " + formatInput(test, input, pos));
3617         gotError = TRUE;
3618     } else {
3619         logln((UnicodeString)"PASS Progress, " +
3620               t->getID() + ": " + formatInput(test, input, pos));
3621     }
3622     t->finishTransliteration(test, pos);
3623     if (pos.start != pos.limit) {
3624         errln((UnicodeString)"Incomplete, " +
3625               t->getID() + ": " + formatInput(test, input, pos));
3626         gotError = TRUE;
3627     }
3628 }
3629 
TestFunction()3630 void TransliteratorTest::TestFunction() {
3631     // Careful with spacing and ';' here:  Phrase this exactly
3632     // as toRules() is going to return it.  If toRules() changes
3633     // with regard to spacing or ';', then adjust this string.
3634     UnicodeString rule =
3635         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3636 
3637     UParseError pe;
3638     UErrorCode ec = U_ZERO_ERROR;
3639     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3640     if (t == NULL) {
3641         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3642         return;
3643     }
3644 
3645     UnicodeString r;
3646     t->toRules(r, TRUE);
3647     if (r == rule) {
3648         logln((UnicodeString)"OK: toRules() => " + r);
3649     } else {
3650         errln((UnicodeString)"FAIL: toRules() => " + r +
3651               ", expected " + rule);
3652     }
3653 
3654     expect(*t, "The Quick Brown Fox",
3655            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3656 
3657     delete t;
3658 }
3659 
TestInvalidBackRef(void)3660 void TransliteratorTest::TestInvalidBackRef(void) {
3661     UnicodeString rule =  ". > $1;";
3662     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3663     UParseError pe;
3664     UErrorCode ec = U_ZERO_ERROR;
3665     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3666     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3667 
3668     if (t != NULL) {
3669         errln("FAIL: createFromRules should have returned NULL");
3670         delete t;
3671     }
3672 
3673     if (t2 != NULL) {
3674         errln("FAIL: createFromRules should have returned NULL");
3675         delete t2;
3676     }
3677 
3678     if (U_SUCCESS(ec)) {
3679         errln("FAIL: Ok: . > $1; => no error");
3680     } else {
3681         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3682     }
3683 }
3684 
TestMulticharStringSet()3685 void TransliteratorTest::TestMulticharStringSet() {
3686     // Basic testing
3687     const char* rule =
3688         "       [{aa}]       > x;"
3689         "         a          > y;"
3690         "       [b{bc}]      > z;"
3691         "[{gd}] { e          > q;"
3692         "         e } [{fg}] > r;" ;
3693 
3694     UParseError pe;
3695     UErrorCode ec = U_ZERO_ERROR;
3696     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3697     if (t == NULL || U_FAILURE(ec)) {
3698         delete t;
3699         errln("FAIL: createFromRules failed");
3700         return;
3701     }
3702 
3703     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3704            "y x yz z d gd de gdq gdqfg ddrfg");
3705     delete t;
3706 
3707     // Overlapped string test.  Make sure that when multiple
3708     // strings can match that the longest one is matched.
3709     rule =
3710         "    [a {ab} {abc}]    > x;"
3711         "           b          > y;"
3712         "           c          > z;"
3713         " q [t {st} {rst}] { e > p;" ;
3714 
3715     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3716     if (t == NULL || U_FAILURE(ec)) {
3717         delete t;
3718         errln("FAIL: createFromRules failed");
3719         return;
3720     }
3721 
3722     expect(*t, "a ab abc qte qste qrste",
3723            "x x x qtp qstp qrstp");
3724     delete t;
3725 }
3726 
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728 // BEGIN TestUserFunction support factory
3729 
3730 Transliterator* _TUFF[4];
3731 UnicodeString* _TUFID[4];
3732 
_TUFFactory(const UnicodeString &,Transliterator::Token context)3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3734                                    Transliterator::Token context) {
3735     return _TUFF[context.integer]->clone();
3736 }
3737 
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3739     _TUFF[n] = t;
3740     _TUFID[n] = new UnicodeString(ID);
3741     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3742 }
3743 
_TUFUnreg(int32_t n)3744 static void _TUFUnreg(int32_t n) {
3745     if (_TUFF[n] != NULL) {
3746         Transliterator::unregister(*_TUFID[n]);
3747         delete _TUFF[n];
3748         delete _TUFID[n];
3749     }
3750 }
3751 
3752 // END TestUserFunction support factory
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3754 
3755 /**
3756  * Test that user-registered transliterators can be used under function
3757  * syntax.
3758  */
TestUserFunction()3759 void TransliteratorTest::TestUserFunction() {
3760 
3761     Transliterator* t;
3762     UParseError pe;
3763     UErrorCode ec = U_ZERO_ERROR;
3764 
3765     // Setup our factory
3766     int32_t i;
3767     for (i=0; i<4; ++i) {
3768         _TUFF[i] = NULL;
3769     }
3770 
3771     // There's no need to register inverses if we don't use them
3772     t = Transliterator::createFromRules("gif",
3773                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774                                         UTRANS_FORWARD, pe, ec);
3775     if (t == NULL || U_FAILURE(ec)) {
3776         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3777         return;
3778     }
3779     _TUFReg("Any-gif", t, 0);
3780 
3781     t = Transliterator::createFromRules("RemoveCurly",
3782                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783                                         UTRANS_FORWARD, pe, ec);
3784     if (t == NULL || U_FAILURE(ec)) {
3785         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3786         goto FAIL;
3787     }
3788     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789     _TUFReg("Any-RemoveCurly", t, 1);
3790 
3791     logln("Trying &hex");
3792     t = Transliterator::createFromRules("hex2",
3793                                         "(.) > &hex($1);",
3794                                         UTRANS_FORWARD, pe, ec);
3795     if (t == NULL || U_FAILURE(ec)) {
3796         errln("FAIL: createFromRules");
3797         goto FAIL;
3798     }
3799     logln("Registering");
3800     _TUFReg("Any-hex2", t, 2);
3801     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3802     if (t == NULL || U_FAILURE(ec)) {
3803         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3804         goto FAIL;
3805     }
3806     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3807     delete t;
3808 
3809     logln("Trying &gif");
3810     t = Transliterator::createFromRules("gif2",
3811                                         "(.) > &Gif(&Hex2($1));",
3812                                         UTRANS_FORWARD, pe, ec);
3813     if (t == NULL || U_FAILURE(ec)) {
3814         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3815         goto FAIL;
3816     }
3817     logln("Registering");
3818     _TUFReg("Any-gif2", t, 3);
3819     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3820     if (t == NULL || U_FAILURE(ec)) {
3821         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3822         goto FAIL;
3823     }
3824     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3826     delete t;
3827 
3828     // Test that filters are allowed after &
3829     t = Transliterator::createFromRules("test",
3830                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831                                         UTRANS_FORWARD, pe, ec);
3832     if (t == NULL || U_FAILURE(ec)) {
3833         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3834         goto FAIL;
3835     }
3836     expect(*t, "abc",
3837            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3838     delete t;
3839 
3840  FAIL:
3841     for (i=0; i<4; ++i) {
3842         _TUFUnreg(i);
3843     }
3844 }
3845 
3846 /**
3847  * Test the Any-X transliterators.
3848  */
TestAnyX(void)3849 void TransliteratorTest::TestAnyX(void) {
3850     UParseError parseError;
3851     UErrorCode status = U_ZERO_ERROR;
3852     Transliterator* anyLatin =
3853         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3854     if (anyLatin==0) {
3855         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3856         delete anyLatin;
3857         return;
3858     }
3859 
3860     expect(*anyLatin,
3861            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3863 
3864     delete anyLatin;
3865 }
3866 
3867 /**
3868  * Test Any-X transliterators with sample letters from all scripts.
3869  */
TestAny(void)3870 void TransliteratorTest::TestAny(void) {
3871     UErrorCode status = U_ZERO_ERROR;
3872     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873     //       function call parameters going on in this test.
3874     UnicodeSet alphabetic("[:alphabetic:]", status);
3875     if (U_FAILURE(status)) {
3876         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3877         return;
3878     }
3879     alphabetic.freeze();
3880 
3881     UnicodeString testString;
3882     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3883         const char *scriptName = uscript_getShortName((UScriptCode)i);
3884         if (scriptName == NULL) {
3885             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3886             return;
3887         }
3888 
3889         UnicodeSet sample;
3890         sample.applyPropertyAlias("script", scriptName, status);
3891         if (U_FAILURE(status)) {
3892             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3893             return;
3894         }
3895         sample.retainAll(alphabetic);
3896         for (int32_t count=0; count<5; count++) {
3897             UChar32 c = sample.charAt(count);
3898             if (c == -1) {
3899                 break;
3900             }
3901             testString.append(c);
3902         }
3903     }
3904 
3905     UParseError parseError;
3906     Transliterator* anyLatin =
3907         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3908     if (U_FAILURE(status)) {
3909         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3910         return;
3911     }
3912 
3913     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3914     anyLatin->transliterate(testString);
3915     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3916     delete anyLatin;
3917 }
3918 
3919 
3920 /**
3921  * Test the source and target set API.  These are only implemented
3922  * for RBT and CompoundTransliterator at this time.
3923  */
TestSourceTargetSet()3924 void TransliteratorTest::TestSourceTargetSet() {
3925     UErrorCode ec = U_ZERO_ERROR;
3926 
3927     // Rules
3928     const char* r =
3929         "a > b; "
3930         "r [x{lu}] > q;";
3931 
3932     // Expected source
3933     UnicodeSet expSrc("[arx{lu}]", ec);
3934 
3935     // Expected target
3936     UnicodeSet expTrg("[bq]", ec);
3937 
3938     UParseError pe;
3939     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3940 
3941     if (U_FAILURE(ec)) {
3942         delete t;
3943         errln("FAIL: Couldn't set up test");
3944         return;
3945     }
3946 
3947     UnicodeSet src; t->getSourceSet(src);
3948     UnicodeSet trg; t->getTargetSet(trg);
3949 
3950     if (src == expSrc && trg == expTrg) {
3951         UnicodeString a, b;
3952         logln((UnicodeString)"Ok: " +
3953               r + " => source = " + src.toPattern(a, TRUE) +
3954               ", target = " + trg.toPattern(b, TRUE));
3955     } else {
3956         UnicodeString a, b, c, d;
3957         errln((UnicodeString)"FAIL: " +
3958               r + " => source = " + src.toPattern(a, TRUE) +
3959               ", expected " + expSrc.toPattern(b, TRUE) +
3960               "; target = " + trg.toPattern(c, TRUE) +
3961               ", expected " + expTrg.toPattern(d, TRUE));
3962     }
3963 
3964     delete t;
3965 }
3966 
3967 /**
3968  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3969  */
TestPatternWhiteSpace()3970 void TransliteratorTest::TestPatternWhiteSpace() {
3971     // Rules
3972     const char* r = "a > \\u200E b;";
3973 
3974     UErrorCode ec = U_ZERO_ERROR;
3975     UParseError pe;
3976     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3977 
3978     if (U_FAILURE(ec)) {
3979         errln("FAIL: Couldn't set up test");
3980     } else {
3981         expect(*t, "a", "b");
3982     }
3983     delete t;
3984 
3985     // UnicodeSet
3986     ec = U_ZERO_ERROR;
3987     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3988 
3989     if (U_FAILURE(ec)) {
3990         errln("FAIL: Couldn't set up test");
3991     } else {
3992         if (set.contains(0x200E)) {
3993             errln("FAIL: U+200E not being ignored by UnicodeSet");
3994         }
3995     }
3996 }
3997 //======================================================================
3998 // this method is in TestUScript.java
3999 //======================================================================
TestAllCodepoints()4000 void TransliteratorTest::TestAllCodepoints(){
4001     UScriptCode code= USCRIPT_INVALID_CODE;
4002     char id[256]={'\0'};
4003     char abbr[256]={'\0'};
4004     char newId[256]={'\0'};
4005     char newAbbrId[256]={'\0'};
4006     char oldId[256]={'\0'};
4007     char oldAbbrId[256]={'\0'};
4008 
4009     UErrorCode status =U_ZERO_ERROR;
4010     UParseError pe;
4011 
4012     for(uint32_t i = 0; i<=0x10ffff; i++){
4013         code =  uscript_getScript(i,&status);
4014         if(code == USCRIPT_INVALID_CODE){
4015             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4016         }
4017         const char* myId = uscript_getName(code);
4018         if(!myId) {
4019           dataerrln("Valid script code returned NULL name. Check your data!");
4020           return;
4021         }
4022         uprv_strcpy(id,myId);
4023         uprv_strcpy(abbr,uscript_getShortName(code));
4024 
4025         uprv_strcpy(newId,"[:");
4026         uprv_strcat(newId,id);
4027         uprv_strcat(newId,":];NFD");
4028 
4029         uprv_strcpy(newAbbrId,"[:");
4030         uprv_strcat(newAbbrId,abbr);
4031         uprv_strcat(newAbbrId,":];NFD");
4032 
4033         if(uprv_strcmp(newId,oldId)!=0){
4034             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4035             if(t==NULL || U_FAILURE(status)){
4036                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4037             }
4038             delete t;
4039         }
4040         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4041             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4042             if(t==NULL || U_FAILURE(status)){
4043                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4044             }
4045             delete t;
4046         }
4047         uprv_strcpy(oldId,newId);
4048         uprv_strcpy(oldAbbrId, newAbbrId);
4049 
4050     }
4051 
4052 }
4053 
4054 #define TEST_TRANSLIT_ID(id, cls) { \
4055   UErrorCode ec = U_ZERO_ERROR; \
4056   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057   if (U_FAILURE(ec)) { \
4058     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4059   } else { \
4060     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4062     } \
4063     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4064   } \
4065   delete t; \
4066 }
4067 
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
4069   UErrorCode ec = U_ZERO_ERROR; \
4070   UParseError pe; \
4071   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072   if (U_FAILURE(ec)) { \
4073     errln("FAIL: Couldn't create " rule); \
4074   } else { \
4075     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077     } \
4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079   } \
4080   delete t; \
4081 }
4082 
TestBoilerplate()4083 void TransliteratorTest::TestBoilerplate() {
4084     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4085     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4086     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4087     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4088     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4089     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4090     TEST_TRANSLIT_ID("Null", NullTransliterator);
4091     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4092     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4093     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4094     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4095     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4096     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4097 }
4098 
TestAlternateSyntax()4099 void TransliteratorTest::TestAlternateSyntax() {
4100     // U+2206 == &
4101     // U+2190 == <
4102     // U+2192 == >
4103     // U+2194 == <>
4104     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4105            "abc",
4106            "xbz");
4107     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4110 }
4111 
4112 static const char* BEGIN_END_RULES[] = {
4113     // [0]
4114     "abc > xy;"
4115     "aba > z;",
4116 
4117     // [1]
4118 /*
4119     "::BEGIN;"
4120     "abc > xy;"
4121     "::END;"
4122     "::BEGIN;"
4123     "aba > z;"
4124     "::END;",
4125 */
4126     "", // test case commented out below, this is here to keep from messing up the indexes
4127 
4128     // [2]
4129 /*
4130     "abc > xy;"
4131     "::BEGIN;"
4132     "aba > z;"
4133     "::END;",
4134 */
4135     "", // test case commented out below, this is here to keep from messing up the indexes
4136 
4137     // [3]
4138 /*
4139     "::BEGIN;"
4140     "abc > xy;"
4141     "::END;"
4142     "aba > z;",
4143 */
4144     "", // test case commented out below, this is here to keep from messing up the indexes
4145 
4146     // [4]
4147     "abc > xy;"
4148     "::Null;"
4149     "aba > z;",
4150 
4151     // [5]
4152     "::Upper;"
4153     "ABC > xy;"
4154     "AB > x;"
4155     "C > z;"
4156     "::Upper;"
4157     "XYZ > p;"
4158     "XY > q;"
4159     "Z > r;"
4160     "::Upper;",
4161 
4162     // [6]
4163     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164     "$delim = [\\-$ws];"
4165     "$ws $delim* > ' ';"
4166     "'-' $delim* > '-';",
4167 
4168     // [7]
4169     "::Null;"
4170     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171     "$delim = [\\-$ws];"
4172     "$ws $delim* > ' ';"
4173     "'-' $delim* > '-';",
4174 
4175     // [8]
4176     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177     "$delim = [\\-$ws];"
4178     "$ws $delim* > ' ';"
4179     "'-' $delim* > '-';"
4180     "::Null;",
4181 
4182     // [9]
4183     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184     "$delim = [\\-$ws];"
4185     "::Null;"
4186     "$ws $delim* > ' ';"
4187     "'-' $delim* > '-';",
4188 
4189     // [10]
4190 /*
4191     "::BEGIN;"
4192     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193     "$delim = [\\-$ws];"
4194     "::END;"
4195     "$ws $delim* > ' ';"
4196     "'-' $delim* > '-';",
4197 */
4198     "", // test case commented out below, this is here to keep from messing up the indexes
4199 
4200     // [11]
4201 /*
4202     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203     "$delim = [\\-$ws];"
4204     "::BEGIN;"
4205     "$ws $delim* > ' ';"
4206     "'-' $delim* > '-';"
4207     "::END;",
4208 */
4209     "", // test case commented out below, this is here to keep from messing up the indexes
4210 
4211     // [12]
4212 /*
4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214     "$delim = [\\-$ws];"
4215     "$ab = [ab];"
4216     "::BEGIN;"
4217     "$ws $delim* > ' ';"
4218     "'-' $delim* > '-';"
4219     "::END;"
4220     "::BEGIN;"
4221     "$ab { ' ' } $ab > '-';"
4222     "c { ' ' > ;"
4223     "::END;"
4224     "::BEGIN;"
4225     "'a-a' > a\\%|a;"
4226     "::END;",
4227 */
4228     "", // test case commented out below, this is here to keep from messing up the indexes
4229 
4230     // [13]
4231     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232     "$delim = [\\-$ws];"
4233     "$ab = [ab];"
4234     "::Null;"
4235     "$ws $delim* > ' ';"
4236     "'-' $delim* > '-';"
4237     "::Null;"
4238     "$ab { ' ' } $ab > '-';"
4239     "c { ' ' > ;"
4240     "::Null;"
4241     "'a-a' > a\\%|a;",
4242 
4243     // [14]
4244 /*
4245     "::[abc];"
4246     "::BEGIN;"
4247     "abc > xy;"
4248     "::END;"
4249     "::BEGIN;"
4250     "aba > yz;"
4251     "::END;"
4252     "::Upper;",
4253 */
4254     "", // test case commented out below, this is here to keep from messing up the indexes
4255 
4256     // [15]
4257     "::[abc];"
4258     "abc > xy;"
4259     "::Null;"
4260     "aba > yz;"
4261     "::Upper;",
4262 
4263     // [16]
4264 /*
4265     "::[abc];"
4266     "::BEGIN;"
4267     "abc <> xy;"
4268     "::END;"
4269     "::BEGIN;"
4270     "aba <> yz;"
4271     "::END;"
4272     "::Upper(Lower);"
4273     "::([XYZ]);"
4274 */
4275     "", // test case commented out below, this is here to keep from messing up the indexes
4276 
4277     // [17]
4278     "::[abc];"
4279     "abc <> xy;"
4280     "::Null;"
4281     "aba <> yz;"
4282     "::Upper(Lower);"
4283     "::([XYZ]);"
4284 };
4285 
4286 /*
4287 (This entire test is commented out below and will need some heavy revision when we re-add
4288 the ::BEGIN/::END stuff)
4289 static const char* BOGUS_BEGIN_END_RULES[] = {
4290     // [7]
4291     "::BEGIN;"
4292     "abc > xy;"
4293     "::BEGIN;"
4294     "aba > z;"
4295     "::END;"
4296     "::END;",
4297 
4298     // [8]
4299     "abc > xy;"
4300     " aba > z;"
4301     "::END;",
4302 
4303     // [9]
4304     "::BEGIN;"
4305     "::Upper;"
4306     "::END;"
4307 };
4308 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4309 */
4310 
4311 static const char* BEGIN_END_TEST_CASES[] = {
4312     // rules             input                   expected output
4313     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4314 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4315 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4316 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4317     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4318     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4319 
4320     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4321     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4322     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4323     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4324 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4325 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4326 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4327 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4328 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4329     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4330     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4331     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4332 
4333 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4334     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4337 };
4338 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4339 
TestBeginEnd()4340 void TransliteratorTest::TestBeginEnd() {
4341     // run through the list of test cases above
4342     int32_t i = 0;
4343     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4344         expect((UnicodeString)"Test case #" + (i / 3),
4345                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4346                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4347                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4348     }
4349 
4350     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4351     UParseError parseError;
4352     UErrorCode status = U_ZERO_ERROR;
4353     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4354             UTRANS_REVERSE, parseError, status);
4355     if (reversed == 0 || U_FAILURE(status)) {
4356         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4357     } else {
4358         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4359     }
4360     delete reversed;
4361 
4362     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4363     // that all of them cause errors
4364 /*
4365 (commented out until we have the real ::BEGIN/::END stuff in place
4366     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4367         UParseError parseError;
4368         UErrorCode status = U_ZERO_ERROR;
4369         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4370                 UTRANS_FORWARD, parseError, status);
4371         if (!U_FAILURE(status)) {
4372             delete t;
4373             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4374         }
4375     }
4376 */
4377 }
4378 
TestBeginEndToRules()4379 void TransliteratorTest::TestBeginEndToRules() {
4380     // run through the same list of test cases we used above, but this time, instead of just
4381     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4382     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4383     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4384     // to (i.e., does the same thing as) the original rule set
4385     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4386         UParseError parseError;
4387         UErrorCode status = U_ZERO_ERROR;
4388         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4389                 UTRANS_FORWARD, parseError, status);
4390         if (U_FAILURE(status)) {
4391             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4392         } else {
4393             UnicodeString rules;
4394             t->toRules(rules, TRUE);
4395             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4396                     UTRANS_FORWARD, parseError, status);
4397             if (U_FAILURE(status)) {
4398                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4399                         parseError, status);
4400                 delete t;
4401             } else {
4402                 expect(*t2,
4403                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4404                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4405                 delete t;
4406                 delete t2;
4407             }
4408         }
4409     }
4410 
4411     // do the same thing for the reversible test case
4412     UParseError parseError;
4413     UErrorCode status = U_ZERO_ERROR;
4414     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4415             UTRANS_REVERSE, parseError, status);
4416     if (U_FAILURE(status)) {
4417         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4418     } else {
4419         UnicodeString rules;
4420         reversed->toRules(rules, FALSE);
4421         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4422                 parseError, status);
4423         if (U_FAILURE(status)) {
4424             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4425                     parseError, status);
4426             delete reversed;
4427         } else {
4428             expect(*reversed2,
4429                    UnicodeString("xy XY XYZ yz YZ"),
4430                    UnicodeString("xy abc xaba yz aba"));
4431             delete reversed;
4432             delete reversed2;
4433         }
4434     }
4435 }
4436 
TestRegisterAlias()4437 void TransliteratorTest::TestRegisterAlias() {
4438     UnicodeString longID("Lower;[aeiou]Upper");
4439     UnicodeString shortID("Any-CapVowels");
4440     UnicodeString reallyShortID("CapVowels");
4441 
4442     Transliterator::registerAlias(shortID, longID);
4443 
4444     UErrorCode err = U_ZERO_ERROR;
4445     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4446     if (U_FAILURE(err)) {
4447         errln("Failed to instantiate transliterator with long ID");
4448         Transliterator::unregister(shortID);
4449         return;
4450     }
4451     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4452     if (U_FAILURE(err)) {
4453         errln("Failed to instantiate transliterator with short ID");
4454         delete t1;
4455         Transliterator::unregister(shortID);
4456         return;
4457     }
4458 
4459     if (t1->getID() != longID)
4460         errln("Transliterator instantiated with long ID doesn't have long ID");
4461     if (t2->getID() != reallyShortID)
4462         errln("Transliterator instantiated with short ID doesn't have short ID");
4463 
4464     UnicodeString rules1;
4465     UnicodeString rules2;
4466 
4467     t1->toRules(rules1, TRUE);
4468     t2->toRules(rules2, TRUE);
4469     if (rules1 != rules2)
4470         errln("Alias transliterators aren't the same");
4471 
4472     delete t1;
4473     delete t2;
4474     Transliterator::unregister(shortID);
4475 
4476     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4477     if (U_SUCCESS(err)) {
4478         errln("Instantiation with short ID succeeded after short ID was unregistered");
4479         delete t1;
4480     }
4481 
4482     // try the same thing again, but this time with something other than
4483     // an instance of CompoundTransliterator
4484     UnicodeString realID("Latin-Greek");
4485     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4486     Transliterator::registerAlias(fakeID, realID);
4487 
4488     err = U_ZERO_ERROR;
4489     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4490     if (U_FAILURE(err)) {
4491         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4492         Transliterator::unregister(realID);
4493         return;
4494     }
4495     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4496     if (U_FAILURE(err)) {
4497         errln("Failed to instantiate transliterator with fake ID");
4498         delete t1;
4499         Transliterator::unregister(realID);
4500         return;
4501     }
4502 
4503     t1->toRules(rules1, TRUE);
4504     t2->toRules(rules2, TRUE);
4505     if (rules1 != rules2)
4506         errln("Alias transliterators aren't the same");
4507 
4508     delete t1;
4509     delete t2;
4510     Transliterator::unregister(fakeID);
4511 }
4512 
TestRuleStripping()4513 void TransliteratorTest::TestRuleStripping() {
4514     /*
4515 #
4516 \uE001>\u0C01; # SIGN
4517     */
4518     static const UChar rule[] = {
4519         0x0023,0x0020,0x000D,0x000A,
4520         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4521     };
4522     static const UChar expectedRule[] = {
4523         0xE001,0x003E,0x0C01,0x003B,0
4524     };
4525     UChar result[sizeof(rule)/sizeof(rule[0])];
4526     UErrorCode status = U_ZERO_ERROR;
4527     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4528     if (len != u_strlen(expectedRule)) {
4529         errln("utrans_stripRules return len = %d", len);
4530     }
4531     if (u_strncmp(expectedRule, result, len) != 0) {
4532         errln("utrans_stripRules did not return expected string");
4533     }
4534 }
4535 
4536 /**
4537  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4538  */
TestHalfwidthFullwidth(void)4539 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4540     UParseError parseError;
4541     UErrorCode status = U_ZERO_ERROR;
4542     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4543     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4544     if (hf == 0 || fh == 0) {
4545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4546         delete hf;
4547         delete fh;
4548         return;
4549     }
4550 
4551     // Array of 2n items
4552     // Each item is
4553     //   "hf"|"fh"|"both",
4554     //   <Halfwidth>,
4555     //   <Fullwidth>
4556     const char* DATA[] = {
4557         "both",
4558         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4559         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4560     };
4561     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4562 
4563     for (int32_t i=0; i<DATA_length; i+=3) {
4564         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4565         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4566         switch (*DATA[i]) {
4567         case 0x68: //'h': // Halfwidth-Fullwidth only
4568             expect(*hf, h, f);
4569             break;
4570         case 0x66: //'f': // Fullwidth-Halfwidth only
4571             expect(*fh, f, h);
4572             break;
4573         case 0x62: //'b': // both directions
4574             expect(*hf, h, f);
4575             expect(*fh, f, h);
4576             break;
4577         }
4578     }
4579     delete hf;
4580     delete fh;
4581 }
4582 
4583 
4584     /**
4585      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4586      *              TODO: confirm that the expected results are correct.
4587      *              For now, test just confirms that C++ and Java give identical results.
4588      */
TestThai(void)4589 void TransliteratorTest::TestThai(void) {
4590 #if !UCONFIG_NO_BREAK_ITERATION
4591     UParseError parseError;
4592     UErrorCode status = U_ZERO_ERROR;
4593     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4594     if (tr == 0) {
4595         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4596         return;
4597     }
4598     if (U_FAILURE(status)) {
4599         errln("FAIL: createInstance failed with %s", u_errorName(status));
4600         return;
4601     }
4602     const char *thaiText =
4603         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4604         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4605         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4606         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4607         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4608         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4609         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4610         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4611         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4612         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4613         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4614         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4615         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4616         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4617         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4618         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4619         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4620         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4621         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4622         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4623         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4624         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4625         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4626         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4627         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4628         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4629         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4630         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4631         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4632         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4633 
4634     const char *latinText =
4635         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4636         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4637         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4638         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4639         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4640         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4641         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4642         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4643         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4644         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4645         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4646         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4647         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4648         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4649         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4650         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4651         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4652         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4653 
4654 
4655     UnicodeString  xlitText(thaiText);
4656     xlitText = xlitText.unescape();
4657     tr->transliterate(xlitText);
4658 
4659     UnicodeString expectedText(latinText);
4660     expectedText = expectedText.unescape();
4661     expect(*tr, xlitText, expectedText);
4662 
4663     delete tr;
4664 #endif
4665 }
4666 
4667 
4668 //======================================================================
4669 // Support methods
4670 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4671 void TransliteratorTest::expectT(const UnicodeString& id,
4672                                  const UnicodeString& source,
4673                                  const UnicodeString& expectedResult) {
4674     UErrorCode ec = U_ZERO_ERROR;
4675     UParseError pe;
4676     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4677     if (U_FAILURE(ec)) {
4678         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4679         delete t;
4680         return;
4681     }
4682     expect(*t, source, expectedResult);
4683     delete t;
4684 }
4685 
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4686 void TransliteratorTest::reportParseError(const UnicodeString& message,
4687                                           const UParseError& parseError,
4688                                           const UErrorCode& status) {
4689     dataerrln(message +
4690           /*", parse error " + parseError.code +*/
4691           ", line " + parseError.line +
4692           ", offset " + parseError.offset +
4693           ", pre-context " + prettify(parseError.preContext, TRUE) +
4694           ", post-context " + prettify(parseError.postContext,TRUE) +
4695           ", Error: " + u_errorName(status));
4696 }
4697 
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4698 void TransliteratorTest::expect(const UnicodeString& rules,
4699                                 const UnicodeString& source,
4700                                 const UnicodeString& expectedResult,
4701                                 UTransPosition *pos) {
4702     expect("<ID>", rules, source, expectedResult, pos);
4703 }
4704 
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4705 void TransliteratorTest::expect(const UnicodeString& id,
4706                                 const UnicodeString& rules,
4707                                 const UnicodeString& source,
4708                                 const UnicodeString& expectedResult,
4709                                 UTransPosition *pos) {
4710     UErrorCode status = U_ZERO_ERROR;
4711     UParseError parseError;
4712     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4713     if (U_FAILURE(status)) {
4714         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4715     } else {
4716         expect(*t, source, expectedResult, pos);
4717     }
4718     delete t;
4719 }
4720 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4721 void TransliteratorTest::expect(const Transliterator& t,
4722                                 const UnicodeString& source,
4723                                 const UnicodeString& expectedResult,
4724                                 const Transliterator& reverseTransliterator) {
4725     expect(t, source, expectedResult);
4726     expect(reverseTransliterator, expectedResult, source);
4727 }
4728 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4729 void TransliteratorTest::expect(const Transliterator& t,
4730                                 const UnicodeString& source,
4731                                 const UnicodeString& expectedResult,
4732                                 UTransPosition *pos) {
4733     if (pos == 0) {
4734         UnicodeString result(source);
4735         t.transliterate(result);
4736         expectAux(t.getID() + ":String", source, result, expectedResult);
4737     }
4738     UTransPosition index={0, 0, 0, 0};
4739     if (pos != 0) {
4740         index = *pos;
4741     }
4742 
4743     UnicodeString rsource(source);
4744     if (pos == 0) {
4745         t.transliterate(rsource);
4746     } else {
4747         // Do it all at once -- below we do it incrementally
4748         t.finishTransliteration(rsource, *pos);
4749     }
4750     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4751 
4752     // Test keyboard (incremental) transliteration -- this result
4753     // must be the same after we finalize (see below).
4754     UnicodeString log;
4755     rsource.remove();
4756     if (pos != 0) {
4757         rsource = source;
4758         formatInput(log, rsource, index);
4759         log.append(" -> ");
4760         UErrorCode status = U_ZERO_ERROR;
4761         t.transliterate(rsource, index, status);
4762         formatInput(log, rsource, index);
4763     } else {
4764         for (int32_t i=0; i<source.length(); ++i) {
4765             if (i != 0) {
4766                 log.append(" + ");
4767             }
4768             log.append(source.charAt(i)).append(" -> ");
4769             UErrorCode status = U_ZERO_ERROR;
4770             t.transliterate(rsource, index, source.charAt(i), status);
4771             formatInput(log, rsource, index);
4772         }
4773     }
4774 
4775     // As a final step in keyboard transliteration, we must call
4776     // transliterate to finish off any pending partial matches that
4777     // were waiting for more input.
4778     t.finishTransliteration(rsource, index);
4779     log.append(" => ").append(rsource);
4780 
4781     expectAux(t.getID() + ":Keyboard", log,
4782               rsource == expectedResult,
4783               expectedResult);
4784 }
4785 
4786 
4787 /**
4788  * @param appendTo result is appended to this param.
4789  * @param input the string being transliterated
4790  * @param pos the index struct
4791  */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4792 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4793                                                const UnicodeString& input,
4794                                                const UTransPosition& pos) {
4795     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4796     // the {} indicate the context start and limit, and the ||
4797     // indicate the start and limit.
4798     if (0 <= pos.contextStart &&
4799         pos.contextStart <= pos.start &&
4800         pos.start <= pos.limit &&
4801         pos.limit <= pos.contextLimit &&
4802         pos.contextLimit <= input.length()) {
4803 
4804         UnicodeString a, b, c, d, e;
4805         input.extractBetween(0, pos.contextStart, a);
4806         input.extractBetween(pos.contextStart, pos.start, b);
4807         input.extractBetween(pos.start, pos.limit, c);
4808         input.extractBetween(pos.limit, pos.contextLimit, d);
4809         input.extractBetween(pos.contextLimit, input.length(), e);
4810         appendTo.append(a).append((UChar)123/*{*/).append(b).
4811             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4812             append((UChar)125/*}*/).append(e);
4813     } else {
4814         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4815                         pos.contextStart + ", s=" + pos.start + ", l=" +
4816                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4817                         input);
4818     }
4819     return appendTo;
4820 }
4821 
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4822 void TransliteratorTest::expectAux(const UnicodeString& tag,
4823                                    const UnicodeString& source,
4824                                    const UnicodeString& result,
4825                                    const UnicodeString& expectedResult) {
4826     expectAux(tag, source + " -> " + result,
4827               result == expectedResult,
4828               expectedResult);
4829 }
4830 
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4831 void TransliteratorTest::expectAux(const UnicodeString& tag,
4832                                    const UnicodeString& summary, UBool pass,
4833                                    const UnicodeString& expectedResult) {
4834     if (pass) {
4835         logln(UnicodeString("(")+tag+") " + prettify(summary));
4836     } else {
4837         dataerrln(UnicodeString("FAIL: (")+tag+") "
4838               + prettify(summary)
4839               + ", expected " + prettify(expectedResult));
4840     }
4841 }
4842 
4843 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4844