1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *   Copyright (C) 1997-2016 International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  *******************************************************************************
8  *   Date        Name        Description
9  *   06/23/00    aliu        Creation.
10  *******************************************************************************
11  */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include <stdlib.h>
18 #include <string.h>
19 #include "unicode/utrans.h"
20 #include "unicode/ustring.h"
21 #include "unicode/uset.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #define TEST(x) addTest(root, &x, "utrans/" # x)
26 
27 static void TestAPI(void);
28 static void TestSimpleRules(void);
29 static void TestFilter(void);
30 static void TestOpenInverse(void);
31 static void TestClone(void);
32 static void TestRegisterUnregister(void);
33 static void TestExtractBetween(void);
34 static void TestUnicodeIDs(void);
35 static void TestGetRulesAndSourceSet(void);
36 static void TestDataVariantsCompounds(void);
37 
38 static void _expectRules(const char*, const char*, const char*);
39 static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
40 
41 void addUTransTest(TestNode** root);
42 
43 
44 void
addUTransTest(TestNode ** root)45 addUTransTest(TestNode** root) {
46     TEST(TestAPI);
47     TEST(TestSimpleRules);
48     TEST(TestFilter);
49     TEST(TestOpenInverse);
50     TEST(TestClone);
51     TEST(TestRegisterUnregister);
52     TEST(TestExtractBetween);
53     TEST(TestUnicodeIDs);
54     TEST(TestGetRulesAndSourceSet);
55     TEST(TestDataVariantsCompounds);
56 }
57 
58 /*------------------------------------------------------------------
59  * Replaceable glue
60  *
61  * To test the Replaceable glue we have to dummy up a C-based
62  * Replaceable callback.  This code is for testing purposes only.
63  *------------------------------------------------------------------*/
64 
65 typedef struct XReplaceable {
66     UChar* text;    /* MUST BE null-terminated */
67 } XReplaceable;
68 
InitXReplaceable(XReplaceable * rep,const char * cstring)69 static void InitXReplaceable(XReplaceable* rep, const char* cstring) {
70     rep->text = malloc(sizeof(UChar) * (strlen(cstring)+1));
71     u_uastrcpy(rep->text, cstring);
72 }
73 
FreeXReplaceable(XReplaceable * rep)74 static void FreeXReplaceable(XReplaceable* rep) {
75     if (rep->text != NULL) {
76         free(rep->text);
77         rep->text = NULL;
78     }
79 }
80 
81 /* UReplaceableCallbacks callback */
Xlength(const UReplaceable * rep)82 static int32_t Xlength(const UReplaceable* rep) {
83     const XReplaceable* x = (const XReplaceable*)rep;
84     return u_strlen(x->text);
85 }
86 
87 /* UReplaceableCallbacks callback */
XcharAt(const UReplaceable * rep,int32_t offset)88 static UChar XcharAt(const UReplaceable* rep, int32_t offset) {
89     const XReplaceable* x = (const XReplaceable*)rep;
90     return x->text[offset];
91 }
92 
93 /* UReplaceableCallbacks callback */
Xchar32At(const UReplaceable * rep,int32_t offset)94 static UChar32 Xchar32At(const UReplaceable* rep, int32_t offset) {
95     const XReplaceable* x = (const XReplaceable*)rep;
96     return x->text[offset];
97 }
98 
99 /* UReplaceableCallbacks callback */
Xreplace(UReplaceable * rep,int32_t start,int32_t limit,const UChar * text,int32_t textLength)100 static void Xreplace(UReplaceable* rep, int32_t start, int32_t limit,
101               const UChar* text, int32_t textLength) {
102     XReplaceable* x = (XReplaceable*)rep;
103     int32_t newLen = Xlength(rep) + limit - start + textLength;
104     UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
105     u_strncpy(newText, x->text, start);
106     u_strncpy(newText + start, text, textLength);
107     u_strcpy(newText + start + textLength, x->text + limit);
108     free(x->text);
109     x->text = newText;
110 }
111 
112 /* UReplaceableCallbacks callback */
Xcopy(UReplaceable * rep,int32_t start,int32_t limit,int32_t dest)113 static void Xcopy(UReplaceable* rep, int32_t start, int32_t limit, int32_t dest) {
114     XReplaceable* x = (XReplaceable*)rep;
115     int32_t newLen = Xlength(rep) + limit - start;
116     UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
117     u_strncpy(newText, x->text, dest);
118     u_strncpy(newText + dest, x->text + start, limit - start);
119     u_strcpy(newText + dest + limit - start, x->text + dest);
120     free(x->text);
121     x->text = newText;
122 }
123 
124 /* UReplaceableCallbacks callback */
Xextract(UReplaceable * rep,int32_t start,int32_t limit,UChar * dst)125 static void Xextract(UReplaceable* rep, int32_t start, int32_t limit, UChar* dst) {
126     XReplaceable* x = (XReplaceable*)rep;
127     int32_t len = limit - start;
128     u_strncpy(dst, x->text, len);
129 }
130 
InitXReplaceableCallbacks(UReplaceableCallbacks * callbacks)131 static void InitXReplaceableCallbacks(UReplaceableCallbacks* callbacks) {
132     callbacks->length = Xlength;
133     callbacks->charAt = XcharAt;
134     callbacks->char32At = Xchar32At;
135     callbacks->replace = Xreplace;
136     callbacks->extract = Xextract;
137     callbacks->copy = Xcopy;
138 }
139 
140 /*------------------------------------------------------------------
141  * Tests
142  *------------------------------------------------------------------*/
143 
TestAPI()144 static void TestAPI() {
145     enum { BUF_CAP = 128 };
146     char buf[BUF_CAP], buf2[BUF_CAP];
147     UErrorCode status = U_ZERO_ERROR;
148     UTransliterator* trans = NULL;
149     int32_t i, n;
150 
151     /* Test getAvailableIDs */
152     n = utrans_countAvailableIDs();
153     if (n < 1) {
154         log_err("FAIL: utrans_countAvailableIDs() returned %d\n", n);
155     } else {
156         log_verbose("System ID count: %d\n", n);
157     }
158     for (i=0; i<n; ++i) {
159         utrans_getAvailableID(i, buf, BUF_CAP);
160         if (*buf == 0) {
161             log_err("FAIL: System transliterator %d: \"\"\n", i);
162         } else {
163             log_verbose("System transliterator %d: \"%s\"\n", i, buf);
164         }
165     }
166 
167     /* Test open */
168     utrans_getAvailableID(0, buf, BUF_CAP);
169     trans = utrans_open(buf, UTRANS_FORWARD,NULL,0,NULL, &status);
170     if (U_FAILURE(status)) {
171         log_err("FAIL: utrans_open(%s) failed, error=%s\n",
172                 buf, u_errorName(status));
173     }
174 
175     else {
176         /* Test getID */
177         utrans_getID(trans, buf2, BUF_CAP);
178         if (0 != strcmp(buf, buf2)) {
179             log_err("FAIL: utrans_getID(%s) returned %s\n",
180                     buf, buf2);
181         }
182         utrans_close(trans);
183     }
184 }
185 
TestUnicodeIDs()186 static void TestUnicodeIDs() {
187     UEnumeration *uenum;
188     UTransliterator *utrans;
189     const UChar *id, *id2;
190     int32_t idLength, id2Length, count, count2;
191 
192     UErrorCode errorCode;
193 
194     errorCode=U_ZERO_ERROR;
195     uenum=utrans_openIDs(&errorCode);
196     if(U_FAILURE(errorCode)) {
197         log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
198         return;
199     }
200 
201     count=uenum_count(uenum, &errorCode);
202     if(U_FAILURE(errorCode) || count<1) {
203         log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
204     }
205 
206     count=0;
207     for(;;) {
208         id=uenum_unext(uenum, &idLength, &errorCode);
209         if(U_FAILURE(errorCode)) {
210             log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
211             break;
212         }
213         if(id==NULL) {
214             break;
215         }
216 
217         if(++count>10) {
218             /* try to actually open only a few transliterators */
219             continue;
220         }
221 
222         utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
223         if(U_FAILURE(errorCode)) {
224             log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
225             continue;
226         }
227 
228         id2=utrans_getUnicodeID(utrans, &id2Length);
229         if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
230             log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
231         }
232 
233         utrans_close(utrans);
234     }
235 
236     uenum_reset(uenum, &errorCode);
237     if(U_FAILURE(errorCode) || count<1) {
238         log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
239     } else {
240         count2=uenum_count(uenum, &errorCode);
241         if(U_FAILURE(errorCode) || count<1) {
242             log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
243         } else if(count!=count2) {
244             log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
245         }
246     }
247 
248     uenum_close(uenum);
249 }
250 
TestOpenInverse()251 static void TestOpenInverse(){
252     UErrorCode status=U_ZERO_ERROR;
253     UTransliterator* t1=NULL;
254     UTransliterator* inverse1=NULL;
255     enum { BUF_CAP = 128 };
256     char buf1[BUF_CAP];
257     int32_t i=0;
258 
259     const char TransID[][25]={
260            "Halfwidth-Fullwidth",
261            "Fullwidth-Halfwidth",
262            "Greek-Latin" ,
263            "Latin-Greek",
264            /*"Arabic-Latin", // Removed in 2.0*/
265            /*"Latin-Arabic", // Removed in 2.0*/
266            "Katakana-Latin",
267            "Latin-Katakana",
268            /*"Hebrew-Latin", // Removed in 2.0*/
269            /*"Latin-Hebrew", // Removed in 2.0*/
270            "Cyrillic-Latin",
271            "Latin-Cyrillic",
272            "Devanagari-Latin",
273            "Latin-Devanagari",
274            "Any-Hex",
275            "Hex-Any"
276          };
277 
278     for(i=0; i<UPRV_LENGTHOF(TransID); i=i+2){
279         status = U_ZERO_ERROR;
280         t1=utrans_open(TransID[i], UTRANS_FORWARD,NULL,0,NULL, &status);
281         if(t1 == NULL || U_FAILURE(status)){
282             log_data_err("FAIL: in instantiation for id=%s -> %s (Are you missing data?)\n", TransID[i], u_errorName(status));
283             continue;
284         }
285         inverse1=utrans_openInverse(t1, &status);
286         if(U_FAILURE(status)){
287             log_err("FAIL: utrans_openInverse() failed for id=%s. Error=%s\n", TransID[i], myErrorName(status));
288             continue;
289         }
290         utrans_getID(inverse1, buf1, BUF_CAP);
291         if(strcmp(buf1, TransID[i+1]) != 0){
292             log_err("FAIL :openInverse() for %s returned %s instead of %s\n", TransID[i], buf1, TransID[i+1]);
293         }
294         utrans_close(t1);
295         utrans_close(inverse1);
296    }
297 }
298 
TestClone()299 static void TestClone(){
300     UErrorCode status=U_ZERO_ERROR;
301     UTransliterator* t1=NULL;
302     UTransliterator* t2=NULL;
303     UTransliterator* t3=NULL;
304     UTransliterator* t4=NULL;
305     enum { BUF_CAP = 128 };
306     char buf1[BUF_CAP], buf2[BUF_CAP], buf3[BUF_CAP];
307 
308     t1=utrans_open("Latin-Devanagari", UTRANS_FORWARD, NULL,0,NULL,&status);
309     if(U_FAILURE(status)){
310         log_data_err("FAIL: construction -> %s (Are you missing data?)\n", u_errorName(status));
311         return;
312     }
313     t2=utrans_open("Latin-Greek", UTRANS_FORWARD, NULL,0,NULL,&status);
314     if(U_FAILURE(status)){
315         log_err("FAIL: construction\n");
316         utrans_close(t1);
317         return;
318     }
319 
320     t3=utrans_clone(t1, &status);
321     t4=utrans_clone(t2, &status);
322 
323     utrans_getID(t1, buf1, BUF_CAP);
324     utrans_getID(t2, buf2, BUF_CAP);
325     utrans_getID(t3, buf3, BUF_CAP);
326 
327     if(strcmp(buf1, buf3) != 0 ||
328         strcmp(buf1, buf2) == 0) {
329         log_err("FAIL: utrans_clone() failed\n");
330     }
331 
332     utrans_getID(t4, buf3, BUF_CAP);
333 
334     if(strcmp(buf2, buf3) != 0 ||
335         strcmp(buf1, buf3) == 0) {
336         log_err("FAIL: utrans_clone() failed\n");
337     }
338 
339     utrans_close(t1);
340     utrans_close(t2);
341     utrans_close(t3);
342     utrans_close(t4);
343 
344 }
345 
TestRegisterUnregister()346 static void TestRegisterUnregister(){
347     UErrorCode status=U_ZERO_ERROR;
348     UTransliterator* t1=NULL;
349     UTransliterator* rules=NULL, *rules2;
350     UTransliterator* inverse1=NULL;
351     UChar rule[]={ 0x0061, 0x003c, 0x003e, 0x0063}; /*a<>b*/
352 
353     U_STRING_DECL(ID, "TestA-TestB", 11);
354     U_STRING_INIT(ID, "TestA-TestB", 11);
355 
356     /* Make sure it doesn't exist */
357     t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
358     if(t1 != NULL || U_SUCCESS(status)) {
359         log_err("FAIL: TestA-TestB already registered\n");
360         return;
361     }
362     status=U_ZERO_ERROR;
363     /* Check inverse too */
364     inverse1=utrans_open("TestA-TestB", UTRANS_REVERSE, NULL,0,NULL,&status);
365     if(inverse1 != NULL || U_SUCCESS(status)) {
366         log_err("FAIL: TestA-TestB already registered\n");
367         return;
368     }
369     status=U_ZERO_ERROR;
370     /* Create it */
371     rules=utrans_open("TestA-TestB",UTRANS_FORWARD, rule, 4, NULL, &status);
372     if(U_FAILURE(status)){
373         log_err("FAIL: utrans_openRules(a<>B) failed with error=%s\n", myErrorName(status));
374         return;
375     }
376 
377     /* clone it so we can register it a second time */
378     rules2=utrans_clone(rules, &status);
379     if(U_FAILURE(status)) {
380         log_err("FAIL: utrans_clone(a<>B) failed with error=%s\n", myErrorName(status));
381         return;
382     }
383 
384     status=U_ZERO_ERROR;
385     /* Register it */
386     utrans_register(rules, &status);
387     if(U_FAILURE(status)){
388         log_err("FAIL: utrans_register failed with error=%s\n", myErrorName(status));
389         return;
390     }
391     status=U_ZERO_ERROR;
392     /* Now check again -- should exist now*/
393     t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
394     if(U_FAILURE(status) || t1 == NULL){
395         log_err("FAIL: TestA-TestB not registered\n");
396         return;
397     }
398     utrans_close(t1);
399 
400     /*unregister the instance*/
401     status=U_ZERO_ERROR;
402     utrans_unregister("TestA-TestB");
403     /* now Make sure it doesn't exist */
404     t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
405     if(U_SUCCESS(status) || t1 != NULL) {
406         log_err("FAIL: TestA-TestB isn't unregistered\n");
407         return;
408     }
409     utrans_close(t1);
410 
411     /* now with utrans_unregisterID(const UChar *) */
412     status=U_ZERO_ERROR;
413     utrans_register(rules2, &status);
414     if(U_FAILURE(status)){
415         log_err("FAIL: 2nd utrans_register failed with error=%s\n", myErrorName(status));
416         return;
417     }
418     status=U_ZERO_ERROR;
419     /* Now check again -- should exist now*/
420     t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
421     if(U_FAILURE(status) || t1 == NULL){
422         log_err("FAIL: 2nd TestA-TestB not registered\n");
423         return;
424     }
425     utrans_close(t1);
426 
427     /*unregister the instance*/
428     status=U_ZERO_ERROR;
429     utrans_unregisterID(ID, -1);
430     /* now Make sure it doesn't exist */
431     t1=utrans_openU(ID, -1, UTRANS_FORWARD,NULL,0,NULL, &status);
432     if(U_SUCCESS(status) || t1 != NULL) {
433         log_err("FAIL: 2nd TestA-TestB isn't unregistered\n");
434         return;
435     }
436 
437     utrans_close(t1);
438     utrans_close(inverse1);
439 }
440 
TestSimpleRules()441 static void TestSimpleRules() {
442     /* Test rules */
443     /* Example: rules 1. ab>x|y
444      *                2. yc>z
445      *
446      * []|eabcd  start - no match, copy e to tranlated buffer
447      * [e]|abcd  match rule 1 - copy output & adjust cursor
448      * [ex|y]cd  match rule 2 - copy output & adjust cursor
449      * [exz]|d   no match, copy d to transliterated buffer
450      * [exzd]|   done
451      */
452     _expectRules("ab>x|y;"
453                  "yc>z",
454                  "eabcd", "exzd");
455 
456     /* Another set of rules:
457      *    1. ab>x|yzacw
458      *    2. za>q
459      *    3. qc>r
460      *    4. cw>n
461      *
462      * []|ab       Rule 1
463      * [x|yzacw]   No match
464      * [xy|zacw]   Rule 2
465      * [xyq|cw]    Rule 4
466      * [xyqn]|     Done
467      */
468     _expectRules("ab>x|yzacw;"
469                  "za>q;"
470                  "qc>r;"
471                  "cw>n",
472                  "ab", "xyqn");
473 
474     /* Test categories
475      */
476     _expectRules("$dummy=" "\\uE100" ";" /* careful here with E100 */
477                  "$vowel=[aeiouAEIOU];"
478                  "$lu=[:Lu:];"
479                  "$vowel } $lu > '!';"
480                  "$vowel > '&';"
481                  "'!' { $lu > '^';"
482                  "$lu > '*';"
483                  "a > ERROR",
484                  "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
485 
486     /* Test multiple passes
487     */
488     _expectRules("abc > xy;"
489                  "::Null;"
490                  "aba > z;",
491                  "abc ababc aba", "xy abxy z");
492 }
493 
TestFilter()494 static void TestFilter() {
495     UErrorCode status = U_ZERO_ERROR;
496     UChar filt[128];
497     UChar buf[128];
498     UChar exp[128];
499     char *cbuf;
500     int32_t limit;
501     const char* DATA[] = {
502         "[^c]", /* Filter out 'c' */
503         "abcde",
504         "\\u0061\\u0062c\\u0064\\u0065",
505 
506         "", /* No filter */
507         "abcde",
508         "\\u0061\\u0062\\u0063\\u0064\\u0065"
509     };
510     int32_t DATA_length = UPRV_LENGTHOF(DATA);
511     int32_t i;
512 
513     UTransliterator* hex = utrans_open("Any-Hex", UTRANS_FORWARD, NULL,0,NULL,&status);
514 
515     if (hex == 0 || U_FAILURE(status)) {
516         log_err("FAIL: utrans_open(Unicode-Hex) failed, error=%s\n",
517                 u_errorName(status));
518         goto exit;
519     }
520 
521     for (i=0; i<DATA_length; i+=3) {
522         /*u_uastrcpy(filt, DATA[i]);*/
523         u_charsToUChars(DATA[i], filt, (int32_t)strlen(DATA[i])+1);
524         utrans_setFilter(hex, filt, -1, &status);
525 
526         if (U_FAILURE(status)) {
527             log_err("FAIL: utrans_setFilter() failed, error=%s\n",
528                     u_errorName(status));
529             goto exit;
530         }
531 
532         /*u_uastrcpy(buf, DATA[i+1]);*/
533         u_charsToUChars(DATA[i+1], buf, (int32_t)strlen(DATA[i+1])+1);
534         limit = 5;
535         utrans_transUChars(hex, buf, NULL, 128, 0, &limit, &status);
536 
537         if (U_FAILURE(status)) {
538             log_err("FAIL: utrans_transUChars() failed, error=%s\n",
539                     u_errorName(status));
540             goto exit;
541         }
542 
543         cbuf=aescstrdup(buf, -1);
544         u_charsToUChars(DATA[i+2], exp, (int32_t)strlen(DATA[i+2])+1);
545         if (0 == u_strcmp(buf, exp)) {
546             log_verbose("Ok: %s | %s -> %s\n", DATA[i+1], DATA[i], cbuf);
547         } else {
548             log_err("FAIL: %s | %s -> %s, expected %s\n", DATA[i+1], DATA[i], cbuf, DATA[i+2]);
549         }
550     }
551 
552  exit:
553     utrans_close(hex);
554 }
555 
556 /**
557  * Test the UReplaceableCallback extractBetween support.  We use a
558  * transliterator known to rely on this call.
559  */
TestExtractBetween()560 static void TestExtractBetween() {
561 
562     UTransliterator *trans;
563     UErrorCode status = U_ZERO_ERROR;
564     UParseError parseErr;
565 
566     trans = utrans_open("Lower", UTRANS_FORWARD, NULL, -1,
567                         &parseErr, &status);
568 
569     if (U_FAILURE(status)) {
570         log_err("FAIL: utrans_open(Lower) failed, error=%s\n",
571                 u_errorName(status));
572     } else {
573         _expect(trans, "ABC", "abc");
574 
575         utrans_close(trans);
576     }
577 }
578 
579 /**
580  * Test utrans_toRules, utrans_getSourceSet
581  */
582 
583 /* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
584    filter & source set 4-20 chars */
585 static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
586 static const char* transSimpleCName = "yo-yo_BJ";
587 
588 enum { kUBufMax = 256 };
TestGetRulesAndSourceSet()589 static void TestGetRulesAndSourceSet() {
590     UErrorCode status = U_ZERO_ERROR;
591     UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
592     if ( U_SUCCESS(status) ) {
593         USet* uset;
594         UChar ubuf[kUBufMax];
595         int32_t ulen;
596 
597         status = U_ZERO_ERROR;
598         ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status);
599         if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
600             log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
601                     u_errorName(status), ulen);
602         }
603 
604         status = U_ZERO_ERROR;
605         ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status);
606         if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
607             log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
608                     u_errorName(status), ulen);
609         }
610 
611         status = U_ZERO_ERROR;
612         ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status);
613         if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
614             log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
615                     u_errorName(status), ulen);
616         }
617 
618         status = U_ZERO_ERROR;
619         uset = utrans_getSourceSet(utrans, FALSE, NULL, &status);
620         ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
621         uset_close(uset);
622         if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
623             log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
624                     u_errorName(status), ulen);
625         }
626 
627         status = U_ZERO_ERROR;
628         uset = utrans_getSourceSet(utrans, TRUE, NULL, &status);
629         ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
630         uset_close(uset);
631         if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
632             log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
633                     u_errorName(status), ulen);
634         }
635 
636         utrans_close(utrans);
637     } else {
638         log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
639                 transSimpleCName, u_errorName(status));
640     }
641 }
642 
643 typedef struct {
644     const char * transID;
645     const char * sourceText;
646     const char * targetText;
647 } TransIDSourceTarg;
648 
649 static const TransIDSourceTarg dataVarCompItems[] = {
650     { "Simplified-Traditional",
651        "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
652        "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
653     { "Halfwidth-Fullwidth",
654       "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
655       "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
656     { "Han-Latin/Names; Latin-Bopomofo",
657        "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
658        "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" },
659     { "Greek-Latin",
660       "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
661       "A \\u0100I H\\u0100I RH" },
662 /* The following transform is provisional and not present in ICU 60
663     { "Greek-Latin/BGN",
664       "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
665       "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
666 */
667     { "Greek-Latin/UNGEGN",
668       "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
669       "A A A R" },
670     { NULL, NULL, NULL }
671 };
672 
673 enum { kBBufMax = 384 };
TestDataVariantsCompounds()674 static void TestDataVariantsCompounds() {
675     const TransIDSourceTarg* itemsPtr;
676     for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
677         UErrorCode status = U_ZERO_ERROR;
678         UChar utrid[kUBufMax];
679         int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
680         UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
681         if (U_FAILURE(status)) {
682             log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
683             continue;
684         }
685         UChar text[kUBufMax];
686         int32_t textLen =  u_unescape(itemsPtr->sourceText, text, kUBufMax);
687         int32_t textLim = textLen;
688         utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
689         if (U_FAILURE(status)) {
690             log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
691         } else {
692             UChar expect[kUBufMax];
693             int32_t expectLen =  u_unescape(itemsPtr->targetText, expect, kUBufMax);
694             if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
695                 char btext[kBBufMax], bexpect[kBBufMax];
696                 u_austrncpy(btext, text, textLen);
697                 u_austrncpy(bexpect, expect, expectLen);
698                 log_err("FAIL: utrans_transUChars(%s),\n       expect %s\n       get    %s\n", itemsPtr->transID, bexpect, btext);
699             }
700         }
701         utrans_close(utrans);
702     }
703 }
704 
_expectRules(const char * crules,const char * cfrom,const char * cto)705 static void _expectRules(const char* crules,
706                   const char* cfrom,
707                   const char* cto) {
708     /* u_uastrcpy has no capacity param for the buffer -- so just
709      * make all buffers way too big */
710     enum { CAP = 256 };
711     UChar rules[CAP];
712     UTransliterator *trans;
713     UErrorCode status = U_ZERO_ERROR;
714     UParseError parseErr;
715 
716     u_uastrcpy(rules, crules);
717 
718     trans = utrans_open(crules /*use rules as ID*/, UTRANS_FORWARD, rules, -1,
719                              &parseErr, &status);
720     if (U_FAILURE(status)) {
721         utrans_close(trans);
722         log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
723                 crules, u_errorName(status));
724         return;
725     }
726 
727     _expect(trans, cfrom, cto);
728 
729     utrans_close(trans);
730 }
731 
_expect(const UTransliterator * trans,const char * cfrom,const char * cto)732 static void _expect(const UTransliterator* trans,
733              const char* cfrom,
734              const char* cto) {
735     /* u_uastrcpy has no capacity param for the buffer -- so just
736      * make all buffers way too big */
737     enum { CAP = 256 };
738     UChar from[CAP];
739     UChar to[CAP];
740     UChar buf[CAP];
741     const UChar *ID;
742     int32_t IDLength;
743     const char *id;
744 
745     UErrorCode status = U_ZERO_ERROR;
746     int32_t limit;
747     UTransPosition pos;
748     XReplaceable xrep;
749     XReplaceable *xrepPtr = &xrep;
750     UReplaceableCallbacks xrepVtable;
751 
752     u_uastrcpy(from, cfrom);
753     u_uastrcpy(to, cto);
754 
755     ID = utrans_getUnicodeID(trans, &IDLength);
756     id = aescstrdup(ID, IDLength);
757 
758     /* utrans_transUChars() */
759     u_strcpy(buf, from);
760     limit = u_strlen(buf);
761     utrans_transUChars(trans, buf, NULL, CAP, 0, &limit, &status);
762     if (U_FAILURE(status)) {
763         log_err("FAIL: utrans_transUChars() failed, error=%s\n",
764                 u_errorName(status));
765         return;
766     }
767 
768     if (0 == u_strcmp(buf, to)) {
769         log_verbose("Ok: utrans_transUChars(%s) x %s -> %s\n",
770                     id, cfrom, cto);
771     } else {
772         char actual[CAP];
773         u_austrcpy(actual, buf);
774         log_err("FAIL: utrans_transUChars(%s) x %s -> %s, expected %s\n",
775                 id, cfrom, actual, cto);
776     }
777 
778     /* utrans_transIncrementalUChars() */
779     u_strcpy(buf, from);
780     pos.start = pos.contextStart = 0;
781     pos.limit = pos.contextLimit = u_strlen(buf);
782     utrans_transIncrementalUChars(trans, buf, NULL, CAP, &pos, &status);
783     utrans_transUChars(trans, buf, NULL, CAP, pos.start, &pos.limit, &status);
784     if (U_FAILURE(status)) {
785         log_err("FAIL: utrans_transIncrementalUChars() failed, error=%s\n",
786                 u_errorName(status));
787         return;
788     }
789 
790     if (0 == u_strcmp(buf, to)) {
791         log_verbose("Ok: utrans_transIncrementalUChars(%s) x %s -> %s\n",
792                     id, cfrom, cto);
793     } else {
794         char actual[CAP];
795         u_austrcpy(actual, buf);
796         log_err("FAIL: utrans_transIncrementalUChars(%s) x %s -> %s, expected %s\n",
797                 id, cfrom, actual, cto);
798     }
799 
800     /* utrans_trans() */
801     InitXReplaceableCallbacks(&xrepVtable);
802     InitXReplaceable(&xrep, cfrom);
803     limit = u_strlen(from);
804     utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, 0, &limit, &status);
805     if (U_FAILURE(status)) {
806         log_err("FAIL: utrans_trans() failed, error=%s\n",
807                 u_errorName(status));
808         FreeXReplaceable(&xrep);
809         return;
810     }
811 
812     if (0 == u_strcmp(xrep.text, to)) {
813         log_verbose("Ok: utrans_trans(%s) x %s -> %s\n",
814                     id, cfrom, cto);
815     } else {
816         char actual[CAP];
817         u_austrcpy(actual, xrep.text);
818         log_err("FAIL: utrans_trans(%s) x %s -> %s, expected %s\n",
819                 id, cfrom, actual, cto);
820     }
821     FreeXReplaceable(&xrep);
822 
823     /* utrans_transIncremental() */
824     InitXReplaceable(&xrep, cfrom);
825     pos.start = pos.contextStart = 0;
826     pos.limit = pos.contextLimit = u_strlen(from);
827     utrans_transIncremental(trans, (UReplaceable*)xrepPtr, &xrepVtable, &pos, &status);
828     utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, pos.start, &pos.limit, &status);
829     if (U_FAILURE(status)) {
830         log_err("FAIL: utrans_transIncremental() failed, error=%s\n",
831                 u_errorName(status));
832         FreeXReplaceable(&xrep);
833         return;
834     }
835 
836     if (0 == u_strcmp(xrep.text, to)) {
837         log_verbose("Ok: utrans_transIncremental(%s) x %s -> %s\n",
838                     id, cfrom, cto);
839     } else {
840         char actual[CAP];
841         u_austrcpy(actual, xrep.text);
842         log_err("FAIL: utrans_transIncremental(%s) x %s -> %s, expected %s\n",
843                 id, cfrom, actual, cto);
844     }
845     FreeXReplaceable(&xrep);
846 }
847 
848 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
849