1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  convtest.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003jul15
16 *   created by: Markus W. Scherer
17 *
18 *   Test file for data-driven conversion tests.
19 */
20 
21 #include "unicode/utypes.h"
22 
23 #if !UCONFIG_NO_LEGACY_CONVERSION
24 /*
25  * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
26  * is slightly unnecessary - it removes tests for Unicode charsets
27  * like UTF-8 that should work.
28  * However, there is no easy way for the test to detect whether a test case
29  * is for a Unicode charset, so it would be difficult to only exclude those.
30  * Also, regular testing of ICU is done with all modules on, therefore
31  * not testing conversion for a custom configuration like this should be ok.
32  */
33 
34 #include "unicode/ucnv.h"
35 #include "unicode/unistr.h"
36 #include "unicode/parsepos.h"
37 #include "unicode/uniset.h"
38 #include "unicode/ustring.h"
39 #include "unicode/ures.h"
40 #include "unicode/utf16.h"
41 #include "convtest.h"
42 #include "cmemory.h"
43 #include "unicode/tstdtmod.h"
44 #include <string.h>
45 #include <stdlib.h>
46 
47 enum {
48     // characters used in test data for callbacks
49     SUB_CB='?',
50     SKIP_CB='0',
51     STOP_CB='.',
52     ESC_CB='&'
53 };
54 
ConversionTest()55 ConversionTest::ConversionTest() {
56     UErrorCode errorCode=U_ZERO_ERROR;
57     utf8Cnv=ucnv_open("UTF-8", &errorCode);
58     ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
59     if(U_FAILURE(errorCode)) {
60         errln("unable to open UTF-8 converter");
61     }
62 }
63 
~ConversionTest()64 ConversionTest::~ConversionTest() {
65     ucnv_close(utf8Cnv);
66 }
67 
68 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)69 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
70     if (exec) logln("TestSuite ConversionTest: ");
71     TESTCASE_AUTO_BEGIN;
72 #if !UCONFIG_NO_FILE_IO
73     TESTCASE_AUTO(TestToUnicode);
74     TESTCASE_AUTO(TestFromUnicode);
75     TESTCASE_AUTO(TestGetUnicodeSet);
76 #endif
77     TESTCASE_AUTO(TestGetUnicodeSet2);
78     TESTCASE_AUTO(TestDefaultIgnorableCallback);
79     TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
80     TESTCASE_AUTO(TestUTF8ToUTF8Streaming);
81     TESTCASE_AUTO_END;
82 }
83 
84 // test data interface ----------------------------------------------------- ***
85 
86 void
TestToUnicode()87 ConversionTest::TestToUnicode() {
88     ConversionCase cc;
89     char charset[100], cbopt[4];
90     const char *option;
91     UnicodeString s, unicode;
92     int32_t offsetsLength;
93     UConverterToUCallback callback;
94 
95     TestDataModule *dataModule;
96     TestData *testData;
97     const DataMap *testCase;
98     UErrorCode errorCode;
99     int32_t i;
100 
101     errorCode=U_ZERO_ERROR;
102     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
103     if(U_SUCCESS(errorCode)) {
104         testData=dataModule->createTestData("toUnicode", errorCode);
105         if(U_SUCCESS(errorCode)) {
106             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
107                 if(U_FAILURE(errorCode)) {
108                     errln("error retrieving conversion/toUnicode test case %d - %s",
109                             i, u_errorName(errorCode));
110                     errorCode=U_ZERO_ERROR;
111                     continue;
112                 }
113 
114                 cc.caseNr=i;
115 
116                 s=testCase->getString("charset", errorCode);
117                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
118                 cc.charset=charset;
119 
120                 // BEGIN android-added
121                 // To save space, Android does not build full ISO-2022-CN tables.
122                 // We skip the TestGetKeywordValuesForLocale for counting available collations.
123                 if (strlen(charset) >= 8 &&
124                     strncmp(charset+4, "2022-CN", 4) == 0) {
125                     continue;
126                 }
127                 // END android-added
128 
129                 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
130                 unicode=testCase->getString("unicode", errorCode);
131                 cc.unicode=unicode.getBuffer();
132                 cc.unicodeLength=unicode.length();
133 
134                 offsetsLength=0;
135                 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
136                 if(offsetsLength==0) {
137                     cc.offsets=NULL;
138                 } else if(offsetsLength!=unicode.length()) {
139                     errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
140                             i, unicode.length(), offsetsLength);
141                     errorCode=U_ILLEGAL_ARGUMENT_ERROR;
142                 }
143 
144                 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
145                 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
146 
147                 s=testCase->getString("errorCode", errorCode);
148                 if(s==UNICODE_STRING("invalid", 7)) {
149                     cc.outErrorCode=U_INVALID_CHAR_FOUND;
150                 } else if(s==UNICODE_STRING("illegal", 7)) {
151                     cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
152                 } else if(s==UNICODE_STRING("truncated", 9)) {
153                     cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
154                 } else if(s==UNICODE_STRING("illesc", 6)) {
155                     cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
156                 } else if(s==UNICODE_STRING("unsuppesc", 9)) {
157                     cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
158                 } else {
159                     cc.outErrorCode=U_ZERO_ERROR;
160                 }
161 
162                 s=testCase->getString("callback", errorCode);
163                 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
164                 cc.cbopt=cbopt;
165                 switch(cbopt[0]) {
166                 case SUB_CB:
167                     callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
168                     break;
169                 case SKIP_CB:
170                     callback=UCNV_TO_U_CALLBACK_SKIP;
171                     break;
172                 case STOP_CB:
173                     callback=UCNV_TO_U_CALLBACK_STOP;
174                     break;
175                 case ESC_CB:
176                     callback=UCNV_TO_U_CALLBACK_ESCAPE;
177                     break;
178                 default:
179                     callback=NULL;
180                     break;
181                 }
182                 option=callback==NULL ? cbopt : cbopt+1;
183                 if(*option==0) {
184                     option=NULL;
185                 }
186 
187                 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
188 
189                 if(U_FAILURE(errorCode)) {
190                     errln("error parsing conversion/toUnicode test case %d - %s",
191                             i, u_errorName(errorCode));
192                     errorCode=U_ZERO_ERROR;
193                 } else {
194                     logln("TestToUnicode[%d] %s", i, charset);
195                     ToUnicodeCase(cc, callback, option);
196                 }
197             }
198             delete testData;
199         }
200         delete dataModule;
201     }
202     else {
203         dataerrln("Could not load test conversion data");
204     }
205 }
206 
207 void
TestFromUnicode()208 ConversionTest::TestFromUnicode() {
209     ConversionCase cc;
210     char charset[100], cbopt[4];
211     const char *option;
212     UnicodeString s, unicode, invalidUChars;
213     int32_t offsetsLength, index;
214     UConverterFromUCallback callback;
215 
216     TestDataModule *dataModule;
217     TestData *testData;
218     const DataMap *testCase;
219     const UChar *p;
220     UErrorCode errorCode;
221     int32_t i, length;
222 
223     errorCode=U_ZERO_ERROR;
224     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
225     if(U_SUCCESS(errorCode)) {
226         testData=dataModule->createTestData("fromUnicode", errorCode);
227         if(U_SUCCESS(errorCode)) {
228             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
229                 if(U_FAILURE(errorCode)) {
230                     errln("error retrieving conversion/fromUnicode test case %d - %s",
231                             i, u_errorName(errorCode));
232                     errorCode=U_ZERO_ERROR;
233                     continue;
234                 }
235 
236                 cc.caseNr=i;
237 
238                 s=testCase->getString("charset", errorCode);
239                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
240                 cc.charset=charset;
241 
242                 // BEGIN android-added
243                 // To save space, Android does not build full ISO-2022-CN tables.
244                 // We skip the TestGetKeywordValuesForLocale for counting available collations.
245                 if (strlen(charset) >= 8 &&
246                     strncmp(charset+4, "2022-CN", 4) == 0) {
247                     continue;
248                 }
249                 // END android-added
250 
251                 unicode=testCase->getString("unicode", errorCode);
252                 cc.unicode=unicode.getBuffer();
253                 cc.unicodeLength=unicode.length();
254                 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
255 
256                 offsetsLength=0;
257                 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
258                 if(offsetsLength==0) {
259                     cc.offsets=NULL;
260                 } else if(offsetsLength!=cc.bytesLength) {
261                     errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
262                             i, cc.bytesLength, offsetsLength);
263                     errorCode=U_ILLEGAL_ARGUMENT_ERROR;
264                 }
265 
266                 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
267                 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
268 
269                 s=testCase->getString("errorCode", errorCode);
270                 if(s==UNICODE_STRING("invalid", 7)) {
271                     cc.outErrorCode=U_INVALID_CHAR_FOUND;
272                 } else if(s==UNICODE_STRING("illegal", 7)) {
273                     cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
274                 } else if(s==UNICODE_STRING("truncated", 9)) {
275                     cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
276                 } else {
277                     cc.outErrorCode=U_ZERO_ERROR;
278                 }
279 
280                 s=testCase->getString("callback", errorCode);
281                 cc.setSub=0; // default: no subchar
282 
283                 if((index=s.indexOf((UChar)0))>0) {
284                     // read NUL-separated subchar first, if any
285                     // copy the subchar from Latin-1 characters
286                     // start after the NUL
287                     p=s.getTerminatedBuffer();
288                     length=index+1;
289                     p+=length;
290                     length=s.length()-length;
291                     if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
292                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
293                     } else {
294                         int32_t j;
295 
296                         for(j=0; j<length; ++j) {
297                             cc.subchar[j]=(char)p[j];
298                         }
299                         // NUL-terminate the subchar
300                         cc.subchar[j]=0;
301                         cc.setSub=1;
302                     }
303 
304                     // remove the NUL and subchar from s
305                     s.truncate(index);
306                 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
307                     // read a substitution string, separated by an equal sign
308                     p=s.getBuffer()+index+1;
309                     length=s.length()-(index+1);
310                     if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
311                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
312                     } else {
313                         u_memcpy(cc.subString, p, length);
314                         // NUL-terminate the subString
315                         cc.subString[length]=0;
316                         cc.setSub=-1;
317                     }
318 
319                     // remove the equal sign and subString from s
320                     s.truncate(index);
321                 }
322 
323                 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
324                 cc.cbopt=cbopt;
325                 switch(cbopt[0]) {
326                 case SUB_CB:
327                     callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
328                     break;
329                 case SKIP_CB:
330                     callback=UCNV_FROM_U_CALLBACK_SKIP;
331                     break;
332                 case STOP_CB:
333                     callback=UCNV_FROM_U_CALLBACK_STOP;
334                     break;
335                 case ESC_CB:
336                     callback=UCNV_FROM_U_CALLBACK_ESCAPE;
337                     break;
338                 default:
339                     callback=NULL;
340                     break;
341                 }
342                 option=callback==NULL ? cbopt : cbopt+1;
343                 if(*option==0) {
344                     option=NULL;
345                 }
346 
347                 invalidUChars=testCase->getString("invalidUChars", errorCode);
348                 cc.invalidUChars=invalidUChars.getBuffer();
349                 cc.invalidLength=invalidUChars.length();
350 
351                 if(U_FAILURE(errorCode)) {
352                     errln("error parsing conversion/fromUnicode test case %d - %s",
353                             i, u_errorName(errorCode));
354                     errorCode=U_ZERO_ERROR;
355                 } else {
356                     logln("TestFromUnicode[%d] %s", i, charset);
357                     FromUnicodeCase(cc, callback, option);
358                 }
359             }
360             delete testData;
361         }
362         delete dataModule;
363     }
364     else {
365         dataerrln("Could not load test conversion data");
366     }
367 }
368 
369 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
370 
371 void
TestGetUnicodeSet()372 ConversionTest::TestGetUnicodeSet() {
373     char charset[100];
374     UnicodeString s, map, mapnot;
375     int32_t which;
376 
377     ParsePosition pos;
378     UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
379     UnicodeSet *cnvSetPtr = &cnvSet;
380     LocalUConverterPointer cnv;
381 
382     TestDataModule *dataModule;
383     TestData *testData;
384     const DataMap *testCase;
385     UErrorCode errorCode;
386     int32_t i;
387 
388     errorCode=U_ZERO_ERROR;
389     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
390     if(U_SUCCESS(errorCode)) {
391         testData=dataModule->createTestData("getUnicodeSet", errorCode);
392         if(U_SUCCESS(errorCode)) {
393             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
394                 if(U_FAILURE(errorCode)) {
395                     errln("error retrieving conversion/getUnicodeSet test case %d - %s",
396                             i, u_errorName(errorCode));
397                     errorCode=U_ZERO_ERROR;
398                     continue;
399                 }
400 
401                 s=testCase->getString("charset", errorCode);
402                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
403 
404                 // BEGIN android-added
405                 // To save space, Android does not build full ISO-2022-CN tables.
406                 // We skip the TestGetKeywordValuesForLocale for counting available collations.
407                 if (strlen(charset) >= 8 &&
408                     strncmp(charset+4, "2022-CN", 4) == 0) {
409                     continue;
410                 }
411                 // END android-added
412 
413                 map=testCase->getString("map", errorCode);
414                 mapnot=testCase->getString("mapnot", errorCode);
415 
416                 which=testCase->getInt28("which", errorCode);
417 
418                 if(U_FAILURE(errorCode)) {
419                     errln("error parsing conversion/getUnicodeSet test case %d - %s",
420                             i, u_errorName(errorCode));
421                     errorCode=U_ZERO_ERROR;
422                     continue;
423                 }
424 
425                 // test this test case
426                 mapSet.clear();
427                 mapnotSet.clear();
428 
429                 pos.setIndex(0);
430                 mapSet.applyPattern(map, pos, 0, NULL, errorCode);
431                 if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {
432                     errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
433                           "    error index %d  index %d  U+%04x",
434                             i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
435                     errorCode=U_ZERO_ERROR;
436                     continue;
437                 }
438 
439                 pos.setIndex(0);
440                 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
441                 if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {
442                     errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
443                           "    error index %d  index %d  U+%04x",
444                             i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
445                     errorCode=U_ZERO_ERROR;
446                     continue;
447                 }
448 
449                 logln("TestGetUnicodeSet[%d] %s", i, charset);
450 
451                 cnv.adoptInstead(cnv_open(charset, errorCode));
452                 if(U_FAILURE(errorCode)) {
453                     errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
454                             charset, i, u_errorName(errorCode));
455                     errorCode=U_ZERO_ERROR;
456                     continue;
457                 }
458 
459                 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
460 
461                 if(U_FAILURE(errorCode)) {
462                     errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
463                             charset, i, u_errorName(errorCode));
464                     errorCode=U_ZERO_ERROR;
465                     continue;
466                 }
467 
468                 // are there items that must be in cnvSet but are not?
469                 (diffSet=mapSet).removeAll(cnvSet);
470                 if(!diffSet.isEmpty()) {
471                     diffSet.toPattern(s, TRUE);
472                     if(s.length()>100) {
473                         s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
474                     }
475                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
476                             charset, i);
477                     errln(s);
478                 }
479 
480                 // are there items that must not be in cnvSet but are?
481                 (diffSet=mapnotSet).retainAll(cnvSet);
482                 if(!diffSet.isEmpty()) {
483                     diffSet.toPattern(s, TRUE);
484                     if(s.length()>100) {
485                         s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
486                     }
487                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
488                             charset, i);
489                     errln(s);
490                 }
491             }
492             delete testData;
493         }
494         delete dataModule;
495     }
496     else {
497         dataerrln("Could not load test conversion data");
498     }
499 }
500 
501 U_CDECL_BEGIN
502 static void U_CALLCONV
getUnicodeSetCallback(const void * context,UConverterFromUnicodeArgs *,const UChar *,int32_t,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)503 getUnicodeSetCallback(const void *context,
504                       UConverterFromUnicodeArgs * /*fromUArgs*/,
505                       const UChar* /*codeUnits*/,
506                       int32_t /*length*/,
507                       UChar32 codePoint,
508                       UConverterCallbackReason reason,
509                       UErrorCode *pErrorCode) {
510     if(reason<=UCNV_IRREGULAR) {
511         ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
512         *pErrorCode=U_ZERO_ERROR;                    // skip
513     }  // else ignore the reset, close and clone calls.
514 }
515 U_CDECL_END
516 
517 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
518 void
TestGetUnicodeSet2()519 ConversionTest::TestGetUnicodeSet2() {
520     // Build a string with all code points.
521     UChar32 cpLimit;
522     int32_t s0Length;
523     if(quick) {
524         cpLimit=s0Length=0x10000;  // BMP only
525     } else {
526         cpLimit=0x110000;
527         s0Length=0x10000+0x200000;  // BMP + surrogate pairs
528     }
529     UChar *s0=new UChar[s0Length];
530     if(s0==NULL) {
531         return;
532     }
533     UChar *s=s0;
534     UChar32 c;
535     UChar c2;
536     // low BMP
537     for(c=0; c<=0xd7ff; ++c) {
538         *s++=(UChar)c;
539     }
540     // trail surrogates
541     for(c=0xdc00; c<=0xdfff; ++c) {
542         *s++=(UChar)c;
543     }
544     // lead surrogates
545     // (after trails so that there is not even one surrogate pair in between)
546     for(c=0xd800; c<=0xdbff; ++c) {
547         *s++=(UChar)c;
548     }
549     // high BMP
550     for(c=0xe000; c<=0xffff; ++c) {
551         *s++=(UChar)c;
552     }
553     // supplementary code points = surrogate pairs
554     if(cpLimit==0x110000) {
555         for(c=0xd800; c<=0xdbff; ++c) {
556             for(c2=0xdc00; c2<=0xdfff; ++c2) {
557                 *s++=(UChar)c;
558                 *s++=c2;
559             }
560         }
561     }
562 
563     static const char *const cnvNames[]={
564         "UTF-8",
565         "UTF-7",
566         "UTF-16",
567         "US-ASCII",
568         "ISO-8859-1",
569         "windows-1252",
570         "Shift-JIS",
571         "ibm-1390",  // EBCDIC_STATEFUL table
572         "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
573         "HZ",
574         "ISO-2022-JP",
575         "JIS7",
576         "ISO-2022-CN",
577         "ISO-2022-CN-EXT",
578         "LMBCS"
579     };
580     LocalUConverterPointer cnv;
581     char buffer[1024];
582     int32_t i;
583     for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
584         UErrorCode errorCode=U_ZERO_ERROR;
585         cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
586         if(U_FAILURE(errorCode)) {
587             errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
588             continue;
589         }
590         UnicodeSet expected;
591         ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
592         if(U_FAILURE(errorCode)) {
593             errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
594             continue;
595         }
596         UConverterUnicodeSet which;
597         for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
598             if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
599                 ucnv_setFallback(cnv.getAlias(), TRUE);
600             }
601             expected.add(0, cpLimit-1);
602             s=s0;
603             UBool flush;
604             do {
605                 char *t=buffer;
606                 flush=(UBool)(s==s0+s0Length);
607                 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
608                 if(U_FAILURE(errorCode)) {
609                     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
610                         errorCode=U_ZERO_ERROR;
611                         continue;
612                     } else {
613                         break;  // unexpected error, should not occur
614                     }
615                 }
616             } while(!flush);
617             UnicodeSet set;
618             ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
619             if(cpLimit<0x110000) {
620                 set.remove(cpLimit, 0x10ffff);
621             }
622             if(which==UCNV_ROUNDTRIP_SET) {
623                 // ignore PUA code points because they will be converted even if they
624                 // are fallbacks and when other fallbacks are turned off,
625                 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
626                 expected.remove(0xe000, 0xf8ff);
627                 expected.remove(0xf0000, 0xffffd);
628                 expected.remove(0x100000, 0x10fffd);
629                 set.remove(0xe000, 0xf8ff);
630                 set.remove(0xf0000, 0xffffd);
631                 set.remove(0x100000, 0x10fffd);
632             }
633             if(set!=expected) {
634                 // First try to see if we have different sets because ucnv_getUnicodeSet()
635                 // added strings: The above conversion method does not tell us what strings might be convertible.
636                 // Remove strings from the set and compare again.
637                 set.removeAllStrings();
638             }
639             if(set!=expected) {
640                 UnicodeSet diffSet;
641                 UnicodeString out;
642 
643                 // are there items that must be in the set but are not?
644                 (diffSet=expected).removeAll(set);
645                 if(!diffSet.isEmpty()) {
646                     diffSet.toPattern(out, TRUE);
647                     if(out.length()>100) {
648                         out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
649                     }
650                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
651                             cnvNames[i], which);
652                     errln(out);
653                 }
654 
655                 // are there items that must not be in the set but are?
656                 (diffSet=set).removeAll(expected);
657                 if(!diffSet.isEmpty()) {
658                     diffSet.toPattern(out, TRUE);
659                     if(out.length()>100) {
660                         out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
661                     }
662                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
663                             cnvNames[i], which);
664                     errln(out);
665                 }
666             }
667         }
668     }
669 
670     delete [] s0;
671 }
672 
673 // Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
674 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
675 void
TestDefaultIgnorableCallback()676 ConversionTest::TestDefaultIgnorableCallback() {
677     UErrorCode status = U_ZERO_ERROR;
678     const char *cnv_name = "euc-jp-2007";
679     const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
680     const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
681 
682     LocalPointer<UnicodeSet> set_ignorable(new UnicodeSet(pattern_ignorable, status));
683     if (U_FAILURE(status)) {
684         dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
685         return;
686     }
687 
688     LocalPointer<UnicodeSet> set_not_ignorable(new UnicodeSet(pattern_not_ignorable, status));
689     if (U_FAILURE(status)) {
690         dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
691         return;
692     }
693 
694     LocalUConverterPointer cnv(cnv_open(cnv_name, status));
695     if (U_FAILURE(status)) {
696         dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
697         return;
698     }
699 
700     // set callback for the converter
701     ucnv_setFromUCallBack(cnv.getAlias(), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
702 
703     UChar32 input[1];
704     char output[10];
705     int32_t outputLength;
706 
707     // test default ignorables are ignored
708     int size = set_ignorable->size();
709     for (int i = 0; i < size; i++) {
710         status = U_ZERO_ERROR;
711         outputLength= 0;
712 
713         input[0] = set_ignorable->charAt(i);
714 
715         outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
716         if (U_FAILURE(status) || outputLength != 0) {
717             errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
718         }
719     }
720 
721     // test non-ignorables are not ignored
722     size = set_not_ignorable->size();
723     for (int i = 0; i < size; i++) {
724         status = U_ZERO_ERROR;
725         outputLength= 0;
726 
727         input[0] = set_not_ignorable->charAt(i);
728 
729         if (input[0] == 0) {
730             continue;
731         }
732 
733         outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
734         if (U_FAILURE(status) || outputLength <= 0) {
735             errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
736         }
737     }
738 }
739 
740 void
TestUTF8ToUTF8Overflow()741 ConversionTest::TestUTF8ToUTF8Overflow() {
742     IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
743     LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
744     LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
745     static const char *text = "aä";  // ä: 2 bytes
746     const char *source = text;
747     const char *sourceLimit = text + strlen(text);
748     char result[20];
749     char *target = result;
750     const char *targetLimit = result + sizeof(result);
751     UChar buffer16[20];
752     UChar *pivotSource = buffer16;
753     UChar *pivotTarget = buffer16;
754     const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
755     int32_t length;
756 
757     // Convert with insufficient target capacity.
758     result[2] = 5;
759     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
760                    &target, result + 2, &source, sourceLimit,
761                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
762                    FALSE, FALSE, errorCode);
763     assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
764     length = (int32_t)(target - result);
765     assertEquals("number of bytes written", 2, length);
766     assertEquals("next byte not clobbered", 5, result[2]);
767 
768     // Convert the rest and flush.
769     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
770                    &target, targetLimit, &source, sourceLimit,
771                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
772                    FALSE, TRUE, errorCode);
773 
774     assertSuccess("UTF-8->UTF-8", errorCode);
775     length = (int32_t)(target - result);
776     assertEquals("3 bytes", 3, length);
777     if (length == 3) {
778         assertTrue("result same as input", memcmp(text, result, length) == 0);
779     }
780 
781     ucnv_reset(cnv1.getAlias());
782     ucnv_reset(cnv2.getAlias());
783     memset(result, 0, sizeof(result));
784     static const char *text2 = "a��";  // U+1F6B2 bicycle: 4 bytes
785     source = text2;
786     sourceLimit = text2 + strlen(text2);
787     target = result;
788     pivotSource = pivotTarget = buffer16;
789 
790     // Convert with insufficient target capacity.
791     result[3] = 5;
792     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
793                    &target, result + 3, &source, sourceLimit,
794                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
795                    FALSE, FALSE, errorCode);
796     assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
797     length = (int32_t)(target - result);
798     assertEquals("text2 number of bytes written", 3, length);
799     assertEquals("text2 next byte not clobbered", 5, result[3]);
800 
801     // Convert the rest and flush.
802     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
803                    &target, targetLimit, &source, sourceLimit,
804                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
805                    FALSE, TRUE, errorCode);
806 
807     assertSuccess("text2 UTF-8->UTF-8", errorCode);
808     length = (int32_t)(target - result);
809     assertEquals("text2 5 bytes", 5, length);
810     if (length == 5) {
811         assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
812     }
813 
814     ucnv_reset(cnv1.getAlias());
815     ucnv_reset(cnv2.getAlias());
816     memset(result, 0, sizeof(result));
817     static const char *illFormed = "\xf1\x91\x93\x96\x91\x94";  // U+514D6 + two more trail bytes
818     source = illFormed;
819     sourceLimit = illFormed + strlen(illFormed);
820     target = result;
821     pivotSource = pivotTarget = buffer16;
822 
823     ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
824 
825     // Convert only two bytes and flush (but expect failure).
826     char errorBytes[10];
827     int8_t errorLength;
828     result[0] = 5;
829     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
830                    &target, targetLimit, &source, source + 2,
831                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
832                    FALSE, TRUE, errorCode);
833     assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
834     length = (int32_t)(target - result);
835     assertEquals("illFormed number of bytes written", 0, length);
836     errorLength = UPRV_LENGTHOF(errorBytes);
837     ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
838     assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
839     if (errorLength == 2) {
840         assertEquals("illFormed truncated errorBytes", 0xf191,
841                      ((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
842     }
843 
844     // Continue conversion starting with a trail byte.
845     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
846                    &target, targetLimit, &source, sourceLimit,
847                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
848                    FALSE, TRUE, errorCode);
849 
850     assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
851     length = (int32_t)(target - result);
852     assertEquals("illFormed trail byte number of bytes written", 0, length);
853     errorLength = UPRV_LENGTHOF(errorBytes);
854     ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
855     assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
856     if (errorLength == 1) {
857         assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
858     }
859 }
860 
861 void
TestUTF8ToUTF8Streaming()862 ConversionTest::TestUTF8ToUTF8Streaming() {
863     IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Streaming");
864     LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
865     LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
866 
867     // UTF8 encoded cyrillic part of 'Lorem ipsum'
868     static const char* text =
869         "\xd0\xb5\xd1\x82\x20\xd1\x81\xd1\x86\xd0\xb0\xd0\xb5\xd0\xb2\xd0"
870         "\xbe\xd0\xbb\xd0\xb0\x20\xd1\x81\xd0\xb0\xd0\xb4\xd0\xb8\xd0\xbf"
871         "\xd1\x81\xd1\x86\xd0\xb8\xd0\xbd\xd0\xb3\x20\xd0\xb0\xd1\x86\xd1"
872         "\x86\xd0\xbe\xd0\xbc\xd0\xbc\xd0\xbe\xd0\xb4\xd0\xb0\xd1\x80\xd0"
873         "\xb5\x20\xd1\x85\xd0\xb0\xd1\x81";
874 
875     int32_t chunk1 = 25; // partial lead at the end: 0xd0
876     int32_t chunk2 = 47; // partial tail at the beginning: 0xb0
877 
878     char result[128];
879 
880     int32_t sourceLen = (int32_t)strlen(text);
881     const char* source = text;
882     const char* sourceLimit = text + chunk1;
883 
884     int32_t targetLen = sizeof(result);
885     char* target = result;
886     const char* targetLimit = result + targetLen;
887 
888     UChar buffer16[20];
889     UChar* pivotSource = buffer16;
890     UChar* pivotTarget = buffer16;
891     const UChar* pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
892 
893     int32_t length;
894     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
895         &target, result + targetLen, &source, sourceLimit,
896         buffer16, &pivotSource, &pivotTarget, pivotLimit,
897         FALSE, FALSE, errorCode);
898 
899     length = (int32_t)(target - result);
900     targetLen -= length;
901     assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length);
902 
903     source = text + chunk1;
904     sourceLimit = source + chunk2;
905 
906     // Convert the rest and flush.
907     ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
908         &target, targetLimit, &source, sourceLimit,
909         buffer16, &pivotSource, &pivotTarget, pivotLimit,
910         FALSE, TRUE, errorCode);
911 
912     length = (int32_t)(target - result - length);
913     targetLen -= length;
914     assertEquals("Second chunk + 2 doesn't  match converted length", chunk2 + 1, length);
915 
916     assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen);
917     assertSuccess("UTF-8->UTF-8", errorCode);
918 }
919 
920 // open testdata or ICU data converter ------------------------------------- ***
921 
922 UConverter *
cnv_open(const char * name,UErrorCode & errorCode)923 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
924     if(name!=NULL && *name=='+') {
925         // Converter names that start with '+' are ignored in ICU4J tests.
926         ++name;
927     }
928     if(name!=NULL && *name=='*') {
929         /* loadTestData(): set the data directory */
930         return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
931     } else {
932         return ucnv_open(name, &errorCode);
933     }
934 }
935 
936 // output helpers ---------------------------------------------------------- ***
937 
938 static inline char
hexDigit(uint8_t digit)939 hexDigit(uint8_t digit) {
940     return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
941 }
942 
943 static char *
printBytes(const uint8_t * bytes,int32_t length,char * out)944 printBytes(const uint8_t *bytes, int32_t length, char *out) {
945     uint8_t b;
946 
947     if(length>0) {
948         b=*bytes++;
949         --length;
950         *out++=hexDigit((uint8_t)(b>>4));
951         *out++=hexDigit((uint8_t)(b&0xf));
952     }
953 
954     while(length>0) {
955         b=*bytes++;
956         --length;
957         *out++=' ';
958         *out++=hexDigit((uint8_t)(b>>4));
959         *out++=hexDigit((uint8_t)(b&0xf));
960     }
961     *out++=0;
962     return out;
963 }
964 
965 static char *
printUnicode(const UChar * unicode,int32_t length,char * out)966 printUnicode(const UChar *unicode, int32_t length, char *out) {
967     UChar32 c;
968     int32_t i;
969 
970     for(i=0; i<length;) {
971         if(i>0) {
972             *out++=' ';
973         }
974         U16_NEXT(unicode, i, length, c);
975         // write 4..6 digits
976         if(c>=0x100000) {
977             *out++='1';
978         }
979         if(c>=0x10000) {
980             *out++=hexDigit((uint8_t)((c>>16)&0xf));
981         }
982         *out++=hexDigit((uint8_t)((c>>12)&0xf));
983         *out++=hexDigit((uint8_t)((c>>8)&0xf));
984         *out++=hexDigit((uint8_t)((c>>4)&0xf));
985         *out++=hexDigit((uint8_t)(c&0xf));
986     }
987     *out++=0;
988     return out;
989 }
990 
991 static char *
printOffsets(const int32_t * offsets,int32_t length,char * out)992 printOffsets(const int32_t *offsets, int32_t length, char *out) {
993     int32_t i, o, d;
994 
995     if(offsets==NULL) {
996         length=0;
997     }
998 
999     for(i=0; i<length; ++i) {
1000         if(i>0) {
1001             *out++=' ';
1002         }
1003         o=offsets[i];
1004 
1005         // print all offsets with 2 characters each (-x, -9..99, xx)
1006         if(o<-9) {
1007             *out++='-';
1008             *out++='x';
1009         } else if(o<0) {
1010             *out++='-';
1011             *out++=(char)('0'-o);
1012         } else if(o<=99) {
1013             *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
1014             *out++=(char)('0'+o%10);
1015         } else /* o>99 */ {
1016             *out++='x';
1017             *out++='x';
1018         }
1019     }
1020     *out++=0;
1021     return out;
1022 }
1023 
1024 // toUnicode test worker functions ----------------------------------------- ***
1025 
1026 static int32_t
stepToUnicode(ConversionCase & cc,UConverter * cnv,UChar * result,int32_t resultCapacity,int32_t * resultOffsets,int32_t step,UErrorCode * pErrorCode)1027 stepToUnicode(ConversionCase &cc, UConverter *cnv,
1028               UChar *result, int32_t resultCapacity,
1029               int32_t *resultOffsets, /* also resultCapacity */
1030               int32_t step,
1031               UErrorCode *pErrorCode) {
1032     const char *source, *sourceLimit, *bytesLimit;
1033     UChar *target, *targetLimit, *resultLimit;
1034     UBool flush;
1035 
1036     source=(const char *)cc.bytes;
1037     target=result;
1038     bytesLimit=source+cc.bytesLength;
1039     resultLimit=result+resultCapacity;
1040 
1041     if(step>=0) {
1042         // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
1043         // move only one buffer (in vs. out) at a time to be extra mean
1044         // step==0 performs bulk conversion and generates offsets
1045 
1046         // initialize the partial limits for the loop
1047         if(step==0) {
1048             // use the entire buffers
1049             sourceLimit=bytesLimit;
1050             targetLimit=resultLimit;
1051             flush=cc.finalFlush;
1052         } else {
1053             // start with empty partial buffers
1054             sourceLimit=source;
1055             targetLimit=target;
1056             flush=FALSE;
1057 
1058             // output offsets only for bulk conversion
1059             resultOffsets=NULL;
1060         }
1061 
1062         for(;;) {
1063             // resetting the opposite conversion direction must not affect this one
1064             ucnv_resetFromUnicode(cnv);
1065 
1066             // convert
1067             ucnv_toUnicode(cnv,
1068                 &target, targetLimit,
1069                 &source, sourceLimit,
1070                 resultOffsets,
1071                 flush, pErrorCode);
1072 
1073             // check pointers and errors
1074             if(source>sourceLimit || target>targetLimit) {
1075                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1076                 break;
1077             } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1078                 if(target!=targetLimit) {
1079                     // buffer overflow must only be set when the target is filled
1080                     *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1081                     break;
1082                 } else if(targetLimit==resultLimit) {
1083                     // not just a partial overflow
1084                     break;
1085                 }
1086 
1087                 // the partial target is filled, set a new limit, reset the error and continue
1088                 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1089                 *pErrorCode=U_ZERO_ERROR;
1090             } else if(U_FAILURE(*pErrorCode)) {
1091                 // some other error occurred, done
1092                 break;
1093             } else {
1094                 if(source!=sourceLimit) {
1095                     // when no error occurs, then the input must be consumed
1096                     *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1097                     break;
1098                 }
1099 
1100                 if(sourceLimit==bytesLimit) {
1101                     // we are done
1102                     break;
1103                 }
1104 
1105                 // the partial conversion succeeded, set a new limit and continue
1106                 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
1107                 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
1108             }
1109         }
1110     } else /* step<0 */ {
1111         /*
1112          * step==-1: call only ucnv_getNextUChar()
1113          * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
1114          *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
1115          *   else give it at most (-step-2)/2 bytes
1116          */
1117         UChar32 c;
1118 
1119         // end the loop by getting an index out of bounds error
1120         for(;;) {
1121             // resetting the opposite conversion direction must not affect this one
1122             ucnv_resetFromUnicode(cnv);
1123 
1124             // convert
1125             if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
1126                 sourceLimit=source; // use sourceLimit not as a real limit
1127                                     // but to remember the pre-getNextUChar source pointer
1128                 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
1129 
1130                 // check pointers and errors
1131                 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
1132                     if(source!=bytesLimit) {
1133                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1134                     } else {
1135                         *pErrorCode=U_ZERO_ERROR;
1136                     }
1137                     break;
1138                 } else if(U_FAILURE(*pErrorCode)) {
1139                     break;
1140                 }
1141                 // source may not move if c is from previous overflow
1142 
1143                 if(target==resultLimit) {
1144                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1145                     break;
1146                 }
1147                 if(c<=0xffff) {
1148                     *target++=(UChar)c;
1149                 } else {
1150                     *target++=U16_LEAD(c);
1151                     if(target==resultLimit) {
1152                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1153                         break;
1154                     }
1155                     *target++=U16_TRAIL(c);
1156                 }
1157 
1158                 // alternate between -n-1 and -n but leave -1 alone
1159                 if(step<-1) {
1160                     ++step;
1161                 }
1162             } else /* step is even */ {
1163                 // allow only one UChar output
1164                 targetLimit=target<resultLimit ? target+1 : resultLimit;
1165 
1166                 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
1167                 // and never output offsets
1168                 if(step==-2) {
1169                     sourceLimit=bytesLimit;
1170                 } else {
1171                     sourceLimit=source+(-step-2)/2;
1172                     if(sourceLimit>bytesLimit) {
1173                         sourceLimit=bytesLimit;
1174                     }
1175                 }
1176 
1177                 ucnv_toUnicode(cnv,
1178                     &target, targetLimit,
1179                     &source, sourceLimit,
1180                     NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
1181 
1182                 // check pointers and errors
1183                 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1184                     if(target!=targetLimit) {
1185                         // buffer overflow must only be set when the target is filled
1186                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1187                         break;
1188                     } else if(targetLimit==resultLimit) {
1189                         // not just a partial overflow
1190                         break;
1191                     }
1192 
1193                     // the partial target is filled, set a new limit and continue
1194                     *pErrorCode=U_ZERO_ERROR;
1195                 } else if(U_FAILURE(*pErrorCode)) {
1196                     // some other error occurred, done
1197                     break;
1198                 } else {
1199                     if(source!=sourceLimit) {
1200                         // when no error occurs, then the input must be consumed
1201                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1202                         break;
1203                     }
1204 
1205                     // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
1206                 }
1207 
1208                 --step;
1209             }
1210         }
1211     }
1212 
1213     return (int32_t)(target-result);
1214 }
1215 
1216 UBool
ToUnicodeCase(ConversionCase & cc,UConverterToUCallback callback,const char * option)1217 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
1218     // open the converter
1219     IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
1220     LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
1221     // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
1222     if(errorCode.isFailure()) {
1223         errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1224                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
1225         errorCode.reset();
1226         return FALSE;
1227     }
1228 
1229     // set the callback
1230     if(callback!=NULL) {
1231         ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
1232         if(U_FAILURE(errorCode)) {
1233             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
1234                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1235             return FALSE;
1236         }
1237     }
1238 
1239     int32_t resultOffsets[256];
1240     UChar result[256];
1241     int32_t resultLength;
1242     UBool ok;
1243 
1244     static const struct {
1245         int32_t step;
1246         const char *name;
1247     } steps[]={
1248         { 0, "bulk" }, // must be first for offsets to be checked
1249         { 1, "step=1" },
1250         { 3, "step=3" },
1251         { 7, "step=7" },
1252         { -1, "getNext" },
1253         { -2, "toU(bulk)+getNext" },
1254         { -3, "getNext+toU(bulk)" },
1255         { -4, "toU(1)+getNext" },
1256         { -5, "getNext+toU(1)" },
1257         { -12, "toU(5)+getNext" },
1258         { -13, "getNext+toU(5)" },
1259     };
1260     int32_t i, step;
1261 
1262     ok=TRUE;
1263     for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1264         step=steps[i].step;
1265         if(step<0 && !cc.finalFlush) {
1266             // skip ucnv_getNextUChar() if !finalFlush because
1267             // ucnv_getNextUChar() always implies flush
1268             continue;
1269         }
1270         if(step!=0) {
1271             // bulk test is first, then offsets are not checked any more
1272             cc.offsets=NULL;
1273         }
1274         else {
1275             for (int32_t i = 0; i < UPRV_LENGTHOF(resultOffsets); i++) {
1276                 resultOffsets[i] = -1;
1277             }
1278         }
1279         for (int32_t i = 0; i < UPRV_LENGTHOF(result); i++) {
1280             result[i] = -1;
1281         }
1282         errorCode.reset();
1283         resultLength=stepToUnicode(cc, cnv.getAlias(),
1284                                 result, UPRV_LENGTHOF(result),
1285                                 step==0 ? resultOffsets : NULL,
1286                                 step, errorCode);
1287         ok=checkToUnicode(
1288                 cc, cnv.getAlias(), steps[i].name,
1289                 result, resultLength,
1290                 cc.offsets!=NULL ? resultOffsets : NULL,
1291                 errorCode);
1292         if(errorCode.isFailure() || !cc.finalFlush) {
1293             // reset if an error occurred or we did not flush
1294             // otherwise do nothing to make sure that flushing resets
1295             ucnv_resetToUnicode(cnv.getAlias());
1296         }
1297         if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
1298             errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1299                 cc.caseNr, cc.charset, resultLength);
1300         }
1301         if (result[resultLength] != (UChar)-1) {
1302             errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
1303                 cc.caseNr, cc.charset, resultLength);
1304         }
1305     }
1306 
1307     // not a real loop, just a convenience for breaking out of the block
1308     while(ok && cc.finalFlush) {
1309         // test ucnv_toUChars()
1310         memset(result, 0, sizeof(result));
1311 
1312         errorCode.reset();
1313         resultLength=ucnv_toUChars(cnv.getAlias(),
1314                         result, UPRV_LENGTHOF(result),
1315                         (const char *)cc.bytes, cc.bytesLength,
1316                         errorCode);
1317         ok=checkToUnicode(
1318                 cc, cnv.getAlias(), "toUChars",
1319                 result, resultLength,
1320                 NULL,
1321                 errorCode);
1322         if(!ok) {
1323             break;
1324         }
1325 
1326         // test preflighting
1327         // keep the correct result for simple checking
1328         errorCode.reset();
1329         resultLength=ucnv_toUChars(cnv.getAlias(),
1330                         NULL, 0,
1331                         (const char *)cc.bytes, cc.bytesLength,
1332                         errorCode);
1333         if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
1334             errorCode.reset();
1335         }
1336         ok=checkToUnicode(
1337                 cc, cnv.getAlias(), "preflight toUChars",
1338                 result, resultLength,
1339                 NULL,
1340                 errorCode);
1341         break;
1342     }
1343 
1344     errorCode.reset();  // all errors have already been reported
1345     return ok;
1346 }
1347 
1348 UBool
checkToUnicode(ConversionCase & cc,UConverter * cnv,const char * name,const UChar * result,int32_t resultLength,const int32_t * resultOffsets,UErrorCode resultErrorCode)1349 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1350                                const UChar *result, int32_t resultLength,
1351                                const int32_t *resultOffsets,
1352                                UErrorCode resultErrorCode) {
1353     char resultInvalidChars[8];
1354     int8_t resultInvalidLength;
1355     UErrorCode errorCode;
1356 
1357     const char *msg;
1358 
1359     // reset the message; NULL will mean "ok"
1360     msg=NULL;
1361 
1362     errorCode=U_ZERO_ERROR;
1363     resultInvalidLength=sizeof(resultInvalidChars);
1364     ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
1365     if(U_FAILURE(errorCode)) {
1366         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
1367                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1368         return FALSE;
1369     }
1370 
1371     // check everything that might have gone wrong
1372     if(cc.unicodeLength!=resultLength) {
1373         msg="wrong result length";
1374     } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
1375         msg="wrong result string";
1376     } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {
1377         msg="wrong offsets";
1378     } else if(cc.outErrorCode!=resultErrorCode) {
1379         msg="wrong error code";
1380     } else if(cc.invalidLength!=resultInvalidLength) {
1381         msg="wrong length of last invalid input";
1382     } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
1383         msg="wrong last invalid input";
1384     }
1385 
1386     if(msg==NULL) {
1387         return TRUE;
1388     } else {
1389         char buffer[2000]; // one buffer for all strings
1390         char *s, *bytesString, *unicodeString, *resultString,
1391             *offsetsString, *resultOffsetsString,
1392             *invalidCharsString, *resultInvalidCharsString;
1393 
1394         bytesString=s=buffer;
1395         s=printBytes(cc.bytes, cc.bytesLength, bytesString);
1396         s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
1397         s=printUnicode(result, resultLength, resultString=s);
1398         s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
1399         s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1400         s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
1401         s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
1402 
1403         if((s-buffer)>(int32_t)sizeof(buffer)) {
1404             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
1405                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1406             exit(1);
1407         }
1408 
1409         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1410               "  bytes <%s>[%d]\n"
1411               " expected <%s>[%d]\n"
1412               "  result  <%s>[%d]\n"
1413               " offsets         <%s>\n"
1414               "  result offsets <%s>\n"
1415               " error code expected %s got %s\n"
1416               "  invalidChars expected <%s> got <%s>\n",
1417               cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1418               bytesString, cc.bytesLength,
1419               unicodeString, cc.unicodeLength,
1420               resultString, resultLength,
1421               offsetsString,
1422               resultOffsetsString,
1423               u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1424               invalidCharsString, resultInvalidCharsString);
1425 
1426         return FALSE;
1427     }
1428 }
1429 
1430 // fromUnicode test worker functions --------------------------------------- ***
1431 
1432 static int32_t
stepFromUTF8(ConversionCase & cc,UConverter * utf8Cnv,UConverter * cnv,char * result,int32_t resultCapacity,int32_t step,UErrorCode * pErrorCode)1433 stepFromUTF8(ConversionCase &cc,
1434              UConverter *utf8Cnv, UConverter *cnv,
1435              char *result, int32_t resultCapacity,
1436              int32_t step,
1437              UErrorCode *pErrorCode) {
1438     const char *source, *sourceLimit, *utf8Limit;
1439     UChar pivotBuffer[32];
1440     UChar *pivotSource, *pivotTarget, *pivotLimit;
1441     char *target, *targetLimit, *resultLimit;
1442     UBool flush;
1443 
1444     source=cc.utf8;
1445     pivotSource=pivotTarget=pivotBuffer;
1446     target=result;
1447     utf8Limit=source+cc.utf8Length;
1448     resultLimit=result+resultCapacity;
1449 
1450     // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
1451     // move only one buffer (in vs. out) at a time to be extra mean
1452     // step==0 performs bulk conversion
1453 
1454     // initialize the partial limits for the loop
1455     if(step==0) {
1456         // use the entire buffers
1457         sourceLimit=utf8Limit;
1458         targetLimit=resultLimit;
1459         flush=cc.finalFlush;
1460 
1461         pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
1462     } else {
1463         // start with empty partial buffers
1464         sourceLimit=source;
1465         targetLimit=target;
1466         flush=FALSE;
1467 
1468         // empty pivot is not allowed, make it of length step
1469         pivotLimit=pivotBuffer+step;
1470     }
1471 
1472     for(;;) {
1473         // resetting the opposite conversion direction must not affect this one
1474         ucnv_resetFromUnicode(utf8Cnv);
1475         ucnv_resetToUnicode(cnv);
1476 
1477         // convert
1478         ucnv_convertEx(cnv, utf8Cnv,
1479             &target, targetLimit,
1480             &source, sourceLimit,
1481             pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
1482             FALSE, flush, pErrorCode);
1483 
1484         // check pointers and errors
1485         if(source>sourceLimit || target>targetLimit) {
1486             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1487             break;
1488         } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1489             if(target!=targetLimit) {
1490                 // buffer overflow must only be set when the target is filled
1491                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1492                 break;
1493             } else if(targetLimit==resultLimit) {
1494                 // not just a partial overflow
1495                 break;
1496             }
1497 
1498             // the partial target is filled, set a new limit, reset the error and continue
1499             targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1500             *pErrorCode=U_ZERO_ERROR;
1501         } else if(U_FAILURE(*pErrorCode)) {
1502             if(pivotSource==pivotBuffer) {
1503                 // toUnicode error, should not occur
1504                 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1505                 break;
1506             } else {
1507                 // fromUnicode error
1508                 // some other error occurred, done
1509                 break;
1510             }
1511         } else {
1512             if(source!=sourceLimit) {
1513                 // when no error occurs, then the input must be consumed
1514                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1515                 break;
1516             }
1517 
1518             if(sourceLimit==utf8Limit) {
1519                 // we are done
1520                 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
1521                     // ucnv_convertEx() warns about not terminating the output
1522                     // but ucnv_fromUnicode() does not and so
1523                     // checkFromUnicode() does not expect it
1524                     *pErrorCode=U_ZERO_ERROR;
1525                 }
1526                 break;
1527             }
1528 
1529             // the partial conversion succeeded, set a new limit and continue
1530             sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
1531             flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
1532         }
1533     }
1534 
1535     return (int32_t)(target-result);
1536 }
1537 
1538 static int32_t
stepFromUnicode(ConversionCase & cc,UConverter * cnv,char * result,int32_t resultCapacity,int32_t * resultOffsets,int32_t step,UErrorCode * pErrorCode)1539 stepFromUnicode(ConversionCase &cc, UConverter *cnv,
1540                 char *result, int32_t resultCapacity,
1541                 int32_t *resultOffsets, /* also resultCapacity */
1542                 int32_t step,
1543                 UErrorCode *pErrorCode) {
1544     const UChar *source, *sourceLimit, *unicodeLimit;
1545     char *target, *targetLimit, *resultLimit;
1546     UBool flush;
1547 
1548     source=cc.unicode;
1549     target=result;
1550     unicodeLimit=source+cc.unicodeLength;
1551     resultLimit=result+resultCapacity;
1552 
1553     // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
1554     // move only one buffer (in vs. out) at a time to be extra mean
1555     // step==0 performs bulk conversion and generates offsets
1556 
1557     // initialize the partial limits for the loop
1558     if(step==0) {
1559         // use the entire buffers
1560         sourceLimit=unicodeLimit;
1561         targetLimit=resultLimit;
1562         flush=cc.finalFlush;
1563     } else {
1564         // start with empty partial buffers
1565         sourceLimit=source;
1566         targetLimit=target;
1567         flush=FALSE;
1568 
1569         // output offsets only for bulk conversion
1570         resultOffsets=NULL;
1571     }
1572 
1573     for(;;) {
1574         // resetting the opposite conversion direction must not affect this one
1575         ucnv_resetToUnicode(cnv);
1576 
1577         // convert
1578         ucnv_fromUnicode(cnv,
1579             &target, targetLimit,
1580             &source, sourceLimit,
1581             resultOffsets,
1582             flush, pErrorCode);
1583 
1584         // check pointers and errors
1585         if(source>sourceLimit || target>targetLimit) {
1586             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1587             break;
1588         } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1589             if(target!=targetLimit) {
1590                 // buffer overflow must only be set when the target is filled
1591                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1592                 break;
1593             } else if(targetLimit==resultLimit) {
1594                 // not just a partial overflow
1595                 break;
1596             }
1597 
1598             // the partial target is filled, set a new limit, reset the error and continue
1599             targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1600             *pErrorCode=U_ZERO_ERROR;
1601         } else if(U_FAILURE(*pErrorCode)) {
1602             // some other error occurred, done
1603             break;
1604         } else {
1605             if(source!=sourceLimit) {
1606                 // when no error occurs, then the input must be consumed
1607                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1608                 break;
1609             }
1610 
1611             if(sourceLimit==unicodeLimit) {
1612                 // we are done
1613                 break;
1614             }
1615 
1616             // the partial conversion succeeded, set a new limit and continue
1617             sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
1618             flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
1619         }
1620     }
1621 
1622     return (int32_t)(target-result);
1623 }
1624 
1625 UBool
FromUnicodeCase(ConversionCase & cc,UConverterFromUCallback callback,const char * option)1626 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
1627     UConverter *cnv;
1628     UErrorCode errorCode;
1629 
1630     // open the converter
1631     errorCode=U_ZERO_ERROR;
1632     cnv=cnv_open(cc.charset, errorCode);
1633     if(U_FAILURE(errorCode)) {
1634         errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1635                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1636         return FALSE;
1637     }
1638     ucnv_resetToUnicode(utf8Cnv);
1639 
1640     // set the callback
1641     if(callback!=NULL) {
1642         ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
1643         if(U_FAILURE(errorCode)) {
1644             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1645                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1646             ucnv_close(cnv);
1647             return FALSE;
1648         }
1649     }
1650 
1651     // set the fallbacks flag
1652     // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1653     ucnv_setFallback(cnv, cc.fallbacks);
1654 
1655     // set the subchar
1656     int32_t length;
1657 
1658     if(cc.setSub>0) {
1659         length=(int32_t)strlen(cc.subchar);
1660         ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
1661         if(U_FAILURE(errorCode)) {
1662             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
1663                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1664             ucnv_close(cnv);
1665             return FALSE;
1666         }
1667     } else if(cc.setSub<0) {
1668         ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
1669         if(U_FAILURE(errorCode)) {
1670             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
1671                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1672             ucnv_close(cnv);
1673             return FALSE;
1674         }
1675     }
1676 
1677     // convert unicode to utf8
1678     char utf8[256];
1679     cc.utf8=utf8;
1680     u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
1681                 cc.unicode, cc.unicodeLength,
1682                 &errorCode);
1683     if(U_FAILURE(errorCode)) {
1684         // skip UTF-8 testing of a string with an unpaired surrogate,
1685         // or of one that's too long
1686         // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1687         cc.utf8Length=-1;
1688     }
1689 
1690     int32_t resultOffsets[256];
1691     char result[256];
1692     int32_t resultLength;
1693     UBool ok;
1694 
1695     static const struct {
1696         int32_t step;
1697         const char *name, *utf8Name;
1698     } steps[]={
1699         { 0, "bulk",   "utf8" }, // must be first for offsets to be checked
1700         { 1, "step=1", "utf8 step=1" },
1701         { 3, "step=3", "utf8 step=3" },
1702         { 7, "step=7", "utf8 step=7" }
1703     };
1704     int32_t i, step;
1705 
1706     ok=TRUE;
1707     for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1708         step=steps[i].step;
1709         for (int32_t i = 0; i < UPRV_LENGTHOF(resultOffsets); i++) {
1710             resultOffsets[i] = -1;
1711         }
1712         for (int32_t i = 0; i < UPRV_LENGTHOF(result); i++) {
1713             result[i] = -1;
1714         }
1715         errorCode=U_ZERO_ERROR;
1716         resultLength=stepFromUnicode(cc, cnv,
1717                                 result, UPRV_LENGTHOF(result),
1718                                 step==0 ? resultOffsets : NULL,
1719                                 step, &errorCode);
1720         ok=checkFromUnicode(
1721                 cc, cnv, steps[i].name,
1722                 (uint8_t *)result, resultLength,
1723                 cc.offsets!=NULL ? resultOffsets : NULL,
1724                 errorCode);
1725         if(U_FAILURE(errorCode) || !cc.finalFlush) {
1726             // reset if an error occurred or we did not flush
1727             // otherwise do nothing to make sure that flushing resets
1728             ucnv_resetFromUnicode(cnv);
1729         }
1730         if (resultOffsets[resultLength] != -1) {
1731             errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1732                 cc.caseNr, cc.charset, resultLength);
1733         }
1734         if (result[resultLength] != (char)-1) {
1735             errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
1736                 cc.caseNr, cc.charset, resultLength);
1737         }
1738 
1739         // bulk test is first, then offsets are not checked any more
1740         cc.offsets=NULL;
1741 
1742         // test direct conversion from UTF-8
1743         if(cc.utf8Length>=0) {
1744             errorCode=U_ZERO_ERROR;
1745             resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
1746                                     result, UPRV_LENGTHOF(result),
1747                                     step, &errorCode);
1748             ok=checkFromUnicode(
1749                     cc, cnv, steps[i].utf8Name,
1750                     (uint8_t *)result, resultLength,
1751                     NULL,
1752                     errorCode);
1753             if(U_FAILURE(errorCode) || !cc.finalFlush) {
1754                 // reset if an error occurred or we did not flush
1755                 // otherwise do nothing to make sure that flushing resets
1756                 ucnv_resetToUnicode(utf8Cnv);
1757                 ucnv_resetFromUnicode(cnv);
1758             }
1759         }
1760     }
1761 
1762     // not a real loop, just a convenience for breaking out of the block
1763     while(ok && cc.finalFlush) {
1764         // test ucnv_fromUChars()
1765         memset(result, 0, sizeof(result));
1766 
1767         errorCode=U_ZERO_ERROR;
1768         resultLength=ucnv_fromUChars(cnv,
1769                         result, UPRV_LENGTHOF(result),
1770                         cc.unicode, cc.unicodeLength,
1771                         &errorCode);
1772         ok=checkFromUnicode(
1773                 cc, cnv, "fromUChars",
1774                 (uint8_t *)result, resultLength,
1775                 NULL,
1776                 errorCode);
1777         if(!ok) {
1778             break;
1779         }
1780 
1781         // test preflighting
1782         // keep the correct result for simple checking
1783         errorCode=U_ZERO_ERROR;
1784         resultLength=ucnv_fromUChars(cnv,
1785                         NULL, 0,
1786                         cc.unicode, cc.unicodeLength,
1787                         &errorCode);
1788         if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
1789             errorCode=U_ZERO_ERROR;
1790         }
1791         ok=checkFromUnicode(
1792                 cc, cnv, "preflight fromUChars",
1793                 (uint8_t *)result, resultLength,
1794                 NULL,
1795                 errorCode);
1796         break;
1797     }
1798 
1799     ucnv_close(cnv);
1800     return ok;
1801 }
1802 
1803 UBool
checkFromUnicode(ConversionCase & cc,UConverter * cnv,const char * name,const uint8_t * result,int32_t resultLength,const int32_t * resultOffsets,UErrorCode resultErrorCode)1804 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1805                                  const uint8_t *result, int32_t resultLength,
1806                                  const int32_t *resultOffsets,
1807                                  UErrorCode resultErrorCode) {
1808     UChar resultInvalidUChars[8];
1809     int8_t resultInvalidLength;
1810     UErrorCode errorCode;
1811 
1812     const char *msg;
1813 
1814     // reset the message; NULL will mean "ok"
1815     msg=NULL;
1816 
1817     errorCode=U_ZERO_ERROR;
1818     resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
1819     ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
1820     if(U_FAILURE(errorCode)) {
1821         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1822                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1823         return FALSE;
1824     }
1825 
1826     // check everything that might have gone wrong
1827     if(cc.bytesLength!=resultLength) {
1828         msg="wrong result length";
1829     } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
1830         msg="wrong result string";
1831     } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {
1832         msg="wrong offsets";
1833     } else if(cc.outErrorCode!=resultErrorCode) {
1834         msg="wrong error code";
1835     } else if(cc.invalidLength!=resultInvalidLength) {
1836         msg="wrong length of last invalid input";
1837     } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
1838         msg="wrong last invalid input";
1839     }
1840 
1841     if(msg==NULL) {
1842         return TRUE;
1843     } else {
1844         char buffer[2000]; // one buffer for all strings
1845         char *s, *unicodeString, *bytesString, *resultString,
1846             *offsetsString, *resultOffsetsString,
1847             *invalidCharsString, *resultInvalidUCharsString;
1848 
1849         unicodeString=s=buffer;
1850         s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
1851         s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
1852         s=printBytes(result, resultLength, resultString=s);
1853         s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
1854         s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1855         s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
1856         s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
1857 
1858         if((s-buffer)>(int32_t)sizeof(buffer)) {
1859             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1860                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1861             exit(1);
1862         }
1863 
1864         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1865               "  unicode <%s>[%d]\n"
1866               " expected <%s>[%d]\n"
1867               "  result  <%s>[%d]\n"
1868               " offsets         <%s>\n"
1869               "  result offsets <%s>\n"
1870               " error code expected %s got %s\n"
1871               "  invalidChars expected <%s> got <%s>\n",
1872               cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1873               unicodeString, cc.unicodeLength,
1874               bytesString, cc.bytesLength,
1875               resultString, resultLength,
1876               offsetsString,
1877               resultOffsetsString,
1878               u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1879               invalidCharsString, resultInvalidUCharsString);
1880 
1881         return FALSE;
1882     }
1883 }
1884 
1885 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1886