1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: convtest.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul15
16 * created by: Markus W. Scherer
17 *
18 * Test file for data-driven conversion tests.
19 */
20
21 #include "unicode/utypes.h"
22
23 #if !UCONFIG_NO_LEGACY_CONVERSION
24 /*
25 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
26 * is slightly unnecessary - it removes tests for Unicode charsets
27 * like UTF-8 that should work.
28 * However, there is no easy way for the test to detect whether a test case
29 * is for a Unicode charset, so it would be difficult to only exclude those.
30 * Also, regular testing of ICU is done with all modules on, therefore
31 * not testing conversion for a custom configuration like this should be ok.
32 */
33
34 #include "unicode/ucnv.h"
35 #include "unicode/unistr.h"
36 #include "unicode/parsepos.h"
37 #include "unicode/uniset.h"
38 #include "unicode/ustring.h"
39 #include "unicode/ures.h"
40 #include "unicode/utf16.h"
41 #include "convtest.h"
42 #include "cmemory.h"
43 #include "unicode/tstdtmod.h"
44 #include <string.h>
45 #include <stdlib.h>
46
47 enum {
48 // characters used in test data for callbacks
49 SUB_CB='?',
50 SKIP_CB='0',
51 STOP_CB='.',
52 ESC_CB='&'
53 };
54
ConversionTest()55 ConversionTest::ConversionTest() {
56 UErrorCode errorCode=U_ZERO_ERROR;
57 utf8Cnv=ucnv_open("UTF-8", &errorCode);
58 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
59 if(U_FAILURE(errorCode)) {
60 errln("unable to open UTF-8 converter");
61 }
62 }
63
~ConversionTest()64 ConversionTest::~ConversionTest() {
65 ucnv_close(utf8Cnv);
66 }
67
68 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)69 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
70 if (exec) logln("TestSuite ConversionTest: ");
71 TESTCASE_AUTO_BEGIN;
72 #if !UCONFIG_NO_FILE_IO
73 TESTCASE_AUTO(TestToUnicode);
74 TESTCASE_AUTO(TestFromUnicode);
75 TESTCASE_AUTO(TestGetUnicodeSet);
76 #endif
77 TESTCASE_AUTO(TestGetUnicodeSet2);
78 TESTCASE_AUTO(TestDefaultIgnorableCallback);
79 TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
80 TESTCASE_AUTO(TestUTF8ToUTF8Streaming);
81 TESTCASE_AUTO_END;
82 }
83
84 // test data interface ----------------------------------------------------- ***
85
86 void
TestToUnicode()87 ConversionTest::TestToUnicode() {
88 ConversionCase cc;
89 char charset[100], cbopt[4];
90 const char *option;
91 UnicodeString s, unicode;
92 int32_t offsetsLength;
93 UConverterToUCallback callback;
94
95 TestDataModule *dataModule;
96 TestData *testData;
97 const DataMap *testCase;
98 UErrorCode errorCode;
99 int32_t i;
100
101 errorCode=U_ZERO_ERROR;
102 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
103 if(U_SUCCESS(errorCode)) {
104 testData=dataModule->createTestData("toUnicode", errorCode);
105 if(U_SUCCESS(errorCode)) {
106 for(i=0; testData->nextCase(testCase, errorCode); ++i) {
107 if(U_FAILURE(errorCode)) {
108 errln("error retrieving conversion/toUnicode test case %d - %s",
109 i, u_errorName(errorCode));
110 errorCode=U_ZERO_ERROR;
111 continue;
112 }
113
114 cc.caseNr=i;
115
116 s=testCase->getString("charset", errorCode);
117 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
118 cc.charset=charset;
119
120 // BEGIN android-added
121 // To save space, Android does not build full ISO-2022-CN tables.
122 // We skip the TestGetKeywordValuesForLocale for counting available collations.
123 if (strlen(charset) >= 8 &&
124 strncmp(charset+4, "2022-CN", 4) == 0) {
125 continue;
126 }
127 // END android-added
128
129 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
130 unicode=testCase->getString("unicode", errorCode);
131 cc.unicode=unicode.getBuffer();
132 cc.unicodeLength=unicode.length();
133
134 offsetsLength=0;
135 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
136 if(offsetsLength==0) {
137 cc.offsets=NULL;
138 } else if(offsetsLength!=unicode.length()) {
139 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
140 i, unicode.length(), offsetsLength);
141 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 }
143
144 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
145 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
146
147 s=testCase->getString("errorCode", errorCode);
148 if(s==UNICODE_STRING("invalid", 7)) {
149 cc.outErrorCode=U_INVALID_CHAR_FOUND;
150 } else if(s==UNICODE_STRING("illegal", 7)) {
151 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
152 } else if(s==UNICODE_STRING("truncated", 9)) {
153 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
154 } else if(s==UNICODE_STRING("illesc", 6)) {
155 cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
156 } else if(s==UNICODE_STRING("unsuppesc", 9)) {
157 cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
158 } else {
159 cc.outErrorCode=U_ZERO_ERROR;
160 }
161
162 s=testCase->getString("callback", errorCode);
163 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
164 cc.cbopt=cbopt;
165 switch(cbopt[0]) {
166 case SUB_CB:
167 callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
168 break;
169 case SKIP_CB:
170 callback=UCNV_TO_U_CALLBACK_SKIP;
171 break;
172 case STOP_CB:
173 callback=UCNV_TO_U_CALLBACK_STOP;
174 break;
175 case ESC_CB:
176 callback=UCNV_TO_U_CALLBACK_ESCAPE;
177 break;
178 default:
179 callback=NULL;
180 break;
181 }
182 option=callback==NULL ? cbopt : cbopt+1;
183 if(*option==0) {
184 option=NULL;
185 }
186
187 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
188
189 if(U_FAILURE(errorCode)) {
190 errln("error parsing conversion/toUnicode test case %d - %s",
191 i, u_errorName(errorCode));
192 errorCode=U_ZERO_ERROR;
193 } else {
194 logln("TestToUnicode[%d] %s", i, charset);
195 ToUnicodeCase(cc, callback, option);
196 }
197 }
198 delete testData;
199 }
200 delete dataModule;
201 }
202 else {
203 dataerrln("Could not load test conversion data");
204 }
205 }
206
207 void
TestFromUnicode()208 ConversionTest::TestFromUnicode() {
209 ConversionCase cc;
210 char charset[100], cbopt[4];
211 const char *option;
212 UnicodeString s, unicode, invalidUChars;
213 int32_t offsetsLength, index;
214 UConverterFromUCallback callback;
215
216 TestDataModule *dataModule;
217 TestData *testData;
218 const DataMap *testCase;
219 const UChar *p;
220 UErrorCode errorCode;
221 int32_t i, length;
222
223 errorCode=U_ZERO_ERROR;
224 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
225 if(U_SUCCESS(errorCode)) {
226 testData=dataModule->createTestData("fromUnicode", errorCode);
227 if(U_SUCCESS(errorCode)) {
228 for(i=0; testData->nextCase(testCase, errorCode); ++i) {
229 if(U_FAILURE(errorCode)) {
230 errln("error retrieving conversion/fromUnicode test case %d - %s",
231 i, u_errorName(errorCode));
232 errorCode=U_ZERO_ERROR;
233 continue;
234 }
235
236 cc.caseNr=i;
237
238 s=testCase->getString("charset", errorCode);
239 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
240 cc.charset=charset;
241
242 // BEGIN android-added
243 // To save space, Android does not build full ISO-2022-CN tables.
244 // We skip the TestGetKeywordValuesForLocale for counting available collations.
245 if (strlen(charset) >= 8 &&
246 strncmp(charset+4, "2022-CN", 4) == 0) {
247 continue;
248 }
249 // END android-added
250
251 unicode=testCase->getString("unicode", errorCode);
252 cc.unicode=unicode.getBuffer();
253 cc.unicodeLength=unicode.length();
254 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
255
256 offsetsLength=0;
257 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
258 if(offsetsLength==0) {
259 cc.offsets=NULL;
260 } else if(offsetsLength!=cc.bytesLength) {
261 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
262 i, cc.bytesLength, offsetsLength);
263 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
264 }
265
266 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
267 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
268
269 s=testCase->getString("errorCode", errorCode);
270 if(s==UNICODE_STRING("invalid", 7)) {
271 cc.outErrorCode=U_INVALID_CHAR_FOUND;
272 } else if(s==UNICODE_STRING("illegal", 7)) {
273 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
274 } else if(s==UNICODE_STRING("truncated", 9)) {
275 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
276 } else {
277 cc.outErrorCode=U_ZERO_ERROR;
278 }
279
280 s=testCase->getString("callback", errorCode);
281 cc.setSub=0; // default: no subchar
282
283 if((index=s.indexOf((UChar)0))>0) {
284 // read NUL-separated subchar first, if any
285 // copy the subchar from Latin-1 characters
286 // start after the NUL
287 p=s.getTerminatedBuffer();
288 length=index+1;
289 p+=length;
290 length=s.length()-length;
291 if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
292 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
293 } else {
294 int32_t j;
295
296 for(j=0; j<length; ++j) {
297 cc.subchar[j]=(char)p[j];
298 }
299 // NUL-terminate the subchar
300 cc.subchar[j]=0;
301 cc.setSub=1;
302 }
303
304 // remove the NUL and subchar from s
305 s.truncate(index);
306 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
307 // read a substitution string, separated by an equal sign
308 p=s.getBuffer()+index+1;
309 length=s.length()-(index+1);
310 if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
311 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
312 } else {
313 u_memcpy(cc.subString, p, length);
314 // NUL-terminate the subString
315 cc.subString[length]=0;
316 cc.setSub=-1;
317 }
318
319 // remove the equal sign and subString from s
320 s.truncate(index);
321 }
322
323 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
324 cc.cbopt=cbopt;
325 switch(cbopt[0]) {
326 case SUB_CB:
327 callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
328 break;
329 case SKIP_CB:
330 callback=UCNV_FROM_U_CALLBACK_SKIP;
331 break;
332 case STOP_CB:
333 callback=UCNV_FROM_U_CALLBACK_STOP;
334 break;
335 case ESC_CB:
336 callback=UCNV_FROM_U_CALLBACK_ESCAPE;
337 break;
338 default:
339 callback=NULL;
340 break;
341 }
342 option=callback==NULL ? cbopt : cbopt+1;
343 if(*option==0) {
344 option=NULL;
345 }
346
347 invalidUChars=testCase->getString("invalidUChars", errorCode);
348 cc.invalidUChars=invalidUChars.getBuffer();
349 cc.invalidLength=invalidUChars.length();
350
351 if(U_FAILURE(errorCode)) {
352 errln("error parsing conversion/fromUnicode test case %d - %s",
353 i, u_errorName(errorCode));
354 errorCode=U_ZERO_ERROR;
355 } else {
356 logln("TestFromUnicode[%d] %s", i, charset);
357 FromUnicodeCase(cc, callback, option);
358 }
359 }
360 delete testData;
361 }
362 delete dataModule;
363 }
364 else {
365 dataerrln("Could not load test conversion data");
366 }
367 }
368
369 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
370
371 void
TestGetUnicodeSet()372 ConversionTest::TestGetUnicodeSet() {
373 char charset[100];
374 UnicodeString s, map, mapnot;
375 int32_t which;
376
377 ParsePosition pos;
378 UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
379 UnicodeSet *cnvSetPtr = &cnvSet;
380 LocalUConverterPointer cnv;
381
382 TestDataModule *dataModule;
383 TestData *testData;
384 const DataMap *testCase;
385 UErrorCode errorCode;
386 int32_t i;
387
388 errorCode=U_ZERO_ERROR;
389 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
390 if(U_SUCCESS(errorCode)) {
391 testData=dataModule->createTestData("getUnicodeSet", errorCode);
392 if(U_SUCCESS(errorCode)) {
393 for(i=0; testData->nextCase(testCase, errorCode); ++i) {
394 if(U_FAILURE(errorCode)) {
395 errln("error retrieving conversion/getUnicodeSet test case %d - %s",
396 i, u_errorName(errorCode));
397 errorCode=U_ZERO_ERROR;
398 continue;
399 }
400
401 s=testCase->getString("charset", errorCode);
402 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
403
404 // BEGIN android-added
405 // To save space, Android does not build full ISO-2022-CN tables.
406 // We skip the TestGetKeywordValuesForLocale for counting available collations.
407 if (strlen(charset) >= 8 &&
408 strncmp(charset+4, "2022-CN", 4) == 0) {
409 continue;
410 }
411 // END android-added
412
413 map=testCase->getString("map", errorCode);
414 mapnot=testCase->getString("mapnot", errorCode);
415
416 which=testCase->getInt28("which", errorCode);
417
418 if(U_FAILURE(errorCode)) {
419 errln("error parsing conversion/getUnicodeSet test case %d - %s",
420 i, u_errorName(errorCode));
421 errorCode=U_ZERO_ERROR;
422 continue;
423 }
424
425 // test this test case
426 mapSet.clear();
427 mapnotSet.clear();
428
429 pos.setIndex(0);
430 mapSet.applyPattern(map, pos, 0, NULL, errorCode);
431 if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {
432 errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
433 " error index %d index %d U+%04x",
434 i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
435 errorCode=U_ZERO_ERROR;
436 continue;
437 }
438
439 pos.setIndex(0);
440 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
441 if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {
442 errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
443 " error index %d index %d U+%04x",
444 i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
445 errorCode=U_ZERO_ERROR;
446 continue;
447 }
448
449 logln("TestGetUnicodeSet[%d] %s", i, charset);
450
451 cnv.adoptInstead(cnv_open(charset, errorCode));
452 if(U_FAILURE(errorCode)) {
453 errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
454 charset, i, u_errorName(errorCode));
455 errorCode=U_ZERO_ERROR;
456 continue;
457 }
458
459 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
460
461 if(U_FAILURE(errorCode)) {
462 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
463 charset, i, u_errorName(errorCode));
464 errorCode=U_ZERO_ERROR;
465 continue;
466 }
467
468 // are there items that must be in cnvSet but are not?
469 (diffSet=mapSet).removeAll(cnvSet);
470 if(!diffSet.isEmpty()) {
471 diffSet.toPattern(s, TRUE);
472 if(s.length()>100) {
473 s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
474 }
475 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
476 charset, i);
477 errln(s);
478 }
479
480 // are there items that must not be in cnvSet but are?
481 (diffSet=mapnotSet).retainAll(cnvSet);
482 if(!diffSet.isEmpty()) {
483 diffSet.toPattern(s, TRUE);
484 if(s.length()>100) {
485 s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
486 }
487 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
488 charset, i);
489 errln(s);
490 }
491 }
492 delete testData;
493 }
494 delete dataModule;
495 }
496 else {
497 dataerrln("Could not load test conversion data");
498 }
499 }
500
501 U_CDECL_BEGIN
502 static void U_CALLCONV
getUnicodeSetCallback(const void * context,UConverterFromUnicodeArgs *,const UChar *,int32_t,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)503 getUnicodeSetCallback(const void *context,
504 UConverterFromUnicodeArgs * /*fromUArgs*/,
505 const UChar* /*codeUnits*/,
506 int32_t /*length*/,
507 UChar32 codePoint,
508 UConverterCallbackReason reason,
509 UErrorCode *pErrorCode) {
510 if(reason<=UCNV_IRREGULAR) {
511 ((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point
512 *pErrorCode=U_ZERO_ERROR; // skip
513 } // else ignore the reset, close and clone calls.
514 }
515 U_CDECL_END
516
517 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
518 void
TestGetUnicodeSet2()519 ConversionTest::TestGetUnicodeSet2() {
520 // Build a string with all code points.
521 UChar32 cpLimit;
522 int32_t s0Length;
523 if(quick) {
524 cpLimit=s0Length=0x10000; // BMP only
525 } else {
526 cpLimit=0x110000;
527 s0Length=0x10000+0x200000; // BMP + surrogate pairs
528 }
529 UChar *s0=new UChar[s0Length];
530 if(s0==NULL) {
531 return;
532 }
533 UChar *s=s0;
534 UChar32 c;
535 UChar c2;
536 // low BMP
537 for(c=0; c<=0xd7ff; ++c) {
538 *s++=(UChar)c;
539 }
540 // trail surrogates
541 for(c=0xdc00; c<=0xdfff; ++c) {
542 *s++=(UChar)c;
543 }
544 // lead surrogates
545 // (after trails so that there is not even one surrogate pair in between)
546 for(c=0xd800; c<=0xdbff; ++c) {
547 *s++=(UChar)c;
548 }
549 // high BMP
550 for(c=0xe000; c<=0xffff; ++c) {
551 *s++=(UChar)c;
552 }
553 // supplementary code points = surrogate pairs
554 if(cpLimit==0x110000) {
555 for(c=0xd800; c<=0xdbff; ++c) {
556 for(c2=0xdc00; c2<=0xdfff; ++c2) {
557 *s++=(UChar)c;
558 *s++=c2;
559 }
560 }
561 }
562
563 static const char *const cnvNames[]={
564 "UTF-8",
565 "UTF-7",
566 "UTF-16",
567 "US-ASCII",
568 "ISO-8859-1",
569 "windows-1252",
570 "Shift-JIS",
571 "ibm-1390", // EBCDIC_STATEFUL table
572 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table
573 "HZ",
574 "ISO-2022-JP",
575 "JIS7",
576 "ISO-2022-CN",
577 "ISO-2022-CN-EXT",
578 "LMBCS"
579 };
580 LocalUConverterPointer cnv;
581 char buffer[1024];
582 int32_t i;
583 for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
584 UErrorCode errorCode=U_ZERO_ERROR;
585 cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
586 if(U_FAILURE(errorCode)) {
587 errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
588 continue;
589 }
590 UnicodeSet expected;
591 ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
592 if(U_FAILURE(errorCode)) {
593 errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
594 continue;
595 }
596 UConverterUnicodeSet which;
597 for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
598 if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
599 ucnv_setFallback(cnv.getAlias(), TRUE);
600 }
601 expected.add(0, cpLimit-1);
602 s=s0;
603 UBool flush;
604 do {
605 char *t=buffer;
606 flush=(UBool)(s==s0+s0Length);
607 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
608 if(U_FAILURE(errorCode)) {
609 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
610 errorCode=U_ZERO_ERROR;
611 continue;
612 } else {
613 break; // unexpected error, should not occur
614 }
615 }
616 } while(!flush);
617 UnicodeSet set;
618 ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
619 if(cpLimit<0x110000) {
620 set.remove(cpLimit, 0x10ffff);
621 }
622 if(which==UCNV_ROUNDTRIP_SET) {
623 // ignore PUA code points because they will be converted even if they
624 // are fallbacks and when other fallbacks are turned off,
625 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
626 expected.remove(0xe000, 0xf8ff);
627 expected.remove(0xf0000, 0xffffd);
628 expected.remove(0x100000, 0x10fffd);
629 set.remove(0xe000, 0xf8ff);
630 set.remove(0xf0000, 0xffffd);
631 set.remove(0x100000, 0x10fffd);
632 }
633 if(set!=expected) {
634 // First try to see if we have different sets because ucnv_getUnicodeSet()
635 // added strings: The above conversion method does not tell us what strings might be convertible.
636 // Remove strings from the set and compare again.
637 set.removeAllStrings();
638 }
639 if(set!=expected) {
640 UnicodeSet diffSet;
641 UnicodeString out;
642
643 // are there items that must be in the set but are not?
644 (diffSet=expected).removeAll(set);
645 if(!diffSet.isEmpty()) {
646 diffSet.toPattern(out, TRUE);
647 if(out.length()>100) {
648 out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
649 }
650 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
651 cnvNames[i], which);
652 errln(out);
653 }
654
655 // are there items that must not be in the set but are?
656 (diffSet=set).removeAll(expected);
657 if(!diffSet.isEmpty()) {
658 diffSet.toPattern(out, TRUE);
659 if(out.length()>100) {
660 out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
661 }
662 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
663 cnvNames[i], which);
664 errln(out);
665 }
666 }
667 }
668 }
669
670 delete [] s0;
671 }
672
673 // Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
674 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
675 void
TestDefaultIgnorableCallback()676 ConversionTest::TestDefaultIgnorableCallback() {
677 UErrorCode status = U_ZERO_ERROR;
678 const char *cnv_name = "euc-jp-2007";
679 const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
680 const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
681
682 LocalPointer<UnicodeSet> set_ignorable(new UnicodeSet(pattern_ignorable, status));
683 if (U_FAILURE(status)) {
684 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
685 return;
686 }
687
688 LocalPointer<UnicodeSet> set_not_ignorable(new UnicodeSet(pattern_not_ignorable, status));
689 if (U_FAILURE(status)) {
690 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
691 return;
692 }
693
694 LocalUConverterPointer cnv(cnv_open(cnv_name, status));
695 if (U_FAILURE(status)) {
696 dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
697 return;
698 }
699
700 // set callback for the converter
701 ucnv_setFromUCallBack(cnv.getAlias(), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
702
703 UChar32 input[1];
704 char output[10];
705 int32_t outputLength;
706
707 // test default ignorables are ignored
708 int size = set_ignorable->size();
709 for (int i = 0; i < size; i++) {
710 status = U_ZERO_ERROR;
711 outputLength= 0;
712
713 input[0] = set_ignorable->charAt(i);
714
715 outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
716 if (U_FAILURE(status) || outputLength != 0) {
717 errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
718 }
719 }
720
721 // test non-ignorables are not ignored
722 size = set_not_ignorable->size();
723 for (int i = 0; i < size; i++) {
724 status = U_ZERO_ERROR;
725 outputLength= 0;
726
727 input[0] = set_not_ignorable->charAt(i);
728
729 if (input[0] == 0) {
730 continue;
731 }
732
733 outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
734 if (U_FAILURE(status) || outputLength <= 0) {
735 errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
736 }
737 }
738 }
739
740 void
TestUTF8ToUTF8Overflow()741 ConversionTest::TestUTF8ToUTF8Overflow() {
742 IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
743 LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
744 LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
745 static const char *text = "aä"; // ä: 2 bytes
746 const char *source = text;
747 const char *sourceLimit = text + strlen(text);
748 char result[20];
749 char *target = result;
750 const char *targetLimit = result + sizeof(result);
751 UChar buffer16[20];
752 UChar *pivotSource = buffer16;
753 UChar *pivotTarget = buffer16;
754 const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
755 int32_t length;
756
757 // Convert with insufficient target capacity.
758 result[2] = 5;
759 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
760 &target, result + 2, &source, sourceLimit,
761 buffer16, &pivotSource, &pivotTarget, pivotLimit,
762 FALSE, FALSE, errorCode);
763 assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
764 length = (int32_t)(target - result);
765 assertEquals("number of bytes written", 2, length);
766 assertEquals("next byte not clobbered", 5, result[2]);
767
768 // Convert the rest and flush.
769 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
770 &target, targetLimit, &source, sourceLimit,
771 buffer16, &pivotSource, &pivotTarget, pivotLimit,
772 FALSE, TRUE, errorCode);
773
774 assertSuccess("UTF-8->UTF-8", errorCode);
775 length = (int32_t)(target - result);
776 assertEquals("3 bytes", 3, length);
777 if (length == 3) {
778 assertTrue("result same as input", memcmp(text, result, length) == 0);
779 }
780
781 ucnv_reset(cnv1.getAlias());
782 ucnv_reset(cnv2.getAlias());
783 memset(result, 0, sizeof(result));
784 static const char *text2 = "a"; // U+1F6B2 bicycle: 4 bytes
785 source = text2;
786 sourceLimit = text2 + strlen(text2);
787 target = result;
788 pivotSource = pivotTarget = buffer16;
789
790 // Convert with insufficient target capacity.
791 result[3] = 5;
792 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
793 &target, result + 3, &source, sourceLimit,
794 buffer16, &pivotSource, &pivotTarget, pivotLimit,
795 FALSE, FALSE, errorCode);
796 assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
797 length = (int32_t)(target - result);
798 assertEquals("text2 number of bytes written", 3, length);
799 assertEquals("text2 next byte not clobbered", 5, result[3]);
800
801 // Convert the rest and flush.
802 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
803 &target, targetLimit, &source, sourceLimit,
804 buffer16, &pivotSource, &pivotTarget, pivotLimit,
805 FALSE, TRUE, errorCode);
806
807 assertSuccess("text2 UTF-8->UTF-8", errorCode);
808 length = (int32_t)(target - result);
809 assertEquals("text2 5 bytes", 5, length);
810 if (length == 5) {
811 assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
812 }
813
814 ucnv_reset(cnv1.getAlias());
815 ucnv_reset(cnv2.getAlias());
816 memset(result, 0, sizeof(result));
817 static const char *illFormed = "\xf1\x91\x93\x96\x91\x94"; // U+514D6 + two more trail bytes
818 source = illFormed;
819 sourceLimit = illFormed + strlen(illFormed);
820 target = result;
821 pivotSource = pivotTarget = buffer16;
822
823 ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
824
825 // Convert only two bytes and flush (but expect failure).
826 char errorBytes[10];
827 int8_t errorLength;
828 result[0] = 5;
829 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
830 &target, targetLimit, &source, source + 2,
831 buffer16, &pivotSource, &pivotTarget, pivotLimit,
832 FALSE, TRUE, errorCode);
833 assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
834 length = (int32_t)(target - result);
835 assertEquals("illFormed number of bytes written", 0, length);
836 errorLength = UPRV_LENGTHOF(errorBytes);
837 ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
838 assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
839 if (errorLength == 2) {
840 assertEquals("illFormed truncated errorBytes", 0xf191,
841 ((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
842 }
843
844 // Continue conversion starting with a trail byte.
845 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
846 &target, targetLimit, &source, sourceLimit,
847 buffer16, &pivotSource, &pivotTarget, pivotLimit,
848 FALSE, TRUE, errorCode);
849
850 assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
851 length = (int32_t)(target - result);
852 assertEquals("illFormed trail byte number of bytes written", 0, length);
853 errorLength = UPRV_LENGTHOF(errorBytes);
854 ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
855 assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
856 if (errorLength == 1) {
857 assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
858 }
859 }
860
861 void
TestUTF8ToUTF8Streaming()862 ConversionTest::TestUTF8ToUTF8Streaming() {
863 IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Streaming");
864 LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
865 LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
866
867 // UTF8 encoded cyrillic part of 'Lorem ipsum'
868 static const char* text =
869 "\xd0\xb5\xd1\x82\x20\xd1\x81\xd1\x86\xd0\xb0\xd0\xb5\xd0\xb2\xd0"
870 "\xbe\xd0\xbb\xd0\xb0\x20\xd1\x81\xd0\xb0\xd0\xb4\xd0\xb8\xd0\xbf"
871 "\xd1\x81\xd1\x86\xd0\xb8\xd0\xbd\xd0\xb3\x20\xd0\xb0\xd1\x86\xd1"
872 "\x86\xd0\xbe\xd0\xbc\xd0\xbc\xd0\xbe\xd0\xb4\xd0\xb0\xd1\x80\xd0"
873 "\xb5\x20\xd1\x85\xd0\xb0\xd1\x81";
874
875 int32_t chunk1 = 25; // partial lead at the end: 0xd0
876 int32_t chunk2 = 47; // partial tail at the beginning: 0xb0
877
878 char result[128];
879
880 int32_t sourceLen = (int32_t)strlen(text);
881 const char* source = text;
882 const char* sourceLimit = text + chunk1;
883
884 int32_t targetLen = sizeof(result);
885 char* target = result;
886 const char* targetLimit = result + targetLen;
887
888 UChar buffer16[20];
889 UChar* pivotSource = buffer16;
890 UChar* pivotTarget = buffer16;
891 const UChar* pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
892
893 int32_t length;
894 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
895 &target, result + targetLen, &source, sourceLimit,
896 buffer16, &pivotSource, &pivotTarget, pivotLimit,
897 FALSE, FALSE, errorCode);
898
899 length = (int32_t)(target - result);
900 targetLen -= length;
901 assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length);
902
903 source = text + chunk1;
904 sourceLimit = source + chunk2;
905
906 // Convert the rest and flush.
907 ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
908 &target, targetLimit, &source, sourceLimit,
909 buffer16, &pivotSource, &pivotTarget, pivotLimit,
910 FALSE, TRUE, errorCode);
911
912 length = (int32_t)(target - result - length);
913 targetLen -= length;
914 assertEquals("Second chunk + 2 doesn't match converted length", chunk2 + 1, length);
915
916 assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen);
917 assertSuccess("UTF-8->UTF-8", errorCode);
918 }
919
920 // open testdata or ICU data converter ------------------------------------- ***
921
922 UConverter *
cnv_open(const char * name,UErrorCode & errorCode)923 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
924 if(name!=NULL && *name=='+') {
925 // Converter names that start with '+' are ignored in ICU4J tests.
926 ++name;
927 }
928 if(name!=NULL && *name=='*') {
929 /* loadTestData(): set the data directory */
930 return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
931 } else {
932 return ucnv_open(name, &errorCode);
933 }
934 }
935
936 // output helpers ---------------------------------------------------------- ***
937
938 static inline char
hexDigit(uint8_t digit)939 hexDigit(uint8_t digit) {
940 return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
941 }
942
943 static char *
printBytes(const uint8_t * bytes,int32_t length,char * out)944 printBytes(const uint8_t *bytes, int32_t length, char *out) {
945 uint8_t b;
946
947 if(length>0) {
948 b=*bytes++;
949 --length;
950 *out++=hexDigit((uint8_t)(b>>4));
951 *out++=hexDigit((uint8_t)(b&0xf));
952 }
953
954 while(length>0) {
955 b=*bytes++;
956 --length;
957 *out++=' ';
958 *out++=hexDigit((uint8_t)(b>>4));
959 *out++=hexDigit((uint8_t)(b&0xf));
960 }
961 *out++=0;
962 return out;
963 }
964
965 static char *
printUnicode(const UChar * unicode,int32_t length,char * out)966 printUnicode(const UChar *unicode, int32_t length, char *out) {
967 UChar32 c;
968 int32_t i;
969
970 for(i=0; i<length;) {
971 if(i>0) {
972 *out++=' ';
973 }
974 U16_NEXT(unicode, i, length, c);
975 // write 4..6 digits
976 if(c>=0x100000) {
977 *out++='1';
978 }
979 if(c>=0x10000) {
980 *out++=hexDigit((uint8_t)((c>>16)&0xf));
981 }
982 *out++=hexDigit((uint8_t)((c>>12)&0xf));
983 *out++=hexDigit((uint8_t)((c>>8)&0xf));
984 *out++=hexDigit((uint8_t)((c>>4)&0xf));
985 *out++=hexDigit((uint8_t)(c&0xf));
986 }
987 *out++=0;
988 return out;
989 }
990
991 static char *
printOffsets(const int32_t * offsets,int32_t length,char * out)992 printOffsets(const int32_t *offsets, int32_t length, char *out) {
993 int32_t i, o, d;
994
995 if(offsets==NULL) {
996 length=0;
997 }
998
999 for(i=0; i<length; ++i) {
1000 if(i>0) {
1001 *out++=' ';
1002 }
1003 o=offsets[i];
1004
1005 // print all offsets with 2 characters each (-x, -9..99, xx)
1006 if(o<-9) {
1007 *out++='-';
1008 *out++='x';
1009 } else if(o<0) {
1010 *out++='-';
1011 *out++=(char)('0'-o);
1012 } else if(o<=99) {
1013 *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
1014 *out++=(char)('0'+o%10);
1015 } else /* o>99 */ {
1016 *out++='x';
1017 *out++='x';
1018 }
1019 }
1020 *out++=0;
1021 return out;
1022 }
1023
1024 // toUnicode test worker functions ----------------------------------------- ***
1025
1026 static int32_t
stepToUnicode(ConversionCase & cc,UConverter * cnv,UChar * result,int32_t resultCapacity,int32_t * resultOffsets,int32_t step,UErrorCode * pErrorCode)1027 stepToUnicode(ConversionCase &cc, UConverter *cnv,
1028 UChar *result, int32_t resultCapacity,
1029 int32_t *resultOffsets, /* also resultCapacity */
1030 int32_t step,
1031 UErrorCode *pErrorCode) {
1032 const char *source, *sourceLimit, *bytesLimit;
1033 UChar *target, *targetLimit, *resultLimit;
1034 UBool flush;
1035
1036 source=(const char *)cc.bytes;
1037 target=result;
1038 bytesLimit=source+cc.bytesLength;
1039 resultLimit=result+resultCapacity;
1040
1041 if(step>=0) {
1042 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
1043 // move only one buffer (in vs. out) at a time to be extra mean
1044 // step==0 performs bulk conversion and generates offsets
1045
1046 // initialize the partial limits for the loop
1047 if(step==0) {
1048 // use the entire buffers
1049 sourceLimit=bytesLimit;
1050 targetLimit=resultLimit;
1051 flush=cc.finalFlush;
1052 } else {
1053 // start with empty partial buffers
1054 sourceLimit=source;
1055 targetLimit=target;
1056 flush=FALSE;
1057
1058 // output offsets only for bulk conversion
1059 resultOffsets=NULL;
1060 }
1061
1062 for(;;) {
1063 // resetting the opposite conversion direction must not affect this one
1064 ucnv_resetFromUnicode(cnv);
1065
1066 // convert
1067 ucnv_toUnicode(cnv,
1068 &target, targetLimit,
1069 &source, sourceLimit,
1070 resultOffsets,
1071 flush, pErrorCode);
1072
1073 // check pointers and errors
1074 if(source>sourceLimit || target>targetLimit) {
1075 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1076 break;
1077 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1078 if(target!=targetLimit) {
1079 // buffer overflow must only be set when the target is filled
1080 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1081 break;
1082 } else if(targetLimit==resultLimit) {
1083 // not just a partial overflow
1084 break;
1085 }
1086
1087 // the partial target is filled, set a new limit, reset the error and continue
1088 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1089 *pErrorCode=U_ZERO_ERROR;
1090 } else if(U_FAILURE(*pErrorCode)) {
1091 // some other error occurred, done
1092 break;
1093 } else {
1094 if(source!=sourceLimit) {
1095 // when no error occurs, then the input must be consumed
1096 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1097 break;
1098 }
1099
1100 if(sourceLimit==bytesLimit) {
1101 // we are done
1102 break;
1103 }
1104
1105 // the partial conversion succeeded, set a new limit and continue
1106 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
1107 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
1108 }
1109 }
1110 } else /* step<0 */ {
1111 /*
1112 * step==-1: call only ucnv_getNextUChar()
1113 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
1114 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
1115 * else give it at most (-step-2)/2 bytes
1116 */
1117 UChar32 c;
1118
1119 // end the loop by getting an index out of bounds error
1120 for(;;) {
1121 // resetting the opposite conversion direction must not affect this one
1122 ucnv_resetFromUnicode(cnv);
1123
1124 // convert
1125 if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
1126 sourceLimit=source; // use sourceLimit not as a real limit
1127 // but to remember the pre-getNextUChar source pointer
1128 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
1129
1130 // check pointers and errors
1131 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
1132 if(source!=bytesLimit) {
1133 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1134 } else {
1135 *pErrorCode=U_ZERO_ERROR;
1136 }
1137 break;
1138 } else if(U_FAILURE(*pErrorCode)) {
1139 break;
1140 }
1141 // source may not move if c is from previous overflow
1142
1143 if(target==resultLimit) {
1144 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1145 break;
1146 }
1147 if(c<=0xffff) {
1148 *target++=(UChar)c;
1149 } else {
1150 *target++=U16_LEAD(c);
1151 if(target==resultLimit) {
1152 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1153 break;
1154 }
1155 *target++=U16_TRAIL(c);
1156 }
1157
1158 // alternate between -n-1 and -n but leave -1 alone
1159 if(step<-1) {
1160 ++step;
1161 }
1162 } else /* step is even */ {
1163 // allow only one UChar output
1164 targetLimit=target<resultLimit ? target+1 : resultLimit;
1165
1166 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
1167 // and never output offsets
1168 if(step==-2) {
1169 sourceLimit=bytesLimit;
1170 } else {
1171 sourceLimit=source+(-step-2)/2;
1172 if(sourceLimit>bytesLimit) {
1173 sourceLimit=bytesLimit;
1174 }
1175 }
1176
1177 ucnv_toUnicode(cnv,
1178 &target, targetLimit,
1179 &source, sourceLimit,
1180 NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
1181
1182 // check pointers and errors
1183 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1184 if(target!=targetLimit) {
1185 // buffer overflow must only be set when the target is filled
1186 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1187 break;
1188 } else if(targetLimit==resultLimit) {
1189 // not just a partial overflow
1190 break;
1191 }
1192
1193 // the partial target is filled, set a new limit and continue
1194 *pErrorCode=U_ZERO_ERROR;
1195 } else if(U_FAILURE(*pErrorCode)) {
1196 // some other error occurred, done
1197 break;
1198 } else {
1199 if(source!=sourceLimit) {
1200 // when no error occurs, then the input must be consumed
1201 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1202 break;
1203 }
1204
1205 // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
1206 }
1207
1208 --step;
1209 }
1210 }
1211 }
1212
1213 return (int32_t)(target-result);
1214 }
1215
1216 UBool
ToUnicodeCase(ConversionCase & cc,UConverterToUCallback callback,const char * option)1217 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
1218 // open the converter
1219 IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
1220 LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
1221 // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
1222 if(errorCode.isFailure()) {
1223 errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1224 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
1225 errorCode.reset();
1226 return FALSE;
1227 }
1228
1229 // set the callback
1230 if(callback!=NULL) {
1231 ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
1232 if(U_FAILURE(errorCode)) {
1233 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
1234 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1235 return FALSE;
1236 }
1237 }
1238
1239 int32_t resultOffsets[256];
1240 UChar result[256];
1241 int32_t resultLength;
1242 UBool ok;
1243
1244 static const struct {
1245 int32_t step;
1246 const char *name;
1247 } steps[]={
1248 { 0, "bulk" }, // must be first for offsets to be checked
1249 { 1, "step=1" },
1250 { 3, "step=3" },
1251 { 7, "step=7" },
1252 { -1, "getNext" },
1253 { -2, "toU(bulk)+getNext" },
1254 { -3, "getNext+toU(bulk)" },
1255 { -4, "toU(1)+getNext" },
1256 { -5, "getNext+toU(1)" },
1257 { -12, "toU(5)+getNext" },
1258 { -13, "getNext+toU(5)" },
1259 };
1260 int32_t i, step;
1261
1262 ok=TRUE;
1263 for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1264 step=steps[i].step;
1265 if(step<0 && !cc.finalFlush) {
1266 // skip ucnv_getNextUChar() if !finalFlush because
1267 // ucnv_getNextUChar() always implies flush
1268 continue;
1269 }
1270 if(step!=0) {
1271 // bulk test is first, then offsets are not checked any more
1272 cc.offsets=NULL;
1273 }
1274 else {
1275 for (int32_t i = 0; i < UPRV_LENGTHOF(resultOffsets); i++) {
1276 resultOffsets[i] = -1;
1277 }
1278 }
1279 for (int32_t i = 0; i < UPRV_LENGTHOF(result); i++) {
1280 result[i] = -1;
1281 }
1282 errorCode.reset();
1283 resultLength=stepToUnicode(cc, cnv.getAlias(),
1284 result, UPRV_LENGTHOF(result),
1285 step==0 ? resultOffsets : NULL,
1286 step, errorCode);
1287 ok=checkToUnicode(
1288 cc, cnv.getAlias(), steps[i].name,
1289 result, resultLength,
1290 cc.offsets!=NULL ? resultOffsets : NULL,
1291 errorCode);
1292 if(errorCode.isFailure() || !cc.finalFlush) {
1293 // reset if an error occurred or we did not flush
1294 // otherwise do nothing to make sure that flushing resets
1295 ucnv_resetToUnicode(cnv.getAlias());
1296 }
1297 if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
1298 errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1299 cc.caseNr, cc.charset, resultLength);
1300 }
1301 if (result[resultLength] != (UChar)-1) {
1302 errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
1303 cc.caseNr, cc.charset, resultLength);
1304 }
1305 }
1306
1307 // not a real loop, just a convenience for breaking out of the block
1308 while(ok && cc.finalFlush) {
1309 // test ucnv_toUChars()
1310 memset(result, 0, sizeof(result));
1311
1312 errorCode.reset();
1313 resultLength=ucnv_toUChars(cnv.getAlias(),
1314 result, UPRV_LENGTHOF(result),
1315 (const char *)cc.bytes, cc.bytesLength,
1316 errorCode);
1317 ok=checkToUnicode(
1318 cc, cnv.getAlias(), "toUChars",
1319 result, resultLength,
1320 NULL,
1321 errorCode);
1322 if(!ok) {
1323 break;
1324 }
1325
1326 // test preflighting
1327 // keep the correct result for simple checking
1328 errorCode.reset();
1329 resultLength=ucnv_toUChars(cnv.getAlias(),
1330 NULL, 0,
1331 (const char *)cc.bytes, cc.bytesLength,
1332 errorCode);
1333 if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
1334 errorCode.reset();
1335 }
1336 ok=checkToUnicode(
1337 cc, cnv.getAlias(), "preflight toUChars",
1338 result, resultLength,
1339 NULL,
1340 errorCode);
1341 break;
1342 }
1343
1344 errorCode.reset(); // all errors have already been reported
1345 return ok;
1346 }
1347
1348 UBool
checkToUnicode(ConversionCase & cc,UConverter * cnv,const char * name,const UChar * result,int32_t resultLength,const int32_t * resultOffsets,UErrorCode resultErrorCode)1349 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1350 const UChar *result, int32_t resultLength,
1351 const int32_t *resultOffsets,
1352 UErrorCode resultErrorCode) {
1353 char resultInvalidChars[8];
1354 int8_t resultInvalidLength;
1355 UErrorCode errorCode;
1356
1357 const char *msg;
1358
1359 // reset the message; NULL will mean "ok"
1360 msg=NULL;
1361
1362 errorCode=U_ZERO_ERROR;
1363 resultInvalidLength=sizeof(resultInvalidChars);
1364 ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
1365 if(U_FAILURE(errorCode)) {
1366 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
1367 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1368 return FALSE;
1369 }
1370
1371 // check everything that might have gone wrong
1372 if(cc.unicodeLength!=resultLength) {
1373 msg="wrong result length";
1374 } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
1375 msg="wrong result string";
1376 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {
1377 msg="wrong offsets";
1378 } else if(cc.outErrorCode!=resultErrorCode) {
1379 msg="wrong error code";
1380 } else if(cc.invalidLength!=resultInvalidLength) {
1381 msg="wrong length of last invalid input";
1382 } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
1383 msg="wrong last invalid input";
1384 }
1385
1386 if(msg==NULL) {
1387 return TRUE;
1388 } else {
1389 char buffer[2000]; // one buffer for all strings
1390 char *s, *bytesString, *unicodeString, *resultString,
1391 *offsetsString, *resultOffsetsString,
1392 *invalidCharsString, *resultInvalidCharsString;
1393
1394 bytesString=s=buffer;
1395 s=printBytes(cc.bytes, cc.bytesLength, bytesString);
1396 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
1397 s=printUnicode(result, resultLength, resultString=s);
1398 s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
1399 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1400 s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
1401 s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
1402
1403 if((s-buffer)>(int32_t)sizeof(buffer)) {
1404 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
1405 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1406 exit(1);
1407 }
1408
1409 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1410 " bytes <%s>[%d]\n"
1411 " expected <%s>[%d]\n"
1412 " result <%s>[%d]\n"
1413 " offsets <%s>\n"
1414 " result offsets <%s>\n"
1415 " error code expected %s got %s\n"
1416 " invalidChars expected <%s> got <%s>\n",
1417 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1418 bytesString, cc.bytesLength,
1419 unicodeString, cc.unicodeLength,
1420 resultString, resultLength,
1421 offsetsString,
1422 resultOffsetsString,
1423 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1424 invalidCharsString, resultInvalidCharsString);
1425
1426 return FALSE;
1427 }
1428 }
1429
1430 // fromUnicode test worker functions --------------------------------------- ***
1431
1432 static int32_t
stepFromUTF8(ConversionCase & cc,UConverter * utf8Cnv,UConverter * cnv,char * result,int32_t resultCapacity,int32_t step,UErrorCode * pErrorCode)1433 stepFromUTF8(ConversionCase &cc,
1434 UConverter *utf8Cnv, UConverter *cnv,
1435 char *result, int32_t resultCapacity,
1436 int32_t step,
1437 UErrorCode *pErrorCode) {
1438 const char *source, *sourceLimit, *utf8Limit;
1439 UChar pivotBuffer[32];
1440 UChar *pivotSource, *pivotTarget, *pivotLimit;
1441 char *target, *targetLimit, *resultLimit;
1442 UBool flush;
1443
1444 source=cc.utf8;
1445 pivotSource=pivotTarget=pivotBuffer;
1446 target=result;
1447 utf8Limit=source+cc.utf8Length;
1448 resultLimit=result+resultCapacity;
1449
1450 // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
1451 // move only one buffer (in vs. out) at a time to be extra mean
1452 // step==0 performs bulk conversion
1453
1454 // initialize the partial limits for the loop
1455 if(step==0) {
1456 // use the entire buffers
1457 sourceLimit=utf8Limit;
1458 targetLimit=resultLimit;
1459 flush=cc.finalFlush;
1460
1461 pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
1462 } else {
1463 // start with empty partial buffers
1464 sourceLimit=source;
1465 targetLimit=target;
1466 flush=FALSE;
1467
1468 // empty pivot is not allowed, make it of length step
1469 pivotLimit=pivotBuffer+step;
1470 }
1471
1472 for(;;) {
1473 // resetting the opposite conversion direction must not affect this one
1474 ucnv_resetFromUnicode(utf8Cnv);
1475 ucnv_resetToUnicode(cnv);
1476
1477 // convert
1478 ucnv_convertEx(cnv, utf8Cnv,
1479 &target, targetLimit,
1480 &source, sourceLimit,
1481 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
1482 FALSE, flush, pErrorCode);
1483
1484 // check pointers and errors
1485 if(source>sourceLimit || target>targetLimit) {
1486 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1487 break;
1488 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1489 if(target!=targetLimit) {
1490 // buffer overflow must only be set when the target is filled
1491 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1492 break;
1493 } else if(targetLimit==resultLimit) {
1494 // not just a partial overflow
1495 break;
1496 }
1497
1498 // the partial target is filled, set a new limit, reset the error and continue
1499 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1500 *pErrorCode=U_ZERO_ERROR;
1501 } else if(U_FAILURE(*pErrorCode)) {
1502 if(pivotSource==pivotBuffer) {
1503 // toUnicode error, should not occur
1504 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1505 break;
1506 } else {
1507 // fromUnicode error
1508 // some other error occurred, done
1509 break;
1510 }
1511 } else {
1512 if(source!=sourceLimit) {
1513 // when no error occurs, then the input must be consumed
1514 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1515 break;
1516 }
1517
1518 if(sourceLimit==utf8Limit) {
1519 // we are done
1520 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
1521 // ucnv_convertEx() warns about not terminating the output
1522 // but ucnv_fromUnicode() does not and so
1523 // checkFromUnicode() does not expect it
1524 *pErrorCode=U_ZERO_ERROR;
1525 }
1526 break;
1527 }
1528
1529 // the partial conversion succeeded, set a new limit and continue
1530 sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
1531 flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
1532 }
1533 }
1534
1535 return (int32_t)(target-result);
1536 }
1537
1538 static int32_t
stepFromUnicode(ConversionCase & cc,UConverter * cnv,char * result,int32_t resultCapacity,int32_t * resultOffsets,int32_t step,UErrorCode * pErrorCode)1539 stepFromUnicode(ConversionCase &cc, UConverter *cnv,
1540 char *result, int32_t resultCapacity,
1541 int32_t *resultOffsets, /* also resultCapacity */
1542 int32_t step,
1543 UErrorCode *pErrorCode) {
1544 const UChar *source, *sourceLimit, *unicodeLimit;
1545 char *target, *targetLimit, *resultLimit;
1546 UBool flush;
1547
1548 source=cc.unicode;
1549 target=result;
1550 unicodeLimit=source+cc.unicodeLength;
1551 resultLimit=result+resultCapacity;
1552
1553 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
1554 // move only one buffer (in vs. out) at a time to be extra mean
1555 // step==0 performs bulk conversion and generates offsets
1556
1557 // initialize the partial limits for the loop
1558 if(step==0) {
1559 // use the entire buffers
1560 sourceLimit=unicodeLimit;
1561 targetLimit=resultLimit;
1562 flush=cc.finalFlush;
1563 } else {
1564 // start with empty partial buffers
1565 sourceLimit=source;
1566 targetLimit=target;
1567 flush=FALSE;
1568
1569 // output offsets only for bulk conversion
1570 resultOffsets=NULL;
1571 }
1572
1573 for(;;) {
1574 // resetting the opposite conversion direction must not affect this one
1575 ucnv_resetToUnicode(cnv);
1576
1577 // convert
1578 ucnv_fromUnicode(cnv,
1579 &target, targetLimit,
1580 &source, sourceLimit,
1581 resultOffsets,
1582 flush, pErrorCode);
1583
1584 // check pointers and errors
1585 if(source>sourceLimit || target>targetLimit) {
1586 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1587 break;
1588 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1589 if(target!=targetLimit) {
1590 // buffer overflow must only be set when the target is filled
1591 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1592 break;
1593 } else if(targetLimit==resultLimit) {
1594 // not just a partial overflow
1595 break;
1596 }
1597
1598 // the partial target is filled, set a new limit, reset the error and continue
1599 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1600 *pErrorCode=U_ZERO_ERROR;
1601 } else if(U_FAILURE(*pErrorCode)) {
1602 // some other error occurred, done
1603 break;
1604 } else {
1605 if(source!=sourceLimit) {
1606 // when no error occurs, then the input must be consumed
1607 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1608 break;
1609 }
1610
1611 if(sourceLimit==unicodeLimit) {
1612 // we are done
1613 break;
1614 }
1615
1616 // the partial conversion succeeded, set a new limit and continue
1617 sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
1618 flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
1619 }
1620 }
1621
1622 return (int32_t)(target-result);
1623 }
1624
1625 UBool
FromUnicodeCase(ConversionCase & cc,UConverterFromUCallback callback,const char * option)1626 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
1627 UConverter *cnv;
1628 UErrorCode errorCode;
1629
1630 // open the converter
1631 errorCode=U_ZERO_ERROR;
1632 cnv=cnv_open(cc.charset, errorCode);
1633 if(U_FAILURE(errorCode)) {
1634 errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1635 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1636 return FALSE;
1637 }
1638 ucnv_resetToUnicode(utf8Cnv);
1639
1640 // set the callback
1641 if(callback!=NULL) {
1642 ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
1643 if(U_FAILURE(errorCode)) {
1644 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1645 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1646 ucnv_close(cnv);
1647 return FALSE;
1648 }
1649 }
1650
1651 // set the fallbacks flag
1652 // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1653 ucnv_setFallback(cnv, cc.fallbacks);
1654
1655 // set the subchar
1656 int32_t length;
1657
1658 if(cc.setSub>0) {
1659 length=(int32_t)strlen(cc.subchar);
1660 ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
1661 if(U_FAILURE(errorCode)) {
1662 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
1663 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1664 ucnv_close(cnv);
1665 return FALSE;
1666 }
1667 } else if(cc.setSub<0) {
1668 ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
1669 if(U_FAILURE(errorCode)) {
1670 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
1671 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1672 ucnv_close(cnv);
1673 return FALSE;
1674 }
1675 }
1676
1677 // convert unicode to utf8
1678 char utf8[256];
1679 cc.utf8=utf8;
1680 u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
1681 cc.unicode, cc.unicodeLength,
1682 &errorCode);
1683 if(U_FAILURE(errorCode)) {
1684 // skip UTF-8 testing of a string with an unpaired surrogate,
1685 // or of one that's too long
1686 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1687 cc.utf8Length=-1;
1688 }
1689
1690 int32_t resultOffsets[256];
1691 char result[256];
1692 int32_t resultLength;
1693 UBool ok;
1694
1695 static const struct {
1696 int32_t step;
1697 const char *name, *utf8Name;
1698 } steps[]={
1699 { 0, "bulk", "utf8" }, // must be first for offsets to be checked
1700 { 1, "step=1", "utf8 step=1" },
1701 { 3, "step=3", "utf8 step=3" },
1702 { 7, "step=7", "utf8 step=7" }
1703 };
1704 int32_t i, step;
1705
1706 ok=TRUE;
1707 for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1708 step=steps[i].step;
1709 for (int32_t i = 0; i < UPRV_LENGTHOF(resultOffsets); i++) {
1710 resultOffsets[i] = -1;
1711 }
1712 for (int32_t i = 0; i < UPRV_LENGTHOF(result); i++) {
1713 result[i] = -1;
1714 }
1715 errorCode=U_ZERO_ERROR;
1716 resultLength=stepFromUnicode(cc, cnv,
1717 result, UPRV_LENGTHOF(result),
1718 step==0 ? resultOffsets : NULL,
1719 step, &errorCode);
1720 ok=checkFromUnicode(
1721 cc, cnv, steps[i].name,
1722 (uint8_t *)result, resultLength,
1723 cc.offsets!=NULL ? resultOffsets : NULL,
1724 errorCode);
1725 if(U_FAILURE(errorCode) || !cc.finalFlush) {
1726 // reset if an error occurred or we did not flush
1727 // otherwise do nothing to make sure that flushing resets
1728 ucnv_resetFromUnicode(cnv);
1729 }
1730 if (resultOffsets[resultLength] != -1) {
1731 errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1732 cc.caseNr, cc.charset, resultLength);
1733 }
1734 if (result[resultLength] != (char)-1) {
1735 errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
1736 cc.caseNr, cc.charset, resultLength);
1737 }
1738
1739 // bulk test is first, then offsets are not checked any more
1740 cc.offsets=NULL;
1741
1742 // test direct conversion from UTF-8
1743 if(cc.utf8Length>=0) {
1744 errorCode=U_ZERO_ERROR;
1745 resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
1746 result, UPRV_LENGTHOF(result),
1747 step, &errorCode);
1748 ok=checkFromUnicode(
1749 cc, cnv, steps[i].utf8Name,
1750 (uint8_t *)result, resultLength,
1751 NULL,
1752 errorCode);
1753 if(U_FAILURE(errorCode) || !cc.finalFlush) {
1754 // reset if an error occurred or we did not flush
1755 // otherwise do nothing to make sure that flushing resets
1756 ucnv_resetToUnicode(utf8Cnv);
1757 ucnv_resetFromUnicode(cnv);
1758 }
1759 }
1760 }
1761
1762 // not a real loop, just a convenience for breaking out of the block
1763 while(ok && cc.finalFlush) {
1764 // test ucnv_fromUChars()
1765 memset(result, 0, sizeof(result));
1766
1767 errorCode=U_ZERO_ERROR;
1768 resultLength=ucnv_fromUChars(cnv,
1769 result, UPRV_LENGTHOF(result),
1770 cc.unicode, cc.unicodeLength,
1771 &errorCode);
1772 ok=checkFromUnicode(
1773 cc, cnv, "fromUChars",
1774 (uint8_t *)result, resultLength,
1775 NULL,
1776 errorCode);
1777 if(!ok) {
1778 break;
1779 }
1780
1781 // test preflighting
1782 // keep the correct result for simple checking
1783 errorCode=U_ZERO_ERROR;
1784 resultLength=ucnv_fromUChars(cnv,
1785 NULL, 0,
1786 cc.unicode, cc.unicodeLength,
1787 &errorCode);
1788 if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
1789 errorCode=U_ZERO_ERROR;
1790 }
1791 ok=checkFromUnicode(
1792 cc, cnv, "preflight fromUChars",
1793 (uint8_t *)result, resultLength,
1794 NULL,
1795 errorCode);
1796 break;
1797 }
1798
1799 ucnv_close(cnv);
1800 return ok;
1801 }
1802
1803 UBool
checkFromUnicode(ConversionCase & cc,UConverter * cnv,const char * name,const uint8_t * result,int32_t resultLength,const int32_t * resultOffsets,UErrorCode resultErrorCode)1804 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1805 const uint8_t *result, int32_t resultLength,
1806 const int32_t *resultOffsets,
1807 UErrorCode resultErrorCode) {
1808 UChar resultInvalidUChars[8];
1809 int8_t resultInvalidLength;
1810 UErrorCode errorCode;
1811
1812 const char *msg;
1813
1814 // reset the message; NULL will mean "ok"
1815 msg=NULL;
1816
1817 errorCode=U_ZERO_ERROR;
1818 resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
1819 ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
1820 if(U_FAILURE(errorCode)) {
1821 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1822 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1823 return FALSE;
1824 }
1825
1826 // check everything that might have gone wrong
1827 if(cc.bytesLength!=resultLength) {
1828 msg="wrong result length";
1829 } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
1830 msg="wrong result string";
1831 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {
1832 msg="wrong offsets";
1833 } else if(cc.outErrorCode!=resultErrorCode) {
1834 msg="wrong error code";
1835 } else if(cc.invalidLength!=resultInvalidLength) {
1836 msg="wrong length of last invalid input";
1837 } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
1838 msg="wrong last invalid input";
1839 }
1840
1841 if(msg==NULL) {
1842 return TRUE;
1843 } else {
1844 char buffer[2000]; // one buffer for all strings
1845 char *s, *unicodeString, *bytesString, *resultString,
1846 *offsetsString, *resultOffsetsString,
1847 *invalidCharsString, *resultInvalidUCharsString;
1848
1849 unicodeString=s=buffer;
1850 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
1851 s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
1852 s=printBytes(result, resultLength, resultString=s);
1853 s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
1854 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1855 s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
1856 s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
1857
1858 if((s-buffer)>(int32_t)sizeof(buffer)) {
1859 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1860 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1861 exit(1);
1862 }
1863
1864 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1865 " unicode <%s>[%d]\n"
1866 " expected <%s>[%d]\n"
1867 " result <%s>[%d]\n"
1868 " offsets <%s>\n"
1869 " result offsets <%s>\n"
1870 " error code expected %s got %s\n"
1871 " invalidChars expected <%s> got <%s>\n",
1872 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1873 unicodeString, cc.unicodeLength,
1874 bytesString, cc.bytesLength,
1875 resultString, resultLength,
1876 offsetsString,
1877 resultOffsetsString,
1878 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1879 invalidCharsString, resultInvalidUCharsString);
1880
1881 return FALSE;
1882 }
1883 }
1884
1885 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1886