1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ***************************************************************************/
6 /*****************************************************************************
7 *
8 * File NCNVCBTS
9 *
10 * Modification History:
11 * Name Date Description
12 * Madhu Katragadda 06/23/2000 Tests for Conveter FallBack API and Functionality
13 ******************************************************************************
14 */
15 #include <stdio.h>
16 #include "unicode/uloc.h"
17 #include "unicode/ucnv.h"
18 #include "unicode/ucnv_err.h"
19 #include "cintltst.h"
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
22 #include "ncnvfbts.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25
26 #if !UCONFIG_NO_LEGACY_CONVERSION
27 #define NEW_MAX_BUFFER 999
28
29
30 #define nct_min(x,y) ((x<y) ? x : y)
31
32 static int32_t gInBufferSize = 0;
33 static int32_t gOutBufferSize = 0;
34 static char gNuConvTestName[1024];
35
my_ucnv_open(const char * cnv,UErrorCode * err)36 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
37 {
38 if(cnv && cnv[0] == '@') {
39 return ucnv_openPackage("testdata", cnv+1, err);
40 } else {
41 return ucnv_open(cnv, err);
42 }
43 }
44
45
printSeq(const unsigned char * a,int len)46 static void printSeq(const unsigned char* a, int len)
47 {
48 int i=0;
49 log_verbose("{");
50 while (i<len)
51 log_verbose("0x%02x ", a[i++]);
52 log_verbose("}\n");
53 }
54
printUSeq(const UChar * a,int len)55 static void printUSeq(const UChar* a, int len)
56 {
57 int i=0;
58 log_verbose("{U+");
59 while (i<len)
60 log_verbose("0x%04x ", a[i++]);
61 log_verbose("}\n");
62 }
63
printSeqErr(const unsigned char * a,int len)64 static void printSeqErr(const unsigned char* a, int len)
65 {
66 int i=0;
67 fprintf(stderr, "{");
68 while (i<len)
69 fprintf(stderr, "0x%02x ", a[i++]);
70 fprintf(stderr, "}\n");
71 }
72
printUSeqErr(const UChar * a,int len)73 static void printUSeqErr(const UChar* a, int len)
74 {
75 int i=0;
76 fprintf(stderr, "{U+");
77 while (i<len)
78 fprintf(stderr, "0x%04x ", a[i++]);
79 fprintf(stderr,"}\n");
80 }
81
TestConverterFallBack(void)82 static void TestConverterFallBack(void)
83 {
84 TestConvertFallBackWithBufferSizes(10,10);
85 TestConvertFallBackWithBufferSizes(2,3);
86 TestConvertFallBackWithBufferSizes(3,2);
87 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,1);
88 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,2);
89 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,3);
90 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,4);
91 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,5);
92 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,6);
93 TestConvertFallBackWithBufferSizes(1,NEW_MAX_BUFFER);
94 TestConvertFallBackWithBufferSizes(2,NEW_MAX_BUFFER);
95 TestConvertFallBackWithBufferSizes(3,NEW_MAX_BUFFER);
96 TestConvertFallBackWithBufferSizes(4,NEW_MAX_BUFFER);
97 TestConvertFallBackWithBufferSizes(5,NEW_MAX_BUFFER);
98 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
99
100 }
101
102
103 void addTestConverterFallBack(TestNode** root);
104
addTestConverterFallBack(TestNode ** root)105 void addTestConverterFallBack(TestNode** root)
106 {
107 #if !UCONFIG_NO_FILE_IO
108 addTest(root, &TestConverterFallBack, "tsconv/ncnvfbts/TestConverterFallBack");
109 #endif
110
111 }
112
113
114 /* Note that this test already makes use of statics, so it's not really
115 multithread safe.
116 This convenience function lets us make the error messages actually useful.
117 */
118
setNuConvTestName(const char * codepage,const char * direction)119 static void setNuConvTestName(const char *codepage, const char *direction)
120 {
121 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
122 codepage,
123 direction,
124 (int)gInBufferSize,
125 (int)gOutBufferSize);
126 }
127
128
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UBool fallback,const int32_t * expectOffsets)129 static UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
130 const char *codepage, UBool fallback, const int32_t *expectOffsets)
131 {
132
133
134 UErrorCode status = U_ZERO_ERROR;
135 UConverter *conv = 0;
136 char junkout[NEW_MAX_BUFFER]; /* FIX */
137 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
138 const UChar *src;
139 char *end;
140 char *targ;
141 int32_t *offs;
142 int i;
143 int32_t realBufferSize;
144 char *realBufferEnd;
145 const UChar *realSourceEnd;
146 const UChar *sourceLimit;
147 UBool checkOffsets = TRUE;
148 UBool doFlush;
149 UBool action=FALSE;
150 char *p;
151
152
153 for(i=0;i<NEW_MAX_BUFFER;i++)
154 junkout[i] = (char)0xF0;
155 for(i=0;i<NEW_MAX_BUFFER;i++)
156 junokout[i] = 0xFF;
157 setNuConvTestName(codepage, "FROM");
158
159 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
160 gOutBufferSize);
161
162 conv = my_ucnv_open(codepage, &status);
163 if(U_FAILURE(status))
164 {
165 log_data_err("Couldn't open converter %s\n",codepage);
166 return TRUE;
167 }
168
169 log_verbose("Converter opened..\n");
170 /*----setting the callback routine----*/
171 ucnv_setFallback (conv, fallback);
172 action = ucnv_usesFallback(conv);
173 if(action != fallback){
174 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
175 }
176 /*------------------------*/
177 src = source;
178 targ = junkout;
179 offs = junokout;
180
181 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
182 realBufferEnd = junkout + realBufferSize;
183 realSourceEnd = source + sourceLen;
184
185 if ( gOutBufferSize != realBufferSize )
186 checkOffsets = FALSE;
187
188 if( gInBufferSize != NEW_MAX_BUFFER )
189 checkOffsets = FALSE;
190
191 do
192 {
193 end = nct_min(targ + gOutBufferSize, realBufferEnd);
194 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
195
196 doFlush = (UBool)(sourceLimit == realSourceEnd);
197
198 if(targ == realBufferEnd)
199 {
200 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
201 return FALSE;
202 }
203 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
204
205
206 status = U_ZERO_ERROR;
207
208 ucnv_fromUnicode (conv,
209 (char **)&targ,
210 (const char *)end,
211 &src,
212 sourceLimit,
213 checkOffsets ? offs : NULL,
214 doFlush, /* flush if we're at the end of the input data */
215 &status);
216
217 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (sourceLimit < realSourceEnd) );
218
219 if(U_FAILURE(status))
220 {
221 log_err("Problem doing toUnicode, errcode %d %s\n", myErrorName(status), gNuConvTestName);
222 return FALSE;
223 }
224
225 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
226 sourceLen, targ-junkout);
227 if(getTestOption(VERBOSITY_OPTION))
228 {
229 char junk[9999];
230 char offset_str[9999];
231
232 junk[0] = 0;
233 offset_str[0] = 0;
234 for(p = junkout;p<targ;p++)
235 {
236 sprintf(junk + uprv_strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
237 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
238 }
239
240 log_verbose(junk);
241 printSeq((const unsigned char*)expect, expectLen);
242 if ( checkOffsets )
243 {
244 log_verbose("\nOffsets:");
245 log_verbose(offset_str);
246 }
247 log_verbose("\n");
248 }
249 ucnv_close(conv);
250
251
252 if(expectLen != targ-junkout)
253 {
254 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
255 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
256 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
257 printSeqErr((const unsigned char*)expect, expectLen);
258 return FALSE;
259 }
260
261 if (checkOffsets && (expectOffsets != 0) )
262 {
263 log_verbose("\ncomparing %d offsets..\n", targ-junkout);
264 if(uprv_memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
265 log_err("\ndid not get the expected offsets while %s \n", gNuConvTestName);
266 log_err("Got : ");
267 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
268 for(p=junkout;p<targ;p++)
269 log_err("%d, ", junokout[p-junkout]);
270 log_err("\nExpected: ");
271 for(i=0; i<(targ-junkout); i++)
272 log_err("%d,", expectOffsets[i]);
273 }
274 }
275
276 log_verbose("\n\ncomparing..\n");
277 if(!memcmp(junkout, expect, expectLen))
278 {
279 log_verbose("Matches!\n");
280 return TRUE;
281 }
282 else
283 {
284 log_err("String does not match. %s\n", gNuConvTestName);
285 log_verbose("String does not match. %s\n", gNuConvTestName);
286 printSeqErr((const unsigned char*)junkout, expectLen);
287 printSeqErr((const unsigned char*)expect, expectLen);
288 return FALSE;
289 }
290 }
291
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UBool fallback,const int32_t * expectOffsets)292 static UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
293 const char *codepage, UBool fallback, const int32_t *expectOffsets)
294 {
295 UErrorCode status = U_ZERO_ERROR;
296 UConverter *conv = 0;
297 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
298 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
299 const char *src;
300 const char *realSourceEnd;
301 const char *srcLimit;
302 UChar *targ;
303 UChar *end;
304 int32_t *offs;
305 int i;
306 UBool checkOffsets = TRUE;
307 char junk[9999];
308 char offset_str[9999];
309 UChar *p;
310 UBool action;
311
312 int32_t realBufferSize;
313 UChar *realBufferEnd;
314
315
316 for(i=0;i<NEW_MAX_BUFFER;i++)
317 junkout[i] = 0xFFFE;
318
319 for(i=0;i<NEW_MAX_BUFFER;i++)
320 junokout[i] = -1;
321
322 setNuConvTestName(codepage, "TO");
323
324 log_verbose("\n========= %s\n", gNuConvTestName);
325
326 conv = my_ucnv_open(codepage, &status);
327 if(U_FAILURE(status))
328 {
329 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
330 return TRUE; /* because it has been logged */
331 }
332
333 log_verbose("Converter opened..\n");
334
335 src = (const char *)source;
336 targ = junkout;
337 offs = junokout;
338
339 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
340 realBufferEnd = junkout + realBufferSize;
341 realSourceEnd = src + sourcelen;
342 /*----setting the fallback routine----*/
343 ucnv_setFallback (conv, fallback);
344 action = ucnv_usesFallback(conv);
345 if(action != fallback){
346 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
347 }
348 /*-------------------------------------*/
349 if ( gOutBufferSize != realBufferSize )
350 checkOffsets = FALSE;
351
352 if( gInBufferSize != NEW_MAX_BUFFER )
353 checkOffsets = FALSE;
354
355 do
356 {
357 end = nct_min( targ + gOutBufferSize, realBufferEnd);
358 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
359
360 if(targ == realBufferEnd)
361 {
362 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
363 return FALSE;
364 }
365 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
366
367
368
369 status = U_ZERO_ERROR;
370
371 ucnv_toUnicode (conv,
372 &targ,
373 end,
374 (const char **)&src,
375 (const char *)srcLimit,
376 checkOffsets ? offs : NULL,
377 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
378 &status);
379 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (srcLimit < realSourceEnd) ); /* while we just need another buffer */
380
381
382 if(U_FAILURE(status))
383 {
384 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
385 return FALSE;
386 }
387
388 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
389 sourcelen, targ-junkout);
390 if(getTestOption(VERBOSITY_OPTION))
391 {
392
393 junk[0] = 0;
394 offset_str[0] = 0;
395
396 for(p = junkout;p<targ;p++)
397 {
398 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
399 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
400 }
401
402 log_verbose(junk);
403 printUSeq(expect, expectlen);
404 if ( checkOffsets )
405 {
406 log_verbose("\nOffsets:");
407 log_verbose(offset_str);
408 }
409 log_verbose("\n");
410 }
411 ucnv_close(conv);
412
413 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
414
415 if (checkOffsets && (expectOffsets != 0))
416 {
417 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
418 {
419 log_err("\n\ndid not get the expected offsets while %s \n", gNuConvTestName);
420 log_err("\nGot : ");
421 for(p=junkout;p<targ;p++)
422 log_err("%d, ", junokout[p-junkout]);
423 log_err("\nExpected: ");
424 for(i=0; i<(targ-junkout); i++)
425 log_err("%d,", expectOffsets[i]);
426 log_err("");
427 for(i=0; i<(targ-junkout); i++)
428 log_err("0x%04X,", junkout[i]);
429 log_err("");
430 for(i=0; i<(src-(const char *)source); i++)
431 log_err("0x%04X,", (unsigned char)source[i]);
432 }
433 }
434
435 if(!memcmp(junkout, expect, expectlen*2))
436 {
437 log_verbose("Matches!\n");
438 return TRUE;
439 }
440 else
441 {
442 log_err("String does not match. %s\n", gNuConvTestName);
443 log_verbose("String does not match. %s\n", gNuConvTestName);
444 printUSeqErr(junkout, expectlen);
445 printf("\n");
446 printUSeqErr(expect, expectlen);
447 return FALSE;
448 }
449 }
450
451
452
TestConvertFallBackWithBufferSizes(int32_t outsize,int32_t insize)453 static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
454 {
455
456 static const UChar SBCSText[] =
457 { 0x0021, 0xFF01, 0x0022, 0xFF02, 0x0023, 0xFF03, 0x003A, 0xFF1A, 0x003B, 0xFF1B, 0x003C, 0xFF1C };
458 /* 21, ?, 22, ?, 23, ?, 3a, ?, 3b, ?, 3c, ? SBCS*/
459 static const uint8_t expectedNative[] =
460 { 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3c};
461 static const UChar retrievedSBCSText[]=
462 { 0x0021, 0x0021, 0x0022, 0x0022, 0x0023, 0x0023, 0x003A, 0x003A, 0x003B, 0x003B, 0x003C, 0x003C };
463 static const int32_t toNativeOffs [] =
464 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b};
465 static const int32_t fromNativeoffs [] =
466 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
467
468
469 /* 1363 isn't DBCS, but it has the DBCS section */
470 static const UChar DBCSText[] =
471 { 0x00a1, 0x00ad, 0x2010, 0x00b7, 0x30fb};
472 static const uint8_t expectedIBM1363_DBCS[] =
473 { 0xa2, 0xae, 0xa1 ,0xa9, 0xa1, 0xa9,0xa1 ,0xa4, 0xa1, 0xa4};
474 static const UChar retrievedDBCSText[]=
475 { 0x00a1, 0x2010, 0x2010, 0x30fb, 0x30fb };
476 static const int32_t toIBM1363Offs_DBCS[] =
477 { 0x00, 0x00, 0x01,0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04};
478 static const int32_t fromIBM1363offs_DBCS[] =
479 { 0, 2, 4, 6, 8};
480
481
482 static const UChar MBCSText[] =
483 { 0x0001, 0x263a, 0x2013, 0x2014, 0x263b, 0x0002};
484 static const uint8_t expectedIBM950[] =
485 { 0x01, 0x01, 0xa1, 0x56, 0xa1, 0x56, 0x02, 0x02};
486 static const UChar retrievedMBCSText[]=
487 { 0x0001, 0x0001, 0x2014, 0x2014, 0x0002, 0x0002};
488 static const int32_t toIBM950Offs [] =
489 { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05};
490 static const int32_t fromIBM950offs [] =
491 { 0, 1, 2, 4, 6, 7};
492
493 static const UChar MBCSText1363[] =
494 { 0x0005,
495 0xffe8,
496 0x0007,
497 0x2022,
498 0x005c,
499 0x00b7,
500 0x3016,
501 0x30fb,
502 0x9a36};
503 static const uint8_t expectedIBM1363[] =
504 { 0x05,
505 0x05,
506 0x07,
507 0x07,
508 0x7f,
509 0xa1, 0xa4,
510 0xa1, 0xe0,
511 0xa1, 0xa4,
512 0xf5, 0xe2};
513 static const UChar retrievedMBCSText1363[]=
514 { 0x0005, 0x0005, 0x0007, 0x0007, 0x001a, 0x30fb, 0x25a1, 0x30fb, 0x9a36};
515 static const int32_t toIBM1363Offs [] =
516 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08};
517 static const int32_t fromIBM1363offs [] =
518 { 0, 1, 2, 3, 4, 5, 7, 9, 11};
519
520
521
522 static const char* nativeCodePage[]={
523 /*NLCS Mapping*/
524 "ibm-437",
525 "ibm-850",
526 "ibm-878",
527 "ibm-923",
528 "ibm-1051",
529 "ibm-1089",
530 "ibm-1250",
531 "ibm-1251",
532 "ibm-1253",
533 "ibm-1254",
534 "ibm-1255",
535 "ibm-1256",
536 "ibm-1257",
537 "ibm-1258",
538 "ibm-1276"
539 };
540
541 int32_t i=0;
542 gInBufferSize = insize;
543 gOutBufferSize = outsize;
544
545 for(i=0; i<sizeof(nativeCodePage)/sizeof(nativeCodePage[0]); i++){
546 log_verbose("Testing %s\n", nativeCodePage[i]);
547 if(!testConvertFromUnicode(SBCSText, sizeof(SBCSText)/sizeof(SBCSText[0]),
548 expectedNative, sizeof(expectedNative), nativeCodePage[i], TRUE, toNativeOffs ))
549 log_err("u-> %s(SBCS) with FallBack did not match.\n", nativeCodePage[i]);
550
551 if(!testConvertToUnicode(expectedNative, sizeof(expectedNative),
552 retrievedSBCSText, sizeof(retrievedSBCSText)/sizeof(retrievedSBCSText[0]), nativeCodePage[i], TRUE, fromNativeoffs ))
553 log_err("%s->u(SBCS) with Fallback did not match.\n", nativeCodePage[i]);
554 }
555
556 /*DBCS*/
557 if(!testConvertFromUnicode(DBCSText, sizeof(DBCSText)/sizeof(DBCSText[0]),
558 expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), "ibm-1363", TRUE, toIBM1363Offs_DBCS ))
559 log_err("u-> ibm-1363(DBCS portion) with FallBack did not match.\n");
560
561 if(!testConvertToUnicode(expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS),
562 retrievedDBCSText, sizeof(retrievedDBCSText)/sizeof(retrievedDBCSText[0]),"ibm-1363", TRUE, fromIBM1363offs_DBCS ))
563 log_err("ibm-1363->u(DBCS portion) with Fallback did not match.\n");
564
565
566 /*MBCS*/
567 if(!testConvertFromUnicode(MBCSText, sizeof(MBCSText)/sizeof(MBCSText[0]),
568 expectedIBM950, sizeof(expectedIBM950), "ibm-950", TRUE, toIBM950Offs ))
569 log_err("u-> ibm-950(MBCS) with FallBack did not match.\n");
570
571 if(!testConvertToUnicode(expectedIBM950, sizeof(expectedIBM950),
572 retrievedMBCSText, sizeof(retrievedMBCSText)/sizeof(retrievedMBCSText[0]),"ibm-950", TRUE, fromIBM950offs ))
573 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
574
575 /*commented untill data table is available*/
576 log_verbose("toUnicode fallback with fallback data for MBCS\n");
577 {
578 const uint8_t IBM950input[] = {
579 0xf4, 0x87, 0xa4, 0x4a, 0xf4, 0x88, 0xa4, 0x4b,
580 0xf9, 0x92, 0xdc, 0xb0, };
581 UChar expectedUnicodeText[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9};
582 int32_t fromIBM950inputOffs [] = { 0, 2, 4, 6, 8, 10};
583 /* for testing reverse fallback behavior */
584 UChar expectedFallbackFalse[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9};
585
586 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input),
587 expectedUnicodeText, sizeof(expectedUnicodeText)/sizeof(expectedUnicodeText[0]),"ibm-950", TRUE, fromIBM950inputOffs ))
588 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
589 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input),
590 expectedFallbackFalse, sizeof(expectedFallbackFalse)/sizeof(expectedFallbackFalse[0]),"ibm-950", FALSE, fromIBM950inputOffs ))
591 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
592
593 }
594 log_verbose("toUnicode fallback with fallback data for euc-tw\n");
595 {
596 const uint8_t euc_tw_input[] = {
597 0xA7, 0xCC, 0x8E, 0xA2, 0xA1, 0xAB,
598 0xA8, 0xC7, 0xC8, 0xDE,
599 0xA8, 0xCD, 0x8E, 0xA2, 0xA2, 0xEA,};
600 UChar expectedUnicodeText[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278};
601 int32_t from_euc_tw_offs [] = { 0, 2, 6, 8, 10, 12};
602 /* for testing reverse fallback behavior */
603 UChar expectedFallbackFalse[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278};
604
605 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input),
606 expectedUnicodeText, sizeof(expectedUnicodeText)/sizeof(expectedUnicodeText[0]),"euc-tw", TRUE, from_euc_tw_offs ))
607 log_err("from euc-tw->u with Fallback did not match.\n");
608
609 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input),
610 expectedFallbackFalse, sizeof(expectedFallbackFalse)/sizeof(expectedFallbackFalse[0]),"euc-tw", FALSE, from_euc_tw_offs ))
611 log_err("from euc-tw->u with Fallback false did not match.\n");
612
613
614 }
615 log_verbose("fromUnicode to euc-tw with fallback data euc-tw\n");
616 {
617 UChar inputText[]= { 0x0001, 0x008e, 0x203e, 0x2223, 0xff5c, 0x5296,
618 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278, 0xEDEC};
619 const uint8_t expected_euc_tw[] = {
620 0x01, 0x1a, 0xa2, 0xa3,
621 0xa2, 0xde, 0xa2, 0xde,
622 0x8e, 0xa2, 0xe5, 0xb9,
623 0x8e, 0xa2, 0xa1, 0xab, 0x8e, 0xa2, 0xa1, 0xab,
624 0xc8, 0xde, 0xc8, 0xde,
625 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xa2, 0xa2, 0xea,
626 0x8e, 0xac, 0xc6, 0xf7};
627 int32_t to_euc_tw_offs [] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6,
628 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12};
629
630 if(!testConvertFromUnicode(inputText, sizeof(inputText)/sizeof(inputText[0]),
631 expected_euc_tw, sizeof(expected_euc_tw), "euc-tw", TRUE, to_euc_tw_offs ))
632 log_err("u-> euc-tw with FallBack did not match.\n");
633
634 }
635
636 /*MBCS 1363*/
637 if(!testConvertFromUnicode(MBCSText1363, sizeof(MBCSText1363)/sizeof(MBCSText1363[0]),
638 expectedIBM1363, sizeof(expectedIBM1363), "ibm-1363", TRUE, toIBM1363Offs ))
639 log_err("u-> ibm-1363(MBCS) with FallBack did not match.\n");
640
641 if(!testConvertToUnicode(expectedIBM1363, sizeof(expectedIBM1363),
642 retrievedMBCSText1363, sizeof(retrievedMBCSText1363)/sizeof(retrievedMBCSText1363[0]),"ibm-1363", TRUE, fromIBM1363offs ))
643 log_err("ibm-1363->u(MBCS) with Fallback did not match.\n");
644
645
646 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
647 which is test file for MBCS conversion with single-byte codepage data.*/
648 {
649
650 /* MBCS with single byte codepage data test1.ucm*/
651 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x0003};
652 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0x08, 0xff,};
653 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, 7};
654
655 const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
656 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd, 0xfffe};
657 int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4,5};
658
659 /*from Unicode*/
660 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
661 expectedtest1, sizeof(expectedtest1), "@test1", TRUE, totest1Offs ))
662 log_err("u-> test1(MBCS conversion with single-byte) did not match.\n");
663
664 /*to Unicode*/
665 if(!testConvertToUnicode(test1input, sizeof(test1input),
666 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", TRUE, fromtest1Offs ))
667 log_err("test1(MBCS conversion with single-byte) -> u did not match.\n");
668
669 }
670
671 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
672 which is test file for MBCS conversion with three-byte codepage data.*/
673 {
674
675 /* MBCS with three byte codepage data test3.ucm*/
676 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x000b, 0xd84d, 0xdc56, 0x000e, 0x0003, };
677 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x07, 0xff, 0x01, 0x02, 0x0b, 0x01, 0x02, 0x0a, 0xff, 0xff,};
678 int32_t totest3Offs[] = { 0, 1, 2, 3, 5, 7, 7, 7, 8, 8, 8, 10, 11};
679
680 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a,
681 0x01, 0x02, 0x0e, 0x01, 0x02, 0x0d, 0x03, 0x01, 0x02, 0x0f,};
682 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56,
683 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd };
684 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10, 13, 13, 16, 17};
685
686 /*from Unicode*/
687 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
688 expectedtest3, sizeof(expectedtest3), "@test3", TRUE, totest3Offs ))
689 log_err("u-> test3(MBCS conversion with three-byte) did not match.\n");
690
691 /*to Unicode*/
692 if(!testConvertToUnicode(test3input, sizeof(test3input),
693 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", TRUE, fromtest3Offs ))
694 log_err("test3(MBCS conversion with three-byte) -> u did not match.\n");
695
696 }
697
698 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
699 which is test file for MBCS conversion with four-byte codepage data.*/
700 {
701
702 /* MBCS with three byte codepage data test4.ucm*/
703 const UChar unicodeInput[] =
704 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
705 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x000f};
706 const uint8_t expectedtest4[] =
707 { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0xff,
708 0x01, 0x02, 0x03, 0x0a, 0xff, 0xff, 0xff};
709 int32_t totest4Offs[] =
710 { 0, 1, 2, 3, 3, 3, 3, 4, 6, 8, 8, 8, 8, 10, 11, 13};
711
712 const uint8_t test4input[] =
713 { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x08,
714 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,};
715 const UChar expectedUnicode[] =
716 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
717 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd};
718 int32_t fromtest4Offs[] =
719 { 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,};
720
721 /*from Unicode*/
722 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
723 expectedtest4, sizeof(expectedtest4), "@test4", TRUE, totest4Offs ))
724 log_err("u-> test4(MBCS conversion with four-byte) did not match.\n");
725
726 /*to Unicode*/
727 if(!testConvertToUnicode(test4input, sizeof(test4input),
728 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", TRUE, fromtest4Offs ))
729 log_err("test4(MBCS conversion with four-byte) -> u did not match.\n");
730
731 }
732 /* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/
733 {
734 const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E };
735 const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f };
736 int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 };
737 const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 };
738 const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c };
739 int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 };
740 /*from Unicode*/
741 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
742 expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs ))
743 log_err("u-> ibm-1371(MBCS conversion with single-byte) did not match.,\n");
744 /*to Unicode*/
745 if(!testConvertToUnicode(test1input, sizeof(test1input),
746 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "ibm-1371", TRUE, fromtest1Offs ))
747 log_err("ibm-1371(MBCS conversion with single-byte) -> u did not match.,\n");
748 }
749
750 }
751 #endif
752