1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*****************************************************************************
9 *
10 * File ncnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
15 ******************************************************************************
16 */
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "unicode/uset.h"
25 #include "unicode/utf8.h"
26 #include "unicode/utf16.h"
27 #include "cintltst.h"
28 #include "cmemory.h"
29 
30 #define MAX_LENGTH 999
31 
32 #define UNICODE_LIMIT 0x10FFFF
33 #define SURROGATE_HIGH_START    0xD800
34 #define SURROGATE_LOW_END       0xDFFF
35 
36 static int32_t  gInBufferSize = 0;
37 static int32_t  gOutBufferSize = 0;
38 static char     gNuConvTestName[1024];
39 
40 #define nct_min(x,y)  ((x<y) ? x : y)
41 
42 static void printSeq(const unsigned char* a, int len);
43 static void printSeqErr(const unsigned char* a, int len);
44 static void printUSeq(const UChar* a, int len);
45 static void printUSeqErr(const UChar* a, int len);
46 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
47                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
48 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
49                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
50 
51 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
52                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
53 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
54                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
55 
setNuConvTestName(const char * codepage,const char * direction)56 static void setNuConvTestName(const char *codepage, const char *direction)
57 {
58     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
59         codepage,
60         direction,
61         (int)gInBufferSize,
62         (int)gOutBufferSize);
63 }
64 
65 
66 static void TestSurrogateBehaviour(void);
67 static void TestErrorBehaviour(void);
68 
69 #if !UCONFIG_NO_LEGACY_CONVERSION
70 static void TestToUnicodeErrorBehaviour(void);
71 static void TestGetNextErrorBehaviour(void);
72 #endif
73 
74 static void TestRegressionUTF8(void);
75 static void TestRegressionUTF32(void);
76 static void TestAvailableConverters(void);
77 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
78 static void TestResetBehaviour(void);
79 static void TestTruncated(void);
80 static void TestUnicodeSet(void);
81 
82 static void TestWithBufferSize(int32_t osize, int32_t isize);
83 
84 
printSeq(const unsigned char * a,int len)85 static void printSeq(const unsigned char* a, int len)
86 {
87     int i=0;
88     log_verbose("\n{");
89     while (i<len)
90         log_verbose("0x%02X ", a[i++]);
91     log_verbose("}\n");
92 }
93 
printUSeq(const UChar * a,int len)94 static void printUSeq(const UChar* a, int len)
95 {
96     int i=0;
97     log_verbose("\n{");
98     while (i<len)
99         log_verbose("%0x04X ", a[i++]);
100     log_verbose("}\n");
101 }
102 
printSeqErr(const unsigned char * a,int len)103 static void printSeqErr(const unsigned char* a, int len)
104 {
105     int i=0;
106     fprintf(stderr, "\n{");
107     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
108     fprintf(stderr, "}\n");
109 }
110 
printUSeqErr(const UChar * a,int len)111 static void printUSeqErr(const UChar* a, int len)
112 {
113     int i=0;
114     fprintf(stderr, "\n{");
115     while (i<len)
116         fprintf(stderr, "0x%04X ", a[i++]);
117     fprintf(stderr,"}\n");
118 }
119 
120 void addExtraTests(TestNode** root);
121 
addExtraTests(TestNode ** root)122 void addExtraTests(TestNode** root)
123 {
124      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
125      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
126 
127 #if !UCONFIG_NO_LEGACY_CONVERSION
128      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
129      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
130 #endif
131 
132      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
133      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
134      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
135      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
136      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
137      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
138      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
139 }
140 
141 /*test surrogate behaviour*/
TestSurrogateBehaviour()142 static void TestSurrogateBehaviour(){
143     log_verbose("Testing for SBCS and LATIN_1\n");
144     {
145         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
146         const uint8_t expected[] = {0x31, 0x1a, 0x32};
147 
148 #if !UCONFIG_NO_LEGACY_CONVERSION
149         /*SBCS*/
150         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
151                 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
152             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
153 #endif
154 
155         /*LATIN_1*/
156         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
157                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
158             log_err("u-> LATIN_1 not match.\n");
159 
160     }
161 
162 #if !UCONFIG_NO_LEGACY_CONVERSION
163     log_verbose("Testing for DBCS and MBCS\n");
164     {
165         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
166         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
167         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
168 
169         /*DBCS*/
170         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
171                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
172             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
173         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
174                 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
175             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
176         /*MBCS*/
177         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
178                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
179             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
180         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
181                 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
182             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
183     }
184 
185     log_verbose("Testing for ISO-2022-jp\n");
186     {
187         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
188 
189         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
190                                     0x31,0x1A, 0x32};
191 
192 
193         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
194 
195         /*iso-2022-jp*/
196         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
197                 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
198             log_err("u-> not match.\n");
199         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
200                 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
201             log_err("u->  not match.\n");
202     }
203 
204    /* BEGIN android-removed */
205    /* To save space, Android does not build full ISO-2022-CN tables.
206       We skip the tests for ISO-2022-CN. */
207    /*
208     log_verbose("Testing for ISO-2022-cn\n");
209     {
210         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
211 
212         static const uint8_t expected[] = {
213                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
214                                     0x36, 0x21,
215                                     0x0F, 0x31,
216                                     0x1A,
217                                     0x32
218                                     };
219 
220 
221 
222         static const int32_t offsets[] = {
223                                     0,    0,    0,    0,    0,    0,    0,
224                                     1,    1,
225                                     2,    2,
226                                     3,
227                                     5,  };
228 
229         // iso-2022-CN  android-change
230         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
231                 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
232             log_err("u-> not match.\n");
233         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
234                 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
235             log_err("u-> not match.\n");
236     }
237     */
238     /* END android-removed */
239 
240         log_verbose("Testing for ISO-2022-kr\n");
241     {
242         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
243 
244         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
245                                     0x0E, 0x6C, 0x69,
246                                     0x0f, 0x1A,
247                                     0x0e, 0x6F, 0x4B,
248                                     0x0F, 0x31,
249                                     0x1A,
250                                     0x32 };
251 
252         static const int32_t offsets[] = {-1, -1, -1, -1,
253                               0, 0, 0,
254                               1, 1,
255                               3, 3, 3,
256                               4, 4,
257                               5,
258                               7,
259                             };
260 
261         /*iso-2022-kr*/
262         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
263                 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
264             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
265         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
266                 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
267             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
268     }
269 
270         log_verbose("Testing for HZ\n");
271     {
272         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
273 
274         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
275                                     0x7E, 0x7D, 0x1A,
276                                     0x7E, 0x7B, 0x36, 0x21,
277                                     0x7E, 0x7D, 0x31,
278                                     0x1A,
279                                     0x32 };
280 
281 
282         static const int32_t offsets[] = {0,0,0,0,
283                              1,1,1,
284                              3,3,3,3,
285                              4,4,4,
286                              5,
287                              7,};
288 
289         /*hz*/
290         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
291                 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
292             log_err("u-> HZ not match.\n");
293         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
294                 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
295             log_err("u-> HZ not match.\n");
296     }
297 #endif
298 
299     /*UTF-8*/
300      log_verbose("Testing for UTF8\n");
301     {
302         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
303         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
304                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
305                            0x04, 0x06 };
306         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
307             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
308 
309 
310         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
311         /*UTF-8*/
312         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
313             expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
314             log_err("u-> UTF8 with offsets and flush true did not match.\n");
315         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
316             expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
317             log_err("u-> UTF8 with offsets and flush true did not match.\n");
318         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
319             expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
320             log_err("u-> UTF8 with offsets and flush true did not match.\n");
321         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
322             expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
323             log_err("u-> UTF8 with offsets and flush true did not match.\n");
324 
325         if(!convertToU(expected, sizeof(expected),
326             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, TRUE, U_ZERO_ERROR ))
327             log_err("UTF8 -> u did not match.\n");
328         if(!convertToU(expected, sizeof(expected),
329             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, FALSE, U_ZERO_ERROR ))
330             log_err("UTF8 -> u did not match.\n");
331         if(!convertToU(expected, sizeof(expected),
332             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
333             log_err("UTF8 ->u  did not match.\n");
334         if(!convertToU(expected, sizeof(expected),
335             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
336             log_err("UTF8 -> u did not match.\n");
337 
338     }
339 }
340 
341 /*test various error behaviours*/
TestErrorBehaviour()342 static void TestErrorBehaviour(){
343     log_verbose("Testing for SBCS and LATIN_1\n");
344     {
345         static const UChar    sampleText[] =   { 0x0031, 0xd801};
346         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
347         static const uint8_t expected0[] =          { 0x31};
348         static const uint8_t expected[] =          { 0x31, 0x1a};
349         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
350 
351 #if !UCONFIG_NO_LEGACY_CONVERSION
352         /*SBCS*/
353         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
354                 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
355             log_err("u-> ibm-920 [UCNV_SBCS] \n");
356         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
357                 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
358             log_err("u-> ibm-920 [UCNV_SBCS] \n");
359         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
360                 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
361             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
362 #endif
363 
364         /*LATIN_1*/
365         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
366                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
367             log_err("u-> LATIN_1 is supposed to fail\n");
368         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
369                 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
370             log_err("u-> LATIN_1 is supposed to fail\n");
371 
372         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
373                 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
374             log_err("u-> LATIN_1 did not match\n");
375     }
376 
377 #if !UCONFIG_NO_LEGACY_CONVERSION
378     log_verbose("Testing for DBCS and MBCS\n");
379     {
380         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
381         static const uint8_t expected[] = { 0xa2, 0xae};
382         static const int32_t offsets[]        = { 0x00, 0x00};
383         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
384         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
385 
386         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
387         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
388         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
389 
390         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
391         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
392         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
393 
394         static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
395         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
396         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
397 
398         /*DBCS*/
399         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
400                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
401             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
402         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
403                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
404             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
405 
406         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
407                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
408             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
409         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
410                 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
411             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
412 
413 
414         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
415                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
416             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
417         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
418                 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
419             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
420 
421         /*MBCS*/
422         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
423                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
424             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
425         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
426                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
427             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
428 
429         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
430                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
431             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
432         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
433                 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
434             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
435         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
436                 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
437             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
438 
439         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
440                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
441             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
442         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
443                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
444             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
445 
446         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
447                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR))
448             log_err("u-> euc-jp [UCNV_MBCS] \n");
449         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
450                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR))
451             log_err("u-> euc-jp [UCNV_MBCS] \n");
452     }
453 
454     /*iso-2022-jp*/
455     log_verbose("Testing for iso-2022-jp\n");
456     {
457         static const UChar    sampleText[]    = { 0x0031, 0xd801};
458         static const uint8_t expected[] = {  0x31};
459         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
460         static const int32_t offsets[]        = { 0x00, 1};
461 
462         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
463         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
464         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
465 
466         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
467         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
468         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
469         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
470                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
471             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
472         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
473                 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
474             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
475 
476         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
477                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
478             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
479         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
480                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
481             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
482         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
483                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
484             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
485 
486         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
487                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
488             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
489         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
490                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
491             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
492     }
493 
494     /* BEGIN android-removed */
495     /* To save space, Android does not build full ISO-2022-CN tables.
496        We skip the tests for ISO-2022-CN. */
497     /*iso-2022-cn*/
498     /*
499     log_verbose("Testing for iso-2022-cn\n");
500     {
501         static const UChar    sampleText[]    = { 0x0031, 0xd801};
502         static const uint8_t expected[] = { 0x31};
503         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
504         static const int32_t offsets[]        = { 0x00, 1};
505 
506         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
507         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
508         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
509 
510         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
511         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
512         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
513 
514         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
515         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
516         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
517         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
518                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
519             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
520         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
521                 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
522             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
523 
524         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
525                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
526             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
527         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
528                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
529             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
530         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
531                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
532             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
533 
534         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
535                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
536             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
537         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
538                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
539             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
540 
541         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
542                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
543             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
544         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
545                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
546             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
547     }
548     */
549     /* END android-removed */
550 
551     /*iso-2022-kr*/
552     log_verbose("Testing for iso-2022-kr\n");
553     {
554         static const UChar    sampleText[]    = { 0x0031, 0xd801};
555         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
556         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
557         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
558 
559         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
560         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
561         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
562 
563         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
564         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
565         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
566 
567         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
568                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
569             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
570         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
571                 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
572             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
573 
574         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
575                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
576             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
577         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
578                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
579             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
580         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
581                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
582             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
583 
584         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
585                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
586             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
587         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
588                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
589             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
590     }
591 
592     /*HZ*/
593     log_verbose("Testing for HZ\n");
594     {
595         static const UChar    sampleText[]    = { 0x0031, 0xd801};
596         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
597         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
598         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
599 
600         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
601         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
602         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
603 
604         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
605         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
606         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
607 
608         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
609         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
610         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
611         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
612                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
613             log_err("u-> HZ [UCNV_MBCS] \n");
614         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
615                 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
616             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
617 
618         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
619                 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
620             log_err("u->HZ[UCNV_DBCS] did not match\n");
621         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
622                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
623             log_err("u-> HZ [UCNV_DBCS] did not match\n");
624         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
625                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
626             log_err("u-> HZ [UCNV_DBCS] did not match\n");
627 
628         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
629                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
630             log_err("u->HZ [UCNV_MBCS] \n");
631         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
632                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
633             log_err("u-> HZ[UCNV_MBCS] \n");
634 
635         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
636                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
637             log_err("u-> HZ [UCNV_MBCS] \n");
638         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
639                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
640             log_err("u-> HZ [UCNV_MBCS] \n");
641     }
642 #endif
643 }
644 
645 #if !UCONFIG_NO_LEGACY_CONVERSION
646 /*test different convertToUnicode error behaviours*/
TestToUnicodeErrorBehaviour()647 static void TestToUnicodeErrorBehaviour()
648 {
649     log_verbose("Testing error conditions for DBCS\n");
650     {
651         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
652         const UChar expected[] = { 0x00a1 };
653 
654         if(!convertToU(sampleText, sizeof(sampleText),
655                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING ))
656             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
657         if(!convertToU(sampleText, sizeof(sampleText),
658                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING ))
659             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
660     }
661     log_verbose("Testing error conditions for SBCS\n");
662     {
663         uint8_t sampleText[] = { 0xa2, 0xFF};
664         const UChar expected[] = { 0x00c2 };
665 
666       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
667         const UChar expected2[] = { 0x0073 };*/
668 
669         if(!convertToU(sampleText, sizeof(sampleText),
670                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
671             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
672         if(!convertToU(sampleText, sizeof(sampleText),
673                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
674             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
675 
676     }
677 }
678 
TestGetNextErrorBehaviour()679 static void TestGetNextErrorBehaviour(){
680    /*Test for unassigned character*/
681 #define INPUT_SIZE 1
682     static const char input1[INPUT_SIZE]={ 0x70 };
683     const char* source=(const char*)input1;
684     UErrorCode err=U_ZERO_ERROR;
685     UChar32 c=0;
686     UConverter *cnv=ucnv_open("ibm-424", &err);
687     if(U_FAILURE(err)) {
688         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
689         return;
690     }
691     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
692     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
693         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
694     }
695     ucnv_close(cnv);
696 }
697 #endif
698 
699 #define MAX_UTF16_LEN 2
700 #define MAX_UTF8_LEN 4
701 
702 /*Regression test for utf8 converter*/
TestRegressionUTF8()703 static void TestRegressionUTF8(){
704     UChar32 currCh = 0;
705     int32_t offset8;
706     int32_t offset16;
707     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
708     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
709 
710     while (currCh <= UNICODE_LIMIT) {
711         offset16 = 0;
712         offset8 = 0;
713         while(currCh <= UNICODE_LIMIT
714             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
715             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
716         {
717             if (currCh == SURROGATE_HIGH_START) {
718                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
719             }
720             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
721             U8_APPEND_UNSAFE(utf8, offset8, currCh);
722             currCh++;
723         }
724         if(!convertFromU(standardForm, offset16,
725             utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
726             log_err("Unicode->UTF8 did not match.\n");
727         }
728         if(!convertToU(utf8, offset8,
729             standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
730             log_err("UTF8->Unicode did not match.\n");
731         }
732     }
733 
734     free(standardForm);
735     free(utf8);
736 
737     {
738         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
739         static const UChar expected[] = { 0x0301, 0x0300 };
740         UConverter *conv8;
741         UErrorCode err = U_ZERO_ERROR;
742         UChar pivotBuffer[100];
743         const UChar* const pivEnd = pivotBuffer + 100;
744         const char* srcBeg;
745         const char* srcEnd;
746         UChar* pivBeg;
747 
748         conv8 = ucnv_open("UTF-8", &err);
749 
750         srcBeg = src8;
751         pivBeg = pivotBuffer;
752         srcEnd = src8 + 3;
753         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
754         if (srcBeg != srcEnd) {
755             log_err("Did not consume whole buffer on first call.\n");
756         }
757 
758         srcEnd = src8 + 4;
759         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
760         if (srcBeg != srcEnd) {
761             log_err("Did not consume whole buffer on second call.\n");
762         }
763 
764         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
765             log_err("Did not get expected results for UTF-8.\n");
766         }
767         ucnv_close(conv8);
768     }
769 }
770 
771 #define MAX_UTF32_LEN 1
772 
TestRegressionUTF32()773 static void TestRegressionUTF32(){
774 #if !UCONFIG_ONLY_HTML_CONVERSION
775     UChar32 currCh = 0;
776     int32_t offset32;
777     int32_t offset16;
778     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
779     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
780 
781     while (currCh <= UNICODE_LIMIT) {
782         offset16 = 0;
783         offset32 = 0;
784         while(currCh <= UNICODE_LIMIT
785             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
786             && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
787         {
788             if (currCh == SURROGATE_HIGH_START) {
789                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
790             }
791             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
792             utf32[offset32++] = currCh;
793             currCh++;
794         }
795         if(!convertFromU(standardForm, offset16,
796             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
797             log_err("Unicode->UTF32 did not match.\n");
798         }
799         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
800             standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
801             log_err("UTF32->Unicode did not match.\n");
802         }
803     }
804     free(standardForm);
805     free(utf32);
806 
807     {
808         /* Check for lone surrogate error handling. */
809         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
810         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
811         static const uint8_t expectedUTF32BE[] = {
812             0x00, 0x00, 0x00, 0x31,
813             0x00, 0x00, 0xff, 0xfd,
814             0x00, 0x00, 0x00, 0x32
815         };
816         static const uint8_t expectedUTF32LE[] = {
817             0x31, 0x00, 0x00, 0x00,
818             0xfd, 0xff, 0x00, 0x00,
819             0x32, 0x00, 0x00, 0x00
820         };
821         static const int32_t offsetsUTF32[] = {
822             0x00, 0x00, 0x00, 0x00,
823             0x01, 0x01, 0x01, 0x01,
824             0x02, 0x02, 0x02, 0x02
825         };
826 
827         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
828                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
829             log_err("u->UTF-32BE\n");
830         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
831                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
832             log_err("u->UTF-32BE\n");
833 
834         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
835                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
836             log_err("u->UTF-32LE\n");
837         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
838                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
839             log_err("u->UTF-32LE\n");
840     }
841 
842     {
843         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
844         static const UChar expected[] = { 0x0031, 0x0030 };
845         UConverter *convBE;
846         UErrorCode err = U_ZERO_ERROR;
847         UChar pivotBuffer[100];
848         const UChar* const pivEnd = pivotBuffer + 100;
849         const char* srcBeg;
850         const char* srcEnd;
851         UChar* pivBeg;
852 
853         convBE = ucnv_open("UTF-32BE", &err);
854 
855         srcBeg = srcBE;
856         pivBeg = pivotBuffer;
857         srcEnd = srcBE + 5;
858         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
859         if (srcBeg != srcEnd) {
860             log_err("Did not consume whole buffer on first call.\n");
861         }
862 
863         srcEnd = srcBE + 8;
864         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
865         if (srcBeg != srcEnd) {
866             log_err("Did not consume whole buffer on second call.\n");
867         }
868 
869         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
870             log_err("Did not get expected results for UTF-32BE.\n");
871         }
872         ucnv_close(convBE);
873     }
874     {
875         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
876         static const UChar expected[] = { 0x0031, 0x0030 };
877         UConverter *convLE;
878         UErrorCode err = U_ZERO_ERROR;
879         UChar pivotBuffer[100];
880         const UChar* const pivEnd = pivotBuffer + 100;
881         const char* srcBeg;
882         const char* srcEnd;
883         UChar* pivBeg;
884 
885         convLE = ucnv_open("UTF-32LE", &err);
886 
887         srcBeg = srcLE;
888         pivBeg = pivotBuffer;
889         srcEnd = srcLE + 5;
890         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
891         if (srcBeg != srcEnd) {
892             log_err("Did not consume whole buffer on first call.\n");
893         }
894 
895         srcEnd = srcLE + 8;
896         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
897         if (srcBeg != srcEnd) {
898             log_err("Did not consume whole buffer on second call.\n");
899         }
900 
901         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
902             log_err("Did not get expected results for UTF-32LE.\n");
903         }
904         ucnv_close(convLE);
905     }
906 #endif
907 }
908 
909 /*Walk through the available converters*/
TestAvailableConverters()910 static void TestAvailableConverters(){
911     UErrorCode status=U_ZERO_ERROR;
912     UConverter *conv=NULL;
913     int32_t i=0;
914     for(i=0; i < ucnv_countAvailable(); i++){
915         status=U_ZERO_ERROR;
916         conv=ucnv_open(ucnv_getAvailableName(i), &status);
917         if(U_FAILURE(status)){
918             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
919                         ucnv_getAvailableName(i), myErrorName(status));
920             continue;
921         }
922         ucnv_close(conv);
923     }
924 
925 }
926 
TestFlushInternalBuffer()927 static void TestFlushInternalBuffer(){
928     TestWithBufferSize(MAX_LENGTH, 1);
929     TestWithBufferSize(1, 1);
930     TestWithBufferSize(1, MAX_LENGTH);
931     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
932 }
933 
TestWithBufferSize(int32_t insize,int32_t outsize)934 static void TestWithBufferSize(int32_t insize, int32_t outsize){
935 
936     gInBufferSize =insize;
937     gOutBufferSize = outsize;
938 
939      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
940     {
941         UChar    sampleText[] =
942             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
943         const uint8_t expectedUTF8[] =
944             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
945         int32_t  toUTF8Offs[] =
946             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
947        /* int32_t fmUTF8Offs[] =
948             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
949 
950         /*UTF-8*/
951         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
952             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
953              log_err("u-> UTF8 did not match.\n");
954     }
955 
956 #if !UCONFIG_NO_LEGACY_CONVERSION
957      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
958     {
959         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
960         const uint8_t toIBM943[]= { 0x61,
961             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
962             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
963             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
964             0x61 };
965         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
966 
967         if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest),
968                 toIBM943, sizeof(toIBM943), "ibm-943",
969                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
970             log_err("u-> ibm-943 with subst with value did not match.\n");
971     }
972 #endif
973 
974      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
975     {
976         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
977             0xe0, 0x80,  0x61};
978         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
979         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
980 
981         if(!testConvertToU(sampleText1, sizeof(sampleText1),
982                  expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
983             log_err("utf8->u with substitute did not match.\n");;
984     }
985 
986 #if !UCONFIG_NO_LEGACY_CONVERSION
987     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
988     /*to Unicode*/
989     {
990         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
991             0x81, 0xad, /*unassigned*/
992             0x89, 0xd3 };
993         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
994             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
995             0x7B87};
996         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
997 
998         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
999                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
1000                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
1001             log_err("ibm-943->u with substitute with value did not match.\n");
1002 
1003     }
1004 #endif
1005 }
1006 
convertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool doFlush,UErrorCode expectedStatus)1007 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1008                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1009 {
1010 
1011     int32_t i=0;
1012     char *p=0;
1013     const UChar *src;
1014     char buffer[MAX_LENGTH];
1015     int32_t offsetBuffer[MAX_LENGTH];
1016     int32_t *offs=0;
1017     char *targ;
1018     char *targetLimit;
1019     UChar *sourceLimit=0;
1020     UErrorCode status = U_ZERO_ERROR;
1021     UConverter *conv = 0;
1022     conv = ucnv_open(codepage, &status);
1023     if(U_FAILURE(status))
1024     {
1025         log_data_err("Couldn't open converter %s\n",codepage);
1026         return TRUE;
1027     }
1028     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1029 
1030     for(i=0; i<MAX_LENGTH; i++){
1031         buffer[i]=(char)0xF0;
1032         offsetBuffer[i]=0xFF;
1033     }
1034 
1035     src=source;
1036     sourceLimit=(UChar*)src+(sourceLen);
1037     targ=buffer;
1038     targetLimit=targ+MAX_LENGTH;
1039     offs=offsetBuffer;
1040     ucnv_fromUnicode (conv,
1041                   (char **)&targ,
1042                   (const char *)targetLimit,
1043                   &src,
1044                   sourceLimit,
1045                   expectOffsets ? offs : NULL,
1046                   doFlush,
1047                   &status);
1048     ucnv_close(conv);
1049     if(status != expectedStatus){
1050           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1051           return FALSE;
1052     }
1053 
1054     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1055         sourceLen, targ-buffer);
1056 
1057     if(expectLen != targ-buffer)
1058     {
1059         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1060         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1061         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
1062         printSeqErr((const unsigned char*)expect, expectLen);
1063         return FALSE;
1064     }
1065 
1066     if(memcmp(buffer, expect, expectLen)){
1067         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
1068         log_info("\nGot:");
1069         printSeqErr((const unsigned char *)buffer, expectLen);
1070         log_info("\nExpected:");
1071         printSeqErr((const unsigned char *)expect, expectLen);
1072         return FALSE;
1073     }
1074     else {
1075         log_verbose("Matches!\n");
1076     }
1077 
1078     if (expectOffsets != 0){
1079         log_verbose("comparing %d offsets..\n", targ-buffer);
1080         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
1081             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
1082             log_info("\nGot  : ");
1083             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
1084             for(p=buffer;p<targ;p++)
1085                 log_info("%d, ", offsetBuffer[p-buffer]);
1086             log_info("\nExpected: ");
1087             for(i=0; i< (targ-buffer); i++)
1088                 log_info("%d,", expectOffsets[i]);
1089         }
1090     }
1091 
1092     return TRUE;
1093 }
1094 
1095 
convertToU(const uint8_t * source,int sourceLen,const UChar * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool doFlush,UErrorCode expectedStatus)1096 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
1097                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1098 {
1099     UErrorCode status = U_ZERO_ERROR;
1100     UConverter *conv = 0;
1101     int32_t i=0;
1102     UChar *p=0;
1103     const char* src;
1104     UChar buffer[MAX_LENGTH];
1105     int32_t offsetBuffer[MAX_LENGTH];
1106     int32_t *offs=0;
1107     UChar *targ;
1108     UChar *targetLimit;
1109     uint8_t *sourceLimit=0;
1110 
1111 
1112 
1113     conv = ucnv_open(codepage, &status);
1114     if(U_FAILURE(status))
1115     {
1116         log_data_err("Couldn't open converter %s\n",codepage);
1117         return TRUE;
1118     }
1119     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1120 
1121 
1122 
1123     for(i=0; i<MAX_LENGTH; i++){
1124         buffer[i]=0xFFFE;
1125         offsetBuffer[i]=-1;
1126     }
1127 
1128     src=(const char *)source;
1129     sourceLimit=(uint8_t*)(src+(sourceLen));
1130     targ=buffer;
1131     targetLimit=targ+MAX_LENGTH;
1132     offs=offsetBuffer;
1133 
1134 
1135 
1136     ucnv_toUnicode (conv,
1137                 &targ,
1138                 targetLimit,
1139                 (const char **)&src,
1140                 (const char *)sourceLimit,
1141                 expectOffsets ? offs : NULL,
1142                 doFlush,
1143                 &status);
1144 
1145     ucnv_close(conv);
1146     if(status != expectedStatus){
1147           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1148           return FALSE;
1149     }
1150     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1151         sourceLen, targ-buffer);
1152 
1153 
1154 
1155 
1156     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
1157 
1158     if (expectOffsets != 0) {
1159         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
1160 
1161             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
1162             log_info("\nGot : ");
1163             for(p=buffer;p<targ;p++)
1164                 log_info("%d, ", offsetBuffer[p-buffer]);
1165             log_info("\nExpected: ");
1166             for(i=0; i<(targ-buffer); i++)
1167                 log_info("%d, ", expectOffsets[i]);
1168             log_info("\nGot result:");
1169             for(i=0; i<(targ-buffer); i++)
1170                 log_info("0x%04X,", buffer[i]);
1171             log_info("\nFrom Input:");
1172             for(i=0; i<(src-(const char *)source); i++)
1173                 log_info("0x%02X,", (unsigned char)source[i]);
1174             log_info("\n");
1175         }
1176     }
1177     if(memcmp(buffer, expect, expectLen*2)){
1178         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
1179         log_info("\nGot:");
1180         printUSeqErr(buffer, expectLen);
1181         log_info("\nExpected:");
1182         printUSeqErr(expect, expectLen);
1183         return FALSE;
1184     }
1185     else {
1186         log_verbose("Matches!\n");
1187     }
1188 
1189     return TRUE;
1190 }
1191 
1192 
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,UBool testReset)1193 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1194                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
1195 {
1196     UErrorCode status = U_ZERO_ERROR;
1197     UConverter *conv = 0;
1198     char    junkout[MAX_LENGTH]; /* FIX */
1199     int32_t    junokout[MAX_LENGTH]; /* FIX */
1200     char *p;
1201     const UChar *src;
1202     char *end;
1203     char *targ;
1204     int32_t *offs;
1205     int i;
1206     int32_t   realBufferSize;
1207     char *realBufferEnd;
1208     const UChar *realSourceEnd;
1209     const UChar *sourceLimit;
1210     UBool checkOffsets = TRUE;
1211     UBool doFlush;
1212 
1213     UConverterFromUCallback oldAction = NULL;
1214     const void* oldContext = NULL;
1215 
1216     for(i=0;i<MAX_LENGTH;i++)
1217         junkout[i] = (char)0xF0;
1218     for(i=0;i<MAX_LENGTH;i++)
1219         junokout[i] = 0xFF;
1220 
1221     setNuConvTestName(codepage, "FROM");
1222 
1223     log_verbose("\n=========  %s\n", gNuConvTestName);
1224 
1225     conv = ucnv_open(codepage, &status);
1226     if(U_FAILURE(status))
1227     {
1228         log_data_err("Couldn't open converter %s\n",codepage);
1229         return TRUE;
1230     }
1231 
1232     log_verbose("Converter opened..\n");
1233     /*----setting the callback routine----*/
1234     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1235     if (U_FAILURE(status)) {
1236         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1237     }
1238     /*------------------------*/
1239 
1240     src = source;
1241     targ = junkout;
1242     offs = junokout;
1243 
1244     realBufferSize = UPRV_LENGTHOF(junkout);
1245     realBufferEnd = junkout + realBufferSize;
1246     realSourceEnd = source + sourceLen;
1247 
1248     if ( gOutBufferSize != realBufferSize )
1249       checkOffsets = FALSE;
1250 
1251     if( gInBufferSize != MAX_LENGTH )
1252       checkOffsets = FALSE;
1253 
1254     do
1255     {
1256         end = nct_min(targ + gOutBufferSize, realBufferEnd);
1257         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
1258 
1259         doFlush = (UBool)(sourceLimit == realSourceEnd);
1260 
1261         if(targ == realBufferEnd)
1262           {
1263         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
1264         return FALSE;
1265           }
1266         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
1267 
1268 
1269         status = U_ZERO_ERROR;
1270         if(gInBufferSize ==999 && gOutBufferSize==999)
1271             doFlush = FALSE;
1272         ucnv_fromUnicode (conv,
1273                   (char **)&targ,
1274                   (const char *)end,
1275                   &src,
1276                   sourceLimit,
1277                   offs,
1278                   doFlush, /* flush if we're at the end of the input data */
1279                   &status);
1280         if(testReset)
1281             ucnv_resetToUnicode(conv);
1282         if(gInBufferSize ==999 && gOutBufferSize==999)
1283             ucnv_resetToUnicode(conv);
1284 
1285       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
1286 
1287     if(U_FAILURE(status)) {
1288         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1289         return FALSE;
1290       }
1291 
1292     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1293         sourceLen, targ-junkout);
1294     if(getTestOption(VERBOSITY_OPTION))
1295     {
1296         char junk[999];
1297         char offset_str[999];
1298         char *ptr;
1299 
1300         junk[0] = 0;
1301         offset_str[0] = 0;
1302         for(ptr = junkout;ptr<targ;ptr++)
1303         {
1304             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
1305             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
1306         }
1307 
1308         log_verbose(junk);
1309         printSeq((const unsigned char *)expect, expectLen);
1310         if ( checkOffsets )
1311           {
1312             log_verbose("\nOffsets:");
1313             log_verbose(offset_str);
1314           }
1315         log_verbose("\n");
1316     }
1317     ucnv_close(conv);
1318 
1319 
1320     if(expectLen != targ-junkout)
1321     {
1322         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1323         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1324         log_info("\nGot:");
1325         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1326         log_info("\nExpected:");
1327         printSeqErr((const unsigned char*)expect, expectLen);
1328         return FALSE;
1329     }
1330 
1331     if (checkOffsets && (expectOffsets != 0) )
1332     {
1333         log_verbose("comparing %d offsets..\n", targ-junkout);
1334         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
1335             log_err("did not get the expected offsets. %s", gNuConvTestName);
1336             log_err("Got  : ");
1337             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1338             for(p=junkout;p<targ;p++)
1339                 log_err("%d, ", junokout[p-junkout]);
1340             log_err("\nExpected: ");
1341             for(i=0; i<(targ-junkout); i++)
1342                 log_err("%d,", expectOffsets[i]);
1343         }
1344     }
1345 
1346     log_verbose("comparing..\n");
1347     if(!memcmp(junkout, expect, expectLen))
1348     {
1349         log_verbose("Matches!\n");
1350         return TRUE;
1351     }
1352     else
1353     {
1354         log_err("String does not match. %s\n", gNuConvTestName);
1355         printUSeqErr(source, sourceLen);
1356         log_info("\nGot:");
1357         printSeqErr((const unsigned char *)junkout, expectLen);
1358         log_info("\nExpected:");
1359         printSeqErr((const unsigned char *)expect, expectLen);
1360 
1361         return FALSE;
1362     }
1363 }
1364 
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,UBool testReset)1365 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
1366                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
1367 {
1368     UErrorCode status = U_ZERO_ERROR;
1369     UConverter *conv = 0;
1370     UChar    junkout[MAX_LENGTH]; /* FIX */
1371     int32_t    junokout[MAX_LENGTH]; /* FIX */
1372     const char *src;
1373     const char *realSourceEnd;
1374     const char *srcLimit;
1375     UChar *p;
1376     UChar *targ;
1377     UChar *end;
1378     int32_t *offs;
1379     int i;
1380     UBool   checkOffsets = TRUE;
1381     int32_t   realBufferSize;
1382     UChar *realBufferEnd;
1383     UBool doFlush;
1384 
1385     UConverterToUCallback oldAction = NULL;
1386     const void* oldContext = NULL;
1387 
1388 
1389     for(i=0;i<MAX_LENGTH;i++)
1390         junkout[i] = 0xFFFE;
1391 
1392     for(i=0;i<MAX_LENGTH;i++)
1393         junokout[i] = -1;
1394 
1395     setNuConvTestName(codepage, "TO");
1396 
1397     log_verbose("\n=========  %s\n", gNuConvTestName);
1398 
1399     conv = ucnv_open(codepage, &status);
1400     if(U_FAILURE(status))
1401     {
1402         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
1403         return TRUE;
1404     }
1405 
1406     log_verbose("Converter opened..\n");
1407      /*----setting the callback routine----*/
1408     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1409     if (U_FAILURE(status)) {
1410         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1411     }
1412     /*-------------------------------------*/
1413 
1414     src = (const char *)source;
1415     targ = junkout;
1416     offs = junokout;
1417 
1418     realBufferSize = UPRV_LENGTHOF(junkout);
1419     realBufferEnd = junkout + realBufferSize;
1420     realSourceEnd = src + sourcelen;
1421 
1422     if ( gOutBufferSize != realBufferSize )
1423       checkOffsets = FALSE;
1424 
1425     if( gInBufferSize != MAX_LENGTH )
1426       checkOffsets = FALSE;
1427 
1428     do
1429       {
1430         end = nct_min( targ + gOutBufferSize, realBufferEnd);
1431         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
1432 
1433         if(targ == realBufferEnd)
1434         {
1435             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
1436             return FALSE;
1437         }
1438         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
1439 
1440         /* oldTarg = targ; */
1441 
1442         status = U_ZERO_ERROR;
1443         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
1444 
1445         ucnv_toUnicode (conv,
1446                 &targ,
1447                 end,
1448                 (const char **)&src,
1449                 (const char *)srcLimit,
1450                 offs,
1451                 doFlush, /* flush if we're at the end of hte source data */
1452                 &status);
1453         if(testReset)
1454             ucnv_resetFromUnicode(conv);
1455         if(gInBufferSize ==999 && gOutBufferSize==999)
1456             ucnv_resetToUnicode(conv);
1457         /*        offs += (targ-oldTarg); */
1458 
1459       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
1460 
1461     if(U_FAILURE(status))
1462     {
1463         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1464         return FALSE;
1465     }
1466 
1467     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
1468         sourcelen, targ-junkout);
1469     if(getTestOption(VERBOSITY_OPTION))
1470     {
1471         char junk[999];
1472         char offset_str[999];
1473 
1474         UChar *ptr;
1475 
1476         junk[0] = 0;
1477         offset_str[0] = 0;
1478 
1479         for(ptr = junkout;ptr<targ;ptr++)
1480         {
1481             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
1482             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
1483         }
1484 
1485         log_verbose(junk);
1486 
1487         if ( checkOffsets )
1488           {
1489             log_verbose("\nOffsets:");
1490             log_verbose(offset_str);
1491           }
1492         log_verbose("\n");
1493     }
1494     ucnv_close(conv);
1495 
1496     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
1497 
1498     if (checkOffsets && (expectOffsets != 0))
1499     {
1500         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
1501 
1502             log_err("did not get the expected offsets. %s",gNuConvTestName);
1503             for(p=junkout;p<targ;p++)
1504                 log_err("%d, ", junokout[p-junkout]);
1505             log_err("\nExpected: ");
1506             for(i=0; i<(targ-junkout); i++)
1507                 log_err("%d,", expectOffsets[i]);
1508             log_err("");
1509             for(i=0; i<(targ-junkout); i++)
1510                 log_err("%X,", junkout[i]);
1511             log_err("");
1512             for(i=0; i<(src-(const char *)source); i++)
1513                 log_err("%X,", (unsigned char)source[i]);
1514         }
1515     }
1516 
1517     if(!memcmp(junkout, expect, expectlen*2))
1518     {
1519         log_verbose("Matches!\n");
1520         return TRUE;
1521     }
1522     else
1523     {
1524         log_err("String does not match. %s\n", gNuConvTestName);
1525         log_verbose("String does not match. %s\n", gNuConvTestName);
1526         log_info("\nGot:");
1527         printUSeq(junkout, expectlen);
1528         log_info("\nExpected:");
1529         printUSeq(expect, expectlen);
1530         return FALSE;
1531     }
1532 }
1533 
1534 
TestResetBehaviour(void)1535 static void TestResetBehaviour(void){
1536 #if !UCONFIG_NO_LEGACY_CONVERSION
1537     log_verbose("Testing Reset for DBCS and MBCS\n");
1538     {
1539         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
1540         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
1541         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
1542 
1543 
1544         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
1545         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
1546         static const int32_t offsets1[] =  { 0,2,4,6};
1547 
1548         /*DBCS*/
1549         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1550                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1551             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1552         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1553                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1554             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1555 
1556         if(!testConvertToU(expected1, sizeof(expected1),
1557                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1558                 offsets1, TRUE))
1559            log_err("ibm-1363 -> did not match.\n");
1560         /*MBCS*/
1561         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1562                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1563             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1564         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1565                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1566             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1567 
1568         if(!testConvertToU(expected1, sizeof(expected1),
1569                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1570                 offsets1, TRUE))
1571            log_err("ibm-1363 -> did not match.\n");
1572 
1573     }
1574 
1575     log_verbose("Testing Reset for ISO-2022-jp\n");
1576     {
1577         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1578 
1579         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1580                                     0x31,0x1A, 0x32};
1581 
1582 
1583         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
1584 
1585 
1586         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1587         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1588                                     0x31,0x1A, 0x32};
1589         static const int32_t offsets1[] =  { 3,5,10,11,12};
1590 
1591         /*iso-2022-jp*/
1592         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1593                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1594             log_err("u-> not match.\n");
1595         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1596                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1597             log_err("u->  not match.\n");
1598 
1599         if(!testConvertToU(expected1, sizeof(expected1),
1600                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1601                 offsets1, TRUE))
1602            log_err("iso-2022-jp -> did not match.\n");
1603 
1604     }
1605 
1606     /* BEGIN android-removed */
1607     /* To save space, Android does not build full ISO-2022-CN tables.
1608        We skip the tests for ISO-2022-CN. */
1609     /*
1610     log_verbose("Testing Reset for ISO-2022-cn\n");
1611     {
1612         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1613 
1614         static const uint8_t expected[] = {
1615                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1616                                     0x36, 0x21,
1617                                     0x0f, 0x31,
1618                                     0x1A,
1619                                     0x32
1620                                     };
1621 
1622 
1623         static const int32_t offsets[] = {
1624                                     0,    0,    0,    0,    0,    0,    0,
1625                                     1,    1,
1626                                     2,    2,
1627                                     3,
1628                                     5,  };
1629 
1630         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1631         static const uint8_t expected1[] = {
1632                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1633                                     0x36, 0x21,
1634                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
1635                                     0x0f, 0x1A,
1636                                     0x32
1637                                     };
1638         static const int32_t offsets1[] =  { 5,7,13,16,17};
1639 
1640         // iso-2022-CN  android-change
1641         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1642                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1643             log_err("u-> not match.\n");
1644         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1645                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1646             log_err("u-> not match.\n");
1647 
1648         if(!testConvertToU(expected1, sizeof(expected1),
1649                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1650                 offsets1, TRUE))
1651            log_err("iso-2022-cn -> did not match.\n");
1652     }
1653     */
1654     /* END android-removed */
1655 
1656         log_verbose("Testing Reset for ISO-2022-kr\n");
1657     {
1658         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1659 
1660         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
1661                                     0x0E, 0x6C, 0x69,
1662                                     0x0f, 0x1A,
1663                                     0x0e, 0x6F, 0x4B,
1664                                     0x0F, 0x31,
1665                                     0x1A,
1666                                     0x32 };
1667 
1668         static const int32_t offsets[] = {-1, -1, -1, -1,
1669                               0, 0, 0,
1670                               1, 1,
1671                               3, 3, 3,
1672                               4, 4,
1673                               5,
1674                               7,
1675                             };
1676         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
1677 
1678         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
1679                                     0x0E, 0x6C, 0x69,
1680                                     0x0f, 0x41,
1681                                     0x0e, 0x6F, 0x4B,
1682                                     0x0F, 0x31,
1683                                     0x42,
1684                                     0x32 };
1685 
1686         static const int32_t offsets1[] = {
1687                               5, 8, 10,
1688                               13, 14, 15
1689 
1690                             };
1691         /*iso-2022-kr*/
1692         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1693                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1694             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1695         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1696                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1697             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1698         if(!testConvertToU(expected1, sizeof(expected1),
1699                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1700                 offsets1, TRUE))
1701            log_err("iso-2022-kr -> did not match.\n");
1702     }
1703 
1704         log_verbose("Testing Reset for HZ\n");
1705     {
1706         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1707 
1708         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
1709                                     0x7E, 0x7D, 0x1A,
1710                                     0x7E, 0x7B, 0x36, 0x21,
1711                                     0x7E, 0x7D, 0x31,
1712                                     0x1A,
1713                                     0x32 };
1714 
1715 
1716         static const int32_t offsets[] = {0,0,0,0,
1717                              1,1,1,
1718                              3,3,3,3,
1719                              4,4,4,
1720                              5,
1721                              7,};
1722         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
1723 
1724         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
1725                                     0x7E, 0x7D, 0x35,
1726                                     0x7E, 0x7B, 0x36, 0x21,
1727                                     0x7E, 0x7D, 0x31,
1728                                     0x41,
1729                                     0x32 };
1730 
1731 
1732         static const int32_t offsets1[] = {2,6,9,13,14,15
1733                             };
1734 
1735         /*hz*/
1736         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1737                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1738             log_err("u->  not match.\n");
1739         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1740                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1741             log_err("u->  not match.\n");
1742         if(!testConvertToU(expected1, sizeof(expected1),
1743                 sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1744                 offsets1, TRUE))
1745            log_err("hz -> did not match.\n");
1746     }
1747 #endif
1748 
1749     /*UTF-8*/
1750      log_verbose("Testing for UTF8\n");
1751     {
1752         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
1753         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
1754                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
1755                            0x04, 0x06 };
1756         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
1757             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
1758 
1759 
1760         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
1761         /*UTF-8*/
1762         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1763             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1764             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1765         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1766             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1767             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1768         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1769             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1770             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1771         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1772             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1773             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1774         if(!testConvertToU(expected, sizeof(expected),
1775             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1776             log_err("UTF8 -> did not match.\n");
1777         if(!testConvertToU(expected, sizeof(expected),
1778             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1779             log_err("UTF8 -> did not match.\n");
1780         if(!testConvertToU(expected, sizeof(expected),
1781             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1782             log_err("UTF8 -> did not match.\n");
1783         if(!testConvertToU(expected, sizeof(expected),
1784             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1785             log_err("UTF8 -> did not match.\n");
1786 
1787     }
1788 
1789 }
1790 
1791 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
1792 static void
doTestTruncated(const char * cnvName,const uint8_t * bytes,int32_t length)1793 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
1794     UConverter *cnv;
1795 
1796     UChar buffer[2];
1797     UChar *target, *targetLimit;
1798     const char *source, *sourceLimit;
1799 
1800     UErrorCode errorCode;
1801 
1802     errorCode=U_ZERO_ERROR;
1803     cnv=ucnv_open(cnvName, &errorCode);
1804     if(U_FAILURE(errorCode)) {
1805         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
1806         return;
1807     }
1808     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1809     if(U_FAILURE(errorCode)) {
1810         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
1811                     cnvName, u_errorName(errorCode));
1812         ucnv_close(cnv);
1813         return;
1814     }
1815 
1816     source=(const char *)bytes;
1817     sourceLimit=source+length;
1818     target=buffer;
1819     targetLimit=buffer+UPRV_LENGTHOF(buffer);
1820 
1821     /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
1822     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
1823     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
1824         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
1825                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1826     }
1827 
1828     errorCode=U_ZERO_ERROR;
1829     source=sourceLimit;
1830     target=buffer;
1831     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1832     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1833         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
1834                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
1835     }
1836 
1837     /* 2. input bytes with flush=TRUE */
1838     ucnv_resetToUnicode(cnv);
1839 
1840     errorCode=U_ZERO_ERROR;
1841     source=(const char *)bytes;
1842     target=buffer;
1843     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1844     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
1845         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
1846                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1847     }
1848 
1849 
1850     ucnv_close(cnv);
1851 }
1852 
1853 static void
TestTruncated()1854 TestTruncated() {
1855     static const struct {
1856         const char *cnvName;
1857         uint8_t bytes[8]; /* partial input bytes resulting in no output */
1858         int32_t length;
1859     } testCases[]={
1860         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
1861         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
1862         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
1863         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
1864 
1865         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
1866         { "UTF-8",      { 0xd1 }, 1 },
1867 
1868         { "UTF-16BE",   { 0x4e }, 1 },
1869         { "UTF-16LE",   { 0x4e }, 1 },
1870         { "UTF-16",     { 0x4e }, 1 },
1871         { "UTF-16",     { 0xff }, 1 },
1872         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
1873 
1874         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
1875         { "UTF-32LE",   { 0x4e }, 1 },
1876         { "UTF-32",     { 0, 0, 0x4e }, 3 },
1877         { "UTF-32",     { 0xff }, 1 },
1878         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
1879         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
1880 
1881 #if !UCONFIG_NO_LEGACY_CONVERSION
1882         { "BOCU-1",     { 0xd5 }, 1 },
1883 
1884         { "Shift-JIS",  { 0xe0 }, 1 },
1885 
1886         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
1887 #else
1888         { "BOCU-1",     { 0xd5 }, 1 ,}
1889 #endif
1890     };
1891     int32_t i;
1892 
1893     for(i=0; i<UPRV_LENGTHOF(testCases); ++i) {
1894         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
1895     }
1896 }
1897 
1898 typedef struct NameRange {
1899     const char *name;
1900     UChar32 start, end, start2, end2, notStart, notEnd;
1901 } NameRange;
1902 
1903 static void
TestUnicodeSet()1904 TestUnicodeSet() {
1905     UErrorCode errorCode;
1906     UConverter *cnv;
1907     USet *set;
1908     const char *name;
1909     int32_t i, count;
1910 
1911     static const char *const completeSetNames[]={
1912         "UTF-7",
1913         "UTF-8",
1914         "UTF-16",
1915         "UTF-16BE",
1916         "UTF-16LE",
1917         "UTF-32",
1918         "UTF-32BE",
1919         "UTF-32LE",
1920         "SCSU",
1921         "BOCU-1",
1922         "CESU-8",
1923 #if !UCONFIG_NO_LEGACY_CONVERSION
1924         "gb18030",
1925 #endif
1926         "IMAP-mailbox-name"
1927     };
1928 #if !UCONFIG_NO_LEGACY_CONVERSION
1929     static const char *const lmbcsNames[]={
1930         "LMBCS-1",
1931         "LMBCS-2",
1932         "LMBCS-3",
1933         "LMBCS-4",
1934         "LMBCS-5",
1935         "LMBCS-6",
1936         "LMBCS-8",
1937         "LMBCS-11",
1938         "LMBCS-16",
1939         "LMBCS-17",
1940         "LMBCS-18",
1941         "LMBCS-19"
1942     };
1943 #endif
1944 
1945     static const NameRange nameRanges[]={
1946         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1947 #if !UCONFIG_NO_LEGACY_CONVERSION
1948         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1949 #endif
1950         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
1951 #if !UCONFIG_NO_LEGACY_CONVERSION
1952         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
1953         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
1954         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
1955         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
1956 #else
1957         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
1958 #endif
1959     };
1960 
1961     /* open an empty set */
1962     set=uset_open(1, 0);
1963 
1964     count=ucnv_countAvailable();
1965     for(i=0; i<count; ++i) {
1966         errorCode=U_ZERO_ERROR;
1967         name=ucnv_getAvailableName(i);
1968         cnv=ucnv_open(name, &errorCode);
1969         if(U_FAILURE(errorCode)) {
1970             log_data_err("error: unable to open converter %s - %s\n",
1971                     name, u_errorName(errorCode));
1972             continue;
1973         }
1974 
1975         uset_clear(set);
1976         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1977         if(U_FAILURE(errorCode)) {
1978             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1979                     name, u_errorName(errorCode));
1980         } else if(uset_size(set)==0) {
1981             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
1982         }
1983 
1984         ucnv_close(cnv);
1985     }
1986 
1987     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
1988     for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) {
1989         errorCode=U_ZERO_ERROR;
1990         name=completeSetNames[i];
1991         cnv=ucnv_open(name, &errorCode);
1992         if(U_FAILURE(errorCode)) {
1993             log_data_err("error: unable to open converter %s - %s\n",
1994                     name, u_errorName(errorCode));
1995             continue;
1996         }
1997 
1998         uset_clear(set);
1999         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2000         if(U_FAILURE(errorCode)) {
2001             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2002                     name, u_errorName(errorCode));
2003         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
2004             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
2005         }
2006 
2007         ucnv_close(cnv);
2008     }
2009 
2010 #if !UCONFIG_NO_LEGACY_CONVERSION
2011     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
2012     for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) {
2013         errorCode=U_ZERO_ERROR;
2014         name=lmbcsNames[i];
2015         cnv=ucnv_open(name, &errorCode);
2016         if(U_FAILURE(errorCode)) {
2017             log_data_err("error: unable to open converter %s - %s\n",
2018                     name, u_errorName(errorCode));
2019             continue;
2020         }
2021 
2022         uset_clear(set);
2023         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2024         if(U_FAILURE(errorCode)) {
2025             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2026                     name, u_errorName(errorCode));
2027         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
2028             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
2029         }
2030 
2031         ucnv_close(cnv);
2032     }
2033 #endif
2034 
2035     /* test specific sets */
2036     for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) {
2037         errorCode=U_ZERO_ERROR;
2038         name=nameRanges[i].name;
2039         cnv=ucnv_open(name, &errorCode);
2040         if(U_FAILURE(errorCode)) {
2041             log_data_err("error: unable to open converter %s - %s\n",
2042                          name, u_errorName(errorCode));
2043             continue;
2044         }
2045 
2046         uset_clear(set);
2047         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2048         if(U_FAILURE(errorCode)) {
2049             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2050                     name, u_errorName(errorCode));
2051         } else if(
2052             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
2053             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
2054         ) {
2055             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
2056         } else if(nameRanges[i].notStart>=0) {
2057             /* simulate containsAny() with the C API */
2058             uset_complement(set);
2059             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
2060                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
2061             }
2062         }
2063 
2064         ucnv_close(cnv);
2065     }
2066 
2067     errorCode = U_ZERO_ERROR;
2068     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
2069     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2070         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2071     }
2072     errorCode = U_PARSE_ERROR;
2073     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
2074     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
2075     if (errorCode != U_PARSE_ERROR) {
2076         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2077     }
2078 
2079     uset_close(set);
2080 }
2081