1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 *        Name                            Description
14 *    Madhu Katragadda     7/21/1999      Testing error callback routines
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include "cmemory.h"
22 #include "cstring.h"
23 #include "unicode/uloc.h"
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_err.h"
26 #include "cintltst.h"
27 #include "unicode/utypes.h"
28 #include "unicode/ustring.h"
29 #include "nccbtst.h"
30 #include "unicode/ucnv_cb.h"
31 #include "unicode/utf16.h"
32 
33 #define NEW_MAX_BUFFER 999
34 
35 #define nct_min(x,y)  ((x<y) ? x : y)
36 
37 static int32_t  gInBufferSize = 0;
38 static int32_t  gOutBufferSize = 0;
39 static char     gNuConvTestName[1024];
40 
printSeq(const uint8_t * a,int len)41 static void printSeq(const uint8_t* a, int len)
42 {
43     int i=0;
44     log_verbose("\n{");
45     while (i<len)
46         log_verbose("0x%02X, ", a[i++]);
47     log_verbose("}\n");
48 }
49 
printUSeq(const UChar * a,int len)50 static void printUSeq(const UChar* a, int len)
51 {
52     int i=0;
53     log_verbose("{");
54     while (i<len)
55         log_verbose("  0x%04x, ", a[i++]);
56     log_verbose("}\n");
57 }
58 
printSeqErr(const uint8_t * a,int len)59 static void printSeqErr(const uint8_t* a, int len)
60 {
61     int i=0;
62     fprintf(stderr, "{");
63     while (i<len)
64         fprintf(stderr, "  0x%02x, ", a[i++]);
65     fprintf(stderr, "}\n");
66 }
67 
printUSeqErr(const UChar * a,int len)68 static void printUSeqErr(const UChar* a, int len)
69 {
70     int i=0;
71     fprintf(stderr, "{");
72     while (i<len)
73         fprintf(stderr, "0x%04x, ", a[i++]);
74     fprintf(stderr,"}\n");
75 }
76 
setNuConvTestName(const char * codepage,const char * direction)77 static void setNuConvTestName(const char *codepage, const char *direction)
78 {
79     sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
80             codepage,
81             direction,
82             (int)gInBufferSize,
83             (int)gOutBufferSize);
84 }
85 
86 
87 static void TestCallBackFailure(void);
88 
89 void addTestConvertErrorCallBack(TestNode** root);
90 
addTestConvertErrorCallBack(TestNode ** root)91 void addTestConvertErrorCallBack(TestNode** root)
92 {
93     addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
94     addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
95     addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
96     /* BEGIN android-removed
97        To save space, Android does not build complete CJK conversion tables.
98        We skip the test here.
99     addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
100        END android-removed */
101 
102 #if !UCONFIG_NO_LEGACY_CONVERSION
103     addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
104     addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
105 #endif
106 
107     addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
108 }
109 
TestSkipCallBack()110 static void TestSkipCallBack()
111 {
112     TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
113     TestSkip(1,NEW_MAX_BUFFER);
114     TestSkip(1,1);
115     TestSkip(NEW_MAX_BUFFER, 1);
116 }
117 
TestStopCallBack()118 static void TestStopCallBack()
119 {
120     TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
121     TestStop(1,NEW_MAX_BUFFER);
122     TestStop(1,1);
123     TestStop(NEW_MAX_BUFFER, 1);
124 }
125 
TestSubCallBack()126 static void TestSubCallBack()
127 {
128     TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
129     TestSub(1,NEW_MAX_BUFFER);
130     TestSub(1,1);
131     TestSub(NEW_MAX_BUFFER, 1);
132 
133 #if !UCONFIG_NO_LEGACY_CONVERSION
134     TestEBCDIC_STATEFUL_Sub(1, 1);
135     TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
136     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
137     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
138 #endif
139 }
140 
TestSubWithValueCallBack()141 static void TestSubWithValueCallBack()
142 {
143     TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
144     TestSubWithValue(1,NEW_MAX_BUFFER);
145     TestSubWithValue(1,1);
146     TestSubWithValue(NEW_MAX_BUFFER, 1);
147 }
148 
149 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()150 static void TestLegalAndOtherCallBack()
151 {
152     TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
153     TestLegalAndOthers(1,NEW_MAX_BUFFER);
154     TestLegalAndOthers(1,1);
155     TestLegalAndOthers(NEW_MAX_BUFFER, 1);
156 }
157 
TestSingleByteCallBack()158 static void TestSingleByteCallBack()
159 {
160     TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
161     TestSingleByte(1,NEW_MAX_BUFFER);
162     TestSingleByte(1,1);
163     TestSingleByte(NEW_MAX_BUFFER, 1);
164 }
165 #endif
166 
TestSkip(int32_t inputsize,int32_t outputsize)167 static void TestSkip(int32_t inputsize, int32_t outputsize)
168 {
169     static const uint8_t expskipIBM_949[]= {
170         0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
171 
172     static const uint8_t expskipIBM_943[] = {
173         0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
174 
175     static const uint8_t expskipIBM_930[] = {
176         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
177 
178     gInBufferSize = inputsize;
179     gOutBufferSize = outputsize;
180 
181     /*From Unicode*/
182     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
183 
184 #if !UCONFIG_NO_LEGACY_CONVERSION
185     {
186         static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
187         static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
188 
189         static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
190         static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
191 
192         if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
193                 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
194                 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
195             log_err("u-> ibm-949 with skip did not match.\n");
196         if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
197                 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
198                 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
199             log_err("u-> ibm-943 with skip did not match.\n");
200     }
201 
202     {
203         static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
204         static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
205         static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
206 
207         /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
208         if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
209                                    fromUBytes, UPRV_LENGTHOF(fromUBytes),
210                                    "ibm-930",
211                                    UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
212                                    NULL, 0)
213         ) {
214             log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
215         }
216     }
217 #endif
218 
219     {
220         static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
221         static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
222         static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
223 
224         static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
225         static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
226         static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
227 
228         /* US-ASCII */
229         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
230                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
231                                    "US-ASCII",
232                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
233                                    NULL, 0)
234         ) {
235             log_err("u->US-ASCII with skip did not match.\n");
236         }
237 
238 #if !UCONFIG_NO_LEGACY_CONVERSION
239         /* SBCS NLTC codepage 367 for US-ASCII */
240         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
241                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
242                                    "ibm-367",
243                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
244                                    NULL, 0)
245         ) {
246             log_err("u->ibm-367 with skip did not match.\n");
247         }
248 #endif
249 
250         /* ISO-Latin-1 */
251         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
252                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
253                                    "LATIN_1",
254                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
255                                    NULL, 0)
256         ) {
257             log_err("u->LATIN_1 with skip did not match.\n");
258         }
259 
260 #if !UCONFIG_NO_LEGACY_CONVERSION
261         /* windows-1252 */
262         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
263                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
264                                    "windows-1252",
265                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
266                                    NULL, 0)
267         ) {
268             log_err("u->windows-1252 with skip did not match.\n");
269         }
270     }
271 
272     {
273         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
274         static const uint8_t toIBM943[]= { 0x61, 0x61 };
275         static const int32_t offset[]= {0, 4};
276 
277          /* EUC_JP*/
278         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
279         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
280             0x61, 0x8e, 0xe0,
281         };
282         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
283 
284         /*EUC_TW*/
285         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
286         static const uint8_t to_euc_tw[]={
287             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
288             0x61, 0xe6, 0xca, 0x8a,
289         };
290         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
291 
292         /*ISO-2022-JP*/
293         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
294         static const uint8_t to_iso_2022_jp[]={
295             0x41,
296             0x42,
297 
298         };
299         static const int32_t from_iso_2022_jpOffs [] ={0,2};
300 
301         /*ISO-2022-JP*/
302         UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
303         static const uint8_t to_iso_2022_jp2[]={
304             0x41,
305             0x43,
306 
307         };
308         static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
309 
310         /*ISO-2022-cn*/
311         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
312         static const uint8_t to_iso_2022_cn[]={
313             0x41, 0x42
314         };
315         static const int32_t from_iso_2022_cnOffs [] ={
316             0, 2
317         };
318 
319         /*ISO-2022-CN*/
320         static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
321         static const uint8_t to_iso_2022_cn1[]={
322             0x41, 0x43
323 
324         };
325         static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
326 
327         /*ISO-2022-kr*/
328         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
329         static const uint8_t to_iso_2022_kr[]={
330             0x1b,   0x24,   0x29,   0x43,
331             0x41,
332             0x0e,   0x25,   0x50,
333             0x25,   0x50,
334             0x0f,   0x42,
335         };
336         static const int32_t from_iso_2022_krOffs [] ={
337             -1,-1,-1,-1,
338             0,
339             1,1,1,
340             3,3,
341             4,4
342         };
343 
344         /*ISO-2022-kr*/
345         static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
346         static const uint8_t to_iso_2022_kr1[]={
347             0x1b,   0x24,   0x29,   0x43,
348             0x41,
349             0x0e,   0x25,   0x50,
350             0x25,   0x50,
351 
352         };
353         static const int32_t from_iso_2022_krOffs1 [] ={
354             -1,-1,-1,-1,
355             0,
356             1,1,1,
357             3,3,
358 
359         };
360         /* HZ encoding */
361         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
362 
363         static const uint8_t to_hz[]={
364             0x7e,   0x7d,   0x41,
365             0x7e,   0x7b,   0x26,   0x30,
366             0x26,   0x30,
367             0x7e,   0x7d,   0x42,
368 
369         };
370         static const int32_t from_hzOffs [] ={
371             0,0,0,
372             1,1,1,1,
373             3,3,
374             4,4,4,4
375         };
376 
377         static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
378 
379         static const uint8_t to_hz1[]={
380             0x7e,   0x7d,   0x41,
381             0x7e,   0x7b,   0x26,   0x30,
382             0x26,   0x30,
383 
384 
385         };
386         static const int32_t from_hzOffs1 [] ={
387             0,0,0,
388             1,1,1,1,
389             3,3,
390 
391         };
392 
393 #endif
394 
395         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
396 
397         static const uint8_t to_SCSU[]={
398             0x41,
399             0x42
400 
401 
402         };
403         static const int32_t from_SCSUOffs [] ={
404             0,
405             2,
406 
407         };
408 
409 #if !UCONFIG_NO_LEGACY_CONVERSION
410         /* ISCII */
411         static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
412         static const uint8_t to_iscii[]={
413             0x41,
414             0x42,
415         };
416         static const int32_t from_isciiOffs [] ={
417             0,2,
418 
419         };
420         /*ISCII*/
421         static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
422         static const uint8_t to_iscii1[]={
423             0x44,
424             0x43,
425 
426         };
427         static const int32_t from_isciiOffs1 [] ={0,2};
428 
429         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
430                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
431                 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
432             log_err("u-> ibm-943 with skip did not match.\n");
433 
434         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
435                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
436                 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
437             log_err("u-> euc-jp with skip did not match.\n");
438 
439         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
440                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
441                 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
442             log_err("u-> euc-tw with skip did not match.\n");
443 
444         /*iso_2022_jp*/
445         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
446                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
447                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
448             log_err("u-> iso-2022-jp with skip did not match.\n");
449 
450         /* with context */
451         if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
452                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
453                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
454             log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
455 
456         /*iso_2022_cn*/
457         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
458                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
459                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
460             log_err("u-> iso-2022-cn with skip did not match.\n");
461         /*with context*/
462         if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
463                 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
464                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
465             log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
466 
467         /*iso_2022_kr*/
468         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
469                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
470                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
471             log_err("u-> iso-2022-kr with skip did not match.\n");
472           /*with context*/
473         if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
474                 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
475                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
476             log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
477 
478         /*hz*/
479         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
480                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
481                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
482             log_err("u-> HZ with skip did not match.\n");
483           /*with context*/
484         if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
485                 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
486                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
487             log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
488 #endif
489 
490         /*SCSU*/
491         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
492                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
493                 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
494             log_err("u-> SCSU with skip did not match.\n");
495 
496 #if !UCONFIG_NO_LEGACY_CONVERSION
497         /*ISCII*/
498         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
499                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
500                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
501             log_err("u-> iscii with skip did not match.\n");
502         /*with context*/
503         if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
504                 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
505                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
506             log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
507 #endif
508     }
509 
510     log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
511     {
512         static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
513             0xFB, 0xEE, 0x28,       /* from source offset 0 */
514             0x24, 0x1E, 0x52,
515             0xB2,
516             0x20,
517             0xB3,
518             0xB1,
519             0x0D,
520             0x0A,
521 
522             0x20,                   /* from 8 */
523             0x00,
524             0xD0, 0x6C,
525             0xB6,
526             0xD8, 0xA5,
527             0x20,
528             0x68,
529             0x59,
530 
531             0xF9, 0x28,             /* from 16 */
532             0x6D,
533             0x20,
534             0x73,
535             0xE0, 0x2D,
536             0xDE, 0x43,
537             0xD0, 0x33,
538             0x20,
539 
540             0xFA, 0x83,             /* from 24 */
541             0x25, 0x01,
542             0xFB, 0x16, 0x87,
543             0x4B, 0x16,
544             0x20,
545             0xE6, 0xBD,
546             0xEB, 0x5B,
547             0x4B, 0xCC,
548 
549             0xF9, 0xA2,             /* from 32 */
550             0xFC, 0x10, 0x3E,
551             0xFE, 0x16, 0x3A, 0x8C,
552             0x20,
553             0xFC, 0x03, 0xAC,
554 
555             0x01,                   /* from 41 */
556             0xDE, 0x83,
557             0x20,
558             0x09
559         };
560         static const UChar expected[]={
561             0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
562             0x0063, 0x0061, 0x000D, 0x000A,
563 
564             0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
565             0x0930, 0x0020, 0x0918, 0x0909,
566 
567             0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
568             0x4000, 0x4E00, 0x7777, 0x0020,
569 
570             0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
571             0x0020, 0xD7A3, 0xDC00, 0xD800,
572 
573             0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
574             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
575 
576             0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
577             0x0009
578         };
579         static const int32_t offsets[]={
580             0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
581             8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
582             16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
583             24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
584             32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
585             41, 42, 42, 43, 44
586         };
587 
588         /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
589         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
590                                  sampleText, UPRV_LENGTHOF(sampleText),
591                                  "BOCU-1",
592                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
593         ) {
594             log_err("u->BOCU-1 with skip did not match.\n");
595         }
596     }
597 
598     log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
599     {
600         const uint8_t sampleText[]={
601             0x61,                               /* 'a' */
602             0xc4, 0xb5,                         /* U+0135 */
603             0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
604             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
605             0xee, 0x80, 0x80,                   /* PUA U+e000 */
606             0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
607             0x62,                               /* 'b' */
608             0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
609             0xd0, 0x80                          /* U+0400 */
610         };
611         UChar expected[]={
612             0x0061,
613             0x0135,
614             0xd020,
615             0xd801, 0xdc01,
616             0xe000,
617             0xdc01,
618             0x0062,
619             0xd801,
620             0x0400
621         };
622         int32_t offsets[]={
623             0,
624             1, 1,
625             2, 2, 2,
626             3, 3, 3, 4, 4, 4,
627             5, 5, 5,
628             6, 6, 6,
629             7,
630             8, 8, 8,
631             9, 9
632         };
633 
634         /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
635 
636         /* without offsets */
637         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
638                                  sampleText, UPRV_LENGTHOF(sampleText),
639                                  "CESU-8",
640                                  UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
641         ) {
642             log_err("u->CESU-8 with skip did not match.\n");
643         }
644 
645         /* with offsets */
646         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
647                                  sampleText, UPRV_LENGTHOF(sampleText),
648                                  "CESU-8",
649                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
650         ) {
651             log_err("u->CESU-8 with skip did not match.\n");
652         }
653     }
654 
655     /*to Unicode*/
656     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
657 
658 #if !UCONFIG_NO_LEGACY_CONVERSION
659     {
660 
661         static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
662         static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
663         static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
664 
665         static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
666         static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
667         static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
668 
669         if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
670                  IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
671                 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
672             log_err("ibm-949->u with skip did not match.\n");
673         if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
674                  IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
675                 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
676             log_err("ibm-943->u with skip did not match.\n");
677 
678 
679         if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
680                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
681                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
682             log_err("ibm-930->u with skip did not match.\n");
683 
684 
685         if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
686                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
687                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
688             log_err("ibm-930->u with skip did not match.\n");
689     }
690 #endif
691 
692     {
693         static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
694         static const UChar usasciiToU[] = { 0x61, 0x31 };
695         static const int32_t usasciiToUOffsets[] = { 0, 2 };
696 
697         static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
698         static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
699         static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
700 
701         /* US-ASCII */
702         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
703                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
704                                  "US-ASCII",
705                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
706                                  NULL, 0)
707         ) {
708             log_err("US-ASCII->u with skip did not match.\n");
709         }
710 
711 #if !UCONFIG_NO_LEGACY_CONVERSION
712         /* SBCS NLTC codepage 367 for US-ASCII */
713         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
714                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
715                                  "ibm-367",
716                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
717                                  NULL, 0)
718         ) {
719             log_err("ibm-367->u with skip did not match.\n");
720         }
721 #endif
722 
723         /* ISO-Latin-1 */
724         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
725                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
726                                  "LATIN_1",
727                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
728                                  NULL, 0)
729         ) {
730             log_err("LATIN_1->u with skip did not match.\n");
731         }
732 
733 #if !UCONFIG_NO_LEGACY_CONVERSION
734         /* windows-1252 */
735         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
736                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
737                                  "windows-1252",
738                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
739                                  NULL, 0)
740         ) {
741             log_err("windows-1252->u with skip did not match.\n");
742         }
743 #endif
744     }
745 
746 #if !UCONFIG_NO_LEGACY_CONVERSION
747     {
748         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
749             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
750         };
751         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
752         };
753         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
754 
755 
756          /* euc-jp*/
757         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
758             0x8f, 0xda, 0xa1,  /*unassigned*/
759            0x8e, 0xe0,
760         };
761         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
762         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
763 
764          /*EUC_TW*/
765         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
766             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
767            0xe6, 0xca, 0x8a,
768         };
769         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
770         static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
771                 /*iso-2022-jp*/
772         static const uint8_t sampleTxt_iso_2022_jp[]={
773             0x41,
774             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
775             0x1b,   0x28,   0x42,   0x42,
776 
777         };
778         static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
779         static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
780 
781         /*iso-2022-cn*/
782         static const uint8_t sampleTxt_iso_2022_cn[]={
783             0x0f,   0x41,   0x44,
784             0x1B,   0x24,   0x29,   0x47,
785             0x0E,   0x40,   0x6f, /*unassigned*/
786             0x0f,   0x42,
787 
788         };
789 
790         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
791         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
792 
793         /*iso-2022-kr*/
794         static const uint8_t sampleTxt_iso_2022_kr[]={
795           0x1b, 0x24, 0x29,  0x43,
796           0x41,
797           0x0E, 0x7f, 0x1E,
798           0x0e, 0x25, 0x50,
799           0x0f, 0x51,
800           0x42, 0x43,
801 
802         };
803         static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
804         static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
805 
806         /*hz*/
807         static const uint8_t sampleTxt_hz[]={
808             0x41,
809             0x7e,   0x7b,   0x26,   0x30,
810             0x7f,   0x1E, /*unassigned*/
811             0x26,   0x30,
812             0x7e,   0x7d,   0x42,
813             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
814             0x7e,   0x7d,   0x42,
815         };
816         static const UChar hztoUnicode[]={
817             0x41,
818             0x03a0,
819             0x03A0,
820             0x42,
821             0x42,};
822 
823         static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
824 
825         /*ISCII*/
826         static const uint8_t sampleTxt_iscii[]={
827             0x41,
828             0xa1,
829             0xEB,    /*unassigned*/
830             0x26,
831             0x30,
832             0xa2,
833             0xEC,    /*unassigned*/
834             0x42,
835         };
836         static const UChar isciitoUnicode[]={
837             0x41,
838             0x0901,
839             0x26,
840             0x30,
841             0x0902,
842             0x42,
843             };
844 
845         static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
846 
847         /*LMBCS*/
848         static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
849             0x12, 0x92, 0xa0, /*unassigned*/
850             0x12, 0x92, 0xA1,
851         };
852         static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
853         static const int32_t fromLMBCS[] = {0, 6};
854 
855         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
856              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
857             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
858         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
859 
860         if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
861              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
862             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
863         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
864 
865         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
866                  euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
867                 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
868             log_err("euc-jp->u with skip did not match.\n");
869 
870 
871 
872         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
873                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
874                 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
875             log_err("euc-tw->u with skip did not match.\n");
876 
877 
878         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
879                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
880                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
881             log_err("iso-2022-jp->u with skip did not match.\n");
882 
883         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
884                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
885                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
886             log_err("iso-2022-cn->u with skip did not match.\n");
887 
888         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
889                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
890                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
891             log_err("iso-2022-kr->u with skip did not match.\n");
892 
893         if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
894                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
895                 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
896             log_err("HZ->u with skip did not match.\n");
897 
898         if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
899                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
900                 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
901             log_err("iscii->u with skip did not match.\n");
902 
903         if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
904                 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
905                 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
906             log_err("LMBCS->u with skip did not match.\n");
907 
908     }
909 #endif
910 
911     log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
912     {
913         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
914             0xe0, 0x80,  0x61,};
915         UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
916         int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
917 
918         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
919                  expected1, UPRV_LENGTHOF(expected1),"utf8",
920                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
921             log_err("utf8->u with skip did not match.\n");;
922     }
923 
924     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
925     {
926         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
927         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
928         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
929 
930         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
931                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
932                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
933             log_err("scsu->u with skip did not match.\n");
934     }
935 
936     log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
937     {
938         const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
939             0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
940             0x24, 0x1E, 0x52,       /* 3 */
941             0xB2,                   /* 6 */
942             0x20,                   /* 7 */
943             0x40, 0x07,             /* 8 - wrong trail byte */
944             0xB3,                   /* 10 */
945             0xB1,                   /* 11 */
946             0xD0, 0x20,             /* 12 - wrong trail byte */
947             0x0D,                   /* 14 */
948             0x0A,                   /* 15 */
949             0x20,                   /* 16 */
950             0x00,                   /* 17 */
951             0xD0, 0x6C,             /* 18 */
952             0xB6,                   /* 20 */
953             0xD8, 0xA5,             /* 21 */
954             0x20,                   /* 23 */
955             0x68,                   /* 24 */
956             0x59,                   /* 25 */
957             0xF9, 0x28,             /* 26 */
958             0x6D,                   /* 28 */
959             0x20,                   /* 29 */
960             0x73,                   /* 30 */
961             0xE0, 0x2D,             /* 31 */
962             0xDE, 0x43,             /* 33 */
963             0xD0, 0x33,             /* 35 */
964             0x20,                   /* 37 */
965             0xFA, 0x83,             /* 38 */
966             0x25, 0x01,             /* 40 */
967             0xFB, 0x16, 0x87,       /* 42 */
968             0x4B, 0x16,             /* 45 */
969             0x20,                   /* 47 */
970             0xE6, 0xBD,             /* 48 */
971             0xEB, 0x5B,             /* 50 */
972             0x4B, 0xCC,             /* 52 */
973             0xF9, 0xA2,             /* 54 */
974             0xFC, 0x10, 0x3E,       /* 56 */
975             0xFE, 0x16, 0x3A, 0x8C, /* 59 */
976             0x20,                   /* 63 */
977             0xFC, 0x03, 0xAC,       /* 64 */
978             0xFF,                   /* 67 - FF just resets the state without encoding anything */
979             0x01,                   /* 68 */
980             0xDE, 0x83,             /* 69 */
981             0x20,                   /* 71 */
982             0x09                    /* 72 */
983         };
984         UChar expected[]={
985             0xFEFF, 0x0061, 0x0062, 0x0020,
986             0x0063, 0x0061, 0x000D, 0x000A,
987             0x0020, 0x0000, 0x00DF, 0x00E6,
988             0x0930, 0x0020, 0x0918, 0x0909,
989             0x3086, 0x304D, 0x0020, 0x3053,
990             0x4000, 0x4E00, 0x7777, 0x0020,
991             0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
992             0x0020, 0xD7A3, 0xDC00, 0xD800,
993             0xD800, 0xDC00, 0xD845, 0xDDDD,
994             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
995             0xDFFF, 0x0001, 0x0E40, 0x0020,
996             0x0009
997         };
998         int32_t offsets[]={
999             0, 3, 6, 7, /* skip 8, */
1000             10, 11, /* skip 12, */
1001             14, 15, 16, 17, 18,
1002             20, 21, 23, 24, 25, 26, 28, 29,
1003             30, 31, 33, 35, 37, 38,
1004             40, 42, 45, 47, 48,
1005             50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1006             63, 64, /* trail */ 64, /* reset only 67, */
1007             68, 69,
1008             71, 72
1009         };
1010 
1011         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1012                                  expected, UPRV_LENGTHOF(expected), "BOCU-1",
1013                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1014         ) {
1015             log_err("BOCU-1->u with skip did not match.\n");
1016         }
1017     }
1018 
1019     log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1020     {
1021         const uint8_t sampleText[]={
1022             0x61,                               /* 0  'a' */
1023             0xc0, 0x80,                         /* 1  non-shortest form */
1024             0xc4, 0xb5,                         /* 3  U+0135 */
1025             0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1026             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1027             0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1028             0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1029             0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1030             0x62,                               /* 24 'b' */
1031             0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1032             0xed, 0xa0,                         /* 28 incomplete sequence */
1033             0xd0, 0x80                          /* 30 U+0400 */
1034         };
1035         UChar expected[]={
1036             0x0061,
1037             /* skip */
1038             0x0135,
1039             0xd020,
1040             0xd801, 0xdc01,
1041             0xe000,
1042             0xdc01,
1043             /* skip */
1044             0x0062,
1045             0xd801,
1046             0x0400
1047         };
1048         int32_t offsets[]={
1049             0,
1050             /* skip 1, */
1051             3,
1052             5,
1053             8, 11,
1054             14,
1055             17,
1056             /* skip 20, 20, */
1057             24,
1058             25,
1059             /* skip 28 */
1060             30
1061         };
1062 
1063         /* without offsets */
1064         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1065                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1066                                  UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1067         ) {
1068             log_err("CESU-8->u with skip did not match.\n");
1069         }
1070 
1071         /* with offsets */
1072         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1073                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1074                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1075         ) {
1076             log_err("CESU-8->u with skip did not match.\n");
1077         }
1078     }
1079 }
1080 
TestStop(int32_t inputsize,int32_t outputsize)1081 static void TestStop(int32_t inputsize, int32_t outputsize)
1082 {
1083     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1084     static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1085 
1086     static const uint8_t expstopIBM_949[]= {
1087         0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1088 
1089     static const uint8_t expstopIBM_943[] = {
1090         0x9f, 0xaf, 0x9f, 0xb1};
1091 
1092     static const uint8_t expstopIBM_930[] = {
1093         0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1094 
1095     static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1096     static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1097     static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1098 
1099 
1100     static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1101     static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1102     static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1103 
1104     static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1105     static const int32_t  fromIBM943Offs [] = { 0, 2};
1106     static const int32_t  fromIBM930Offs [] = { 1, 3};
1107 
1108     gInBufferSize = inputsize;
1109     gOutBufferSize = outputsize;
1110 
1111     /*From Unicode*/
1112 
1113 #if !UCONFIG_NO_LEGACY_CONVERSION
1114     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1115             expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1116             UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1117         log_err("u-> ibm-949 with stop did not match.\n");
1118     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1119             expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1120             UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1121         log_err("u-> ibm-943 with stop did not match.\n");
1122     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1123             expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1124             UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1125         log_err("u-> ibm-930 with stop did not match.\n");
1126 
1127     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1128     {
1129         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1130         static const uint8_t toIBM943[]= { 0x61,};
1131         static const int32_t offset[]= {0,} ;
1132 
1133          /*EUC_JP*/
1134         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1135         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1136         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1137 
1138         /*EUC_TW*/
1139         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1140         static const uint8_t to_euc_tw[]={
1141             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1142         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1143 
1144         /*ISO-2022-JP*/
1145         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1146         static const uint8_t to_iso_2022_jp[]={
1147              0x41,
1148 
1149         };
1150         static const int32_t from_iso_2022_jpOffs [] ={0,};
1151 
1152         /*ISO-2022-cn*/
1153         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1154         static const uint8_t to_iso_2022_cn[]={
1155             0x41,
1156 
1157         };
1158         static const int32_t from_iso_2022_cnOffs [] ={
1159             0,0,
1160             2,2,
1161         };
1162 
1163         /*ISO-2022-kr*/
1164         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1165         static const uint8_t to_iso_2022_kr[]={
1166             0x1b,   0x24,   0x29,   0x43,
1167             0x41,
1168             0x0e,   0x25,   0x50,
1169         };
1170         static const int32_t from_iso_2022_krOffs [] ={
1171             -1,-1,-1,-1,
1172              0,
1173             1,1,1,
1174         };
1175 
1176         /* HZ encoding */
1177         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1178 
1179         static const uint8_t to_hz[]={
1180             0x7e,   0x7d, 0x41,
1181             0x7e,   0x7b,   0x26,   0x30,
1182 
1183         };
1184         static const int32_t from_hzOffs [] ={
1185             0, 0,0,
1186             1,1,1,1,
1187         };
1188 
1189         /*ISCII*/
1190         static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1191         static const uint8_t to_iscii[]={
1192             0x41,
1193         };
1194         static const int32_t from_isciiOffs [] ={
1195             0,
1196         };
1197 
1198         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1199                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1200                 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1201             log_err("u-> ibm-943 with stop did not match.\n");
1202 
1203         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1204                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1205                 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1206             log_err("u-> euc-jp with stop did not match.\n");
1207 
1208         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1209                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1210                 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1211             log_err("u-> euc-tw with stop did not match.\n");
1212 
1213         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1214                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1215                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1216             log_err("u-> iso-2022-jp with stop did not match.\n");
1217 
1218         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1219                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1220                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1221             log_err("u-> iso-2022-jp with stop did not match.\n");
1222 
1223         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1224                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1225                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1226             log_err("u-> iso-2022-cn with stop did not match.\n");
1227 
1228         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1229                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1230                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1231             log_err("u-> iso-2022-kr with stop did not match.\n");
1232 
1233         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1234                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1235                 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1236             log_err("u-> HZ with stop did not match.\n");\
1237 
1238         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1239                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1240                 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1241             log_err("u-> iscii with stop did not match.\n");
1242 
1243 
1244     }
1245 #endif
1246 
1247     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1248     {
1249         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1250 
1251         static const uint8_t to_SCSU[]={
1252             0x41,
1253 
1254         };
1255         int32_t from_SCSUOffs [] ={
1256             0,
1257 
1258         };
1259         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1260                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1261                 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1262             log_err("u-> SCSU with skip did not match.\n");
1263 
1264     }
1265 
1266     /*to Unicode*/
1267 
1268 #if !UCONFIG_NO_LEGACY_CONVERSION
1269     if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1270              IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1271             UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1272         log_err("ibm-949->u with stop did not match.\n");
1273     if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1274              IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1275             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1276         log_err("ibm-943->u with stop did not match.\n");
1277     if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1278              IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1279             UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1280         log_err("ibm-930->u with stop did not match.\n");
1281 
1282     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1283     {
1284 
1285         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1286             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1287         };
1288         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1289         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1290 
1291 
1292          /*EUC-JP*/
1293         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1294             0x8f, 0xda, 0xa1,  /*unassigned*/
1295            0x8e, 0xe0,
1296         };
1297         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1298         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1299 
1300           /*EUC_TW*/
1301         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1302             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1303            0xe6, 0xca, 0x8a,
1304         };
1305         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1306         int32_t from_euc_twOffs [] ={ 0, 1, 3};
1307 
1308 
1309 
1310          if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1311              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1312             UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1313         log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1314 
1315         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1316              euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1317             UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1318         log_err("euc-jp->u with stop did not match.\n");
1319 
1320         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1321                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1322                 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1323             log_err("euc-tw->u with stop did not match.\n");
1324     }
1325 #endif
1326 
1327     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1328     {
1329         static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1330             0xe0, 0x80,  0x61,};
1331         static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1332         static const int32_t offsets1[] = {   0x0000, 0x0001};
1333 
1334         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1335                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1336                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1337             log_err("utf8->u with stop did not match.\n");;
1338     }
1339     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1340     {
1341         static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1342         static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1343         static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1344 
1345         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1346                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1347                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1348             log_err("scsu->u with stop did not match.\n");;
1349     }
1350 
1351 }
1352 
TestSub(int32_t inputsize,int32_t outputsize)1353 static void TestSub(int32_t inputsize, int32_t outputsize)
1354 {
1355     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1356     static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1357 
1358     static const uint8_t expsubIBM_949[] =
1359      { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1360 
1361     static const uint8_t expsubIBM_943[] = {
1362         0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1363 
1364     static const uint8_t expsubIBM_930[] = {
1365         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1366 
1367     static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1368     static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1369     static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1370 
1371     static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1372     static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1373     static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1374 
1375     static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1376     static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1377     static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1378 
1379     gInBufferSize = inputsize;
1380     gOutBufferSize = outputsize;
1381 
1382     /*from unicode*/
1383 
1384 #if !UCONFIG_NO_LEGACY_CONVERSION
1385     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1386             expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1387             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1388         log_err("u-> ibm-949 with subst did not match.\n");
1389     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1390             expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1391             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1392         log_err("u-> ibm-943 with subst did not match.\n");
1393     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1394             expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1395             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1396         log_err("u-> ibm-930 with subst did not match.\n");
1397 
1398     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1399     {
1400         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1401         static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1402         static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1403 
1404 
1405         /* EUC_JP*/
1406         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1407         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1408             0xf4, 0xfe, 0xf4, 0xfe,
1409             0x61, 0x8e, 0xe0,
1410         };
1411         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1412 
1413         /*EUC_TW*/
1414         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1415         static const uint8_t to_euc_tw[]={
1416             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1417             0xfd, 0xfe, 0xfd, 0xfe,
1418             0x61, 0xe6, 0xca, 0x8a,
1419         };
1420 
1421         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1422 
1423         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1424                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1425                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1426             log_err("u-> ibm-943 with substitute did not match.\n");
1427 
1428         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1429                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1430                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1431             log_err("u-> euc-jp with substitute did not match.\n");
1432 
1433         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1434                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1435                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1436             log_err("u-> euc-tw with substitute did not match.\n");
1437     }
1438 #endif
1439 
1440     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1441     {
1442         UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1443 
1444         const uint8_t to_SCSU[]={
1445             0x41,
1446             0x0e, 0xff,0xfd,
1447             0x42
1448 
1449 
1450         };
1451         int32_t from_SCSUOffs [] ={
1452             0,
1453             1,1,1,
1454             2,
1455 
1456         };
1457         const uint8_t to_SCSU_1[]={
1458             0x41,
1459 
1460         };
1461         int32_t from_SCSUOffs_1 [] ={
1462             0,
1463 
1464         };
1465         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1466                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1467                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1468             log_err("u-> SCSU with substitute did not match.\n");
1469 
1470         if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1471                 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1472                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1473             log_err("u-> SCSU with substitute did not match.\n");
1474     }
1475 
1476     log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1477     {
1478         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1479         static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1480                            0xf0, 0x90, 0x90, 0x81,
1481                            0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1482                            0xef, 0xbf, 0xbf, 0x61,
1483 
1484         };
1485         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1486         if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1487                 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1488                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1489             log_err("u-> utf8 with substitute did not match.\n");
1490         }
1491     }
1492 
1493     log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1494     {
1495         static const UChar in[]={ 0x0041, 0xfeff };
1496 
1497         static const uint8_t out[]={
1498 #if U_IS_BIG_ENDIAN
1499             0xfe, 0xff,
1500             0x00, 0x41,
1501             0xfe, 0xff
1502 #else
1503             0xff, 0xfe,
1504             0x41, 0x00,
1505             0xff, 0xfe
1506 #endif
1507         };
1508         static const int32_t offsets[]={
1509             -1, -1, 0, 0, 1, 1
1510         };
1511 
1512         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1513                                    out, UPRV_LENGTHOF(out), "UTF-16",
1514                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1515         ) {
1516             log_err("u->UTF-16 with substitute did not match.\n");
1517         }
1518     }
1519 
1520     log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1521     {
1522         static const UChar in[]={ 0x0041, 0xfeff };
1523 
1524         static const uint8_t out[]={
1525 #if U_IS_BIG_ENDIAN
1526             0x00, 0x00, 0xfe, 0xff,
1527             0x00, 0x00, 0x00, 0x41,
1528             0x00, 0x00, 0xfe, 0xff
1529 #else
1530             0xff, 0xfe, 0x00, 0x00,
1531             0x41, 0x00, 0x00, 0x00,
1532             0xff, 0xfe, 0x00, 0x00
1533 #endif
1534         };
1535         static const int32_t offsets[]={
1536             -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1537         };
1538 
1539         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1540                                    out, UPRV_LENGTHOF(out), "UTF-32",
1541                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1542         ) {
1543             log_err("u->UTF-32 with substitute did not match.\n");
1544         }
1545     }
1546 
1547     /*to unicode*/
1548 
1549 #if !UCONFIG_NO_LEGACY_CONVERSION
1550     if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1551              IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1552             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1553         log_err("ibm-949->u with substitute did not match.\n");
1554     if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1555              IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1556             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1557         log_err("ibm-943->u with substitute did not match.\n");
1558     if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1559              IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1560             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1561         log_err("ibm-930->u with substitute did not match.\n");
1562 
1563     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1564     {
1565 
1566         const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1567             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1568         };
1569         UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1570         };
1571         int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1572 
1573 
1574         /* EUC_JP*/
1575         const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1576             0x8f, 0xda, 0xa1,  /*unassigned*/
1577            0x8e, 0xe0, 0x8a
1578         };
1579         UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1580         int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1581 
1582         /*EUC_TW*/
1583         const uint8_t sampleTxt_euc_tw[]={
1584             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1585             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1586             0xe6, 0xca, 0x8a,
1587         };
1588         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1589         int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1590 
1591 
1592         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1593            EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1594           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1595             log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1596 
1597 
1598         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1599            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1600           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1601             log_err("euc-jp->u with substitute did not match.\n");
1602 
1603 
1604         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1605            euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1606           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1607             log_err("euc-tw->u with substitute  did not match.\n");
1608 
1609 
1610         if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1611            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1612           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1613             log_err("euc-jp->u with substitute did not match.\n");
1614     }
1615 #endif
1616 
1617     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1618     {
1619         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1620             0xe0, 0x80,  0x61,};
1621         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1622         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1623 
1624         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1625                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1626                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1627             log_err("utf8->u with substitute did not match.\n");;
1628     }
1629     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1630     {
1631         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1632         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1633         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1634 
1635         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1636                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1637                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1638             log_err("scsu->u with stop did not match.\n");;
1639     }
1640 
1641 #if !UCONFIG_NO_LEGACY_CONVERSION
1642     log_verbose("Testing ibm-930 subchar/subchar1\n");
1643     {
1644         static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1645         static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1646         static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1647 
1648         static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1649         static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1650         static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1651 
1652         if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1653                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1654         ) {
1655             log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1656         }
1657 
1658         if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1659                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1660         ) {
1661             log_err("ibm-930->u subchar/subchar1 did not match.\n");
1662         }
1663     }
1664 
1665     log_verbose("Testing GB 18030 with substitute callbacks\n");
1666     {
1667         static const UChar u2[]={
1668             0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1669         static const uint8_t gb2[]={
1670             0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1671         static const int32_t offsets2[]={
1672             0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1673 
1674         if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1675                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1676         ) {
1677             log_err("gb18030->u with substitute did not match.\n");
1678         }
1679     }
1680 #endif
1681 
1682     log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1683     {
1684         static const uint8_t utf7[]={
1685          /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1686             0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1687         };
1688         static const UChar unicode[]={
1689             0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1690         };
1691         static const int32_t offsets[]={
1692             0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1693         };
1694 
1695         if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1696                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1697         ) {
1698             log_err("UTF-7->u with substitute did not match.\n");
1699         }
1700     }
1701 
1702     log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1703     {
1704         static const uint8_t
1705             in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1706             in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1707             in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1708 
1709         static const UChar
1710             out1[]={ 0x4e00, 0xfeff },
1711             out2[]={ 0x004e, 0xfffe },
1712             out3[]={ 0xfefd, 0x4e00, 0xfeff };
1713 
1714         static const int32_t
1715             offsets1[]={ 2, 4 },
1716             offsets2[]={ 2, 4 },
1717             offsets3[]={ 0, 2, 4 };
1718 
1719         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1720                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1721         ) {
1722             log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1723         }
1724 
1725         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1726                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1727         ) {
1728             log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1729         }
1730 
1731         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1732                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1733         ) {
1734             log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1735         }
1736     }
1737 
1738     log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1739     {
1740         static const uint8_t
1741             in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1742             in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1743             in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1744             in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1745 
1746         static const UChar
1747             out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1748             out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1749             out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1750             out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1751 
1752         static const int32_t
1753             offsets1[]={ 4, 4, 8 },
1754             offsets2[]={ 4, 4, 8 },
1755             offsets3[]={ 0, 4, 4, 8, 12 },
1756             offsets4[]={ 0, 0, 4, 8 };
1757 
1758         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1759                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1760         ) {
1761             log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1762         }
1763 
1764         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1765                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1766         ) {
1767             log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1768         }
1769 
1770         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1771                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1772         ) {
1773             log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1774         }
1775 
1776         if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1777                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1778         ) {
1779             log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1780         }
1781     }
1782 }
1783 
TestSubWithValue(int32_t inputsize,int32_t outputsize)1784 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1785 {
1786     UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1787     UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1788 
1789     const uint8_t expsubwvalIBM_949[]= {
1790         0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1791         0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1792 
1793     const uint8_t expsubwvalIBM_943[]= {
1794         0x9f, 0xaf, 0x9f, 0xb1,
1795         0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1796 
1797     const uint8_t expsubwvalIBM_930[] = {
1798         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1799 
1800     int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1801     int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1802     int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1803 
1804     gInBufferSize = inputsize;
1805     gOutBufferSize = outputsize;
1806 
1807     /*from Unicode*/
1808 
1809 #if !UCONFIG_NO_LEGACY_CONVERSION
1810     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1811             expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1812             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1813         log_err("u-> ibm-949 with subst with value did not match.\n");
1814 
1815     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1816             expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1817             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1818         log_err("u-> ibm-943 with sub with value did not match.\n");
1819 
1820     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1821             expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1822             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1823         log_err("u-> ibm-930 with subst with value did not match.\n");
1824 
1825 
1826     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1827     {
1828         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1829         static const uint8_t toIBM943[]= { 0x61,
1830             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1831             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1832             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1833             0x61 };
1834         static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1835 
1836 
1837          /* EUC_JP*/
1838         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1839         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1840             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1842             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1843             0x61, 0x8e, 0xe0,
1844         };
1845         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1846             3, 3, 3, 3, 3, 3,
1847             3, 3, 3, 3, 3, 3,
1848             5, 5, 5, 5, 5, 5,
1849             6, 7, 7,
1850         };
1851 
1852         /*EUC_TW*/
1853         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1854         static const uint8_t to_euc_tw[]={
1855             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1856             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1857             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1858             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1859             0x61, 0xe6, 0xca, 0x8a,
1860         };
1861         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1862              3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1863              6, 7, 7, 8,
1864         };
1865         /*ISO-2022-JP*/
1866         static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1867         static const uint8_t to_iso_2022_jp1[]={
1868             0x1b,   0x24,   0x42,   0x21, 0x21,
1869             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1870             0x1b,   0x24,   0x42,   0x21, 0x22,
1871             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1872             0x42,
1873         };
1874 
1875         static const int32_t from_iso_2022_jpOffs1 [] ={
1876             0,0,0,0,0,
1877             1,1,1,1,1,1,1,1,1,
1878             2,2,2,2,2,
1879             3,3,3,3,3,3,3,3,3,
1880             4,
1881         };
1882         /* surrogate pair*/
1883         static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1884         static const uint8_t to_iso_2022_jp2[]={
1885                                 0x1b,   0x24,   0x42,   0x21,   0x21,
1886                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1887                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1888                                 0x1b,   0x24,   0x42,   0x21,   0x22,
1889                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1890                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1891                                 0x42,
1892                                 };
1893         static const int32_t from_iso_2022_jpOffs2 [] ={
1894             0,0,0,0,0,
1895             1,1,1,1,1,1,1,1,1,
1896             1,1,1,1,1,1,
1897             3,3,3,3,3,
1898             4,4,4,4,4,4,4,4,4,
1899             4,4,4,4,4,4,
1900             6,
1901         };
1902 
1903         /*ISO-2022-cn*/
1904         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1905         static const uint8_t to_iso_2022_cn[]={
1906             0x41,
1907             0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1908             0x42,
1909         };
1910         static const int32_t from_iso_2022_cnOffs [] ={
1911             0,
1912             1,1,1,1,1,1,
1913             2,
1914         };
1915 
1916         static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1917 
1918         static const uint8_t to_iso_2022_cn4[]={
1919                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1920                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1921                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1922                              0x0e,   0x21,   0x22,
1923                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1924                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1925                              0x42,
1926                              };
1927         static const int32_t from_iso_2022_cnOffs4 [] ={
1928             0,0,0,0,0,0,0,
1929             1,1,1,1,1,1,1,
1930             1,1,1,1,1,1,
1931             3,3,3,
1932             4,4,4,4,4,4,4,
1933             4,4,4,4,4,4,
1934             6
1935 
1936         };
1937 
1938         /*ISO-2022-kr*/
1939         static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1940         static const uint8_t to_iso_2022_kr2[]={
1941             0x1b,   0x24,   0x29,   0x43,
1942             0x41,
1943             0x0e,   0x25,   0x50,
1944             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1945             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1946             0x0e,   0x25,   0x50,
1947             0x0f,   0x42,
1948             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1949             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1950             0x43
1951         };
1952         static const int32_t from_iso_2022_krOffs2 [] ={
1953             -1,-1,-1,-1,
1954              0,
1955             1,1,1,
1956             2,2,2,2,2,2,2,
1957             2,2,2,2,2,2,
1958             4,4,4,
1959             5,5,
1960             6,6,6,6,6,6,
1961             6,6,6,6,6,6,
1962             8,
1963         };
1964 
1965         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1966         static const uint8_t to_iso_2022_kr[]={
1967             0x1b,   0x24,   0x29,   0x43,
1968             0x41,
1969             0x0e,   0x25,   0x50,
1970             0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1971             0x0e,   0x25,   0x50,
1972             0x0f,   0x42,
1973             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1974             0x43
1975         };
1976 
1977 
1978         static const int32_t from_iso_2022_krOffs [] ={
1979             -1,-1,-1,-1,
1980              0,
1981             1,1,1,
1982             2,2,2,2,2,2,2,
1983             3,3,3,
1984             4,4,
1985             5,5,5,5,5,5,
1986             6,
1987         };
1988         /* HZ encoding */
1989         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1990 
1991         static const uint8_t to_hz[]={
1992             0x7e,   0x7d,   0x41,
1993             0x7e,   0x7b,   0x26,   0x30,
1994             0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
1995             0x7e,   0x7b,   0x26,   0x30,
1996             0x7e,   0x7d,   0x42,
1997 
1998         };
1999         static const int32_t from_hzOffs [] ={
2000             0,0,0,
2001             1,1,1,1,
2002             2,2,2,2,2,2,2,2,
2003             3,3,3,3,
2004             4,4,4
2005         };
2006 
2007         static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2008         static const uint8_t to_hz2[]={
2009             0x7e,   0x7d,   0x41,
2010             0x7e,   0x7b,   0x26,   0x30,
2011             0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2012             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2013             0x7e,   0x7b,   0x26,   0x30,
2014             0x7e,   0x7d,   0x42,
2015             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2016             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2017             0x43
2018         };
2019         static const int32_t from_hzOffs2 [] ={
2020             0,0,0,
2021             1,1,1,1,
2022             2,2,2,2,2,2,2,2,
2023             2,2,2,2,2,2,
2024             4,4,4,4,
2025             5,5,5,
2026             6,6,6,6,6,6,
2027             6,6,6,6,6,6,
2028             8,
2029         };
2030 
2031                 /*ISCII*/
2032         static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2033         static const uint8_t to_iscii[]={
2034             0x41,
2035             0xef,   0x42,   0xa1,
2036             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2037             0xa2,
2038             0x42,
2039             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2040             0x43
2041         };
2042 
2043 
2044         static const int32_t from_isciiOffs [] ={
2045             0,
2046             1,1,1,
2047             2,2,2,2,2,2,
2048             3,
2049             4,
2050             5,5,5,5,5,5,
2051             6,
2052         };
2053 
2054         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2055                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2056                 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2057             log_err("u-> ibm-943 with subst with value did not match.\n");
2058 
2059         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2060                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2061                 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2062             log_err("u-> euc-jp with subst with value did not match.\n");
2063 
2064         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2065                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2066                 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2067             log_err("u-> euc-tw with subst with value did not match.\n");
2068 
2069         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2070                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2071                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2072             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2073 
2074         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2075                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2076                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2077             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2078 
2079         if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2080                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2081                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2082             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2083         /*ESCAPE OPTIONS*/
2084         {
2085             /* surrogate pair*/
2086             static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2087             static const uint8_t to_iso_2022_jp3_v2[]={
2088                     0x1b,   0x24,   0x42,   0x21,   0x21,
2089                     0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2090 
2091                     0x1b,   0x24,   0x42,   0x21,   0x22,
2092                     0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2093 
2094                     0x42,
2095                     0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2096                     };
2097 
2098             static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2099                 0,0,0,0,0,
2100                 1,1,1,1,1,1,1,1,1,1,1,1,
2101 
2102                 3,3,3,3,3,
2103                 4,4,4,4,4,4,4,4,4,4,4,4,
2104 
2105                 6,
2106                 7,7,7,7,7,7,7,7,7
2107             };
2108 
2109             if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2110                     to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2111                     UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2112                 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2113         }
2114         {
2115             static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2116             static const uint8_t to_iso_2022_cn5_v2[]={
2117                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2118                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2119                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2120                              0x0e,   0x21,   0x22,
2121                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2122                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2123                              0x42,
2124                              0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2125                              };
2126             static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2127                 0,0,0,0,0,0,0,
2128                 1,1,1,1,1,1,1,
2129                 1,1,1,1,1,1,
2130                 3,3,3,
2131                 4,4,4,4,4,4,4,
2132                 4,4,4,4,4,4,
2133                 6,
2134                 7,7,7,7,7,7
2135             };
2136             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2137                 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2138                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2139                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2140 
2141         }
2142         {
2143             static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2144             static const uint8_t to_iso_2022_cn6_v2[]={
2145                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2146                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2147                                 0x0e,   0x21,   0x22,
2148                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2149                                 0x42,
2150                                 0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2151                              };
2152             static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2153                     0,  0,  0,  0,  0,  0,  0,
2154                     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2155                     3,  3,  3,
2156                     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2157                     6,
2158                     7,  7,  7,  7,  7,  7,  7,  7,
2159             };
2160             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2161                 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2162                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2163                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2164 
2165         }
2166         {
2167             static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2168             static const uint8_t to_iso_2022_cn7_v2[]={
2169                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2170                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2171                                 0x0e,   0x21,   0x22,
2172                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2173                                 0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2174                             };
2175             static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2176                                 0,  0,  0,  0,  0,  0,  0,
2177                                 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2178                                 3,  3,  3,
2179                                 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2180                                 6,
2181                                 7,  7,  7,  7,  7,  7,
2182             };
2183             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2184                 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2185                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2186                 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2187 
2188         }
2189         {
2190             static const UChar iso_2022_cn_inputText8[]={
2191                                 0x3000,
2192                                 0xD84D, 0xDC56,
2193                                 0x3001,
2194                                 0xD84D, 0xDC56,
2195                                 0xDBFF, 0xDFFF,
2196                                 0x0042,
2197                                 0x0902};
2198             static const uint8_t to_iso_2022_cn8_v2[]={
2199                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2200                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2201                                 0x0e,   0x21,   0x22,
2202                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2203                                 0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2204                                 0x42,
2205                                 0x5c,   0x39,   0x30,   0x32,   0x20
2206                              };
2207             static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2208                     0,  0,  0,  0,  0,  0,  0,
2209                     1,  1,  1,  1,  1,  1,  1,  1,
2210                     3,  3,  3,
2211                     4,  4,  4,  4,  4,  4,  4,  4,
2212                     6,  6,  6,  6,  6,  6,  6,  6,
2213                     8,
2214                     9,  9,  9,  9,  9
2215             };
2216             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2217                 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2218                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2219                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2220 
2221         }
2222         {
2223             static const uint8_t to_iso_2022_cn4_v3[]={
2224                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2225                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2226                             0x0e,   0x21,   0x22,
2227                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2228                             0x42
2229                              };
2230 
2231 
2232             static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2233                 0,0,0,0,0,0,0,
2234                 1,1,1,1,1,1,1,1,1,1,1,
2235 
2236                 3,3,3,
2237                 4,4,4,4,4,4,4,4,4,4,4,
2238 
2239                 6
2240 
2241             };
2242             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2243                 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2244                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2245             {
2246                 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2247             }
2248         }
2249         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2250                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2251                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2252             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2253 
2254         if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2255                 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2256                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2257             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2258         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2259                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2260                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2261             log_err("u-> iso_2022_kr with subst with value did not match.\n");
2262         if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2263                 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2264                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2265             log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2266         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2267                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2268                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2269             log_err("u-> hz with subst with value did not match.\n");
2270         if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2271                 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2272                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2273             log_err("u-> hz with subst with value did not match.\n");
2274 
2275         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2276                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2277                 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2278             log_err("u-> iscii with subst with value did not match.\n");
2279     }
2280 #endif
2281 
2282     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2283     /*to Unicode*/
2284     {
2285 #if !UCONFIG_NO_LEGACY_CONVERSION
2286         static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2287             0x81, 0xad, /*unassigned*/
2288             0x89, 0xd3 };
2289         static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2290             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2291             0x7B87};
2292         static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2293 
2294         /* EUC_JP*/
2295         static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2296             0x8f, 0xda, 0xa1,  /*unassigned*/
2297            0x8e, 0xe0,
2298         };
2299         static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2300             0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2301             0x00a2 };
2302         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2303             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2304             9,
2305         };
2306 
2307         /*EUC_TW*/
2308         static const uint8_t sampleTxt_euc_tw[]={
2309             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2310             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2311             0xe6, 0xca, 0x8a,
2312         };
2313         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2314              0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2315              0x8706, 0x8a, };
2316         static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2317              7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2318              11, 13};
2319 
2320         /*iso-2022-jp*/
2321         static const uint8_t sampleTxt_iso_2022_jp[]={
2322             0x1b,   0x28,   0x42,   0x41,
2323             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
2324             0x1b,   0x28,   0x42,   0x42,
2325 
2326         };
2327                                                    /*     A    %    X    3    A    %    X    1    A     B    */
2328         static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2329         static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2330 
2331         /*iso-2022-cn*/
2332         static const uint8_t sampleTxt_iso_2022_cn[]={
2333             0x0f,   0x41,   0x44,
2334             0x1B,   0x24,   0x29,   0x47,
2335             0x0E,   0x40,   0x6c, /*unassigned*/
2336             0x0f,   0x42,
2337 
2338         };
2339         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2340         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2341 
2342         /*iso-2022-kr*/
2343         static const uint8_t sampleTxt_iso_2022_kr[]={
2344           0x1b, 0x24, 0x29,  0x43,
2345           0x41,
2346           0x0E, 0x7f, 0x1E,
2347           0x0e, 0x25, 0x50,
2348           0x0f, 0x51,
2349           0x42, 0x43,
2350 
2351         };
2352         static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2353         static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2354 
2355         /*hz*/
2356         static const uint8_t sampleTxt_hz[]={
2357             0x41,
2358             0x7e,   0x7b,   0x26,   0x30,
2359             0x7f,   0x1E, /*unassigned*/
2360             0x26,   0x30,
2361             0x7e,   0x7d,   0x42,
2362             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2363             0x7e,   0x7d,   0x42,
2364         };
2365         static const UChar hztoUnicode[]={
2366             0x41,
2367             0x03a0,
2368             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2369             0x03A0,
2370             0x42,
2371             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2372             0x42,};
2373 
2374         static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2375 
2376 
2377         /*iscii*/
2378         static const uint8_t sampleTxt_iscii[]={
2379             0x41,
2380             0x30,
2381             0xEB, /*unassigned*/
2382             0xa3,
2383             0x42,
2384             0xEC, /*unassigned*/
2385             0x42,
2386         };
2387         static const UChar isciitoUnicode[]={
2388             0x41,
2389             0x30,
2390             0x25,  0x58,  0x45, 0x42,
2391             0x0903,
2392             0x42,
2393             0x25,  0x58,  0x45, 0x43,
2394             0x42,};
2395 
2396         static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2397 #endif
2398 
2399         /*UTF8*/
2400         static const uint8_t sampleTxtUTF8[]={
2401             0x20, 0x64, 0x50,
2402             0xC2, 0x7E, /* truncated char */
2403             0x20,
2404             0xE0, 0xB5, 0x7E, /* truncated char */
2405             0x40,
2406         };
2407         static const UChar UTF8ToUnicode[]={
2408             0x0020, 0x0064, 0x0050,
2409             0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2410             0x0020,
2411             0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2412             0x0040
2413         };
2414         static const int32_t fromUTF8[] = {
2415             0, 1, 2,
2416             3, 3, 3, 3, 4,
2417             5,
2418             6, 6, 6, 6, 6, 6, 6, 6, 8,
2419             9
2420         };
2421         static const UChar UTF8ToUnicodeXML_DEC[]={
2422             0x0020, 0x0064, 0x0050,
2423             0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2424             0x0020,
2425             0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2426             0x0040
2427         };
2428         static const int32_t fromUTF8XML_DEC[] = {
2429             0, 1, 2,
2430             3, 3, 3, 3, 3, 3, 4,
2431             5,
2432             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2433             9
2434         };
2435 
2436 
2437 #if !UCONFIG_NO_LEGACY_CONVERSION
2438         if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2439                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2440                 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2441             log_err("ibm-943->u with substitute with value did not match.\n");
2442 
2443         if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2444                  EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2445                 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2446             log_err("euc-jp->u with substitute with value did not match.\n");
2447 
2448         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2449                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2450                 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2451             log_err("euc-tw->u with substitute with value did not match.\n");
2452 
2453         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2454                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2455                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2456             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2457 
2458         if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2459                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2460                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2461             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2462 
2463         {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2464             {
2465                 static const UChar iso_2022_jptoUnicodeDec[]={
2466                                                   0x0041,
2467                                                   /*   &         #         5         8         ;   */
2468                                                   0x0026,   0x0023,   0x0035,   0x0038,   0x003b,
2469                                                   0x0026,   0x0023,   0x0032,   0x0036,   0x003b,
2470                                                   0x0042 };
2471                 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2472                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2473                      iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2474                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2475                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2476             }
2477             {
2478                 static const UChar iso_2022_jptoUnicodeHex[]={
2479                                                   0x0041,
2480                                                   /*   &       #       x       3       A       ;  */
2481                                                   0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2482                                                   0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2483                                                   0x0042 };
2484                 static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2485                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2486                      iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2487                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2488                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2489             }
2490             {
2491                 static const UChar iso_2022_jptoUnicodeC[]={
2492                                                 0x0041,
2493                                                 0x005C, 0x0078, 0x0033, 0x0041,   /*  \x3A */
2494                                                 0x005C, 0x0078, 0x0031, 0x0041,   /*  \x1A */
2495                                                 0x0042 };
2496                 int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2497                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2498                      iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2499                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2500                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2501             }
2502         }
2503         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2504                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2505                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2506             log_err("iso-2022-cn->u with substitute with value did not match.\n");
2507 
2508         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2509                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2510                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2511             log_err("iso-2022-kr->u with substitute with value did not match.\n");
2512 
2513          if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2514                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2515                 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2516             log_err("hz->u with substitute with value did not match.\n");
2517 
2518          if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2519                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2520                 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2521             log_err("ISCII ->u with substitute with value did not match.\n");
2522 #endif
2523 
2524         if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2525                 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2526                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2527             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2528         if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2529                 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2530                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2531             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2532     }
2533 }
2534 
2535 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2536 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2537 {
2538     static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2539     static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2540     static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2541 
2542 
2543     static const uint8_t text943[] = {
2544         0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2545     static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2546     static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2547     static const UChar toUnicode943stop[]= { 0x304b};
2548 
2549     static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2550     static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2551     static const int32_t  fromIBM943Offsstop[] = { 0};
2552 
2553     gInBufferSize = inputsize;
2554     gOutBufferSize = outputsize;
2555     /*checking with a legal value*/
2556     if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2557             templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2558             UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2559         log_err("u-> ibm-949 with skip did not match.\n");
2560 
2561     /*checking illegal value for ibm-943 with substitute*/
2562     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2563              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2564             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2565         log_err("ibm-943->u with subst did not match.\n");
2566     /*checking illegal value for ibm-943 with skip */
2567     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2568              toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2569             UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2570         log_err("ibm-943->u with skip did not match.\n");
2571 
2572     /*checking illegal value for ibm-943 with stop */
2573     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2574              toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2575             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2576         log_err("ibm-943->u with stop did not match.\n");
2577 
2578 }
2579 
TestSingleByte(int32_t inputsize,int32_t outputsize)2580 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2581 {
2582     static const uint8_t sampleText[] = {
2583         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2584         0xff, 0x32, 0x33};
2585     static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2586     static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2587     /*checking illegal value for ibm-943 with substitute*/
2588     gInBufferSize = inputsize;
2589     gOutBufferSize = outputsize;
2590 
2591     if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2592              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2593             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2594         log_err("ibm-943->u with subst did not match.\n");
2595 }
2596 
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2597 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2598 {
2599     /*EBCDIC_STATEFUL*/
2600     static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2601     static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2602     static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2603 /*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2604 
2605     /*EBCDIC_STATEFUL with subChar=3f*/
2606     static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2607     static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2608     static const char mySubChar[]={ 0x3f};
2609 
2610     gInBufferSize = inputsize;
2611     gOutBufferSize = outputsize;
2612 
2613     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2614         toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2615         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2616             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2617 
2618     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2619         toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2620         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2621             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2622 }
2623 #endif
2624 
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2625 UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2626                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2627                 const char *mySubChar, int8_t len)
2628 {
2629 
2630 
2631     UErrorCode status = U_ZERO_ERROR;
2632     UConverter *conv = 0;
2633     char junkout[NEW_MAX_BUFFER]; /* FIX */
2634     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2635     const UChar *src;
2636     char *end;
2637     char *targ;
2638     int32_t *offs;
2639     int i;
2640     int32_t  realBufferSize;
2641     char *realBufferEnd;
2642     const UChar *realSourceEnd;
2643     const UChar *sourceLimit;
2644     UBool checkOffsets = TRUE;
2645     UBool doFlush;
2646     char junk[9999];
2647     char offset_str[9999];
2648     char *p;
2649     UConverterFromUCallback oldAction = NULL;
2650     const void* oldContext = NULL;
2651 
2652 
2653     for(i=0;i<NEW_MAX_BUFFER;i++)
2654         junkout[i] = (char)0xF0;
2655     for(i=0;i<NEW_MAX_BUFFER;i++)
2656         junokout[i] = 0xFF;
2657     setNuConvTestName(codepage, "FROM");
2658 
2659     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2660             gOutBufferSize);
2661 
2662     conv = ucnv_open(codepage, &status);
2663     if(U_FAILURE(status))
2664     {
2665         log_data_err("Couldn't open converter %s\n",codepage);
2666         return TRUE;
2667     }
2668 
2669     log_verbose("Converter opened..\n");
2670 
2671     /*----setting the callback routine----*/
2672     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2673     if (U_FAILURE(status))
2674     {
2675         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2676     }
2677     /*------------------------*/
2678     /*setting the subChar*/
2679     if(mySubChar != NULL){
2680         ucnv_setSubstChars(conv, mySubChar, len, &status);
2681         if (U_FAILURE(status))  {
2682             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2683         }
2684     }
2685     /*------------*/
2686 
2687     src = source;
2688     targ = junkout;
2689     offs = junokout;
2690 
2691     realBufferSize = UPRV_LENGTHOF(junkout);
2692     realBufferEnd = junkout + realBufferSize;
2693     realSourceEnd = source + sourceLen;
2694 
2695     if ( gOutBufferSize != realBufferSize )
2696       checkOffsets = FALSE;
2697 
2698     if( gInBufferSize != NEW_MAX_BUFFER )
2699       checkOffsets = FALSE;
2700 
2701     do
2702     {
2703         end = nct_min(targ + gOutBufferSize, realBufferEnd);
2704         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2705 
2706         doFlush = (UBool)(sourceLimit == realSourceEnd);
2707 
2708         if(targ == realBufferEnd)
2709         {
2710             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2711             return FALSE;
2712         }
2713         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2714 
2715 
2716         status = U_ZERO_ERROR;
2717 
2718         ucnv_fromUnicode (conv,
2719                   (char **)&targ,
2720                   (const char *)end,
2721                   &src,
2722                   sourceLimit,
2723                   checkOffsets ? offs : NULL,
2724                   doFlush, /* flush if we're at the end of the input data */
2725                   &status);
2726     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2727 
2728 
2729     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2730         UChar errChars[50]; /* should be sufficient */
2731         int8_t errLen = 50;
2732         UErrorCode err = U_ZERO_ERROR;
2733         const UChar* start= NULL;
2734         ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2735         if(U_FAILURE(err)){
2736             log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2737         }
2738         /* length of in invalid chars should be equal to returned length*/
2739         start = src - errLen;
2740         if(u_strncmp(errChars,start,errLen)!=0){
2741             log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2742         }
2743     }
2744     /* allow failure codes for the stop callback */
2745     if(U_FAILURE(status) &&
2746        (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2747     {
2748         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2749         return FALSE;
2750     }
2751 
2752     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2753         sourceLen, targ-junkout);
2754     if(getTestOption(VERBOSITY_OPTION))
2755     {
2756 
2757         junk[0] = 0;
2758         offset_str[0] = 0;
2759         for(p = junkout;p<targ;p++)
2760         {
2761             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2762             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2763         }
2764 
2765         log_verbose(junk);
2766         printSeq(expect, expectLen);
2767         if ( checkOffsets )
2768         {
2769             log_verbose("\nOffsets:");
2770             log_verbose(offset_str);
2771         }
2772         log_verbose("\n");
2773     }
2774     ucnv_close(conv);
2775 
2776 
2777     if(expectLen != targ-junkout)
2778     {
2779         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2780         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2781         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2782         printSeqErr(expect, expectLen);
2783         return FALSE;
2784     }
2785 
2786     if (checkOffsets && (expectOffsets != 0) )
2787     {
2788         log_verbose("comparing %d offsets..\n", targ-junkout);
2789         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2790             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2791             log_err("Got Output : ");
2792             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2793             log_err("Got Offsets:      ");
2794             for(p=junkout;p<targ;p++)
2795                 log_err("%d,", junokout[p-junkout]);
2796             log_err("\n");
2797             log_err("Expected Offsets: ");
2798             for(i=0; i<(targ-junkout); i++)
2799                 log_err("%d,", expectOffsets[i]);
2800             log_err("\n");
2801             return FALSE;
2802         }
2803     }
2804 
2805     if(!memcmp(junkout, expect, expectLen))
2806     {
2807         log_verbose("String matches! %s\n", gNuConvTestName);
2808         return TRUE;
2809     }
2810     else
2811     {
2812         log_err("String does not match. %s\n", gNuConvTestName);
2813         log_err("source: ");
2814         printUSeqErr(source, sourceLen);
2815         log_err("Got:      ");
2816         printSeqErr((const uint8_t *)junkout, expectLen);
2817         log_err("Expected: ");
2818         printSeqErr(expect, expectLen);
2819         return FALSE;
2820     }
2821 }
2822 
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2823 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2824                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2825                const char *mySubChar, int8_t len)
2826 {
2827     UErrorCode status = U_ZERO_ERROR;
2828     UConverter *conv = 0;
2829     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2830     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2831     const char *src;
2832     const char *realSourceEnd;
2833     const char *srcLimit;
2834     UChar *targ;
2835     UChar *end;
2836     int32_t *offs;
2837     int i;
2838     UBool   checkOffsets = TRUE;
2839     char junk[9999];
2840     char offset_str[9999];
2841     UChar *p;
2842     UConverterToUCallback oldAction = NULL;
2843     const void* oldContext = NULL;
2844 
2845     int32_t   realBufferSize;
2846     UChar *realBufferEnd;
2847 
2848 
2849     for(i=0;i<NEW_MAX_BUFFER;i++)
2850         junkout[i] = 0xFFFE;
2851 
2852     for(i=0;i<NEW_MAX_BUFFER;i++)
2853         junokout[i] = -1;
2854 
2855     setNuConvTestName(codepage, "TO");
2856 
2857     log_verbose("\n=========  %s\n", gNuConvTestName);
2858 
2859     conv = ucnv_open(codepage, &status);
2860     if(U_FAILURE(status))
2861     {
2862         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2863         return TRUE;
2864     }
2865 
2866     log_verbose("Converter opened..\n");
2867 
2868     src = (const char *)source;
2869     targ = junkout;
2870     offs = junokout;
2871 
2872     realBufferSize = UPRV_LENGTHOF(junkout);
2873     realBufferEnd = junkout + realBufferSize;
2874     realSourceEnd = src + sourcelen;
2875     /*----setting the callback routine----*/
2876     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2877     if (U_FAILURE(status))
2878     {
2879         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2880     }
2881     /*-------------------------------------*/
2882     /*setting the subChar*/
2883     if(mySubChar != NULL){
2884         ucnv_setSubstChars(conv, mySubChar, len, &status);
2885         if (U_FAILURE(status))  {
2886             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2887         }
2888     }
2889     /*------------*/
2890 
2891 
2892     if ( gOutBufferSize != realBufferSize )
2893         checkOffsets = FALSE;
2894 
2895     if( gInBufferSize != NEW_MAX_BUFFER )
2896         checkOffsets = FALSE;
2897 
2898     do
2899     {
2900         end = nct_min( targ + gOutBufferSize, realBufferEnd);
2901         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2902 
2903         if(targ == realBufferEnd)
2904         {
2905             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2906             return FALSE;
2907         }
2908         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2909 
2910 
2911 
2912         status = U_ZERO_ERROR;
2913 
2914         ucnv_toUnicode (conv,
2915                 &targ,
2916                 end,
2917                 (const char **)&src,
2918                 (const char *)srcLimit,
2919                 checkOffsets ? offs : NULL,
2920                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2921                 &status);
2922     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2923 
2924     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2925         char errChars[50]; /* should be sufficient */
2926         int8_t errLen = 50;
2927         UErrorCode err = U_ZERO_ERROR;
2928         const char* start= NULL;
2929         ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2930         if(U_FAILURE(err)){
2931             log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2932         }
2933         /* length of in invalid chars should be equal to returned length*/
2934         start = src - errLen;
2935         if(uprv_strncmp(errChars,start,errLen)!=0){
2936             log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2937         }
2938     }
2939     /* allow failure codes for the stop callback */
2940     if(U_FAILURE(status) &&
2941        (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2942     {
2943         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2944         return FALSE;
2945     }
2946 
2947     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2948         sourcelen, targ-junkout);
2949     if(getTestOption(VERBOSITY_OPTION))
2950     {
2951 
2952         junk[0] = 0;
2953         offset_str[0] = 0;
2954 
2955         for(p = junkout;p<targ;p++)
2956         {
2957             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2958             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2959         }
2960 
2961         log_verbose(junk);
2962         printUSeq(expect, expectlen);
2963         if ( checkOffsets )
2964         {
2965             log_verbose("\nOffsets:");
2966             log_verbose(offset_str);
2967         }
2968         log_verbose("\n");
2969     }
2970     ucnv_close(conv);
2971 
2972     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2973 
2974     if (checkOffsets && (expectOffsets != 0))
2975     {
2976         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2977         {
2978             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2979             log_err("Got offsets:      ");
2980             for(p=junkout;p<targ;p++)
2981                 log_err("  %2d,", junokout[p-junkout]);
2982             log_err("\n");
2983             log_err("Expected offsets: ");
2984             for(i=0; i<(targ-junkout); i++)
2985                 log_err("  %2d,", expectOffsets[i]);
2986             log_err("\n");
2987             log_err("Got output:       ");
2988             for(i=0; i<(targ-junkout); i++)
2989                 log_err("0x%04x,", junkout[i]);
2990             log_err("\n");
2991             log_err("From source:      ");
2992             for(i=0; i<(src-(const char *)source); i++)
2993                 log_err("  0x%02x,", (unsigned char)source[i]);
2994             log_err("\n");
2995         }
2996     }
2997 
2998     if(!memcmp(junkout, expect, expectlen*2))
2999     {
3000         log_verbose("Matches!\n");
3001         return TRUE;
3002     }
3003     else
3004     {
3005         log_err("String does not match. %s\n", gNuConvTestName);
3006         log_verbose("String does not match. %s\n", gNuConvTestName);
3007         log_err("Got:      ");
3008         printUSeqErr(junkout, expectlen);
3009         log_err("Expected: ");
3010         printUSeqErr(expect, expectlen);
3011         log_err("\n");
3012         return FALSE;
3013     }
3014 }
3015 
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3016 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3017                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3018                 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3019 {
3020 
3021 
3022     UErrorCode status = U_ZERO_ERROR;
3023     UConverter *conv = 0;
3024     char junkout[NEW_MAX_BUFFER]; /* FIX */
3025     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3026     const UChar *src;
3027     char *end;
3028     char *targ;
3029     int32_t *offs;
3030     int i;
3031     int32_t  realBufferSize;
3032     char *realBufferEnd;
3033     const UChar *realSourceEnd;
3034     const UChar *sourceLimit;
3035     UBool checkOffsets = TRUE;
3036     UBool doFlush;
3037     char junk[9999];
3038     char offset_str[9999];
3039     char *p;
3040     UConverterFromUCallback oldAction = NULL;
3041     const void* oldContext = NULL;
3042 
3043 
3044     for(i=0;i<NEW_MAX_BUFFER;i++)
3045         junkout[i] = (char)0xF0;
3046     for(i=0;i<NEW_MAX_BUFFER;i++)
3047         junokout[i] = 0xFF;
3048     setNuConvTestName(codepage, "FROM");
3049 
3050     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3051             gOutBufferSize);
3052 
3053     conv = ucnv_open(codepage, &status);
3054     if(U_FAILURE(status))
3055     {
3056         log_data_err("Couldn't open converter %s\n",codepage);
3057         return TRUE; /* Because the err has already been logged. */
3058     }
3059 
3060     log_verbose("Converter opened..\n");
3061 
3062     /*----setting the callback routine----*/
3063     ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3064     if (U_FAILURE(status))
3065     {
3066         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3067     }
3068     /*------------------------*/
3069     /*setting the subChar*/
3070     if(mySubChar != NULL){
3071         ucnv_setSubstChars(conv, mySubChar, len, &status);
3072         if (U_FAILURE(status))  {
3073             log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3074         }
3075     }
3076     /*------------*/
3077 
3078     src = source;
3079     targ = junkout;
3080     offs = junokout;
3081 
3082     realBufferSize = UPRV_LENGTHOF(junkout);
3083     realBufferEnd = junkout + realBufferSize;
3084     realSourceEnd = source + sourceLen;
3085 
3086     if ( gOutBufferSize != realBufferSize )
3087       checkOffsets = FALSE;
3088 
3089     if( gInBufferSize != NEW_MAX_BUFFER )
3090       checkOffsets = FALSE;
3091 
3092     do
3093     {
3094         end = nct_min(targ + gOutBufferSize, realBufferEnd);
3095         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3096 
3097         doFlush = (UBool)(sourceLimit == realSourceEnd);
3098 
3099         if(targ == realBufferEnd)
3100         {
3101             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3102             return FALSE;
3103         }
3104         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3105 
3106 
3107         status = U_ZERO_ERROR;
3108 
3109         ucnv_fromUnicode (conv,
3110                   (char **)&targ,
3111                   (const char *)end,
3112                   &src,
3113                   sourceLimit,
3114                   checkOffsets ? offs : NULL,
3115                   doFlush, /* flush if we're at the end of the input data */
3116                   &status);
3117     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3118 
3119     /* allow failure codes for the stop callback */
3120     if(U_FAILURE(status) && status != expectedError)
3121     {
3122         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3123         return FALSE;
3124     }
3125 
3126     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3127         sourceLen, targ-junkout);
3128     if(getTestOption(VERBOSITY_OPTION))
3129     {
3130 
3131         junk[0] = 0;
3132         offset_str[0] = 0;
3133         for(p = junkout;p<targ;p++)
3134         {
3135             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3136             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3137         }
3138 
3139         log_verbose(junk);
3140         printSeq(expect, expectLen);
3141         if ( checkOffsets )
3142         {
3143             log_verbose("\nOffsets:");
3144             log_verbose(offset_str);
3145         }
3146         log_verbose("\n");
3147     }
3148     ucnv_close(conv);
3149 
3150 
3151     if(expectLen != targ-junkout)
3152     {
3153         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3154         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3155         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3156         printSeqErr(expect, expectLen);
3157         return FALSE;
3158     }
3159 
3160     if (checkOffsets && (expectOffsets != 0) )
3161     {
3162         log_verbose("comparing %d offsets..\n", targ-junkout);
3163         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3164             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3165             log_err("Got Output : ");
3166             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3167             log_err("Got Offsets:      ");
3168             for(p=junkout;p<targ;p++)
3169                 log_err("%d,", junokout[p-junkout]);
3170             log_err("\n");
3171             log_err("Expected Offsets: ");
3172             for(i=0; i<(targ-junkout); i++)
3173                 log_err("%d,", expectOffsets[i]);
3174             log_err("\n");
3175             return FALSE;
3176         }
3177     }
3178 
3179     if(!memcmp(junkout, expect, expectLen))
3180     {
3181         log_verbose("String matches! %s\n", gNuConvTestName);
3182         return TRUE;
3183     }
3184     else
3185     {
3186         log_err("String does not match. %s\n", gNuConvTestName);
3187         log_err("source: ");
3188         printUSeqErr(source, sourceLen);
3189         log_err("Got:      ");
3190         printSeqErr((const uint8_t *)junkout, expectLen);
3191         log_err("Expected: ");
3192         printSeqErr(expect, expectLen);
3193         return FALSE;
3194     }
3195 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3196 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3197                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3198                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3199 {
3200     UErrorCode status = U_ZERO_ERROR;
3201     UConverter *conv = 0;
3202     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3203     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3204     const char *src;
3205     const char *realSourceEnd;
3206     const char *srcLimit;
3207     UChar *targ;
3208     UChar *end;
3209     int32_t *offs;
3210     int i;
3211     UBool   checkOffsets = TRUE;
3212     char junk[9999];
3213     char offset_str[9999];
3214     UChar *p;
3215     UConverterToUCallback oldAction = NULL;
3216     const void* oldContext = NULL;
3217 
3218     int32_t   realBufferSize;
3219     UChar *realBufferEnd;
3220 
3221 
3222     for(i=0;i<NEW_MAX_BUFFER;i++)
3223         junkout[i] = 0xFFFE;
3224 
3225     for(i=0;i<NEW_MAX_BUFFER;i++)
3226         junokout[i] = -1;
3227 
3228     setNuConvTestName(codepage, "TO");
3229 
3230     log_verbose("\n=========  %s\n", gNuConvTestName);
3231 
3232     conv = ucnv_open(codepage, &status);
3233     if(U_FAILURE(status))
3234     {
3235         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3236         return TRUE;
3237     }
3238 
3239     log_verbose("Converter opened..\n");
3240 
3241     src = (const char *)source;
3242     targ = junkout;
3243     offs = junokout;
3244 
3245     realBufferSize = UPRV_LENGTHOF(junkout);
3246     realBufferEnd = junkout + realBufferSize;
3247     realSourceEnd = src + sourcelen;
3248     /*----setting the callback routine----*/
3249     ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3250     if (U_FAILURE(status))
3251     {
3252         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3253     }
3254     /*-------------------------------------*/
3255     /*setting the subChar*/
3256     if(mySubChar != NULL){
3257         ucnv_setSubstChars(conv, mySubChar, len, &status);
3258         if (U_FAILURE(status))  {
3259             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3260         }
3261     }
3262     /*------------*/
3263 
3264 
3265     if ( gOutBufferSize != realBufferSize )
3266         checkOffsets = FALSE;
3267 
3268     if( gInBufferSize != NEW_MAX_BUFFER )
3269         checkOffsets = FALSE;
3270 
3271     do
3272     {
3273         end = nct_min( targ + gOutBufferSize, realBufferEnd);
3274         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3275 
3276         if(targ == realBufferEnd)
3277         {
3278             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3279             return FALSE;
3280         }
3281         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3282 
3283 
3284 
3285         status = U_ZERO_ERROR;
3286 
3287         ucnv_toUnicode (conv,
3288                 &targ,
3289                 end,
3290                 (const char **)&src,
3291                 (const char *)srcLimit,
3292                 checkOffsets ? offs : NULL,
3293                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3294                 &status);
3295     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3296 
3297     /* allow failure codes for the stop callback */
3298     if(U_FAILURE(status) && status!=expectedError)
3299     {
3300         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3301         return FALSE;
3302     }
3303 
3304     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3305         sourcelen, targ-junkout);
3306     if(getTestOption(VERBOSITY_OPTION))
3307     {
3308 
3309         junk[0] = 0;
3310         offset_str[0] = 0;
3311 
3312         for(p = junkout;p<targ;p++)
3313         {
3314             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3315             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3316         }
3317 
3318         log_verbose(junk);
3319         printUSeq(expect, expectlen);
3320         if ( checkOffsets )
3321         {
3322             log_verbose("\nOffsets:");
3323             log_verbose(offset_str);
3324         }
3325         log_verbose("\n");
3326     }
3327     ucnv_close(conv);
3328 
3329     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3330 
3331     if (checkOffsets && (expectOffsets != 0))
3332     {
3333         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3334         {
3335             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3336             log_err("Got offsets:      ");
3337             for(p=junkout;p<targ;p++)
3338                 log_err("  %2d,", junokout[p-junkout]);
3339             log_err("\n");
3340             log_err("Expected offsets: ");
3341             for(i=0; i<(targ-junkout); i++)
3342                 log_err("  %2d,", expectOffsets[i]);
3343             log_err("\n");
3344             log_err("Got output:       ");
3345             for(i=0; i<(targ-junkout); i++)
3346                 log_err("0x%04x,", junkout[i]);
3347             log_err("\n");
3348             log_err("From source:      ");
3349             for(i=0; i<(src-(const char *)source); i++)
3350                 log_err("  0x%02x,", (unsigned char)source[i]);
3351             log_err("\n");
3352         }
3353     }
3354 
3355     if(!memcmp(junkout, expect, expectlen*2))
3356     {
3357         log_verbose("Matches!\n");
3358         return TRUE;
3359     }
3360     else
3361     {
3362         log_err("String does not match. %s\n", gNuConvTestName);
3363         log_verbose("String does not match. %s\n", gNuConvTestName);
3364         log_err("Got:      ");
3365         printUSeqErr(junkout, expectlen);
3366         log_err("Expected: ");
3367         printUSeqErr(expect, expectlen);
3368         log_err("\n");
3369         return FALSE;
3370     }
3371 }
3372 
TestCallBackFailure(void)3373 static void TestCallBackFailure(void) {
3374     UErrorCode status = U_USELESS_COLLATOR_ERROR;
3375     ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3376     if (status != U_USELESS_COLLATOR_ERROR) {
3377         log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3378     }
3379     ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3380     if (status != U_USELESS_COLLATOR_ERROR) {
3381         log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3382     }
3383     ucnv_cbFromUWriteSub(NULL, -1, &status);
3384     if (status != U_USELESS_COLLATOR_ERROR) {
3385         log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3386     }
3387     ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3388     if (status != U_USELESS_COLLATOR_ERROR) {
3389         log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3390     }
3391 }
3392