1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 2002-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  custrtst.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2002oct09
14 *   created by: Markus W. Scherer
15 *
16 *   Tests of ustring.h Unicode string API functions.
17 */
18 
19 #include "unicode/ustring.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/uiter.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 #include <string.h>
25 
26 /* get the sign of an integer */
27 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
28 
29 /* test setup --------------------------------------------------------------- */
30 
31 static void setUpDataTable(void);
32 static void TestStringCopy(void);
33 static void TestStringFunctions(void);
34 static void TestStringSearching(void);
35 static void TestSurrogateSearching(void);
36 static void TestUnescape(void);
37 static void TestCountChar32(void);
38 static void TestUCharIterator(void);
39 
40 void addUStringTest(TestNode** root);
41 
addUStringTest(TestNode ** root)42 void addUStringTest(TestNode** root)
43 {
44     addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
45     addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
46     addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
47     addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
48     addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
49     addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
50     addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
51 }
52 
53 /* test data for TestStringFunctions ---------------------------------------- */
54 
55 UChar*** dataTable = NULL;
56 
57 static const char* raw[3][4] = {
58 
59     /* First String */
60     {   "English_",  "French_",   "Croatian_", "English_"},
61     /* Second String */
62     {   "United States",    "France",   "Croatia",  "Unites States"},
63 
64    /* Concatenated string */
65     {   "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
66 };
67 
setUpDataTable()68 static void setUpDataTable()
69 {
70     int32_t i,j;
71     if(dataTable == NULL) {
72         dataTable = (UChar***)calloc(sizeof(UChar**),3);
73 
74             for (i = 0; i < 3; i++) {
75               dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
76                 for (j = 0; j < 4; j++){
77                     dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
78                     u_uastrcpy(dataTable[i][j],raw[i][j]);
79                 }
80             }
81     }
82 }
83 
cleanUpDataTable()84 static void cleanUpDataTable()
85 {
86     int32_t i,j;
87     if(dataTable != NULL) {
88         for (i=0; i<3; i++) {
89             for(j = 0; j<4; j++) {
90                 free(dataTable[i][j]);
91             }
92             free(dataTable[i]);
93         }
94         free(dataTable);
95     }
96     dataTable = NULL;
97 }
98 
99 /*Tests  for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions()100 static void TestStringFunctions()
101 {
102     int32_t i,j,k;
103     UChar temp[512];
104     UChar nullTemp[512];
105     char test[512];
106     char tempOut[512];
107 
108     setUpDataTable();
109 
110     log_verbose("Testing u_strlen()\n");
111     if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
112         log_err("There is an error in u_strlen()");
113 
114     log_verbose("Testing u_memcpy() and u_memcmp()\n");
115 
116     for(i=0;i<3;++i)
117     {
118         for(j=0;j<4;++j)
119         {
120             log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
121             temp[0] = 0;
122             temp[7] = 0xA4; /* Mark the end */
123             u_memcpy(temp,dataTable[i][j], 7);
124 
125             if(temp[7] != 0xA4)
126                 log_err("an error occured in u_memcpy()\n");
127             if(u_memcmp(temp, dataTable[i][j], 7)!=0)
128                 log_err("an error occured in u_memcpy() or u_memcmp()\n");
129         }
130     }
131     if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
132         log_err("an error occured in u_memcmp()\n");
133 
134     log_verbose("Testing u_memset()\n");
135     nullTemp[0] = 0;
136     nullTemp[7] = 0;
137     u_memset(nullTemp, 0xa4, 7);
138     for (i = 0; i < 7; i++) {
139         if(nullTemp[i] != 0xa4) {
140             log_err("an error occured in u_memset()\n");
141         }
142     }
143     if(nullTemp[7] != 0) {
144         log_err("u_memset() went too far\n");
145     }
146 
147     u_memset(nullTemp, 0, 7);
148     nullTemp[7] = 0xa4;
149     temp[7] = 0;
150     u_memcpy(temp,nullTemp, 7);
151     if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
152         log_err("an error occured in u_memcpy() or u_memcmp()\n");
153 
154 
155     log_verbose("Testing u_memmove()\n");
156     for (i = 0; i < 7; i++) {
157         temp[i] = (UChar)i;
158     }
159     u_memmove(temp + 1, temp, 7);
160     if(temp[0] != 0) {
161         log_err("an error occured in u_memmove()\n");
162     }
163     for (i = 1; i <= 7; i++) {
164         if(temp[i] != (i - 1)) {
165             log_err("an error occured in u_memmove()\n");
166         }
167     }
168 
169     log_verbose("Testing u_strcpy() and u_strcmp()\n");
170 
171     for(i=0;i<3;++i)
172     {
173         for(j=0;j<4;++j)
174         {
175             log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
176             temp[0] = 0;
177             u_strcpy(temp,dataTable[i][j]);
178 
179             if(u_strcmp(temp,dataTable[i][j])!=0)
180                 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
181         }
182     }
183     if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
184         log_err("an error occured in u_memcmp()\n");
185 
186     log_verbose("testing u_strcat()\n");
187     i=0;
188     for(j=0; j<2;++j)
189     {
190         u_uastrcpy(temp, "");
191         u_strcpy(temp,dataTable[i][j]);
192         u_strcat(temp,dataTable[i+1][j]);
193         if(u_strcmp(temp,dataTable[i+2][j])!=0)
194             log_err("something threw an error in u_strcat()\n");
195 
196     }
197     log_verbose("Testing u_strncmp()\n");
198     for(i=0,j=0;j<4; ++j)
199     {
200         k=u_strlen(dataTable[i][j]);
201         if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
202             log_err("Something threw an error in u_strncmp\n");
203     }
204     if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
205         log_err("an error occured in u_memcmp()\n");
206 
207 
208     log_verbose("Testing u_strncat\n");
209     for(i=0,j=0;j<4; ++j)
210     {
211         k=u_strlen(dataTable[i][j]);
212 
213         u_uastrcpy(temp,"");
214 
215         if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
216             log_err("something threw an error in u_strncat or u_uastrcpy()\n");
217 
218     }
219 
220     log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
221     for(i=2,j=0;j<4; ++j)
222     {
223         k=u_strlen(dataTable[i][j]);
224         u_strncpy(temp, dataTable[i][j],k);
225         temp[k] = 0xa4;
226 
227         if(u_strncmp(temp, dataTable[i][j],k)!=0)
228             log_err("something threw an error in u_strncpy()\n");
229 
230         if(temp[k] != 0xa4)
231             log_err("something threw an error in u_strncpy()\n");
232 
233         u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
234         u_uastrncpy(temp, raw[i][j], k-1);
235         if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
236             log_err("something threw an error in u_uastrncpy(k-1)\n");
237 
238         if(temp[k-1] != 0x3F)
239             log_err("something threw an error in u_uastrncpy(k-1)\n");
240 
241         u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
242         u_uastrncpy(temp, raw[i][j], k+1);
243         if(u_strcmp(temp, dataTable[i][j])!=0)
244             log_err("something threw an error in u_uastrncpy(k+1)\n");
245 
246         if(temp[k] != 0)
247             log_err("something threw an error in u_uastrncpy(k+1)\n");
248 
249         u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
250         u_uastrncpy(temp, raw[i][j], k);
251         if(u_strncmp(temp, dataTable[i][j], k)!=0)
252             log_err("something threw an error in u_uastrncpy(k)\n");
253 
254         if(temp[k] != 0x3F)
255             log_err("something threw an error in u_uastrncpy(k)\n");
256     }
257 
258     log_verbose("Testing u_strchr() and u_memchr()\n");
259 
260     for(i=2,j=0;j<4;j++)
261     {
262         UChar saveVal = dataTable[i][j][0];
263         UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
264         int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
265 
266         log_verbose("%s ", u_austrcpy(tempOut, findPtr));
267 
268         if (findPtr == NULL || *findPtr != 0x005F) {
269             log_err("u_strchr can't find '_' in the string\n");
270         }
271 
272         findPtr = u_strchr32(dataTable[i][j], 0x005F);
273         if (findPtr == NULL || *findPtr != 0x005F) {
274             log_err("u_strchr32 can't find '_' in the string\n");
275         }
276 
277         findPtr = u_strchr(dataTable[i][j], 0);
278         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
279             log_err("u_strchr can't find NULL in the string\n");
280         }
281 
282         findPtr = u_strchr32(dataTable[i][j], 0);
283         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
284             log_err("u_strchr32 can't find NULL in the string\n");
285         }
286 
287         findPtr = u_memchr(dataTable[i][j], 0, dataSize);
288         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
289             log_err("u_memchr can't find NULL in the string\n");
290         }
291 
292         findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
293         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
294             log_err("u_memchr32 can't find NULL in the string\n");
295         }
296 
297         dataTable[i][j][0] = 0;
298         /* Make sure we skip over the NULL termination */
299         findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
300         if (findPtr == NULL || *findPtr != 0x005F) {
301             log_err("u_memchr can't find '_' in the string\n");
302         }
303 
304         findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
305         if (findPtr == NULL || *findPtr != 0x005F) {
306             log_err("u_memchr32 can't find '_' in the string\n");
307         }
308         findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
309         if (findPtr != NULL) {
310             log_err("Should have found NULL when the character is not there.\n");
311         }
312         dataTable[i][j][0] = saveVal;   /* Put it back for the other tests */
313     }
314 
315     /*
316      * test that u_strchr32()
317      * does not find surrogate code points when they are part of matched pairs
318      * (= part of supplementary code points)
319      * Jitterbug 1542
320      */
321     {
322         static const UChar s[]={
323             /*   0       1       2       3       4       5       6       7       8  9 */
324             0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
325         };
326 
327         if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
328             log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
329         }
330         if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
331             log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
332         }
333     }
334 
335     log_verbose("Testing u_austrcpy()");
336     u_austrcpy(test,dataTable[0][0]);
337     if(strcmp(test,raw[0][0])!=0)
338         log_err("There is an error in u_austrcpy()");
339 
340 
341     log_verbose("Testing u_strtok_r()");
342     {
343         const char tokString[] = "  ,  1 2 3  AHHHHH! 5.5 6 7    ,        8\n";
344         const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
345         UChar delimBuf[sizeof(test)];
346         UChar currTokenBuf[sizeof(tokString)];
347         UChar *state;
348         uint32_t currToken = 0;
349         UChar *ptr;
350 
351         u_uastrcpy(temp, tokString);
352         u_uastrcpy(delimBuf, " ");
353 
354         ptr = u_strtok_r(temp, delimBuf, &state);
355         u_uastrcpy(delimBuf, " ,");
356         while (ptr != NULL) {
357             u_uastrcpy(currTokenBuf, tokens[currToken]);
358             if (u_strcmp(ptr, currTokenBuf) != 0) {
359                 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
360             }
361             ptr = u_strtok_r(NULL, delimBuf, &state);
362             currToken++;
363         }
364 
365         if (currToken != sizeof(tokens)/sizeof(tokens[0])) {
366             log_err("Didn't get correct number of tokens\n");
367         }
368         state = delimBuf;       /* Give it an "invalid" saveState */
369         u_uastrcpy(currTokenBuf, "");
370         if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
371             log_err("Didn't get NULL for empty string\n");
372         }
373         if (state != NULL) {
374             log_err("State should be NULL for empty string\n");
375         }
376         state = delimBuf;       /* Give it an "invalid" saveState */
377         u_uastrcpy(currTokenBuf, ", ,");
378         if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
379             log_err("Didn't get NULL for a string of delimiters\n");
380         }
381         if (state != NULL) {
382             log_err("State should be NULL for a string of delimiters\n");
383         }
384 
385         state = delimBuf;       /* Give it an "invalid" saveState */
386         u_uastrcpy(currTokenBuf, "q, ,");
387         if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
388             log_err("Got NULL for a string that does not begin with delimiters\n");
389         }
390         if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
391             log_err("Didn't get NULL for a string that ends in delimiters\n");
392         }
393         if (state != NULL) {
394             log_err("State should be NULL for empty string\n");
395         }
396 
397         state = delimBuf;       /* Give it an "invalid" saveState */
398         u_uastrcpy(currTokenBuf, tokString);
399         u_uastrcpy(temp, tokString);
400         u_uastrcpy(delimBuf, "q");  /* Give it a delimiter that it can't find. */
401         ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
402         if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
403             log_err("Should have recieved the same string when there are no delimiters\n");
404         }
405         if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
406             log_err("Should not have found another token in a one token string\n");
407         }
408     }
409 
410     /* test u_strcmpCodePointOrder() */
411     {
412         /* these strings are in ascending order */
413         static const UChar strings[][4]={
414             { 0x61, 0 },                    /* U+0061 */
415             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
416             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
417             { 0xd800, 0 },                  /* U+d800 */
418             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
419             { 0xdfff, 0 },                  /* U+dfff */
420             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
421             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
422             { 0xd800, 0xdc02, 0 },          /* U+10002 */
423             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
424         };
425 
426         UCharIterator iter1, iter2;
427         int32_t len1, len2, r1, r2;
428 
429         for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
430             if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
431                 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
432             }
433             if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
434                 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
435             }
436 
437             /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
438             if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
439                 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
440             }
441 
442             /* test u_strCompare(TRUE) */
443             len1=u_strlen(strings[i]);
444             len2=u_strlen(strings[i+1]);
445             if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
446                 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
447                 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
448                 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
449             ) {
450                 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
451             }
452 
453             /* test u_strCompare(FALSE) */
454             r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
455             r2=u_strcmp(strings[i], strings[i+1]);
456             if(_SIGN(r1)!=_SIGN(r2)) {
457                 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
458             }
459 
460             /* test u_strCompareIter() */
461             uiter_setString(&iter1, strings[i], len1);
462             uiter_setString(&iter2, strings[i+1], len2);
463             if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
464                 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
465             }
466             r1=u_strCompareIter(&iter1, &iter2, FALSE);
467             if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
468                 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
469             }
470         }
471     }
472 
473     cleanUpDataTable();
474 }
475 
TestStringSearching()476 static void TestStringSearching()
477 {
478     const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
479     const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
480     const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
481     const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
482     const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
483     const UChar surrMatchSet4[] = {0x0000};
484     const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
485     const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
486     const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0};   /* has partial surrogate */
487     const UChar
488         empty[] = { 0 },
489         a[] = { 0x61, 0 },
490         ab[] = { 0x61, 0x62, 0 },
491         ba[] = { 0x62, 0x61, 0 },
492         abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
493         cd[] = { 0x63, 0x64, 0 },
494         dc[] = { 0x64, 0x63, 0 },
495         cdh[] = { 0x63, 0x64, 0x68, 0 },
496         f[] = { 0x66, 0 },
497         fg[] = { 0x66, 0x67, 0 },
498         gf[] = { 0x67, 0x66, 0 };
499 
500     log_verbose("Testing u_strpbrk()");
501 
502     if (u_strpbrk(testString, a) != &testString[0]) {
503         log_err("u_strpbrk couldn't find first letter a.\n");
504     }
505     if (u_strpbrk(testString, dc) != &testString[2]) {
506         log_err("u_strpbrk couldn't find d or c.\n");
507     }
508     if (u_strpbrk(testString, cd) != &testString[2]) {
509         log_err("u_strpbrk couldn't find c or d.\n");
510     }
511     if (u_strpbrk(testString, cdh) != &testString[2]) {
512         log_err("u_strpbrk couldn't find c, d or h.\n");
513     }
514     if (u_strpbrk(testString, f) != NULL) {
515         log_err("u_strpbrk didn't return NULL for \"f\".\n");
516     }
517     if (u_strpbrk(testString, fg) != NULL) {
518         log_err("u_strpbrk didn't return NULL for \"fg\".\n");
519     }
520     if (u_strpbrk(testString, gf) != NULL) {
521         log_err("u_strpbrk didn't return NULL for \"gf\".\n");
522     }
523     if (u_strpbrk(testString, empty) != NULL) {
524         log_err("u_strpbrk didn't return NULL for \"\".\n");
525     }
526 
527     log_verbose("Testing u_strpbrk() with surrogates");
528 
529     if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
530         log_err("u_strpbrk couldn't find first letter a.\n");
531     }
532     if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
533         log_err("u_strpbrk couldn't find d or c.\n");
534     }
535     if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
536         log_err("u_strpbrk couldn't find c or d.\n");
537     }
538     if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
539         log_err("u_strpbrk couldn't find c, d or h.\n");
540     }
541     if (u_strpbrk(testSurrogateString, f) != NULL) {
542         log_err("u_strpbrk didn't return NULL for \"f\".\n");
543     }
544     if (u_strpbrk(testSurrogateString, fg) != NULL) {
545         log_err("u_strpbrk didn't return NULL for \"fg\".\n");
546     }
547     if (u_strpbrk(testSurrogateString, gf) != NULL) {
548         log_err("u_strpbrk didn't return NULL for \"gf\".\n");
549     }
550     if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
551         log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
552     }
553     if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
554         log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
555     }
556     if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
557         log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
558     }
559     if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
560         log_err("u_strpbrk should have returned NULL for empty string.\n");
561     }
562     if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
563         log_err("u_strpbrk should have found bad surrogate.\n");
564     }
565 
566     log_verbose("Testing u_strcspn()");
567 
568     if (u_strcspn(testString, a) != 0) {
569         log_err("u_strcspn couldn't find first letter a.\n");
570     }
571     if (u_strcspn(testString, dc) != 2) {
572         log_err("u_strcspn couldn't find d or c.\n");
573     }
574     if (u_strcspn(testString, cd) != 2) {
575         log_err("u_strcspn couldn't find c or d.\n");
576     }
577     if (u_strcspn(testString, cdh) != 2) {
578         log_err("u_strcspn couldn't find c, d or h.\n");
579     }
580     if (u_strcspn(testString, f) != u_strlen(testString)) {
581         log_err("u_strcspn didn't return NULL for \"f\".\n");
582     }
583     if (u_strcspn(testString, fg) != u_strlen(testString)) {
584         log_err("u_strcspn didn't return NULL for \"fg\".\n");
585     }
586     if (u_strcspn(testString, gf) != u_strlen(testString)) {
587         log_err("u_strcspn didn't return NULL for \"gf\".\n");
588     }
589 
590     log_verbose("Testing u_strcspn() with surrogates");
591 
592     if (u_strcspn(testSurrogateString, a) != 1) {
593         log_err("u_strcspn couldn't find first letter a.\n");
594     }
595     if (u_strcspn(testSurrogateString, dc) != 5) {
596         log_err("u_strcspn couldn't find d or c.\n");
597     }
598     if (u_strcspn(testSurrogateString, cd) != 5) {
599         log_err("u_strcspn couldn't find c or d.\n");
600     }
601     if (u_strcspn(testSurrogateString, cdh) != 5) {
602         log_err("u_strcspn couldn't find c, d or h.\n");
603     }
604     if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
605         log_err("u_strcspn didn't return NULL for \"f\".\n");
606     }
607     if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
608         log_err("u_strcspn didn't return NULL for \"fg\".\n");
609     }
610     if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
611         log_err("u_strcspn didn't return NULL for \"gf\".\n");
612     }
613     if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
614         log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
615     }
616     if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
617         log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
618     }
619     if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
620         log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
621     }
622     if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
623         log_err("u_strcspn should have returned strlen for empty string.\n");
624     }
625 
626 
627     log_verbose("Testing u_strspn()");
628 
629     if (u_strspn(testString, a) != 1) {
630         log_err("u_strspn couldn't skip first letter a.\n");
631     }
632     if (u_strspn(testString, ab) != 2) {
633         log_err("u_strspn couldn't skip a or b.\n");
634     }
635     if (u_strspn(testString, ba) != 2) {
636         log_err("u_strspn couldn't skip a or b.\n");
637     }
638     if (u_strspn(testString, f) != 0) {
639         log_err("u_strspn didn't return 0 for \"f\".\n");
640     }
641     if (u_strspn(testString, dc) != 0) {
642         log_err("u_strspn couldn't find first letter a (skip d or c).\n");
643     }
644     if (u_strspn(testString, abcd) != u_strlen(testString)) {
645         log_err("u_strspn couldn't skip over the whole string.\n");
646     }
647     if (u_strspn(testString, empty) != 0) {
648         log_err("u_strspn should have returned 0 for empty string.\n");
649     }
650 
651     log_verbose("Testing u_strspn() with surrogates");
652     if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
653         log_err("u_strspn couldn't skip 0xdbff or a.\n");
654     }
655     if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
656         log_err("u_strspn couldn't skip 0xdbff or a.\n");
657     }
658     if (u_strspn(testSurrogateString, f) != 0) {
659         log_err("u_strspn couldn't skip d or c (skip first letter).\n");
660     }
661     if (u_strspn(testSurrogateString, dc) != 0) {
662         log_err("u_strspn couldn't skip d or c (skip first letter).\n");
663     }
664     if (u_strspn(testSurrogateString, cd) != 0) {
665         log_err("u_strspn couldn't skip d or c (skip first letter).\n");
666     }
667     if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
668         log_err("u_strspn couldn't skip whole string.\n");
669     }
670     if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
671         log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
672     }
673     if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
674         log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
675     }
676     if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
677         log_err("u_strspn should have returned 0 for empty string.\n");
678     }
679 }
680 
681 /*
682  * All binary Unicode string searches should behave the same for equivalent input.
683  * See Jitterbug 2145.
684  * There are some new functions, too - just test them all.
685  */
686 static void
TestSurrogateSearching()687 TestSurrogateSearching() {
688     static const UChar s[]={
689         /* 0       1       2     3       4     5       6     7       8       9    10 11 */
690         0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
691     }, sub_a[]={
692         0x61, 0
693     }, sub_b[]={
694         0x62, 0
695     }, sub_lead[]={
696         0xd801, 0
697     }, sub_trail[]={
698         0xdc02, 0
699     }, sub_supp[]={
700         0xd801, 0xdc02, 0
701     }, sub_supp2[]={
702         0xd801, 0xdc03, 0
703     }, sub_a_lead[]={
704         0x61, 0xd801, 0
705     }, sub_trail_a[]={
706         0xdc02, 0x61, 0
707     }, sub_aba[]={
708         0x61, 0x62, 0x61, 0
709     };
710     static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
711     static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
712 
713     const UChar *first, *last;
714 
715     /* search for NUL code point: find end of string */
716     first=s+u_strlen(s);
717 
718     if(
719         first!=u_strchr(s, nul) ||
720         first!=u_strchr32(s, nul) ||
721         first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
722         first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
723         first!=u_strrchr(s, nul) ||
724         first!=u_strrchr32(s, nul) ||
725         first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
726         first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
727     ) {
728         log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
729     }
730 
731     /* search for empty substring: find beginning of string */
732     if(
733         s!=u_strstr(s, &nul) ||
734         s!=u_strFindFirst(s, -1, &nul, -1) ||
735         s!=u_strFindFirst(s, -1, &nul, 0) ||
736         s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
737         s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
738         s!=u_strrstr(s, &nul) ||
739         s!=u_strFindLast(s, -1, &nul, -1) ||
740         s!=u_strFindLast(s, -1, &nul, 0) ||
741         s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
742         s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
743     ) {
744         log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
745     }
746 
747     /* find 'a' in s[1..10[ */
748     first=s+3;
749     last=s+7;
750     if(
751         first!=u_strchr(s+1, a) ||
752         first!=u_strchr32(s+1, a) ||
753         first!=u_memchr(s+1, a, 9) ||
754         first!=u_memchr32(s+1, a, 9) ||
755         first!=u_strstr(s+1, sub_a) ||
756         first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
757         first!=u_strFindFirst(s+1, -1, &a, 1) ||
758         first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
759         first!=u_strFindFirst(s+1, 9, &a, 1) ||
760         (s+10)!=u_strrchr(s+1, a) ||
761         (s+10)!=u_strrchr32(s+1, a) ||
762         last!=u_memrchr(s+1, a, 9) ||
763         last!=u_memrchr32(s+1, a, 9) ||
764         (s+10)!=u_strrstr(s+1, sub_a) ||
765         (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
766         (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
767         last!=u_strFindLast(s+1, 9, sub_a, -1) ||
768         last!=u_strFindLast(s+1, 9, &a, 1)
769     ) {
770         log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
771     }
772 
773     /* do not find 'b' in s[1..10[ */
774     if(
775         NULL!=u_strchr(s+1, b) ||
776         NULL!=u_strchr32(s+1, b) ||
777         NULL!=u_memchr(s+1, b, 9) ||
778         NULL!=u_memchr32(s+1, b, 9) ||
779         NULL!=u_strstr(s+1, sub_b) ||
780         NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
781         NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
782         NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
783         NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
784         NULL!=u_strrchr(s+1, b) ||
785         NULL!=u_strrchr32(s+1, b) ||
786         NULL!=u_memrchr(s+1, b, 9) ||
787         NULL!=u_memrchr32(s+1, b, 9) ||
788         NULL!=u_strrstr(s+1, sub_b) ||
789         NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
790         NULL!=u_strFindLast(s+1, -1, &b, 1) ||
791         NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
792         NULL!=u_strFindLast(s+1, 9, &b, 1)
793     ) {
794         log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
795     }
796 
797     /* do not find a non-code point in s[1..10[ */
798     if(
799         NULL!=u_strchr32(s+1, ill) ||
800         NULL!=u_memchr32(s+1, ill, 9) ||
801         NULL!=u_strrchr32(s+1, ill) ||
802         NULL!=u_memrchr32(s+1, ill, 9)
803     ) {
804         log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
805     }
806 
807     /* find U+d801 in s[1..10[ */
808     first=s+6;
809     if(
810         first!=u_strchr(s+1, lead) ||
811         first!=u_strchr32(s+1, lead) ||
812         first!=u_memchr(s+1, lead, 9) ||
813         first!=u_memchr32(s+1, lead, 9) ||
814         first!=u_strstr(s+1, sub_lead) ||
815         first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
816         first!=u_strFindFirst(s+1, -1, &lead, 1) ||
817         first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
818         first!=u_strFindFirst(s+1, 9, &lead, 1) ||
819         first!=u_strrchr(s+1, lead) ||
820         first!=u_strrchr32(s+1, lead) ||
821         first!=u_memrchr(s+1, lead, 9) ||
822         first!=u_memrchr32(s+1, lead, 9) ||
823         first!=u_strrstr(s+1, sub_lead) ||
824         first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
825         first!=u_strFindLast(s+1, -1, &lead, 1) ||
826         first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
827         first!=u_strFindLast(s+1, 9, &lead, 1)
828     ) {
829         log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
830     }
831 
832     /* find U+dc02 in s[1..10[ */
833     first=s+4;
834     if(
835         first!=u_strchr(s+1, trail) ||
836         first!=u_strchr32(s+1, trail) ||
837         first!=u_memchr(s+1, trail, 9) ||
838         first!=u_memchr32(s+1, trail, 9) ||
839         first!=u_strstr(s+1, sub_trail) ||
840         first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
841         first!=u_strFindFirst(s+1, -1, &trail, 1) ||
842         first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
843         first!=u_strFindFirst(s+1, 9, &trail, 1) ||
844         first!=u_strrchr(s+1, trail) ||
845         first!=u_strrchr32(s+1, trail) ||
846         first!=u_memrchr(s+1, trail, 9) ||
847         first!=u_memrchr32(s+1, trail, 9) ||
848         first!=u_strrstr(s+1, sub_trail) ||
849         first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
850         first!=u_strFindLast(s+1, -1, &trail, 1) ||
851         first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
852         first!=u_strFindLast(s+1, 9, &trail, 1)
853     ) {
854         log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
855     }
856 
857     /* find U+10402 in s[1..10[ */
858     first=s+1;
859     last=s+8;
860     if(
861         first!=u_strchr32(s+1, supp) ||
862         first!=u_memchr32(s+1, supp, 9) ||
863         first!=u_strstr(s+1, sub_supp) ||
864         first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
865         first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
866         first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
867         first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
868         last!=u_strrchr32(s+1, supp) ||
869         last!=u_memrchr32(s+1, supp, 9) ||
870         last!=u_strrstr(s+1, sub_supp) ||
871         last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
872         last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
873         last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
874         last!=u_strFindLast(s+1, 9, sub_supp, 2)
875     ) {
876         log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
877     }
878 
879     /* do not find U+10402 in a single UChar */
880     if(
881         NULL!=u_memchr32(s+1, supp, 1) ||
882         NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
883         NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
884         NULL!=u_memrchr32(s+1, supp, 1) ||
885         NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
886         NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
887         NULL!=u_memrchr32(s+2, supp, 1) ||
888         NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
889         NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
890     ) {
891         log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
892     }
893 
894     /* do not find U+10403 in s[1..10[ */
895     if(
896         NULL!=u_strchr32(s+1, supp2) ||
897         NULL!=u_memchr32(s+1, supp2, 9) ||
898         NULL!=u_strstr(s+1, sub_supp2) ||
899         NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
900         NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
901         NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
902         NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
903         NULL!=u_strrchr32(s+1, supp2) ||
904         NULL!=u_memrchr32(s+1, supp2, 9) ||
905         NULL!=u_strrstr(s+1, sub_supp2) ||
906         NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
907         NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
908         NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
909         NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
910     ) {
911         log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
912     }
913 
914     /* find <0061 d801> in s[1..10[ */
915     first=s+5;
916     if(
917         first!=u_strstr(s+1, sub_a_lead) ||
918         first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
919         first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
920         first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
921         first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
922         first!=u_strrstr(s+1, sub_a_lead) ||
923         first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
924         first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
925         first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
926         first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
927     ) {
928         log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
929     }
930 
931     /* find <dc02 0061> in s[1..10[ */
932     first=s+4;
933     if(
934         first!=u_strstr(s+1, sub_trail_a) ||
935         first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
936         first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
937         first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
938         first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
939         first!=u_strrstr(s+1, sub_trail_a) ||
940         first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
941         first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
942         first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
943         first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
944     ) {
945         log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
946     }
947 
948     /* do not find "aba" in s[1..10[ */
949     if(
950         NULL!=u_strstr(s+1, sub_aba) ||
951         NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
952         NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
953         NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
954         NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
955         NULL!=u_strrstr(s+1, sub_aba) ||
956         NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
957         NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
958         NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
959         NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
960     ) {
961         log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
962     }
963 }
964 
TestStringCopy()965 static void TestStringCopy()
966 {
967     UChar temp[40];
968     UChar *result=0;
969     UChar subString[5];
970     UChar uchars[]={0x61, 0x62, 0x63, 0x00};
971     char  charOut[40];
972     char  chars[]="abc";    /* needs default codepage */
973 
974     log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
975 
976     u_uastrcpy(temp, "abc");
977     if(u_strcmp(temp, uchars) != 0) {
978         log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
979     }
980 
981     temp[0] = 0xFB; /* load garbage into it */
982     temp[1] = 0xFB;
983     temp[2] = 0xFB;
984     temp[3] = 0xFB;
985 
986     u_uastrncpy(temp, "abcabcabc", 3);
987     if(u_strncmp(uchars, temp, 3) != 0){
988         log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
989     }
990     if(temp[3] != 0xFB) {
991         log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
992     }
993 
994     charOut[0] = (char)0x7B; /* load garbage into it */
995     charOut[1] = (char)0x7B;
996     charOut[2] = (char)0x7B;
997     charOut[3] = (char)0x7B;
998 
999     temp[0] = 0x0061;
1000     temp[1] = 0x0062;
1001     temp[2] = 0x0063;
1002     temp[3] = 0x0061;
1003     temp[4] = 0x0062;
1004     temp[5] = 0x0063;
1005     temp[6] = 0x0000;
1006 
1007     u_austrncpy(charOut, temp, 3);
1008     if(strncmp(chars, charOut, 3) != 0){
1009         log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1010     }
1011     if(charOut[3] != (char)0x7B) {
1012         log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1013     }
1014 
1015     /*Testing u_strchr()*/
1016     log_verbose("Testing u_strchr\n");
1017     temp[0]=0x42;
1018     temp[1]=0x62;
1019     temp[2]=0x62;
1020     temp[3]=0x63;
1021     temp[4]=0xd841;
1022     temp[5]=0xd841;
1023     temp[6]=0xdc02;
1024     temp[7]=0;
1025     result=u_strchr(temp, (UChar)0x62);
1026     if(result != temp+1){
1027         log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1028     }
1029     /*Testing u_strstr()*/
1030     log_verbose("Testing u_strstr\n");
1031     subString[0]=0x62;
1032     subString[1]=0x63;
1033     subString[2]=0;
1034     result=u_strstr(temp, subString);
1035     if(result != temp+2){
1036         log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1037     }
1038     result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1039     if(result != temp){
1040         log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1041     }
1042     result=u_strstr(subString, temp);
1043     if(result != NULL){
1044         log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1045     }
1046 
1047     /*Testing u_strchr32*/
1048     log_verbose("Testing u_strchr32\n");
1049     result=u_strchr32(temp, (UChar32)0x62);
1050     if(result != temp+1){
1051         log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1052     }
1053     result=u_strchr32(temp, (UChar32)0xfb);
1054     if(result != NULL){
1055         log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1056     }
1057     result=u_strchr32(temp, (UChar32)0x20402);
1058     if(result != temp+5){
1059         log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1060     }
1061 
1062     temp[7]=0xfc00;
1063     result=u_memchr32(temp, (UChar32)0x20402, 7);
1064     if(result != temp+5){
1065         log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1066     }
1067     result=u_memchr32(temp, (UChar32)0x20402, 6);
1068     if(result != NULL){
1069         log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1070     }
1071     result=u_memchr32(temp, (UChar32)0x20402, 1);
1072     if(result != NULL){
1073         log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1074     }
1075     result=u_memchr32(temp, (UChar32)0xfc00, 8);
1076     if(result != temp+7){
1077         log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1078     }
1079 }
1080 
1081 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1082 
1083 static void
TestUnescape()1084 TestUnescape() {
1085     static UChar buffer[200];
1086 
1087     static const char* input =
1088         "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1089 
1090     static const UChar expect[]={
1091         0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1092         0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1093         0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1094         0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1095     };
1096     static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
1097     int32_t length;
1098 
1099     /* test u_unescape() */
1100     length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
1101     if(length!=explength || u_strcmp(buffer, expect)!=0) {
1102         log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1103                 explength);
1104     }
1105 
1106     /* try preflighting */
1107     length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
1108     if(length!=explength || u_strcmp(buffer, expect)!=0) {
1109         log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1110     }
1111 
1112     /* ### TODO: test u_unescapeAt() */
1113 }
1114 
1115 /* test code point counting functions --------------------------------------- */
1116 
1117 /* reference implementation of u_strHasMoreChar32Than() */
1118 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1119 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1120     int32_t count=u_countChar32(s, length);
1121     return count>number;
1122 }
1123 
1124 /* compare the real function against the reference */
1125 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1126 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1127     if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1128         log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1129                 i, length, number, u_strHasMoreChar32Than(s, length, number));
1130     }
1131 }
1132 
1133 static void
TestCountChar32()1134 TestCountChar32() {
1135     static const UChar string[]={
1136         0x61, 0x62, 0xd800, 0xdc00,
1137         0xd801, 0xdc01, 0x63, 0xd802,
1138         0x64, 0xdc03, 0x65, 0x66,
1139         0xd804, 0xdc04, 0xd805, 0xdc05,
1140         0x67
1141     };
1142     UChar buffer[100];
1143     int32_t i, length, number;
1144 
1145     /* test u_strHasMoreChar32Than() with length>=0 */
1146     length=UPRV_LENGTHOF(string);
1147     while(length>=0) {
1148         for(i=0; i<=length; ++i) {
1149             for(number=-1; number<=((length-i)+2); ++number) {
1150                 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1151             }
1152         }
1153         --length;
1154     }
1155 
1156     /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1157     length=UPRV_LENGTHOF(string);
1158     u_memcpy(buffer, string, length);
1159     while(length>=0) {
1160         buffer[length]=0;
1161         for(i=0; i<=length; ++i) {
1162             for(number=-1; number<=((length-i)+2); ++number) {
1163                 _testStrHasMoreChar32Than(string+i, i, -1, number);
1164             }
1165         }
1166         --length;
1167     }
1168 
1169     /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1170     for(length=-1; length<=1; ++length) {
1171         for(i=0; i<=length; ++i) {
1172             for(number=-2; number<=2; ++number) {
1173                 _testStrHasMoreChar32Than(NULL, 0, length, number);
1174             }
1175         }
1176     }
1177 }
1178 
1179 /* UCharIterator ------------------------------------------------------------ */
1180 
1181 /*
1182  * Compare results from two iterators, should be same.
1183  * Assume that the text is not empty and that
1184  * iteration start==0 and iteration limit==length.
1185  */
1186 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1187 compareIterators(UCharIterator *iter1, const char *n1,
1188                  UCharIterator *iter2, const char *n2) {
1189     int32_t i, pos1, pos2, middle, length;
1190     UChar32 c1, c2;
1191 
1192     /* compare lengths */
1193     length=iter1->getIndex(iter1, UITER_LENGTH);
1194     pos2=iter2->getIndex(iter2, UITER_LENGTH);
1195     if(length!=pos2) {
1196         log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1197         return;
1198     }
1199 
1200     /* set into the middle */
1201     middle=length/2;
1202 
1203     pos1=iter1->move(iter1, middle, UITER_ZERO);
1204     if(pos1!=middle) {
1205         log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1206         return;
1207     }
1208 
1209     pos2=iter2->move(iter2, middle, UITER_ZERO);
1210     if(pos2!=middle) {
1211         log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1212         return;
1213     }
1214 
1215     /* test current() */
1216     c1=iter1->current(iter1);
1217     c2=iter2->current(iter2);
1218     if(c1!=c2) {
1219         log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1220         return;
1221     }
1222 
1223     /* move forward 3 UChars */
1224     for(i=0; i<3; ++i) {
1225         c1=iter1->next(iter1);
1226         c2=iter2->next(iter2);
1227         if(c1!=c2) {
1228             log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1229             return;
1230         }
1231     }
1232 
1233     /* move backward 5 UChars */
1234     for(i=0; i<5; ++i) {
1235         c1=iter1->previous(iter1);
1236         c2=iter2->previous(iter2);
1237         if(c1!=c2) {
1238             log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1239             return;
1240         }
1241     }
1242 
1243     /* iterate forward from the beginning */
1244     pos1=iter1->move(iter1, 0, UITER_START);
1245     if(pos1<0) {
1246         log_err("%s->move(start) failed\n", n1);
1247         return;
1248     }
1249     if(!iter1->hasNext(iter1)) {
1250         log_err("%s->hasNext() at the start returns FALSE\n", n1);
1251         return;
1252     }
1253 
1254     pos2=iter2->move(iter2, 0, UITER_START);
1255     if(pos2<0) {
1256         log_err("%s->move(start) failed\n", n2);
1257         return;
1258     }
1259     if(!iter2->hasNext(iter2)) {
1260         log_err("%s->hasNext() at the start returns FALSE\n", n2);
1261         return;
1262     }
1263 
1264     do {
1265         c1=iter1->next(iter1);
1266         c2=iter2->next(iter2);
1267         if(c1!=c2) {
1268             log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1269             return;
1270         }
1271     } while(c1>=0);
1272 
1273     if(iter1->hasNext(iter1)) {
1274         log_err("%s->hasNext() at the end returns TRUE\n", n1);
1275         return;
1276     }
1277     if(iter2->hasNext(iter2)) {
1278         log_err("%s->hasNext() at the end returns TRUE\n", n2);
1279         return;
1280     }
1281 
1282     /* back to the middle */
1283     pos1=iter1->move(iter1, middle, UITER_ZERO);
1284     if(pos1!=middle) {
1285         log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1286         return;
1287     }
1288 
1289     pos2=iter2->move(iter2, middle, UITER_ZERO);
1290     if(pos2!=middle) {
1291         log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1292         return;
1293     }
1294 
1295     /* move to index 1 */
1296     pos1=iter1->move(iter1, 1, UITER_ZERO);
1297     if(pos1!=1) {
1298         log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1299         return;
1300     }
1301 
1302     pos2=iter2->move(iter2, 1, UITER_ZERO);
1303     if(pos2!=1) {
1304         log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1305         return;
1306     }
1307 
1308     /* iterate backward from the end */
1309     pos1=iter1->move(iter1, 0, UITER_LIMIT);
1310     if(pos1<0) {
1311         log_err("%s->move(limit) failed\n", n1);
1312         return;
1313     }
1314     if(!iter1->hasPrevious(iter1)) {
1315         log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1316         return;
1317     }
1318 
1319     pos2=iter2->move(iter2, 0, UITER_LIMIT);
1320     if(pos2<0) {
1321         log_err("%s->move(limit) failed\n", n2);
1322         return;
1323     }
1324     if(!iter2->hasPrevious(iter2)) {
1325         log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1326         return;
1327     }
1328 
1329     do {
1330         c1=iter1->previous(iter1);
1331         c2=iter2->previous(iter2);
1332         if(c1!=c2) {
1333             log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1334             return;
1335         }
1336     } while(c1>=0);
1337 
1338     if(iter1->hasPrevious(iter1)) {
1339         log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1340         return;
1341     }
1342     if(iter2->hasPrevious(iter2)) {
1343         log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1344         return;
1345     }
1346 }
1347 
1348 /*
1349  * Test the iterator's getState() and setState() functions.
1350  * iter1 and iter2 must be set up for the same iterator type and the same string
1351  * but may be physically different structs (different addresses).
1352  *
1353  * Assume that the text is not empty and that
1354  * iteration start==0 and iteration limit==length.
1355  * It must be 2<=middle<=length-2.
1356  */
1357 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1358 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1359     UChar32 u[4];
1360 
1361     UErrorCode errorCode;
1362     UChar32 c;
1363     uint32_t state;
1364     int32_t i, j;
1365 
1366     /* get four UChars from the middle of the string */
1367     iter1->move(iter1, middle-2, UITER_ZERO);
1368     for(i=0; i<4; ++i) {
1369         c=iter1->next(iter1);
1370         if(c<0) {
1371             /* the test violates the assumptions, see comment above */
1372             log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1373             return;
1374         }
1375         u[i]=c;
1376     }
1377 
1378     /* move to the middle and get the state */
1379     iter1->move(iter1, -2, UITER_CURRENT);
1380     state=uiter_getState(iter1);
1381 
1382     /* set the state into the second iterator and compare the results */
1383     errorCode=U_ZERO_ERROR;
1384     uiter_setState(iter2, state, &errorCode);
1385     if(U_FAILURE(errorCode)) {
1386         log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1387         return;
1388     }
1389 
1390     c=iter2->current(iter2);
1391     if(c!=u[2]) {
1392         log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1393     }
1394 
1395     c=iter2->previous(iter2);
1396     if(c!=u[1]) {
1397         log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1398     }
1399 
1400     iter2->move(iter2, 2, UITER_CURRENT);
1401     c=iter2->next(iter2);
1402     if(c!=u[3]) {
1403         log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1404     }
1405 
1406     iter2->move(iter2, -3, UITER_CURRENT);
1407     c=iter2->previous(iter2);
1408     if(c!=u[0]) {
1409         log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1410     }
1411 
1412     /* move the second iterator back to the middle */
1413     iter2->move(iter2, 1, UITER_CURRENT);
1414     iter2->next(iter2);
1415 
1416     /* check that both are in the middle */
1417     i=iter1->getIndex(iter1, UITER_CURRENT);
1418     j=iter2->getIndex(iter2, UITER_CURRENT);
1419     if(i!=middle) {
1420         log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1421     }
1422     if(i!=j) {
1423         log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1424     }
1425 
1426     /* compare lengths */
1427     i=iter1->getIndex(iter1, UITER_LENGTH);
1428     j=iter2->getIndex(iter2, UITER_LENGTH);
1429     if(i!=j) {
1430         log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1431     }
1432 }
1433 
1434 static void
TestUCharIterator()1435 TestUCharIterator() {
1436     static const UChar text[]={
1437         0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1438     };
1439     char bytes[40];
1440 
1441     UCharIterator iter, iter1, iter2;
1442     UConverter *cnv;
1443     UErrorCode errorCode;
1444     int32_t length;
1445 
1446     /* simple API/code coverage - test NOOP UCharIterator */
1447     uiter_setString(&iter, NULL, 0);
1448     if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1449         iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1450         iter.hasNext(&iter) || iter.hasPrevious(&iter)
1451     ) {
1452         log_err("NOOP UCharIterator behaves unexpectedly\n");
1453     }
1454 
1455     /* test get/set state */
1456     length=UPRV_LENGTHOF(text)-1;
1457     uiter_setString(&iter1, text, -1);
1458     uiter_setString(&iter2, text, length);
1459     testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1460     testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1461 
1462     /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1463     errorCode=U_ZERO_ERROR;
1464     u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1465     if(U_FAILURE(errorCode)) {
1466         log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1467         return;
1468     }
1469 
1470     uiter_setString(&iter1, text, -1);
1471     uiter_setUTF8(&iter2, bytes, length);
1472     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1473 
1474     /* try again with length=-1 */
1475     uiter_setUTF8(&iter2, bytes, -1);
1476     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1477 
1478     /* test get/set state */
1479     length=UPRV_LENGTHOF(text)-1;
1480     uiter_setUTF8(&iter1, bytes, -1);
1481     testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1482     testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1483 
1484     /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1485     errorCode=U_ZERO_ERROR;
1486     cnv=ucnv_open("UTF-16BE", &errorCode);
1487     length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1488     ucnv_close(cnv);
1489     if(U_FAILURE(errorCode)) {
1490         log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1491         return;
1492     }
1493 
1494     /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1495     bytes[length]=bytes[length+1]=0;
1496 
1497     uiter_setString(&iter1, text, -1);
1498     uiter_setUTF16BE(&iter2, bytes, length);
1499     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1500 
1501     /* try again with length=-1 */
1502     uiter_setUTF16BE(&iter2, bytes, -1);
1503     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1504 
1505     /* try again after moving the bytes up one, and with length=-1 */
1506     memmove(bytes+1, bytes, length+2);
1507     uiter_setUTF16BE(&iter2, bytes+1, -1);
1508     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1509 
1510     /* ### TODO test other iterators: CharacterIterator, Replaceable */
1511 }
1512