1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 2002-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  custrtst.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2002oct09
16 *   created by: Markus W. Scherer
17 *
18 *   Tests of ustring.h Unicode string API functions.
19 */
20 
21 #include "unicode/ustring.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/uiter.h"
24 #include "cintltst.h"
25 #include "cmemory.h"
26 #include <string.h>
27 
28 /* get the sign of an integer */
29 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
30 
31 /* test setup --------------------------------------------------------------- */
32 
33 static void setUpDataTable(void);
34 static void TestStringCopy(void);
35 static void TestStringFunctions(void);
36 static void TestStringSearching(void);
37 static void TestSurrogateSearching(void);
38 static void TestUnescape(void);
39 static void TestCountChar32(void);
40 static void TestUCharIterator(void);
41 
42 void addUStringTest(TestNode** root);
43 
addUStringTest(TestNode ** root)44 void addUStringTest(TestNode** root)
45 {
46     addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
47     addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
48     addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
49     addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
50     addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
51     addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
52     addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
53 }
54 
55 /* test data for TestStringFunctions ---------------------------------------- */
56 
57 UChar*** dataTable = NULL;
58 
59 static const char* raw[3][4] = {
60 
61     /* First String */
62     {   "English_",  "French_",   "Croatian_", "English_"},
63     /* Second String */
64     {   "United States",    "France",   "Croatia",  "Unites States"},
65 
66    /* Concatenated string */
67     {   "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
68 };
69 
setUpDataTable()70 static void setUpDataTable()
71 {
72     int32_t i,j;
73     if(dataTable == NULL) {
74         dataTable = (UChar***)calloc(sizeof(UChar**),3);
75 
76             for (i = 0; i < 3; i++) {
77               dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
78                 for (j = 0; j < 4; j++){
79                     dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
80                     u_uastrcpy(dataTable[i][j],raw[i][j]);
81                 }
82             }
83     }
84 }
85 
cleanUpDataTable()86 static void cleanUpDataTable()
87 {
88     int32_t i,j;
89     if(dataTable != NULL) {
90         for (i=0; i<3; i++) {
91             for(j = 0; j<4; j++) {
92                 free(dataTable[i][j]);
93             }
94             free(dataTable[i]);
95         }
96         free(dataTable);
97     }
98     dataTable = NULL;
99 }
100 
101 /*Tests  for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions()102 static void TestStringFunctions()
103 {
104     int32_t i,j,k;
105     UChar temp[512];
106     UChar nullTemp[512];
107     char test[512];
108     char tempOut[512];
109 
110     setUpDataTable();
111 
112     log_verbose("Testing u_strlen()\n");
113     if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
114         log_err("There is an error in u_strlen()");
115 
116     log_verbose("Testing u_memcpy() and u_memcmp()\n");
117 
118     for(i=0;i<3;++i)
119     {
120         for(j=0;j<4;++j)
121         {
122             log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
123             temp[0] = 0;
124             temp[7] = 0xA4; /* Mark the end */
125             u_memcpy(temp,dataTable[i][j], 7);
126 
127             if(temp[7] != 0xA4)
128                 log_err("an error occured in u_memcpy()\n");
129             if(u_memcmp(temp, dataTable[i][j], 7)!=0)
130                 log_err("an error occured in u_memcpy() or u_memcmp()\n");
131         }
132     }
133     if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
134         log_err("an error occured in u_memcmp()\n");
135 
136     log_verbose("Testing u_memset()\n");
137     nullTemp[0] = 0;
138     nullTemp[7] = 0;
139     u_memset(nullTemp, 0xa4, 7);
140     for (i = 0; i < 7; i++) {
141         if(nullTemp[i] != 0xa4) {
142             log_err("an error occured in u_memset()\n");
143         }
144     }
145     if(nullTemp[7] != 0) {
146         log_err("u_memset() went too far\n");
147     }
148 
149     u_memset(nullTemp, 0, 7);
150     nullTemp[7] = 0xa4;
151     temp[7] = 0;
152     u_memcpy(temp,nullTemp, 7);
153     if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
154         log_err("an error occured in u_memcpy() or u_memcmp()\n");
155 
156 
157     log_verbose("Testing u_memmove()\n");
158     for (i = 0; i < 7; i++) {
159         temp[i] = (UChar)i;
160     }
161     u_memmove(temp + 1, temp, 7);
162     if(temp[0] != 0) {
163         log_err("an error occured in u_memmove()\n");
164     }
165     for (i = 1; i <= 7; i++) {
166         if(temp[i] != (i - 1)) {
167             log_err("an error occured in u_memmove()\n");
168         }
169     }
170 
171     log_verbose("Testing u_strcpy() and u_strcmp()\n");
172 
173     for(i=0;i<3;++i)
174     {
175         for(j=0;j<4;++j)
176         {
177             log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
178             temp[0] = 0;
179             u_strcpy(temp,dataTable[i][j]);
180 
181             if(u_strcmp(temp,dataTable[i][j])!=0)
182                 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
183         }
184     }
185     if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
186         log_err("an error occured in u_memcmp()\n");
187 
188     log_verbose("testing u_strcat()\n");
189     i=0;
190     for(j=0; j<2;++j)
191     {
192         u_uastrcpy(temp, "");
193         u_strcpy(temp,dataTable[i][j]);
194         u_strcat(temp,dataTable[i+1][j]);
195         if(u_strcmp(temp,dataTable[i+2][j])!=0)
196             log_err("something threw an error in u_strcat()\n");
197 
198     }
199     log_verbose("Testing u_strncmp()\n");
200     for(i=0,j=0;j<4; ++j)
201     {
202         k=u_strlen(dataTable[i][j]);
203         if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
204             log_err("Something threw an error in u_strncmp\n");
205     }
206     if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
207         log_err("an error occured in u_memcmp()\n");
208 
209 
210     log_verbose("Testing u_strncat\n");
211     for(i=0,j=0;j<4; ++j)
212     {
213         k=u_strlen(dataTable[i][j]);
214 
215         u_uastrcpy(temp,"");
216 
217         if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
218             log_err("something threw an error in u_strncat or u_uastrcpy()\n");
219 
220     }
221 
222     log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
223     for(i=2,j=0;j<4; ++j)
224     {
225         k=u_strlen(dataTable[i][j]);
226         u_strncpy(temp, dataTable[i][j],k);
227         temp[k] = 0xa4;
228 
229         if(u_strncmp(temp, dataTable[i][j],k)!=0)
230             log_err("something threw an error in u_strncpy()\n");
231 
232         if(temp[k] != 0xa4)
233             log_err("something threw an error in u_strncpy()\n");
234 
235         u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
236         u_uastrncpy(temp, raw[i][j], k-1);
237         if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
238             log_err("something threw an error in u_uastrncpy(k-1)\n");
239 
240         if(temp[k-1] != 0x3F)
241             log_err("something threw an error in u_uastrncpy(k-1)\n");
242 
243         u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
244         u_uastrncpy(temp, raw[i][j], k+1);
245         if(u_strcmp(temp, dataTable[i][j])!=0)
246             log_err("something threw an error in u_uastrncpy(k+1)\n");
247 
248         if(temp[k] != 0)
249             log_err("something threw an error in u_uastrncpy(k+1)\n");
250 
251         u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
252         u_uastrncpy(temp, raw[i][j], k);
253         if(u_strncmp(temp, dataTable[i][j], k)!=0)
254             log_err("something threw an error in u_uastrncpy(k)\n");
255 
256         if(temp[k] != 0x3F)
257             log_err("something threw an error in u_uastrncpy(k)\n");
258     }
259 
260     log_verbose("Testing u_strchr() and u_memchr()\n");
261 
262     for(i=2,j=0;j<4;j++)
263     {
264         UChar saveVal = dataTable[i][j][0];
265         UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
266         int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
267 
268         log_verbose("%s ", u_austrcpy(tempOut, findPtr));
269 
270         if (findPtr == NULL || *findPtr != 0x005F) {
271             log_err("u_strchr can't find '_' in the string\n");
272         }
273 
274         findPtr = u_strchr32(dataTable[i][j], 0x005F);
275         if (findPtr == NULL || *findPtr != 0x005F) {
276             log_err("u_strchr32 can't find '_' in the string\n");
277         }
278 
279         findPtr = u_strchr(dataTable[i][j], 0);
280         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
281             log_err("u_strchr can't find NULL in the string\n");
282         }
283 
284         findPtr = u_strchr32(dataTable[i][j], 0);
285         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
286             log_err("u_strchr32 can't find NULL in the string\n");
287         }
288 
289         findPtr = u_memchr(dataTable[i][j], 0, dataSize);
290         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
291             log_err("u_memchr can't find NULL in the string\n");
292         }
293 
294         findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
295         if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
296             log_err("u_memchr32 can't find NULL in the string\n");
297         }
298 
299         dataTable[i][j][0] = 0;
300         /* Make sure we skip over the NULL termination */
301         findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
302         if (findPtr == NULL || *findPtr != 0x005F) {
303             log_err("u_memchr can't find '_' in the string\n");
304         }
305 
306         findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
307         if (findPtr == NULL || *findPtr != 0x005F) {
308             log_err("u_memchr32 can't find '_' in the string\n");
309         }
310         findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
311         if (findPtr != NULL) {
312             log_err("Should have found NULL when the character is not there.\n");
313         }
314         dataTable[i][j][0] = saveVal;   /* Put it back for the other tests */
315     }
316 
317     /*
318      * test that u_strchr32()
319      * does not find surrogate code points when they are part of matched pairs
320      * (= part of supplementary code points)
321      * Jitterbug 1542
322      */
323     {
324         static const UChar s[]={
325             /*   0       1       2       3       4       5       6       7       8  9 */
326             0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
327         };
328 
329         if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
330             log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
331         }
332         if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
333             log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
334         }
335     }
336 
337     log_verbose("Testing u_austrcpy()");
338     u_austrcpy(test,dataTable[0][0]);
339     if(strcmp(test,raw[0][0])!=0)
340         log_err("There is an error in u_austrcpy()");
341 
342 
343     log_verbose("Testing u_strtok_r()");
344     {
345         const char tokString[] = "  ,  1 2 3  AHHHHH! 5.5 6 7    ,        8\n";
346         const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
347         UChar delimBuf[sizeof(test)];
348         UChar currTokenBuf[sizeof(tokString)];
349         UChar *state;
350         uint32_t currToken = 0;
351         UChar *ptr;
352 
353         u_uastrcpy(temp, tokString);
354         u_uastrcpy(delimBuf, " ");
355 
356         ptr = u_strtok_r(temp, delimBuf, &state);
357         u_uastrcpy(delimBuf, " ,");
358         while (ptr != NULL) {
359             u_uastrcpy(currTokenBuf, tokens[currToken]);
360             if (u_strcmp(ptr, currTokenBuf) != 0) {
361                 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
362             }
363             ptr = u_strtok_r(NULL, delimBuf, &state);
364             currToken++;
365         }
366 
367         if (currToken != UPRV_LENGTHOF(tokens)) {
368             log_err("Didn't get correct number of tokens\n");
369         }
370         state = delimBuf;       /* Give it an "invalid" saveState */
371         u_uastrcpy(currTokenBuf, "");
372         if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
373             log_err("Didn't get NULL for empty string\n");
374         }
375         if (state != NULL) {
376             log_err("State should be NULL for empty string\n");
377         }
378         state = delimBuf;       /* Give it an "invalid" saveState */
379         u_uastrcpy(currTokenBuf, ", ,");
380         if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
381             log_err("Didn't get NULL for a string of delimiters\n");
382         }
383         if (state != NULL) {
384             log_err("State should be NULL for a string of delimiters\n");
385         }
386 
387         state = delimBuf;       /* Give it an "invalid" saveState */
388         u_uastrcpy(currTokenBuf, "q, ,");
389         if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
390             log_err("Got NULL for a string that does not begin with delimiters\n");
391         }
392         if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
393             log_err("Didn't get NULL for a string that ends in delimiters\n");
394         }
395         if (state != NULL) {
396             log_err("State should be NULL for empty string\n");
397         }
398 
399         state = delimBuf;       /* Give it an "invalid" saveState */
400         u_uastrcpy(currTokenBuf, tokString);
401         u_uastrcpy(temp, tokString);
402         u_uastrcpy(delimBuf, "q");  /* Give it a delimiter that it can't find. */
403         ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
404         if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
405             log_err("Should have recieved the same string when there are no delimiters\n");
406         }
407         if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
408             log_err("Should not have found another token in a one token string\n");
409         }
410     }
411 
412     /* test u_strcmpCodePointOrder() */
413     {
414         /* these strings are in ascending order */
415         static const UChar strings[][4]={
416             { 0x61, 0 },                    /* U+0061 */
417             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
418             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
419             { 0xd800, 0 },                  /* U+d800 */
420             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
421             { 0xdfff, 0 },                  /* U+dfff */
422             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
423             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
424             { 0xd800, 0xdc02, 0 },          /* U+10002 */
425             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
426         };
427 
428         UCharIterator iter1, iter2;
429         int32_t len1, len2, r1, r2;
430 
431         for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
432             if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
433                 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
434             }
435             if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
436                 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
437             }
438 
439             /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
440             if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
441                 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
442             }
443 
444             /* test u_strCompare(TRUE) */
445             len1=u_strlen(strings[i]);
446             len2=u_strlen(strings[i+1]);
447             if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
448                 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
449                 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
450                 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
451             ) {
452                 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
453             }
454 
455             /* test u_strCompare(FALSE) */
456             r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
457             r2=u_strcmp(strings[i], strings[i+1]);
458             if(_SIGN(r1)!=_SIGN(r2)) {
459                 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
460             }
461 
462             /* test u_strCompareIter() */
463             uiter_setString(&iter1, strings[i], len1);
464             uiter_setString(&iter2, strings[i+1], len2);
465             if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
466                 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
467             }
468             r1=u_strCompareIter(&iter1, &iter2, FALSE);
469             if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
470                 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
471             }
472         }
473     }
474 
475     cleanUpDataTable();
476 }
477 
TestStringSearching()478 static void TestStringSearching()
479 {
480     const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
481     const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
482     const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
483     const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
484     const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
485     const UChar surrMatchSet4[] = {0x0000};
486     const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
487     const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
488     const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0};   /* has partial surrogate */
489     const UChar
490         empty[] = { 0 },
491         a[] = { 0x61, 0 },
492         ab[] = { 0x61, 0x62, 0 },
493         ba[] = { 0x62, 0x61, 0 },
494         abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
495         cd[] = { 0x63, 0x64, 0 },
496         dc[] = { 0x64, 0x63, 0 },
497         cdh[] = { 0x63, 0x64, 0x68, 0 },
498         f[] = { 0x66, 0 },
499         fg[] = { 0x66, 0x67, 0 },
500         gf[] = { 0x67, 0x66, 0 };
501 
502     log_verbose("Testing u_strpbrk()");
503 
504     if (u_strpbrk(testString, a) != &testString[0]) {
505         log_err("u_strpbrk couldn't find first letter a.\n");
506     }
507     if (u_strpbrk(testString, dc) != &testString[2]) {
508         log_err("u_strpbrk couldn't find d or c.\n");
509     }
510     if (u_strpbrk(testString, cd) != &testString[2]) {
511         log_err("u_strpbrk couldn't find c or d.\n");
512     }
513     if (u_strpbrk(testString, cdh) != &testString[2]) {
514         log_err("u_strpbrk couldn't find c, d or h.\n");
515     }
516     if (u_strpbrk(testString, f) != NULL) {
517         log_err("u_strpbrk didn't return NULL for \"f\".\n");
518     }
519     if (u_strpbrk(testString, fg) != NULL) {
520         log_err("u_strpbrk didn't return NULL for \"fg\".\n");
521     }
522     if (u_strpbrk(testString, gf) != NULL) {
523         log_err("u_strpbrk didn't return NULL for \"gf\".\n");
524     }
525     if (u_strpbrk(testString, empty) != NULL) {
526         log_err("u_strpbrk didn't return NULL for \"\".\n");
527     }
528 
529     log_verbose("Testing u_strpbrk() with surrogates");
530 
531     if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
532         log_err("u_strpbrk couldn't find first letter a.\n");
533     }
534     if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
535         log_err("u_strpbrk couldn't find d or c.\n");
536     }
537     if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
538         log_err("u_strpbrk couldn't find c or d.\n");
539     }
540     if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
541         log_err("u_strpbrk couldn't find c, d or h.\n");
542     }
543     if (u_strpbrk(testSurrogateString, f) != NULL) {
544         log_err("u_strpbrk didn't return NULL for \"f\".\n");
545     }
546     if (u_strpbrk(testSurrogateString, fg) != NULL) {
547         log_err("u_strpbrk didn't return NULL for \"fg\".\n");
548     }
549     if (u_strpbrk(testSurrogateString, gf) != NULL) {
550         log_err("u_strpbrk didn't return NULL for \"gf\".\n");
551     }
552     if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
553         log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
554     }
555     if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
556         log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
557     }
558     if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
559         log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
560     }
561     if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
562         log_err("u_strpbrk should have returned NULL for empty string.\n");
563     }
564     if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
565         log_err("u_strpbrk should have found bad surrogate.\n");
566     }
567 
568     log_verbose("Testing u_strcspn()");
569 
570     if (u_strcspn(testString, a) != 0) {
571         log_err("u_strcspn couldn't find first letter a.\n");
572     }
573     if (u_strcspn(testString, dc) != 2) {
574         log_err("u_strcspn couldn't find d or c.\n");
575     }
576     if (u_strcspn(testString, cd) != 2) {
577         log_err("u_strcspn couldn't find c or d.\n");
578     }
579     if (u_strcspn(testString, cdh) != 2) {
580         log_err("u_strcspn couldn't find c, d or h.\n");
581     }
582     if (u_strcspn(testString, f) != u_strlen(testString)) {
583         log_err("u_strcspn didn't return NULL for \"f\".\n");
584     }
585     if (u_strcspn(testString, fg) != u_strlen(testString)) {
586         log_err("u_strcspn didn't return NULL for \"fg\".\n");
587     }
588     if (u_strcspn(testString, gf) != u_strlen(testString)) {
589         log_err("u_strcspn didn't return NULL for \"gf\".\n");
590     }
591 
592     log_verbose("Testing u_strcspn() with surrogates");
593 
594     if (u_strcspn(testSurrogateString, a) != 1) {
595         log_err("u_strcspn couldn't find first letter a.\n");
596     }
597     if (u_strcspn(testSurrogateString, dc) != 5) {
598         log_err("u_strcspn couldn't find d or c.\n");
599     }
600     if (u_strcspn(testSurrogateString, cd) != 5) {
601         log_err("u_strcspn couldn't find c or d.\n");
602     }
603     if (u_strcspn(testSurrogateString, cdh) != 5) {
604         log_err("u_strcspn couldn't find c, d or h.\n");
605     }
606     if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
607         log_err("u_strcspn didn't return NULL for \"f\".\n");
608     }
609     if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
610         log_err("u_strcspn didn't return NULL for \"fg\".\n");
611     }
612     if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
613         log_err("u_strcspn didn't return NULL for \"gf\".\n");
614     }
615     if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
616         log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
617     }
618     if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
619         log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
620     }
621     if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
622         log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
623     }
624     if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
625         log_err("u_strcspn should have returned strlen for empty string.\n");
626     }
627 
628 
629     log_verbose("Testing u_strspn()");
630 
631     if (u_strspn(testString, a) != 1) {
632         log_err("u_strspn couldn't skip first letter a.\n");
633     }
634     if (u_strspn(testString, ab) != 2) {
635         log_err("u_strspn couldn't skip a or b.\n");
636     }
637     if (u_strspn(testString, ba) != 2) {
638         log_err("u_strspn couldn't skip a or b.\n");
639     }
640     if (u_strspn(testString, f) != 0) {
641         log_err("u_strspn didn't return 0 for \"f\".\n");
642     }
643     if (u_strspn(testString, dc) != 0) {
644         log_err("u_strspn couldn't find first letter a (skip d or c).\n");
645     }
646     if (u_strspn(testString, abcd) != u_strlen(testString)) {
647         log_err("u_strspn couldn't skip over the whole string.\n");
648     }
649     if (u_strspn(testString, empty) != 0) {
650         log_err("u_strspn should have returned 0 for empty string.\n");
651     }
652 
653     log_verbose("Testing u_strspn() with surrogates");
654     if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
655         log_err("u_strspn couldn't skip 0xdbff or a.\n");
656     }
657     if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
658         log_err("u_strspn couldn't skip 0xdbff or a.\n");
659     }
660     if (u_strspn(testSurrogateString, f) != 0) {
661         log_err("u_strspn couldn't skip d or c (skip first letter).\n");
662     }
663     if (u_strspn(testSurrogateString, dc) != 0) {
664         log_err("u_strspn couldn't skip d or c (skip first letter).\n");
665     }
666     if (u_strspn(testSurrogateString, cd) != 0) {
667         log_err("u_strspn couldn't skip d or c (skip first letter).\n");
668     }
669     if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
670         log_err("u_strspn couldn't skip whole string.\n");
671     }
672     if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
673         log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
674     }
675     if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
676         log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
677     }
678     if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
679         log_err("u_strspn should have returned 0 for empty string.\n");
680     }
681 }
682 
683 /*
684  * All binary Unicode string searches should behave the same for equivalent input.
685  * See Jitterbug 2145.
686  * There are some new functions, too - just test them all.
687  */
688 static void
TestSurrogateSearching()689 TestSurrogateSearching() {
690     static const UChar s[]={
691         /* 0       1       2     3       4     5       6     7       8       9    10 11 */
692         0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
693     }, sub_a[]={
694         0x61, 0
695     }, sub_b[]={
696         0x62, 0
697     }, sub_lead[]={
698         0xd801, 0
699     }, sub_trail[]={
700         0xdc02, 0
701     }, sub_supp[]={
702         0xd801, 0xdc02, 0
703     }, sub_supp2[]={
704         0xd801, 0xdc03, 0
705     }, sub_a_lead[]={
706         0x61, 0xd801, 0
707     }, sub_trail_a[]={
708         0xdc02, 0x61, 0
709     }, sub_aba[]={
710         0x61, 0x62, 0x61, 0
711     };
712     static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
713     static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
714 
715     const UChar *first, *last;
716 
717     /* search for NUL code point: find end of string */
718     first=s+u_strlen(s);
719 
720     if(
721         first!=u_strchr(s, nul) ||
722         first!=u_strchr32(s, nul) ||
723         first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
724         first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
725         first!=u_strrchr(s, nul) ||
726         first!=u_strrchr32(s, nul) ||
727         first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
728         first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
729     ) {
730         log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
731     }
732 
733     /* search for empty substring: find beginning of string */
734     if(
735         s!=u_strstr(s, &nul) ||
736         s!=u_strFindFirst(s, -1, &nul, -1) ||
737         s!=u_strFindFirst(s, -1, &nul, 0) ||
738         s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
739         s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
740         s!=u_strrstr(s, &nul) ||
741         s!=u_strFindLast(s, -1, &nul, -1) ||
742         s!=u_strFindLast(s, -1, &nul, 0) ||
743         s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
744         s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
745     ) {
746         log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
747     }
748 
749     /* find 'a' in s[1..10[ */
750     first=s+3;
751     last=s+7;
752     if(
753         first!=u_strchr(s+1, a) ||
754         first!=u_strchr32(s+1, a) ||
755         first!=u_memchr(s+1, a, 9) ||
756         first!=u_memchr32(s+1, a, 9) ||
757         first!=u_strstr(s+1, sub_a) ||
758         first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
759         first!=u_strFindFirst(s+1, -1, &a, 1) ||
760         first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
761         first!=u_strFindFirst(s+1, 9, &a, 1) ||
762         (s+10)!=u_strrchr(s+1, a) ||
763         (s+10)!=u_strrchr32(s+1, a) ||
764         last!=u_memrchr(s+1, a, 9) ||
765         last!=u_memrchr32(s+1, a, 9) ||
766         (s+10)!=u_strrstr(s+1, sub_a) ||
767         (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
768         (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
769         last!=u_strFindLast(s+1, 9, sub_a, -1) ||
770         last!=u_strFindLast(s+1, 9, &a, 1)
771     ) {
772         log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
773     }
774 
775     /* do not find 'b' in s[1..10[ */
776     if(
777         NULL!=u_strchr(s+1, b) ||
778         NULL!=u_strchr32(s+1, b) ||
779         NULL!=u_memchr(s+1, b, 9) ||
780         NULL!=u_memchr32(s+1, b, 9) ||
781         NULL!=u_strstr(s+1, sub_b) ||
782         NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
783         NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
784         NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
785         NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
786         NULL!=u_strrchr(s+1, b) ||
787         NULL!=u_strrchr32(s+1, b) ||
788         NULL!=u_memrchr(s+1, b, 9) ||
789         NULL!=u_memrchr32(s+1, b, 9) ||
790         NULL!=u_strrstr(s+1, sub_b) ||
791         NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
792         NULL!=u_strFindLast(s+1, -1, &b, 1) ||
793         NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
794         NULL!=u_strFindLast(s+1, 9, &b, 1)
795     ) {
796         log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
797     }
798 
799     /* do not find a non-code point in s[1..10[ */
800     if(
801         NULL!=u_strchr32(s+1, ill) ||
802         NULL!=u_memchr32(s+1, ill, 9) ||
803         NULL!=u_strrchr32(s+1, ill) ||
804         NULL!=u_memrchr32(s+1, ill, 9)
805     ) {
806         log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
807     }
808 
809     /* find U+d801 in s[1..10[ */
810     first=s+6;
811     if(
812         first!=u_strchr(s+1, lead) ||
813         first!=u_strchr32(s+1, lead) ||
814         first!=u_memchr(s+1, lead, 9) ||
815         first!=u_memchr32(s+1, lead, 9) ||
816         first!=u_strstr(s+1, sub_lead) ||
817         first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
818         first!=u_strFindFirst(s+1, -1, &lead, 1) ||
819         first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
820         first!=u_strFindFirst(s+1, 9, &lead, 1) ||
821         first!=u_strrchr(s+1, lead) ||
822         first!=u_strrchr32(s+1, lead) ||
823         first!=u_memrchr(s+1, lead, 9) ||
824         first!=u_memrchr32(s+1, lead, 9) ||
825         first!=u_strrstr(s+1, sub_lead) ||
826         first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
827         first!=u_strFindLast(s+1, -1, &lead, 1) ||
828         first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
829         first!=u_strFindLast(s+1, 9, &lead, 1)
830     ) {
831         log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
832     }
833 
834     /* find U+dc02 in s[1..10[ */
835     first=s+4;
836     if(
837         first!=u_strchr(s+1, trail) ||
838         first!=u_strchr32(s+1, trail) ||
839         first!=u_memchr(s+1, trail, 9) ||
840         first!=u_memchr32(s+1, trail, 9) ||
841         first!=u_strstr(s+1, sub_trail) ||
842         first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
843         first!=u_strFindFirst(s+1, -1, &trail, 1) ||
844         first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
845         first!=u_strFindFirst(s+1, 9, &trail, 1) ||
846         first!=u_strrchr(s+1, trail) ||
847         first!=u_strrchr32(s+1, trail) ||
848         first!=u_memrchr(s+1, trail, 9) ||
849         first!=u_memrchr32(s+1, trail, 9) ||
850         first!=u_strrstr(s+1, sub_trail) ||
851         first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
852         first!=u_strFindLast(s+1, -1, &trail, 1) ||
853         first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
854         first!=u_strFindLast(s+1, 9, &trail, 1)
855     ) {
856         log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
857     }
858 
859     /* find U+10402 in s[1..10[ */
860     first=s+1;
861     last=s+8;
862     if(
863         first!=u_strchr32(s+1, supp) ||
864         first!=u_memchr32(s+1, supp, 9) ||
865         first!=u_strstr(s+1, sub_supp) ||
866         first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
867         first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
868         first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
869         first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
870         last!=u_strrchr32(s+1, supp) ||
871         last!=u_memrchr32(s+1, supp, 9) ||
872         last!=u_strrstr(s+1, sub_supp) ||
873         last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
874         last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
875         last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
876         last!=u_strFindLast(s+1, 9, sub_supp, 2)
877     ) {
878         log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
879     }
880 
881     /* do not find U+10402 in a single UChar */
882     if(
883         NULL!=u_memchr32(s+1, supp, 1) ||
884         NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
885         NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
886         NULL!=u_memrchr32(s+1, supp, 1) ||
887         NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
888         NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
889         NULL!=u_memrchr32(s+2, supp, 1) ||
890         NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
891         NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
892     ) {
893         log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
894     }
895 
896     /* do not find U+10403 in s[1..10[ */
897     if(
898         NULL!=u_strchr32(s+1, supp2) ||
899         NULL!=u_memchr32(s+1, supp2, 9) ||
900         NULL!=u_strstr(s+1, sub_supp2) ||
901         NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
902         NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
903         NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
904         NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
905         NULL!=u_strrchr32(s+1, supp2) ||
906         NULL!=u_memrchr32(s+1, supp2, 9) ||
907         NULL!=u_strrstr(s+1, sub_supp2) ||
908         NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
909         NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
910         NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
911         NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
912     ) {
913         log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
914     }
915 
916     /* find <0061 d801> in s[1..10[ */
917     first=s+5;
918     if(
919         first!=u_strstr(s+1, sub_a_lead) ||
920         first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
921         first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
922         first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
923         first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
924         first!=u_strrstr(s+1, sub_a_lead) ||
925         first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
926         first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
927         first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
928         first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
929     ) {
930         log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
931     }
932 
933     /* find <dc02 0061> in s[1..10[ */
934     first=s+4;
935     if(
936         first!=u_strstr(s+1, sub_trail_a) ||
937         first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
938         first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
939         first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
940         first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
941         first!=u_strrstr(s+1, sub_trail_a) ||
942         first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
943         first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
944         first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
945         first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
946     ) {
947         log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
948     }
949 
950     /* do not find "aba" in s[1..10[ */
951     if(
952         NULL!=u_strstr(s+1, sub_aba) ||
953         NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
954         NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
955         NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
956         NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
957         NULL!=u_strrstr(s+1, sub_aba) ||
958         NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
959         NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
960         NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
961         NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
962     ) {
963         log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
964     }
965 }
966 
TestStringCopy()967 static void TestStringCopy()
968 {
969     UChar temp[40];
970     UChar *result=0;
971     UChar subString[5];
972     UChar uchars[]={0x61, 0x62, 0x63, 0x00};
973     char  charOut[40];
974     char  chars[]="abc";    /* needs default codepage */
975 
976     log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
977 
978     u_uastrcpy(temp, "abc");
979     if(u_strcmp(temp, uchars) != 0) {
980         log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
981     }
982 
983     temp[0] = 0xFB; /* load garbage into it */
984     temp[1] = 0xFB;
985     temp[2] = 0xFB;
986     temp[3] = 0xFB;
987 
988     u_uastrncpy(temp, "abcabcabc", 3);
989     if(u_strncmp(uchars, temp, 3) != 0){
990         log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
991     }
992     if(temp[3] != 0xFB) {
993         log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
994     }
995 
996     charOut[0] = (char)0x7B; /* load garbage into it */
997     charOut[1] = (char)0x7B;
998     charOut[2] = (char)0x7B;
999     charOut[3] = (char)0x7B;
1000 
1001     temp[0] = 0x0061;
1002     temp[1] = 0x0062;
1003     temp[2] = 0x0063;
1004     temp[3] = 0x0061;
1005     temp[4] = 0x0062;
1006     temp[5] = 0x0063;
1007     temp[6] = 0x0000;
1008 
1009     u_austrncpy(charOut, temp, 3);
1010     if(strncmp(chars, charOut, 3) != 0){
1011         log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1012     }
1013     if(charOut[3] != (char)0x7B) {
1014         log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1015     }
1016 
1017     /*Testing u_strchr()*/
1018     log_verbose("Testing u_strchr\n");
1019     temp[0]=0x42;
1020     temp[1]=0x62;
1021     temp[2]=0x62;
1022     temp[3]=0x63;
1023     temp[4]=0xd841;
1024     temp[5]=0xd841;
1025     temp[6]=0xdc02;
1026     temp[7]=0;
1027     result=u_strchr(temp, (UChar)0x62);
1028     if(result != temp+1){
1029         log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1030     }
1031     /*Testing u_strstr()*/
1032     log_verbose("Testing u_strstr\n");
1033     subString[0]=0x62;
1034     subString[1]=0x63;
1035     subString[2]=0;
1036     result=u_strstr(temp, subString);
1037     if(result != temp+2){
1038         log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1039     }
1040     result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1041     if(result != temp){
1042         log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1043     }
1044     result=u_strstr(subString, temp);
1045     if(result != NULL){
1046         log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1047     }
1048 
1049     /*Testing u_strchr32*/
1050     log_verbose("Testing u_strchr32\n");
1051     result=u_strchr32(temp, (UChar32)0x62);
1052     if(result != temp+1){
1053         log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1054     }
1055     result=u_strchr32(temp, (UChar32)0xfb);
1056     if(result != NULL){
1057         log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1058     }
1059     result=u_strchr32(temp, (UChar32)0x20402);
1060     if(result != temp+5){
1061         log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1062     }
1063 
1064     temp[7]=0xfc00;
1065     result=u_memchr32(temp, (UChar32)0x20402, 7);
1066     if(result != temp+5){
1067         log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1068     }
1069     result=u_memchr32(temp, (UChar32)0x20402, 6);
1070     if(result != NULL){
1071         log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1072     }
1073     result=u_memchr32(temp, (UChar32)0x20402, 1);
1074     if(result != NULL){
1075         log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1076     }
1077     result=u_memchr32(temp, (UChar32)0xfc00, 8);
1078     if(result != temp+7){
1079         log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1080     }
1081 }
1082 
1083 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1084 
1085 static void
TestUnescape()1086 TestUnescape() {
1087     static UChar buffer[200];
1088 
1089     static const char* input =
1090         "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1091 
1092     static const UChar expect[]={
1093         0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1094         0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1095         0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1096         0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1097     };
1098     static const int32_t explength = UPRV_LENGTHOF(expect)-1;
1099     int32_t length;
1100 
1101     /* test u_unescape() */
1102     length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
1103     if(length!=explength || u_strcmp(buffer, expect)!=0) {
1104         log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1105                 explength);
1106     }
1107 
1108     /* try preflighting */
1109     length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
1110     if(length!=explength || u_strcmp(buffer, expect)!=0) {
1111         log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1112     }
1113 
1114     /* ### TODO: test u_unescapeAt() */
1115 }
1116 
1117 /* test code point counting functions --------------------------------------- */
1118 
1119 /* reference implementation of u_strHasMoreChar32Than() */
1120 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1121 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1122     int32_t count=u_countChar32(s, length);
1123     return count>number;
1124 }
1125 
1126 /* compare the real function against the reference */
1127 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1128 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1129     if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1130         log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1131                 i, length, number, u_strHasMoreChar32Than(s, length, number));
1132     }
1133 }
1134 
1135 static void
TestCountChar32()1136 TestCountChar32() {
1137     static const UChar string[]={
1138         0x61, 0x62, 0xd800, 0xdc00,
1139         0xd801, 0xdc01, 0x63, 0xd802,
1140         0x64, 0xdc03, 0x65, 0x66,
1141         0xd804, 0xdc04, 0xd805, 0xdc05,
1142         0x67
1143     };
1144     UChar buffer[100];
1145     int32_t i, length, number;
1146 
1147     /* test u_strHasMoreChar32Than() with length>=0 */
1148     length=UPRV_LENGTHOF(string);
1149     while(length>=0) {
1150         for(i=0; i<=length; ++i) {
1151             for(number=-1; number<=((length-i)+2); ++number) {
1152                 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1153             }
1154         }
1155         --length;
1156     }
1157 
1158     /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1159     length=UPRV_LENGTHOF(string);
1160     u_memcpy(buffer, string, length);
1161     while(length>=0) {
1162         buffer[length]=0;
1163         for(i=0; i<=length; ++i) {
1164             for(number=-1; number<=((length-i)+2); ++number) {
1165                 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
1166             }
1167         }
1168         --length;
1169     }
1170 
1171     /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1172     for(length=-1; length<=1; ++length) {
1173         for(i=0; i<=length; ++i) {
1174             for(number=-2; number<=2; ++number) {
1175                 _testStrHasMoreChar32Than(NULL, 0, length, number);
1176             }
1177         }
1178     }
1179 }
1180 
1181 /* UCharIterator ------------------------------------------------------------ */
1182 
1183 /*
1184  * Compare results from two iterators, should be same.
1185  * Assume that the text is not empty and that
1186  * iteration start==0 and iteration limit==length.
1187  */
1188 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1189 compareIterators(UCharIterator *iter1, const char *n1,
1190                  UCharIterator *iter2, const char *n2) {
1191     int32_t i, pos1, pos2, middle, length;
1192     UChar32 c1, c2;
1193 
1194     /* compare lengths */
1195     length=iter1->getIndex(iter1, UITER_LENGTH);
1196     pos2=iter2->getIndex(iter2, UITER_LENGTH);
1197     if(length!=pos2) {
1198         log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1199         return;
1200     }
1201 
1202     /* set into the middle */
1203     middle=length/2;
1204 
1205     pos1=iter1->move(iter1, middle, UITER_ZERO);
1206     if(pos1!=middle) {
1207         log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1208         return;
1209     }
1210 
1211     pos2=iter2->move(iter2, middle, UITER_ZERO);
1212     if(pos2!=middle) {
1213         log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1214         return;
1215     }
1216 
1217     /* test current() */
1218     c1=iter1->current(iter1);
1219     c2=iter2->current(iter2);
1220     if(c1!=c2) {
1221         log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1222         return;
1223     }
1224 
1225     /* move forward 3 UChars */
1226     for(i=0; i<3; ++i) {
1227         c1=iter1->next(iter1);
1228         c2=iter2->next(iter2);
1229         if(c1!=c2) {
1230             log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1231             return;
1232         }
1233     }
1234 
1235     /* move backward 5 UChars */
1236     for(i=0; i<5; ++i) {
1237         c1=iter1->previous(iter1);
1238         c2=iter2->previous(iter2);
1239         if(c1!=c2) {
1240             log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1241             return;
1242         }
1243     }
1244 
1245     /* iterate forward from the beginning */
1246     pos1=iter1->move(iter1, 0, UITER_START);
1247     if(pos1<0) {
1248         log_err("%s->move(start) failed\n", n1);
1249         return;
1250     }
1251     if(!iter1->hasNext(iter1)) {
1252         log_err("%s->hasNext() at the start returns FALSE\n", n1);
1253         return;
1254     }
1255 
1256     pos2=iter2->move(iter2, 0, UITER_START);
1257     if(pos2<0) {
1258         log_err("%s->move(start) failed\n", n2);
1259         return;
1260     }
1261     if(!iter2->hasNext(iter2)) {
1262         log_err("%s->hasNext() at the start returns FALSE\n", n2);
1263         return;
1264     }
1265 
1266     do {
1267         c1=iter1->next(iter1);
1268         c2=iter2->next(iter2);
1269         if(c1!=c2) {
1270             log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1271             return;
1272         }
1273     } while(c1>=0);
1274 
1275     if(iter1->hasNext(iter1)) {
1276         log_err("%s->hasNext() at the end returns TRUE\n", n1);
1277         return;
1278     }
1279     if(iter2->hasNext(iter2)) {
1280         log_err("%s->hasNext() at the end returns TRUE\n", n2);
1281         return;
1282     }
1283 
1284     /* back to the middle */
1285     pos1=iter1->move(iter1, middle, UITER_ZERO);
1286     if(pos1!=middle) {
1287         log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1288         return;
1289     }
1290 
1291     pos2=iter2->move(iter2, middle, UITER_ZERO);
1292     if(pos2!=middle) {
1293         log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1294         return;
1295     }
1296 
1297     /* move to index 1 */
1298     pos1=iter1->move(iter1, 1, UITER_ZERO);
1299     if(pos1!=1) {
1300         log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1301         return;
1302     }
1303 
1304     pos2=iter2->move(iter2, 1, UITER_ZERO);
1305     if(pos2!=1) {
1306         log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1307         return;
1308     }
1309 
1310     /* iterate backward from the end */
1311     pos1=iter1->move(iter1, 0, UITER_LIMIT);
1312     if(pos1<0) {
1313         log_err("%s->move(limit) failed\n", n1);
1314         return;
1315     }
1316     if(!iter1->hasPrevious(iter1)) {
1317         log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1318         return;
1319     }
1320 
1321     pos2=iter2->move(iter2, 0, UITER_LIMIT);
1322     if(pos2<0) {
1323         log_err("%s->move(limit) failed\n", n2);
1324         return;
1325     }
1326     if(!iter2->hasPrevious(iter2)) {
1327         log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1328         return;
1329     }
1330 
1331     do {
1332         c1=iter1->previous(iter1);
1333         c2=iter2->previous(iter2);
1334         if(c1!=c2) {
1335             log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1336             return;
1337         }
1338     } while(c1>=0);
1339 
1340     if(iter1->hasPrevious(iter1)) {
1341         log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1342         return;
1343     }
1344     if(iter2->hasPrevious(iter2)) {
1345         log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1346         return;
1347     }
1348 }
1349 
1350 /*
1351  * Test the iterator's getState() and setState() functions.
1352  * iter1 and iter2 must be set up for the same iterator type and the same string
1353  * but may be physically different structs (different addresses).
1354  *
1355  * Assume that the text is not empty and that
1356  * iteration start==0 and iteration limit==length.
1357  * It must be 2<=middle<=length-2.
1358  */
1359 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1360 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1361     UChar32 u[4];
1362 
1363     UErrorCode errorCode;
1364     UChar32 c;
1365     uint32_t state;
1366     int32_t i, j;
1367 
1368     /* get four UChars from the middle of the string */
1369     iter1->move(iter1, middle-2, UITER_ZERO);
1370     for(i=0; i<4; ++i) {
1371         c=iter1->next(iter1);
1372         if(c<0) {
1373             /* the test violates the assumptions, see comment above */
1374             log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1375             return;
1376         }
1377         u[i]=c;
1378     }
1379 
1380     /* move to the middle and get the state */
1381     iter1->move(iter1, -2, UITER_CURRENT);
1382     state=uiter_getState(iter1);
1383 
1384     /* set the state into the second iterator and compare the results */
1385     errorCode=U_ZERO_ERROR;
1386     uiter_setState(iter2, state, &errorCode);
1387     if(U_FAILURE(errorCode)) {
1388         log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1389         return;
1390     }
1391 
1392     c=iter2->current(iter2);
1393     if(c!=u[2]) {
1394         log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1395     }
1396 
1397     c=iter2->previous(iter2);
1398     if(c!=u[1]) {
1399         log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1400     }
1401 
1402     iter2->move(iter2, 2, UITER_CURRENT);
1403     c=iter2->next(iter2);
1404     if(c!=u[3]) {
1405         log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1406     }
1407 
1408     iter2->move(iter2, -3, UITER_CURRENT);
1409     c=iter2->previous(iter2);
1410     if(c!=u[0]) {
1411         log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1412     }
1413 
1414     /* move the second iterator back to the middle */
1415     iter2->move(iter2, 1, UITER_CURRENT);
1416     iter2->next(iter2);
1417 
1418     /* check that both are in the middle */
1419     i=iter1->getIndex(iter1, UITER_CURRENT);
1420     j=iter2->getIndex(iter2, UITER_CURRENT);
1421     if(i!=middle) {
1422         log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1423     }
1424     if(i!=j) {
1425         log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1426     }
1427 
1428     /* compare lengths */
1429     i=iter1->getIndex(iter1, UITER_LENGTH);
1430     j=iter2->getIndex(iter2, UITER_LENGTH);
1431     if(i!=j) {
1432         log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1433     }
1434 }
1435 
1436 static void
TestUCharIterator()1437 TestUCharIterator() {
1438     static const UChar text[]={
1439         0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1440     };
1441     char bytes[40];
1442 
1443     UCharIterator iter, iter1, iter2;
1444     UConverter *cnv;
1445     UErrorCode errorCode;
1446     int32_t length;
1447 
1448     /* simple API/code coverage - test NOOP UCharIterator */
1449     uiter_setString(&iter, NULL, 0);
1450     if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1451         iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1452         iter.hasNext(&iter) || iter.hasPrevious(&iter)
1453     ) {
1454         log_err("NOOP UCharIterator behaves unexpectedly\n");
1455     }
1456 
1457     /* test get/set state */
1458     length=UPRV_LENGTHOF(text)-1;
1459     uiter_setString(&iter1, text, -1);
1460     uiter_setString(&iter2, text, length);
1461     testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1462     testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1463 
1464     /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1465     errorCode=U_ZERO_ERROR;
1466     u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1467     if(U_FAILURE(errorCode)) {
1468         log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1469         return;
1470     }
1471 
1472     uiter_setString(&iter1, text, -1);
1473     uiter_setUTF8(&iter2, bytes, length);
1474     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1475 
1476     /* try again with length=-1 */
1477     uiter_setUTF8(&iter2, bytes, -1);
1478     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1479 
1480     /* test get/set state */
1481     length=UPRV_LENGTHOF(text)-1;
1482     uiter_setUTF8(&iter1, bytes, -1);
1483     testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1484     testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1485 
1486     /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1487     errorCode=U_ZERO_ERROR;
1488     cnv=ucnv_open("UTF-16BE", &errorCode);
1489     length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1490     ucnv_close(cnv);
1491     if(U_FAILURE(errorCode)) {
1492         log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1493         return;
1494     }
1495 
1496     /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1497     bytes[length]=bytes[length+1]=0;
1498 
1499     uiter_setString(&iter1, text, -1);
1500     uiter_setUTF16BE(&iter2, bytes, length);
1501     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1502 
1503     /* try again with length=-1 */
1504     uiter_setUTF16BE(&iter2, bytes, -1);
1505     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1506 
1507     /* try again after moving the bytes up one, and with length=-1 */
1508     memmove(bytes+1, bytes, length+2);
1509     uiter_setUTF16BE(&iter2, bytes+1, -1);
1510     compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1511 
1512     /* ### TODO test other iterators: CharacterIterator, Replaceable */
1513 }
1514