1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 2002-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: custrtst.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002oct09
14 * created by: Markus W. Scherer
15 *
16 * Tests of ustring.h Unicode string API functions.
17 */
18
19 #include "unicode/ustring.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/uiter.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 #include <string.h>
25
26 /* get the sign of an integer */
27 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
28
29 /* test setup --------------------------------------------------------------- */
30
31 static void setUpDataTable(void);
32 static void TestStringCopy(void);
33 static void TestStringFunctions(void);
34 static void TestStringSearching(void);
35 static void TestSurrogateSearching(void);
36 static void TestUnescape(void);
37 static void TestCountChar32(void);
38 static void TestUCharIterator(void);
39
40 void addUStringTest(TestNode** root);
41
addUStringTest(TestNode ** root)42 void addUStringTest(TestNode** root)
43 {
44 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
45 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
46 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
47 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
48 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
49 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
50 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
51 }
52
53 /* test data for TestStringFunctions ---------------------------------------- */
54
55 UChar*** dataTable = NULL;
56
57 static const char* raw[3][4] = {
58
59 /* First String */
60 { "English_", "French_", "Croatian_", "English_"},
61 /* Second String */
62 { "United States", "France", "Croatia", "Unites States"},
63
64 /* Concatenated string */
65 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
66 };
67
setUpDataTable()68 static void setUpDataTable()
69 {
70 int32_t i,j;
71 if(dataTable == NULL) {
72 dataTable = (UChar***)calloc(sizeof(UChar**),3);
73
74 for (i = 0; i < 3; i++) {
75 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
76 for (j = 0; j < 4; j++){
77 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
78 u_uastrcpy(dataTable[i][j],raw[i][j]);
79 }
80 }
81 }
82 }
83
cleanUpDataTable()84 static void cleanUpDataTable()
85 {
86 int32_t i,j;
87 if(dataTable != NULL) {
88 for (i=0; i<3; i++) {
89 for(j = 0; j<4; j++) {
90 free(dataTable[i][j]);
91 }
92 free(dataTable[i]);
93 }
94 free(dataTable);
95 }
96 dataTable = NULL;
97 }
98
99 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions()100 static void TestStringFunctions()
101 {
102 int32_t i,j,k;
103 UChar temp[512];
104 UChar nullTemp[512];
105 char test[512];
106 char tempOut[512];
107
108 setUpDataTable();
109
110 log_verbose("Testing u_strlen()\n");
111 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
112 log_err("There is an error in u_strlen()");
113
114 log_verbose("Testing u_memcpy() and u_memcmp()\n");
115
116 for(i=0;i<3;++i)
117 {
118 for(j=0;j<4;++j)
119 {
120 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
121 temp[0] = 0;
122 temp[7] = 0xA4; /* Mark the end */
123 u_memcpy(temp,dataTable[i][j], 7);
124
125 if(temp[7] != 0xA4)
126 log_err("an error occured in u_memcpy()\n");
127 if(u_memcmp(temp, dataTable[i][j], 7)!=0)
128 log_err("an error occured in u_memcpy() or u_memcmp()\n");
129 }
130 }
131 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
132 log_err("an error occured in u_memcmp()\n");
133
134 log_verbose("Testing u_memset()\n");
135 nullTemp[0] = 0;
136 nullTemp[7] = 0;
137 u_memset(nullTemp, 0xa4, 7);
138 for (i = 0; i < 7; i++) {
139 if(nullTemp[i] != 0xa4) {
140 log_err("an error occured in u_memset()\n");
141 }
142 }
143 if(nullTemp[7] != 0) {
144 log_err("u_memset() went too far\n");
145 }
146
147 u_memset(nullTemp, 0, 7);
148 nullTemp[7] = 0xa4;
149 temp[7] = 0;
150 u_memcpy(temp,nullTemp, 7);
151 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
152 log_err("an error occured in u_memcpy() or u_memcmp()\n");
153
154
155 log_verbose("Testing u_memmove()\n");
156 for (i = 0; i < 7; i++) {
157 temp[i] = (UChar)i;
158 }
159 u_memmove(temp + 1, temp, 7);
160 if(temp[0] != 0) {
161 log_err("an error occured in u_memmove()\n");
162 }
163 for (i = 1; i <= 7; i++) {
164 if(temp[i] != (i - 1)) {
165 log_err("an error occured in u_memmove()\n");
166 }
167 }
168
169 log_verbose("Testing u_strcpy() and u_strcmp()\n");
170
171 for(i=0;i<3;++i)
172 {
173 for(j=0;j<4;++j)
174 {
175 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
176 temp[0] = 0;
177 u_strcpy(temp,dataTable[i][j]);
178
179 if(u_strcmp(temp,dataTable[i][j])!=0)
180 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
181 }
182 }
183 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
184 log_err("an error occured in u_memcmp()\n");
185
186 log_verbose("testing u_strcat()\n");
187 i=0;
188 for(j=0; j<2;++j)
189 {
190 u_uastrcpy(temp, "");
191 u_strcpy(temp,dataTable[i][j]);
192 u_strcat(temp,dataTable[i+1][j]);
193 if(u_strcmp(temp,dataTable[i+2][j])!=0)
194 log_err("something threw an error in u_strcat()\n");
195
196 }
197 log_verbose("Testing u_strncmp()\n");
198 for(i=0,j=0;j<4; ++j)
199 {
200 k=u_strlen(dataTable[i][j]);
201 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
202 log_err("Something threw an error in u_strncmp\n");
203 }
204 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
205 log_err("an error occured in u_memcmp()\n");
206
207
208 log_verbose("Testing u_strncat\n");
209 for(i=0,j=0;j<4; ++j)
210 {
211 k=u_strlen(dataTable[i][j]);
212
213 u_uastrcpy(temp,"");
214
215 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
216 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
217
218 }
219
220 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
221 for(i=2,j=0;j<4; ++j)
222 {
223 k=u_strlen(dataTable[i][j]);
224 u_strncpy(temp, dataTable[i][j],k);
225 temp[k] = 0xa4;
226
227 if(u_strncmp(temp, dataTable[i][j],k)!=0)
228 log_err("something threw an error in u_strncpy()\n");
229
230 if(temp[k] != 0xa4)
231 log_err("something threw an error in u_strncpy()\n");
232
233 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
234 u_uastrncpy(temp, raw[i][j], k-1);
235 if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
236 log_err("something threw an error in u_uastrncpy(k-1)\n");
237
238 if(temp[k-1] != 0x3F)
239 log_err("something threw an error in u_uastrncpy(k-1)\n");
240
241 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
242 u_uastrncpy(temp, raw[i][j], k+1);
243 if(u_strcmp(temp, dataTable[i][j])!=0)
244 log_err("something threw an error in u_uastrncpy(k+1)\n");
245
246 if(temp[k] != 0)
247 log_err("something threw an error in u_uastrncpy(k+1)\n");
248
249 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
250 u_uastrncpy(temp, raw[i][j], k);
251 if(u_strncmp(temp, dataTable[i][j], k)!=0)
252 log_err("something threw an error in u_uastrncpy(k)\n");
253
254 if(temp[k] != 0x3F)
255 log_err("something threw an error in u_uastrncpy(k)\n");
256 }
257
258 log_verbose("Testing u_strchr() and u_memchr()\n");
259
260 for(i=2,j=0;j<4;j++)
261 {
262 UChar saveVal = dataTable[i][j][0];
263 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
264 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
265
266 log_verbose("%s ", u_austrcpy(tempOut, findPtr));
267
268 if (findPtr == NULL || *findPtr != 0x005F) {
269 log_err("u_strchr can't find '_' in the string\n");
270 }
271
272 findPtr = u_strchr32(dataTable[i][j], 0x005F);
273 if (findPtr == NULL || *findPtr != 0x005F) {
274 log_err("u_strchr32 can't find '_' in the string\n");
275 }
276
277 findPtr = u_strchr(dataTable[i][j], 0);
278 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
279 log_err("u_strchr can't find NULL in the string\n");
280 }
281
282 findPtr = u_strchr32(dataTable[i][j], 0);
283 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
284 log_err("u_strchr32 can't find NULL in the string\n");
285 }
286
287 findPtr = u_memchr(dataTable[i][j], 0, dataSize);
288 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
289 log_err("u_memchr can't find NULL in the string\n");
290 }
291
292 findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
293 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
294 log_err("u_memchr32 can't find NULL in the string\n");
295 }
296
297 dataTable[i][j][0] = 0;
298 /* Make sure we skip over the NULL termination */
299 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
300 if (findPtr == NULL || *findPtr != 0x005F) {
301 log_err("u_memchr can't find '_' in the string\n");
302 }
303
304 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
305 if (findPtr == NULL || *findPtr != 0x005F) {
306 log_err("u_memchr32 can't find '_' in the string\n");
307 }
308 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
309 if (findPtr != NULL) {
310 log_err("Should have found NULL when the character is not there.\n");
311 }
312 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */
313 }
314
315 /*
316 * test that u_strchr32()
317 * does not find surrogate code points when they are part of matched pairs
318 * (= part of supplementary code points)
319 * Jitterbug 1542
320 */
321 {
322 static const UChar s[]={
323 /* 0 1 2 3 4 5 6 7 8 9 */
324 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
325 };
326
327 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
328 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
329 }
330 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
331 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
332 }
333 }
334
335 log_verbose("Testing u_austrcpy()");
336 u_austrcpy(test,dataTable[0][0]);
337 if(strcmp(test,raw[0][0])!=0)
338 log_err("There is an error in u_austrcpy()");
339
340
341 log_verbose("Testing u_strtok_r()");
342 {
343 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
344 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
345 UChar delimBuf[sizeof(test)];
346 UChar currTokenBuf[sizeof(tokString)];
347 UChar *state;
348 uint32_t currToken = 0;
349 UChar *ptr;
350
351 u_uastrcpy(temp, tokString);
352 u_uastrcpy(delimBuf, " ");
353
354 ptr = u_strtok_r(temp, delimBuf, &state);
355 u_uastrcpy(delimBuf, " ,");
356 while (ptr != NULL) {
357 u_uastrcpy(currTokenBuf, tokens[currToken]);
358 if (u_strcmp(ptr, currTokenBuf) != 0) {
359 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
360 }
361 ptr = u_strtok_r(NULL, delimBuf, &state);
362 currToken++;
363 }
364
365 if (currToken != sizeof(tokens)/sizeof(tokens[0])) {
366 log_err("Didn't get correct number of tokens\n");
367 }
368 state = delimBuf; /* Give it an "invalid" saveState */
369 u_uastrcpy(currTokenBuf, "");
370 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
371 log_err("Didn't get NULL for empty string\n");
372 }
373 if (state != NULL) {
374 log_err("State should be NULL for empty string\n");
375 }
376 state = delimBuf; /* Give it an "invalid" saveState */
377 u_uastrcpy(currTokenBuf, ", ,");
378 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
379 log_err("Didn't get NULL for a string of delimiters\n");
380 }
381 if (state != NULL) {
382 log_err("State should be NULL for a string of delimiters\n");
383 }
384
385 state = delimBuf; /* Give it an "invalid" saveState */
386 u_uastrcpy(currTokenBuf, "q, ,");
387 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
388 log_err("Got NULL for a string that does not begin with delimiters\n");
389 }
390 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
391 log_err("Didn't get NULL for a string that ends in delimiters\n");
392 }
393 if (state != NULL) {
394 log_err("State should be NULL for empty string\n");
395 }
396
397 state = delimBuf; /* Give it an "invalid" saveState */
398 u_uastrcpy(currTokenBuf, tokString);
399 u_uastrcpy(temp, tokString);
400 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */
401 ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
402 if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
403 log_err("Should have recieved the same string when there are no delimiters\n");
404 }
405 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
406 log_err("Should not have found another token in a one token string\n");
407 }
408 }
409
410 /* test u_strcmpCodePointOrder() */
411 {
412 /* these strings are in ascending order */
413 static const UChar strings[][4]={
414 { 0x61, 0 }, /* U+0061 */
415 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
416 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
417 { 0xd800, 0 }, /* U+d800 */
418 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
419 { 0xdfff, 0 }, /* U+dfff */
420 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
421 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
422 { 0xd800, 0xdc02, 0 }, /* U+10002 */
423 { 0xd84d, 0xdc56, 0 } /* U+23456 */
424 };
425
426 UCharIterator iter1, iter2;
427 int32_t len1, len2, r1, r2;
428
429 for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
430 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
431 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
432 }
433 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
434 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
435 }
436
437 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
438 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
439 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
440 }
441
442 /* test u_strCompare(TRUE) */
443 len1=u_strlen(strings[i]);
444 len2=u_strlen(strings[i+1]);
445 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
446 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
447 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
448 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
449 ) {
450 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
451 }
452
453 /* test u_strCompare(FALSE) */
454 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
455 r2=u_strcmp(strings[i], strings[i+1]);
456 if(_SIGN(r1)!=_SIGN(r2)) {
457 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
458 }
459
460 /* test u_strCompareIter() */
461 uiter_setString(&iter1, strings[i], len1);
462 uiter_setString(&iter2, strings[i+1], len2);
463 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
464 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
465 }
466 r1=u_strCompareIter(&iter1, &iter2, FALSE);
467 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
468 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
469 }
470 }
471 }
472
473 cleanUpDataTable();
474 }
475
TestStringSearching()476 static void TestStringSearching()
477 {
478 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
479 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
480 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
481 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
482 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
483 const UChar surrMatchSet4[] = {0x0000};
484 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
485 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
486 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
487 const UChar
488 empty[] = { 0 },
489 a[] = { 0x61, 0 },
490 ab[] = { 0x61, 0x62, 0 },
491 ba[] = { 0x62, 0x61, 0 },
492 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
493 cd[] = { 0x63, 0x64, 0 },
494 dc[] = { 0x64, 0x63, 0 },
495 cdh[] = { 0x63, 0x64, 0x68, 0 },
496 f[] = { 0x66, 0 },
497 fg[] = { 0x66, 0x67, 0 },
498 gf[] = { 0x67, 0x66, 0 };
499
500 log_verbose("Testing u_strpbrk()");
501
502 if (u_strpbrk(testString, a) != &testString[0]) {
503 log_err("u_strpbrk couldn't find first letter a.\n");
504 }
505 if (u_strpbrk(testString, dc) != &testString[2]) {
506 log_err("u_strpbrk couldn't find d or c.\n");
507 }
508 if (u_strpbrk(testString, cd) != &testString[2]) {
509 log_err("u_strpbrk couldn't find c or d.\n");
510 }
511 if (u_strpbrk(testString, cdh) != &testString[2]) {
512 log_err("u_strpbrk couldn't find c, d or h.\n");
513 }
514 if (u_strpbrk(testString, f) != NULL) {
515 log_err("u_strpbrk didn't return NULL for \"f\".\n");
516 }
517 if (u_strpbrk(testString, fg) != NULL) {
518 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
519 }
520 if (u_strpbrk(testString, gf) != NULL) {
521 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
522 }
523 if (u_strpbrk(testString, empty) != NULL) {
524 log_err("u_strpbrk didn't return NULL for \"\".\n");
525 }
526
527 log_verbose("Testing u_strpbrk() with surrogates");
528
529 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
530 log_err("u_strpbrk couldn't find first letter a.\n");
531 }
532 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
533 log_err("u_strpbrk couldn't find d or c.\n");
534 }
535 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
536 log_err("u_strpbrk couldn't find c or d.\n");
537 }
538 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
539 log_err("u_strpbrk couldn't find c, d or h.\n");
540 }
541 if (u_strpbrk(testSurrogateString, f) != NULL) {
542 log_err("u_strpbrk didn't return NULL for \"f\".\n");
543 }
544 if (u_strpbrk(testSurrogateString, fg) != NULL) {
545 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
546 }
547 if (u_strpbrk(testSurrogateString, gf) != NULL) {
548 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
549 }
550 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
551 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
552 }
553 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
554 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
555 }
556 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
557 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
558 }
559 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
560 log_err("u_strpbrk should have returned NULL for empty string.\n");
561 }
562 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
563 log_err("u_strpbrk should have found bad surrogate.\n");
564 }
565
566 log_verbose("Testing u_strcspn()");
567
568 if (u_strcspn(testString, a) != 0) {
569 log_err("u_strcspn couldn't find first letter a.\n");
570 }
571 if (u_strcspn(testString, dc) != 2) {
572 log_err("u_strcspn couldn't find d or c.\n");
573 }
574 if (u_strcspn(testString, cd) != 2) {
575 log_err("u_strcspn couldn't find c or d.\n");
576 }
577 if (u_strcspn(testString, cdh) != 2) {
578 log_err("u_strcspn couldn't find c, d or h.\n");
579 }
580 if (u_strcspn(testString, f) != u_strlen(testString)) {
581 log_err("u_strcspn didn't return NULL for \"f\".\n");
582 }
583 if (u_strcspn(testString, fg) != u_strlen(testString)) {
584 log_err("u_strcspn didn't return NULL for \"fg\".\n");
585 }
586 if (u_strcspn(testString, gf) != u_strlen(testString)) {
587 log_err("u_strcspn didn't return NULL for \"gf\".\n");
588 }
589
590 log_verbose("Testing u_strcspn() with surrogates");
591
592 if (u_strcspn(testSurrogateString, a) != 1) {
593 log_err("u_strcspn couldn't find first letter a.\n");
594 }
595 if (u_strcspn(testSurrogateString, dc) != 5) {
596 log_err("u_strcspn couldn't find d or c.\n");
597 }
598 if (u_strcspn(testSurrogateString, cd) != 5) {
599 log_err("u_strcspn couldn't find c or d.\n");
600 }
601 if (u_strcspn(testSurrogateString, cdh) != 5) {
602 log_err("u_strcspn couldn't find c, d or h.\n");
603 }
604 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
605 log_err("u_strcspn didn't return NULL for \"f\".\n");
606 }
607 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
608 log_err("u_strcspn didn't return NULL for \"fg\".\n");
609 }
610 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
611 log_err("u_strcspn didn't return NULL for \"gf\".\n");
612 }
613 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
614 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
615 }
616 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
617 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
618 }
619 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
620 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
621 }
622 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
623 log_err("u_strcspn should have returned strlen for empty string.\n");
624 }
625
626
627 log_verbose("Testing u_strspn()");
628
629 if (u_strspn(testString, a) != 1) {
630 log_err("u_strspn couldn't skip first letter a.\n");
631 }
632 if (u_strspn(testString, ab) != 2) {
633 log_err("u_strspn couldn't skip a or b.\n");
634 }
635 if (u_strspn(testString, ba) != 2) {
636 log_err("u_strspn couldn't skip a or b.\n");
637 }
638 if (u_strspn(testString, f) != 0) {
639 log_err("u_strspn didn't return 0 for \"f\".\n");
640 }
641 if (u_strspn(testString, dc) != 0) {
642 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
643 }
644 if (u_strspn(testString, abcd) != u_strlen(testString)) {
645 log_err("u_strspn couldn't skip over the whole string.\n");
646 }
647 if (u_strspn(testString, empty) != 0) {
648 log_err("u_strspn should have returned 0 for empty string.\n");
649 }
650
651 log_verbose("Testing u_strspn() with surrogates");
652 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
653 log_err("u_strspn couldn't skip 0xdbff or a.\n");
654 }
655 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
656 log_err("u_strspn couldn't skip 0xdbff or a.\n");
657 }
658 if (u_strspn(testSurrogateString, f) != 0) {
659 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
660 }
661 if (u_strspn(testSurrogateString, dc) != 0) {
662 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
663 }
664 if (u_strspn(testSurrogateString, cd) != 0) {
665 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
666 }
667 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
668 log_err("u_strspn couldn't skip whole string.\n");
669 }
670 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
671 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
672 }
673 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
674 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
675 }
676 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
677 log_err("u_strspn should have returned 0 for empty string.\n");
678 }
679 }
680
681 /*
682 * All binary Unicode string searches should behave the same for equivalent input.
683 * See Jitterbug 2145.
684 * There are some new functions, too - just test them all.
685 */
686 static void
TestSurrogateSearching()687 TestSurrogateSearching() {
688 static const UChar s[]={
689 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
690 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
691 }, sub_a[]={
692 0x61, 0
693 }, sub_b[]={
694 0x62, 0
695 }, sub_lead[]={
696 0xd801, 0
697 }, sub_trail[]={
698 0xdc02, 0
699 }, sub_supp[]={
700 0xd801, 0xdc02, 0
701 }, sub_supp2[]={
702 0xd801, 0xdc03, 0
703 }, sub_a_lead[]={
704 0x61, 0xd801, 0
705 }, sub_trail_a[]={
706 0xdc02, 0x61, 0
707 }, sub_aba[]={
708 0x61, 0x62, 0x61, 0
709 };
710 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
711 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
712
713 const UChar *first, *last;
714
715 /* search for NUL code point: find end of string */
716 first=s+u_strlen(s);
717
718 if(
719 first!=u_strchr(s, nul) ||
720 first!=u_strchr32(s, nul) ||
721 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
722 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
723 first!=u_strrchr(s, nul) ||
724 first!=u_strrchr32(s, nul) ||
725 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
726 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
727 ) {
728 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
729 }
730
731 /* search for empty substring: find beginning of string */
732 if(
733 s!=u_strstr(s, &nul) ||
734 s!=u_strFindFirst(s, -1, &nul, -1) ||
735 s!=u_strFindFirst(s, -1, &nul, 0) ||
736 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
737 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
738 s!=u_strrstr(s, &nul) ||
739 s!=u_strFindLast(s, -1, &nul, -1) ||
740 s!=u_strFindLast(s, -1, &nul, 0) ||
741 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
742 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
743 ) {
744 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
745 }
746
747 /* find 'a' in s[1..10[ */
748 first=s+3;
749 last=s+7;
750 if(
751 first!=u_strchr(s+1, a) ||
752 first!=u_strchr32(s+1, a) ||
753 first!=u_memchr(s+1, a, 9) ||
754 first!=u_memchr32(s+1, a, 9) ||
755 first!=u_strstr(s+1, sub_a) ||
756 first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
757 first!=u_strFindFirst(s+1, -1, &a, 1) ||
758 first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
759 first!=u_strFindFirst(s+1, 9, &a, 1) ||
760 (s+10)!=u_strrchr(s+1, a) ||
761 (s+10)!=u_strrchr32(s+1, a) ||
762 last!=u_memrchr(s+1, a, 9) ||
763 last!=u_memrchr32(s+1, a, 9) ||
764 (s+10)!=u_strrstr(s+1, sub_a) ||
765 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
766 (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
767 last!=u_strFindLast(s+1, 9, sub_a, -1) ||
768 last!=u_strFindLast(s+1, 9, &a, 1)
769 ) {
770 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
771 }
772
773 /* do not find 'b' in s[1..10[ */
774 if(
775 NULL!=u_strchr(s+1, b) ||
776 NULL!=u_strchr32(s+1, b) ||
777 NULL!=u_memchr(s+1, b, 9) ||
778 NULL!=u_memchr32(s+1, b, 9) ||
779 NULL!=u_strstr(s+1, sub_b) ||
780 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
781 NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
782 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
783 NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
784 NULL!=u_strrchr(s+1, b) ||
785 NULL!=u_strrchr32(s+1, b) ||
786 NULL!=u_memrchr(s+1, b, 9) ||
787 NULL!=u_memrchr32(s+1, b, 9) ||
788 NULL!=u_strrstr(s+1, sub_b) ||
789 NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
790 NULL!=u_strFindLast(s+1, -1, &b, 1) ||
791 NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
792 NULL!=u_strFindLast(s+1, 9, &b, 1)
793 ) {
794 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
795 }
796
797 /* do not find a non-code point in s[1..10[ */
798 if(
799 NULL!=u_strchr32(s+1, ill) ||
800 NULL!=u_memchr32(s+1, ill, 9) ||
801 NULL!=u_strrchr32(s+1, ill) ||
802 NULL!=u_memrchr32(s+1, ill, 9)
803 ) {
804 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
805 }
806
807 /* find U+d801 in s[1..10[ */
808 first=s+6;
809 if(
810 first!=u_strchr(s+1, lead) ||
811 first!=u_strchr32(s+1, lead) ||
812 first!=u_memchr(s+1, lead, 9) ||
813 first!=u_memchr32(s+1, lead, 9) ||
814 first!=u_strstr(s+1, sub_lead) ||
815 first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
816 first!=u_strFindFirst(s+1, -1, &lead, 1) ||
817 first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
818 first!=u_strFindFirst(s+1, 9, &lead, 1) ||
819 first!=u_strrchr(s+1, lead) ||
820 first!=u_strrchr32(s+1, lead) ||
821 first!=u_memrchr(s+1, lead, 9) ||
822 first!=u_memrchr32(s+1, lead, 9) ||
823 first!=u_strrstr(s+1, sub_lead) ||
824 first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
825 first!=u_strFindLast(s+1, -1, &lead, 1) ||
826 first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
827 first!=u_strFindLast(s+1, 9, &lead, 1)
828 ) {
829 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
830 }
831
832 /* find U+dc02 in s[1..10[ */
833 first=s+4;
834 if(
835 first!=u_strchr(s+1, trail) ||
836 first!=u_strchr32(s+1, trail) ||
837 first!=u_memchr(s+1, trail, 9) ||
838 first!=u_memchr32(s+1, trail, 9) ||
839 first!=u_strstr(s+1, sub_trail) ||
840 first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
841 first!=u_strFindFirst(s+1, -1, &trail, 1) ||
842 first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
843 first!=u_strFindFirst(s+1, 9, &trail, 1) ||
844 first!=u_strrchr(s+1, trail) ||
845 first!=u_strrchr32(s+1, trail) ||
846 first!=u_memrchr(s+1, trail, 9) ||
847 first!=u_memrchr32(s+1, trail, 9) ||
848 first!=u_strrstr(s+1, sub_trail) ||
849 first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
850 first!=u_strFindLast(s+1, -1, &trail, 1) ||
851 first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
852 first!=u_strFindLast(s+1, 9, &trail, 1)
853 ) {
854 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
855 }
856
857 /* find U+10402 in s[1..10[ */
858 first=s+1;
859 last=s+8;
860 if(
861 first!=u_strchr32(s+1, supp) ||
862 first!=u_memchr32(s+1, supp, 9) ||
863 first!=u_strstr(s+1, sub_supp) ||
864 first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
865 first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
866 first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
867 first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
868 last!=u_strrchr32(s+1, supp) ||
869 last!=u_memrchr32(s+1, supp, 9) ||
870 last!=u_strrstr(s+1, sub_supp) ||
871 last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
872 last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
873 last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
874 last!=u_strFindLast(s+1, 9, sub_supp, 2)
875 ) {
876 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
877 }
878
879 /* do not find U+10402 in a single UChar */
880 if(
881 NULL!=u_memchr32(s+1, supp, 1) ||
882 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
883 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
884 NULL!=u_memrchr32(s+1, supp, 1) ||
885 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
886 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
887 NULL!=u_memrchr32(s+2, supp, 1) ||
888 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
889 NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
890 ) {
891 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
892 }
893
894 /* do not find U+10403 in s[1..10[ */
895 if(
896 NULL!=u_strchr32(s+1, supp2) ||
897 NULL!=u_memchr32(s+1, supp2, 9) ||
898 NULL!=u_strstr(s+1, sub_supp2) ||
899 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
900 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
901 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
902 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
903 NULL!=u_strrchr32(s+1, supp2) ||
904 NULL!=u_memrchr32(s+1, supp2, 9) ||
905 NULL!=u_strrstr(s+1, sub_supp2) ||
906 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
907 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
908 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
909 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
910 ) {
911 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
912 }
913
914 /* find <0061 d801> in s[1..10[ */
915 first=s+5;
916 if(
917 first!=u_strstr(s+1, sub_a_lead) ||
918 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
919 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
920 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
921 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
922 first!=u_strrstr(s+1, sub_a_lead) ||
923 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
924 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
925 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
926 first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
927 ) {
928 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
929 }
930
931 /* find <dc02 0061> in s[1..10[ */
932 first=s+4;
933 if(
934 first!=u_strstr(s+1, sub_trail_a) ||
935 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
936 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
937 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
938 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
939 first!=u_strrstr(s+1, sub_trail_a) ||
940 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
941 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
942 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
943 first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
944 ) {
945 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
946 }
947
948 /* do not find "aba" in s[1..10[ */
949 if(
950 NULL!=u_strstr(s+1, sub_aba) ||
951 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
952 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
953 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
954 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
955 NULL!=u_strrstr(s+1, sub_aba) ||
956 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
957 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
958 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
959 NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
960 ) {
961 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
962 }
963 }
964
TestStringCopy()965 static void TestStringCopy()
966 {
967 UChar temp[40];
968 UChar *result=0;
969 UChar subString[5];
970 UChar uchars[]={0x61, 0x62, 0x63, 0x00};
971 char charOut[40];
972 char chars[]="abc"; /* needs default codepage */
973
974 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
975
976 u_uastrcpy(temp, "abc");
977 if(u_strcmp(temp, uchars) != 0) {
978 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
979 }
980
981 temp[0] = 0xFB; /* load garbage into it */
982 temp[1] = 0xFB;
983 temp[2] = 0xFB;
984 temp[3] = 0xFB;
985
986 u_uastrncpy(temp, "abcabcabc", 3);
987 if(u_strncmp(uchars, temp, 3) != 0){
988 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
989 }
990 if(temp[3] != 0xFB) {
991 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
992 }
993
994 charOut[0] = (char)0x7B; /* load garbage into it */
995 charOut[1] = (char)0x7B;
996 charOut[2] = (char)0x7B;
997 charOut[3] = (char)0x7B;
998
999 temp[0] = 0x0061;
1000 temp[1] = 0x0062;
1001 temp[2] = 0x0063;
1002 temp[3] = 0x0061;
1003 temp[4] = 0x0062;
1004 temp[5] = 0x0063;
1005 temp[6] = 0x0000;
1006
1007 u_austrncpy(charOut, temp, 3);
1008 if(strncmp(chars, charOut, 3) != 0){
1009 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1010 }
1011 if(charOut[3] != (char)0x7B) {
1012 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1013 }
1014
1015 /*Testing u_strchr()*/
1016 log_verbose("Testing u_strchr\n");
1017 temp[0]=0x42;
1018 temp[1]=0x62;
1019 temp[2]=0x62;
1020 temp[3]=0x63;
1021 temp[4]=0xd841;
1022 temp[5]=0xd841;
1023 temp[6]=0xdc02;
1024 temp[7]=0;
1025 result=u_strchr(temp, (UChar)0x62);
1026 if(result != temp+1){
1027 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1028 }
1029 /*Testing u_strstr()*/
1030 log_verbose("Testing u_strstr\n");
1031 subString[0]=0x62;
1032 subString[1]=0x63;
1033 subString[2]=0;
1034 result=u_strstr(temp, subString);
1035 if(result != temp+2){
1036 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1037 }
1038 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1039 if(result != temp){
1040 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1041 }
1042 result=u_strstr(subString, temp);
1043 if(result != NULL){
1044 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1045 }
1046
1047 /*Testing u_strchr32*/
1048 log_verbose("Testing u_strchr32\n");
1049 result=u_strchr32(temp, (UChar32)0x62);
1050 if(result != temp+1){
1051 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1052 }
1053 result=u_strchr32(temp, (UChar32)0xfb);
1054 if(result != NULL){
1055 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1056 }
1057 result=u_strchr32(temp, (UChar32)0x20402);
1058 if(result != temp+5){
1059 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1060 }
1061
1062 temp[7]=0xfc00;
1063 result=u_memchr32(temp, (UChar32)0x20402, 7);
1064 if(result != temp+5){
1065 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1066 }
1067 result=u_memchr32(temp, (UChar32)0x20402, 6);
1068 if(result != NULL){
1069 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1070 }
1071 result=u_memchr32(temp, (UChar32)0x20402, 1);
1072 if(result != NULL){
1073 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1074 }
1075 result=u_memchr32(temp, (UChar32)0xfc00, 8);
1076 if(result != temp+7){
1077 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1078 }
1079 }
1080
1081 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1082
1083 static void
TestUnescape()1084 TestUnescape() {
1085 static UChar buffer[200];
1086
1087 static const char* input =
1088 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1089
1090 static const UChar expect[]={
1091 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1092 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1093 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1094 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1095 };
1096 static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
1097 int32_t length;
1098
1099 /* test u_unescape() */
1100 length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
1101 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1102 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1103 explength);
1104 }
1105
1106 /* try preflighting */
1107 length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
1108 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1109 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1110 }
1111
1112 /* ### TODO: test u_unescapeAt() */
1113 }
1114
1115 /* test code point counting functions --------------------------------------- */
1116
1117 /* reference implementation of u_strHasMoreChar32Than() */
1118 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1119 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1120 int32_t count=u_countChar32(s, length);
1121 return count>number;
1122 }
1123
1124 /* compare the real function against the reference */
1125 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1126 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1127 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1128 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1129 i, length, number, u_strHasMoreChar32Than(s, length, number));
1130 }
1131 }
1132
1133 static void
TestCountChar32()1134 TestCountChar32() {
1135 static const UChar string[]={
1136 0x61, 0x62, 0xd800, 0xdc00,
1137 0xd801, 0xdc01, 0x63, 0xd802,
1138 0x64, 0xdc03, 0x65, 0x66,
1139 0xd804, 0xdc04, 0xd805, 0xdc05,
1140 0x67
1141 };
1142 UChar buffer[100];
1143 int32_t i, length, number;
1144
1145 /* test u_strHasMoreChar32Than() with length>=0 */
1146 length=UPRV_LENGTHOF(string);
1147 while(length>=0) {
1148 for(i=0; i<=length; ++i) {
1149 for(number=-1; number<=((length-i)+2); ++number) {
1150 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1151 }
1152 }
1153 --length;
1154 }
1155
1156 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1157 length=UPRV_LENGTHOF(string);
1158 u_memcpy(buffer, string, length);
1159 while(length>=0) {
1160 buffer[length]=0;
1161 for(i=0; i<=length; ++i) {
1162 for(number=-1; number<=((length-i)+2); ++number) {
1163 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
1164 }
1165 }
1166 --length;
1167 }
1168
1169 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1170 for(length=-1; length<=1; ++length) {
1171 for(i=0; i<=length; ++i) {
1172 for(number=-2; number<=2; ++number) {
1173 _testStrHasMoreChar32Than(NULL, 0, length, number);
1174 }
1175 }
1176 }
1177 }
1178
1179 /* UCharIterator ------------------------------------------------------------ */
1180
1181 /*
1182 * Compare results from two iterators, should be same.
1183 * Assume that the text is not empty and that
1184 * iteration start==0 and iteration limit==length.
1185 */
1186 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1187 compareIterators(UCharIterator *iter1, const char *n1,
1188 UCharIterator *iter2, const char *n2) {
1189 int32_t i, pos1, pos2, middle, length;
1190 UChar32 c1, c2;
1191
1192 /* compare lengths */
1193 length=iter1->getIndex(iter1, UITER_LENGTH);
1194 pos2=iter2->getIndex(iter2, UITER_LENGTH);
1195 if(length!=pos2) {
1196 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1197 return;
1198 }
1199
1200 /* set into the middle */
1201 middle=length/2;
1202
1203 pos1=iter1->move(iter1, middle, UITER_ZERO);
1204 if(pos1!=middle) {
1205 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1206 return;
1207 }
1208
1209 pos2=iter2->move(iter2, middle, UITER_ZERO);
1210 if(pos2!=middle) {
1211 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1212 return;
1213 }
1214
1215 /* test current() */
1216 c1=iter1->current(iter1);
1217 c2=iter2->current(iter2);
1218 if(c1!=c2) {
1219 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1220 return;
1221 }
1222
1223 /* move forward 3 UChars */
1224 for(i=0; i<3; ++i) {
1225 c1=iter1->next(iter1);
1226 c2=iter2->next(iter2);
1227 if(c1!=c2) {
1228 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1229 return;
1230 }
1231 }
1232
1233 /* move backward 5 UChars */
1234 for(i=0; i<5; ++i) {
1235 c1=iter1->previous(iter1);
1236 c2=iter2->previous(iter2);
1237 if(c1!=c2) {
1238 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1239 return;
1240 }
1241 }
1242
1243 /* iterate forward from the beginning */
1244 pos1=iter1->move(iter1, 0, UITER_START);
1245 if(pos1<0) {
1246 log_err("%s->move(start) failed\n", n1);
1247 return;
1248 }
1249 if(!iter1->hasNext(iter1)) {
1250 log_err("%s->hasNext() at the start returns FALSE\n", n1);
1251 return;
1252 }
1253
1254 pos2=iter2->move(iter2, 0, UITER_START);
1255 if(pos2<0) {
1256 log_err("%s->move(start) failed\n", n2);
1257 return;
1258 }
1259 if(!iter2->hasNext(iter2)) {
1260 log_err("%s->hasNext() at the start returns FALSE\n", n2);
1261 return;
1262 }
1263
1264 do {
1265 c1=iter1->next(iter1);
1266 c2=iter2->next(iter2);
1267 if(c1!=c2) {
1268 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1269 return;
1270 }
1271 } while(c1>=0);
1272
1273 if(iter1->hasNext(iter1)) {
1274 log_err("%s->hasNext() at the end returns TRUE\n", n1);
1275 return;
1276 }
1277 if(iter2->hasNext(iter2)) {
1278 log_err("%s->hasNext() at the end returns TRUE\n", n2);
1279 return;
1280 }
1281
1282 /* back to the middle */
1283 pos1=iter1->move(iter1, middle, UITER_ZERO);
1284 if(pos1!=middle) {
1285 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1286 return;
1287 }
1288
1289 pos2=iter2->move(iter2, middle, UITER_ZERO);
1290 if(pos2!=middle) {
1291 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1292 return;
1293 }
1294
1295 /* move to index 1 */
1296 pos1=iter1->move(iter1, 1, UITER_ZERO);
1297 if(pos1!=1) {
1298 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1299 return;
1300 }
1301
1302 pos2=iter2->move(iter2, 1, UITER_ZERO);
1303 if(pos2!=1) {
1304 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1305 return;
1306 }
1307
1308 /* iterate backward from the end */
1309 pos1=iter1->move(iter1, 0, UITER_LIMIT);
1310 if(pos1<0) {
1311 log_err("%s->move(limit) failed\n", n1);
1312 return;
1313 }
1314 if(!iter1->hasPrevious(iter1)) {
1315 log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1316 return;
1317 }
1318
1319 pos2=iter2->move(iter2, 0, UITER_LIMIT);
1320 if(pos2<0) {
1321 log_err("%s->move(limit) failed\n", n2);
1322 return;
1323 }
1324 if(!iter2->hasPrevious(iter2)) {
1325 log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1326 return;
1327 }
1328
1329 do {
1330 c1=iter1->previous(iter1);
1331 c2=iter2->previous(iter2);
1332 if(c1!=c2) {
1333 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1334 return;
1335 }
1336 } while(c1>=0);
1337
1338 if(iter1->hasPrevious(iter1)) {
1339 log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1340 return;
1341 }
1342 if(iter2->hasPrevious(iter2)) {
1343 log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1344 return;
1345 }
1346 }
1347
1348 /*
1349 * Test the iterator's getState() and setState() functions.
1350 * iter1 and iter2 must be set up for the same iterator type and the same string
1351 * but may be physically different structs (different addresses).
1352 *
1353 * Assume that the text is not empty and that
1354 * iteration start==0 and iteration limit==length.
1355 * It must be 2<=middle<=length-2.
1356 */
1357 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1358 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1359 UChar32 u[4];
1360
1361 UErrorCode errorCode;
1362 UChar32 c;
1363 uint32_t state;
1364 int32_t i, j;
1365
1366 /* get four UChars from the middle of the string */
1367 iter1->move(iter1, middle-2, UITER_ZERO);
1368 for(i=0; i<4; ++i) {
1369 c=iter1->next(iter1);
1370 if(c<0) {
1371 /* the test violates the assumptions, see comment above */
1372 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1373 return;
1374 }
1375 u[i]=c;
1376 }
1377
1378 /* move to the middle and get the state */
1379 iter1->move(iter1, -2, UITER_CURRENT);
1380 state=uiter_getState(iter1);
1381
1382 /* set the state into the second iterator and compare the results */
1383 errorCode=U_ZERO_ERROR;
1384 uiter_setState(iter2, state, &errorCode);
1385 if(U_FAILURE(errorCode)) {
1386 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1387 return;
1388 }
1389
1390 c=iter2->current(iter2);
1391 if(c!=u[2]) {
1392 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1393 }
1394
1395 c=iter2->previous(iter2);
1396 if(c!=u[1]) {
1397 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1398 }
1399
1400 iter2->move(iter2, 2, UITER_CURRENT);
1401 c=iter2->next(iter2);
1402 if(c!=u[3]) {
1403 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1404 }
1405
1406 iter2->move(iter2, -3, UITER_CURRENT);
1407 c=iter2->previous(iter2);
1408 if(c!=u[0]) {
1409 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1410 }
1411
1412 /* move the second iterator back to the middle */
1413 iter2->move(iter2, 1, UITER_CURRENT);
1414 iter2->next(iter2);
1415
1416 /* check that both are in the middle */
1417 i=iter1->getIndex(iter1, UITER_CURRENT);
1418 j=iter2->getIndex(iter2, UITER_CURRENT);
1419 if(i!=middle) {
1420 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1421 }
1422 if(i!=j) {
1423 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1424 }
1425
1426 /* compare lengths */
1427 i=iter1->getIndex(iter1, UITER_LENGTH);
1428 j=iter2->getIndex(iter2, UITER_LENGTH);
1429 if(i!=j) {
1430 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1431 }
1432 }
1433
1434 static void
TestUCharIterator()1435 TestUCharIterator() {
1436 static const UChar text[]={
1437 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1438 };
1439 char bytes[40];
1440
1441 UCharIterator iter, iter1, iter2;
1442 UConverter *cnv;
1443 UErrorCode errorCode;
1444 int32_t length;
1445
1446 /* simple API/code coverage - test NOOP UCharIterator */
1447 uiter_setString(&iter, NULL, 0);
1448 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1449 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1450 iter.hasNext(&iter) || iter.hasPrevious(&iter)
1451 ) {
1452 log_err("NOOP UCharIterator behaves unexpectedly\n");
1453 }
1454
1455 /* test get/set state */
1456 length=UPRV_LENGTHOF(text)-1;
1457 uiter_setString(&iter1, text, -1);
1458 uiter_setString(&iter2, text, length);
1459 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1460 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1461
1462 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1463 errorCode=U_ZERO_ERROR;
1464 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1465 if(U_FAILURE(errorCode)) {
1466 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1467 return;
1468 }
1469
1470 uiter_setString(&iter1, text, -1);
1471 uiter_setUTF8(&iter2, bytes, length);
1472 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1473
1474 /* try again with length=-1 */
1475 uiter_setUTF8(&iter2, bytes, -1);
1476 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1477
1478 /* test get/set state */
1479 length=UPRV_LENGTHOF(text)-1;
1480 uiter_setUTF8(&iter1, bytes, -1);
1481 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1482 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1483
1484 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1485 errorCode=U_ZERO_ERROR;
1486 cnv=ucnv_open("UTF-16BE", &errorCode);
1487 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1488 ucnv_close(cnv);
1489 if(U_FAILURE(errorCode)) {
1490 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1491 return;
1492 }
1493
1494 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1495 bytes[length]=bytes[length+1]=0;
1496
1497 uiter_setString(&iter1, text, -1);
1498 uiter_setUTF16BE(&iter2, bytes, length);
1499 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1500
1501 /* try again with length=-1 */
1502 uiter_setUTF16BE(&iter2, bytes, -1);
1503 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1504
1505 /* try again after moving the bytes up one, and with length=-1 */
1506 memmove(bytes+1, bytes, length+2);
1507 uiter_setUTF16BE(&iter2, bytes+1, -1);
1508 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1509
1510 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1511 }
1512