1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2002-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: cstrcase.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002feb21
14 * created by: Markus W. Scherer
15 *
16 * Test file for string casing C API functions.
17 */
18
19 #include <string.h>
20 #include "unicode/utypes.h"
21 #include "unicode/uchar.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uloc.h"
24 #include "unicode/ubrk.h"
25 #include "unicode/ucasemap.h"
26 #include "cmemory.h"
27 #include "cintltst.h"
28 #include "ustr_imp.h"
29
30 /* test string case mapping functions --------------------------------------- */
31
32 static void
TestCaseLower(void)33 TestCaseLower(void) {
34 static const UChar
35
36 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
37 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
38 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff };
39
40 UChar buffer[32];
41 int32_t length;
42 UErrorCode errorCode;
43
44 /* lowercase with root locale and separate buffers */
45 buffer[0]=0xabcd;
46 errorCode=U_ZERO_ERROR;
47 length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
48 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
49 "",
50 &errorCode);
51 if( U_FAILURE(errorCode) ||
52 length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) ||
53 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
54 buffer[length]!=0
55 ) {
56 log_err("error in u_strToLower(root locale)=%ld error=%s string matches: %s\t\nlowerRoot=%s\t\nbuffer=%s\n",
57 length,
58 u_errorName(errorCode),
59 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 &&
60 buffer[length]==0 ? "yes" : "no",
61 aescstrdup(lowerRoot,-1),
62 aescstrdup(buffer,-1));
63 }
64
65 /* lowercase with turkish locale and in the same buffer */
66 uprv_memcpy(buffer, beforeLower, sizeof(beforeLower));
67 buffer[sizeof(beforeLower)/U_SIZEOF_UCHAR]=0;
68 errorCode=U_ZERO_ERROR;
69 length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
70 buffer, -1, /* implicit srcLength */
71 "tr",
72 &errorCode);
73 if( U_FAILURE(errorCode) ||
74 length!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) ||
75 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
76 buffer[length]!=0
77 ) {
78 log_err("error in u_strToLower(turkish locale)=%ld error=%s string matches: %s\n",
79 length,
80 u_errorName(errorCode),
81 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
82 }
83
84 /* test preflighting */
85 buffer[0]=buffer[2]=0xabcd;
86 errorCode=U_ZERO_ERROR;
87 length=u_strToLower(buffer, 2, /* set destCapacity=2 */
88 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
89 "",
90 &errorCode);
91 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
92 length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) ||
93 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
94 buffer[2]!=0xabcd
95 ) {
96 log_err("error in u_strToLower(root locale preflighting)=%ld error=%s string matches: %s\n",
97 length,
98 u_errorName(errorCode),
99 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
100 }
101
102 /* test error handling */
103 errorCode=U_ZERO_ERROR;
104 length=u_strToLower(NULL, sizeof(buffer)/U_SIZEOF_UCHAR,
105 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
106 "",
107 &errorCode);
108 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
109 log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n",
110 length,
111 u_errorName(errorCode));
112 }
113
114 buffer[0]=0xabcd;
115 errorCode=U_ZERO_ERROR;
116 length=u_strToLower(buffer, -1,
117 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
118 "",
119 &errorCode);
120 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
121 buffer[0]!=0xabcd
122 ) {
123 log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
124 length,
125 u_errorName(errorCode),
126 buffer[0]);
127 }
128 }
129
130 static void
TestCaseUpper(void)131 TestCaseUpper(void) {
132 static const UChar
133
134 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
135 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
136 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff };
137
138 UChar buffer[32];
139 int32_t length;
140 UErrorCode errorCode;
141
142 /* uppercase with root locale and in the same buffer */
143 uprv_memcpy(buffer, beforeUpper, sizeof(beforeUpper));
144 errorCode=U_ZERO_ERROR;
145 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
146 buffer, sizeof(beforeUpper)/U_SIZEOF_UCHAR,
147 "",
148 &errorCode);
149 if( U_FAILURE(errorCode) ||
150 length!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) ||
151 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
152 buffer[length]!=0
153 ) {
154 log_err("error in u_strToUpper(root locale)=%ld error=%s string matches: %s\n",
155 length,
156 u_errorName(errorCode),
157 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
158 }
159
160 /* uppercase with turkish locale and separate buffers */
161 buffer[0]=0xabcd;
162 errorCode=U_ZERO_ERROR;
163 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
164 beforeUpper, sizeof(beforeUpper)/U_SIZEOF_UCHAR,
165 "tr",
166 &errorCode);
167 if( U_FAILURE(errorCode) ||
168 length!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) ||
169 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
170 buffer[length]!=0
171 ) {
172 log_err("error in u_strToUpper(turkish locale)=%ld error=%s string matches: %s\n",
173 length,
174 u_errorName(errorCode),
175 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
176 }
177
178 /* test preflighting */
179 errorCode=U_ZERO_ERROR;
180 length=u_strToUpper(NULL, 0,
181 beforeUpper, sizeof(beforeUpper)/U_SIZEOF_UCHAR,
182 "tr",
183 &errorCode);
184 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
185 length!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR)
186 ) {
187 log_err("error in u_strToUpper(turkish locale pure preflighting)=%ld error=%s\n",
188 length,
189 u_errorName(errorCode));
190 }
191
192 /* test error handling */
193 buffer[0]=0xabcd;
194 errorCode=U_ZERO_ERROR;
195 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
196 NULL, sizeof(beforeUpper)/U_SIZEOF_UCHAR,
197 "tr",
198 &errorCode);
199 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
200 buffer[0]!=0xabcd
201 ) {
202 log_err("error in u_strToUpper(turkish locale src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
203 length,
204 u_errorName(errorCode),
205 buffer[0]);
206 }
207
208 buffer[0]=0xabcd;
209 errorCode=U_ZERO_ERROR;
210 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
211 beforeUpper, -2,
212 "tr",
213 &errorCode);
214 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
215 buffer[0]!=0xabcd
216 ) {
217 log_err("error in u_strToUpper(turkish locale srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
218 length,
219 u_errorName(errorCode),
220 buffer[0]);
221 }
222 }
223
224 #if !UCONFIG_NO_BREAK_ITERATION
225
226 static void
TestCaseTitle(void)227 TestCaseTitle(void) {
228 static const UChar
229
230 beforeTitle[]= { 0x61, 0x42, 0x20, 0x69, 0x3c2, 0x20, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
231 titleWord[]= { 0x41, 0x62, 0x20, 0x49, 0x3c2, 0x20, 0x53, 0x73, 0x3c3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff },
232 titleChar[]= { 0x41, 0x42, 0x20, 0x49, 0x3a3, 0x20, 0x53, 0x73, 0x3a3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff };
233
234 UChar buffer[32];
235 UBreakIterator *titleIterChars;
236 int32_t length;
237 UErrorCode errorCode;
238
239 errorCode=U_ZERO_ERROR;
240 titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, &errorCode);
241 if(U_FAILURE(errorCode)) {
242 log_err_status(errorCode, "error: ubrk_open(UBRK_CHARACTER)->%s\n", u_errorName(errorCode));
243 return;
244 }
245
246 /* titlecase with standard break iterator and in the same buffer */
247 uprv_memcpy(buffer, beforeTitle, sizeof(beforeTitle));
248 errorCode=U_ZERO_ERROR;
249 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
250 buffer, sizeof(beforeTitle)/U_SIZEOF_UCHAR,
251 NULL, "",
252 &errorCode);
253 if( U_FAILURE(errorCode) ||
254 length!=(sizeof(titleWord)/U_SIZEOF_UCHAR) ||
255 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)!=0 ||
256 buffer[length]!=0
257 ) {
258 log_err("error in u_strToTitle(standard iterator)=%ld error=%s string matches: %s\n",
259 length,
260 u_errorName(errorCode),
261 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
262 }
263
264 /* titlecase with UBRK_CHARACTERS and separate buffers */
265 buffer[0]=0xabcd;
266 errorCode=U_ZERO_ERROR;
267 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
268 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR,
269 titleIterChars, "",
270 &errorCode);
271 if( U_FAILURE(errorCode) ||
272 length!=(sizeof(titleChar)/U_SIZEOF_UCHAR) ||
273 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)!=0 ||
274 buffer[length]!=0
275 ) {
276 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s string matches: %s\n",
277 length,
278 u_errorName(errorCode),
279 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
280 }
281
282 /* test preflighting */
283 errorCode=U_ZERO_ERROR;
284 length=u_strToTitle(NULL, 0,
285 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR,
286 titleIterChars, "",
287 &errorCode);
288 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
289 length!=(sizeof(titleChar)/U_SIZEOF_UCHAR)
290 ) {
291 log_err("error in u_strToTitle(UBRK_CHARACTERS pure preflighting)=%ld error=%s\n",
292 length,
293 u_errorName(errorCode));
294 }
295
296 /* test error handling */
297 buffer[0]=0xabcd;
298 errorCode=U_ZERO_ERROR;
299 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
300 NULL, sizeof(beforeTitle)/U_SIZEOF_UCHAR,
301 titleIterChars, "",
302 &errorCode);
303 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
304 buffer[0]!=0xabcd
305 ) {
306 log_err("error in u_strToTitle(UBRK_CHARACTERS src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
307 length,
308 u_errorName(errorCode),
309 buffer[0]);
310 }
311
312 buffer[0]=0xabcd;
313 errorCode=U_ZERO_ERROR;
314 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
315 beforeTitle, -2,
316 titleIterChars, "",
317 &errorCode);
318 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
319 buffer[0]!=0xabcd
320 ) {
321 log_err("error in u_strToTitle(UBRK_CHARACTERS srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
322 length,
323 u_errorName(errorCode),
324 buffer[0]);
325 }
326
327 ubrk_close(titleIterChars);
328 }
329
330 static void
TestCaseDutchTitle(void)331 TestCaseDutchTitle(void) {
332 static const UChar
333
334 beforeTitle[]= { 0x69, 0x6A, 0x73, 0x73, 0x45, 0x6c, 0x20, 0x69, 0x67, 0x6c, 0x4f, 0x6f , 0x20 , 0x49, 0x4A, 0x53, 0x53, 0x45, 0x4C },
335 titleRoot[]= { 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6C },
336 titleDutch[]= { 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6C };
337
338 UChar buffer[32];
339 UBreakIterator *titleIterWord;
340 int32_t length;
341 UErrorCode errorCode;
342
343 errorCode=U_ZERO_ERROR;
344 titleIterWord=ubrk_open(UBRK_WORD, "", beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, &errorCode);
345 if(U_FAILURE(errorCode)) {
346 log_err_status(errorCode, "error: ubrk_open(UBRK_WORD)->%s\n", u_errorName(errorCode));
347 return;
348 }
349
350 /* titlecase with default locale */
351 buffer[0]=0xabcd;
352 errorCode=U_ZERO_ERROR;
353 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
354 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR,
355 titleIterWord, "",
356 &errorCode);
357 if( U_FAILURE(errorCode) ||
358 length!=(sizeof(titleRoot)/U_SIZEOF_UCHAR) ||
359 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
360 buffer[length]!=0
361 ) {
362 char charsOut[21];
363 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
364 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s root locale string matches: %s\noutput buffer is {%s}\n",
365 length,
366 u_errorName(errorCode),
367 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
368 }
369 /* titlecase with Dutch locale */
370 buffer[0]=0xabcd;
371 errorCode=U_ZERO_ERROR;
372 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
373 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR,
374 titleIterWord, "nl",
375 &errorCode);
376 if( U_FAILURE(errorCode) ||
377 length!=(sizeof(titleDutch)/U_SIZEOF_UCHAR) ||
378 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)!=0 ||
379 buffer[length]!=0
380 ) {
381 char charsOut[21];
382 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
383 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s dutch locale string matches: %s\noutput buffer is {%s}\n",
384 length,
385 u_errorName(errorCode),
386 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
387 }
388
389 ubrk_close(titleIterWord);
390 }
391
392 #endif
393
394 /* test case folding and case-insensitive string compare -------------------- */
395
396 static void
TestCaseFolding(void)397 TestCaseFolding(void) {
398 /*
399 * CaseFolding.txt says about i and its cousins:
400 * 0049; C; 0069; # LATIN CAPITAL LETTER I
401 * 0049; T; 0131; # LATIN CAPITAL LETTER I
402 *
403 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
404 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
405 * That's all.
406 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
407 */
408 static const UChar32
409 simple[]={
410 /* input, default, exclude special i */
411 0x61, 0x61, 0x61,
412 0x49, 0x69, 0x131,
413 0x130, 0x130, 0x69,
414 0x131, 0x131, 0x131,
415 0xdf, 0xdf, 0xdf,
416 0xfb03, 0xfb03, 0xfb03,
417 0x1040e,0x10436,0x10436,
418 0x5ffff,0x5ffff,0x5ffff
419 };
420
421 static const UChar
422 mixed[]= { 0x61, 0x42, 0x130, 0x49, 0x131, 0x3d0, 0xdf, 0xfb03, 0xd93f, 0xdfff },
423 foldedDefault[]= { 0x61, 0x62, 0x69, 0x307, 0x69, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff },
424 foldedExcludeSpecialI[]={ 0x61, 0x62, 0x69, 0x131, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff };
425
426 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
427
428 const UChar32 *p;
429 int32_t i;
430
431 UChar buffer[32];
432 int32_t length;
433 UErrorCode errorCode;
434 UBool isUnicode_3_1;
435
436 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
437 u_getUnicodeVersion(unicodeVersion);
438 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
439
440 /* test simple case folding */
441 p=simple;
442 for(i=0; i<sizeof(simple)/12; p+=3, ++i) {
443 if(u_foldCase(p[0], U_FOLD_CASE_DEFAULT)!=p[1]) {
444 log_err("error: u_foldCase(0x%04lx, default)=0x%04lx instead of 0x%04lx\n",
445 p[0], u_foldCase(p[0], U_FOLD_CASE_DEFAULT), p[1]);
446 return;
447 }
448
449 if(isUnicode_3_1 && u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I)!=p[2]) {
450 log_err("error: u_foldCase(0x%04lx, exclude special i)=0x%04lx instead of 0x%04lx\n",
451 p[0], u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I), p[2]);
452 return;
453 }
454 }
455
456 /* test full string case folding with default option and separate buffers */
457 buffer[0]=0xabcd;
458 errorCode=U_ZERO_ERROR;
459 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
460 mixed, sizeof(mixed)/U_SIZEOF_UCHAR,
461 U_FOLD_CASE_DEFAULT,
462 &errorCode);
463 if( U_FAILURE(errorCode) ||
464 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) ||
465 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
466 buffer[length]!=0
467 ) {
468 log_err("error in u_strFoldCase(default)=%ld error=%s string matches: %s\n",
469 length,
470 u_errorName(errorCode),
471 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
472 }
473
474 /* exclude special i */
475 if(isUnicode_3_1) {
476 buffer[0]=0xabcd;
477 errorCode=U_ZERO_ERROR;
478 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
479 mixed, sizeof(mixed)/U_SIZEOF_UCHAR,
480 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
481 &errorCode);
482 if( U_FAILURE(errorCode) ||
483 length!=(sizeof(foldedExcludeSpecialI)/U_SIZEOF_UCHAR) ||
484 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
485 buffer[length]!=0
486 ) {
487 log_err("error in u_strFoldCase(exclude special i)=%ld error=%s string matches: %s\n",
488 length,
489 u_errorName(errorCode),
490 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
491 }
492 }
493
494 /* test full string case folding with default option and in the same buffer */
495 uprv_memcpy(buffer, mixed, sizeof(mixed));
496 buffer[sizeof(mixed)/U_SIZEOF_UCHAR]=0;
497 errorCode=U_ZERO_ERROR;
498 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
499 buffer, -1, /* implicit srcLength */
500 U_FOLD_CASE_DEFAULT,
501 &errorCode);
502 if( U_FAILURE(errorCode) ||
503 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) ||
504 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
505 buffer[length]!=0
506 ) {
507 log_err("error in u_strFoldCase(default same buffer)=%ld error=%s string matches: %s\n",
508 length,
509 u_errorName(errorCode),
510 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
511 }
512
513 /* test full string case folding, exclude special i, in the same buffer */
514 if(isUnicode_3_1) {
515 uprv_memcpy(buffer, mixed, sizeof(mixed));
516 errorCode=U_ZERO_ERROR;
517 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
518 buffer, sizeof(mixed)/U_SIZEOF_UCHAR,
519 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
520 &errorCode);
521 if( U_FAILURE(errorCode) ||
522 length!=(sizeof(foldedExcludeSpecialI)/U_SIZEOF_UCHAR) ||
523 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
524 buffer[length]!=0
525 ) {
526 log_err("error in u_strFoldCase(exclude special i same buffer)=%ld error=%s string matches: %s\n",
527 length,
528 u_errorName(errorCode),
529 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
530 }
531 }
532
533 /* test preflighting */
534 buffer[0]=buffer[2]=0xabcd;
535 errorCode=U_ZERO_ERROR;
536 length=u_strFoldCase(buffer, 2, /* set destCapacity=2 */
537 mixed, sizeof(mixed)/U_SIZEOF_UCHAR,
538 U_FOLD_CASE_DEFAULT,
539 &errorCode);
540 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
541 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) ||
542 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
543 buffer[2]!=0xabcd
544 ) {
545 log_err("error in u_strFoldCase(default preflighting)=%ld error=%s string matches: %s\n",
546 length,
547 u_errorName(errorCode),
548 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
549 }
550
551 errorCode=U_ZERO_ERROR;
552 length=u_strFoldCase(NULL, 0,
553 mixed, sizeof(mixed)/U_SIZEOF_UCHAR,
554 U_FOLD_CASE_DEFAULT,
555 &errorCode);
556 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
557 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR)
558 ) {
559 log_err("error in u_strFoldCase(default pure preflighting)=%ld error=%s\n",
560 length,
561 u_errorName(errorCode));
562 }
563
564 /* test error handling */
565 errorCode=U_ZERO_ERROR;
566 length=u_strFoldCase(NULL, sizeof(buffer)/U_SIZEOF_UCHAR,
567 mixed, sizeof(mixed)/U_SIZEOF_UCHAR,
568 U_FOLD_CASE_DEFAULT,
569 &errorCode);
570 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
571 log_err("error in u_strFoldCase(default dest=NULL)=%ld error=%s\n",
572 length,
573 u_errorName(errorCode));
574 }
575
576 buffer[0]=0xabcd;
577 errorCode=U_ZERO_ERROR;
578 length=u_strFoldCase(buffer, -1,
579 mixed, sizeof(mixed)/U_SIZEOF_UCHAR,
580 U_FOLD_CASE_DEFAULT,
581 &errorCode);
582 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
583 buffer[0]!=0xabcd
584 ) {
585 log_err("error in u_strFoldCase(default destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
586 length,
587 u_errorName(errorCode),
588 buffer[0]);
589 }
590
591 buffer[0]=0xabcd;
592 errorCode=U_ZERO_ERROR;
593 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
594 NULL, sizeof(mixed)/U_SIZEOF_UCHAR,
595 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
596 &errorCode);
597 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
598 buffer[0]!=0xabcd
599 ) {
600 log_err("error in u_strFoldCase(exclude special i src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
601 length,
602 u_errorName(errorCode),
603 buffer[0]);
604 }
605
606 buffer[0]=0xabcd;
607 errorCode=U_ZERO_ERROR;
608 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
609 mixed, -2,
610 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
611 &errorCode);
612 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
613 buffer[0]!=0xabcd
614 ) {
615 log_err("error in u_strFoldCase(exclude special i srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
616 length,
617 u_errorName(errorCode),
618 buffer[0]);
619 }
620 }
621
622 static void
TestCaseCompare(void)623 TestCaseCompare(void) {
624 static const UChar
625
626 mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0xfb03, 0xd93f, 0xdfff, 0 },
627 otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
628 otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
629 different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
630
631 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
632
633 int32_t result, lenMixed, lenOtherDefault, lenOtherExcludeSpecialI, lenDifferent;
634 UErrorCode errorCode;
635 UBool isUnicode_3_1;
636
637 errorCode=U_ZERO_ERROR;
638
639 lenMixed=u_strlen(mixed);
640 lenOtherDefault=u_strlen(otherDefault);
641 (void)lenOtherDefault; /* Suppress set but not used warning. */
642 lenOtherExcludeSpecialI=u_strlen(otherExcludeSpecialI);
643 lenDifferent=u_strlen(different);
644
645 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
646 u_getUnicodeVersion(unicodeVersion);
647 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
648 (void)isUnicode_3_1; /* Suppress set but not used warning. */
649
650 /* test u_strcasecmp() */
651 result=u_strcasecmp(mixed, otherDefault, U_FOLD_CASE_DEFAULT);
652 if(result!=0) {
653 log_err("error: u_strcasecmp(mixed, other, default)=%ld instead of 0\n", result);
654 }
655 result=u_strCaseCompare(mixed, -1, otherDefault, -1, U_FOLD_CASE_DEFAULT, &errorCode);
656 if(result!=0) {
657 log_err("error: u_strCaseCompare(mixed, other, default)=%ld instead of 0\n", result);
658 }
659
660 /* test u_strcasecmp() - exclude special i */
661 result=u_strcasecmp(mixed, otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
662 if(result!=0) {
663 log_err("error: u_strcasecmp(mixed, other, exclude special i)=%ld instead of 0\n", result);
664 }
665 result=u_strCaseCompare(mixed, lenMixed, otherExcludeSpecialI, lenOtherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
666 if(result!=0) {
667 log_err("error: u_strCaseCompare(mixed, other, exclude special i)=%ld instead of 0\n", result);
668 }
669
670 /* test u_strcasecmp() */
671 result=u_strcasecmp(mixed, different, U_FOLD_CASE_DEFAULT);
672 if(result<=0) {
673 log_err("error: u_strcasecmp(mixed, different, default)=%ld instead of positive\n", result);
674 }
675 result=u_strCaseCompare(mixed, -1, different, lenDifferent, U_FOLD_CASE_DEFAULT, &errorCode);
676 if(result<=0) {
677 log_err("error: u_strCaseCompare(mixed, different, default)=%ld instead of positive\n", result);
678 }
679
680 /* test u_strncasecmp() - stop before the sharp s (U+00df) */
681 result=u_strncasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
682 if(result!=0) {
683 log_err("error: u_strncasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
684 }
685 result=u_strCaseCompare(mixed, 4, different, 4, U_FOLD_CASE_DEFAULT, &errorCode);
686 if(result!=0) {
687 log_err("error: u_strCaseCompare(mixed, 4, different, 4, default)=%ld instead of 0\n", result);
688 }
689
690 /* test u_strncasecmp() - stop in the middle of the sharp s (U+00df) */
691 result=u_strncasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
692 if(result<=0) {
693 log_err("error: u_strncasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
694 }
695 result=u_strCaseCompare(mixed, 5, different, 5, U_FOLD_CASE_DEFAULT, &errorCode);
696 if(result<=0) {
697 log_err("error: u_strCaseCompare(mixed, 5, different, 5, default)=%ld instead of positive\n", result);
698 }
699
700 /* test u_memcasecmp() - stop before the sharp s (U+00df) */
701 result=u_memcasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
702 if(result!=0) {
703 log_err("error: u_memcasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
704 }
705
706 /* test u_memcasecmp() - stop in the middle of the sharp s (U+00df) */
707 result=u_memcasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
708 if(result<=0) {
709 log_err("error: u_memcasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
710 }
711 }
712
713 /* test UCaseMap ------------------------------------------------------------ */
714
715 /*
716 * API test for UCaseMap;
717 * test cases for actual case mappings using UCaseMap see
718 * intltest utility/UnicodeStringTest/StringCaseTest/TestCasing
719 */
720 static void
TestUCaseMap(void)721 TestUCaseMap(void) {
722 static const char
723 aBc[] ={ 0x61, 0x42, 0x63, 0 },
724 abc[] ={ 0x61, 0x62, 0x63, 0 },
725 ABCg[]={ 0x41, 0x42, 0x43, 0x67, 0 },
726 defg[]={ 0x64, 0x65, 0x66, 0x67, 0 };
727 char utf8Out[8];
728
729 UCaseMap *csm;
730 const char *locale;
731 uint32_t options;
732 int32_t length;
733 UErrorCode errorCode;
734
735 errorCode=U_ZERO_ERROR;
736 csm=ucasemap_open("tur", 0xa5, &errorCode);
737 if(U_FAILURE(errorCode)) {
738 log_err("ucasemap_open(\"tur\") failed - %s\n", u_errorName(errorCode));
739 return;
740 }
741 locale=ucasemap_getLocale(csm);
742 if(0!=strcmp(locale, "tr")) {
743 log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale);
744 }
745 /* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */
746 ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
747 locale=ucasemap_getLocale(csm);
748 if(0!=strcmp(locale, "i-klingon")) {
749 log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale);
750 }
751
752 errorCode=U_ZERO_ERROR;
753 options=ucasemap_getOptions(csm);
754 if(options!=0xa5) {
755 log_err("ucasemap_getOptions(ucasemap_open(0xa5))==0x%lx!=0xa5\n", (long)options);
756 }
757 ucasemap_setOptions(csm, 0x333333, &errorCode);
758 options=ucasemap_getOptions(csm);
759 if(options!=0x333333) {
760 log_err("ucasemap_getOptions(ucasemap_setOptions(0x333333))==0x%lx!=0x333333\n", (long)options);
761 }
762
763 /* test case mapping API; not all permutations necessary due to shared implementation code */
764
765 /* NUL terminated source */
766 errorCode=U_ZERO_ERROR;
767 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
768 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
769 log_err("ucasemap_utf8ToLower(aBc\\0) failed\n");
770 }
771
772 /* incoming failure code */
773 errorCode=U_PARSE_ERROR;
774 strcpy(utf8Out, defg);
775 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
776 if(errorCode!=U_PARSE_ERROR || 0!=strcmp(defg, utf8Out)) {
777 log_err("ucasemap_utf8ToLower(failure) failed\n");
778 }
779
780 /* overlapping input & output */
781 errorCode=U_ZERO_ERROR;
782 strcpy(utf8Out, aBc);
783 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, utf8Out+1, 2, &errorCode);
784 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
785 log_err("ucasemap_utf8ToUpper(overlap 1) failed\n");
786 }
787
788 /* overlap in the other direction */
789 errorCode=U_ZERO_ERROR;
790 strcpy(utf8Out, aBc);
791 length=ucasemap_utf8ToUpper(csm, utf8Out+1, 2, utf8Out, 2, &errorCode);
792 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
793 log_err("ucasemap_utf8ToUpper(overlap 2) failed\n");
794 }
795
796 /* NULL destination */
797 errorCode=U_ZERO_ERROR;
798 strcpy(utf8Out, defg);
799 length=ucasemap_utf8ToLower(csm, NULL, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
800 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
801 log_err("ucasemap_utf8ToLower(dest=NULL) failed\n");
802 }
803
804 /* destCapacity<0 */
805 errorCode=U_ZERO_ERROR;
806 strcpy(utf8Out, defg);
807 length=ucasemap_utf8ToLower(csm, utf8Out, -2, aBc, -1, &errorCode);
808 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
809 log_err("ucasemap_utf8ToLower(destCapacity<0) failed\n");
810 }
811
812 /* NULL source */
813 errorCode=U_ZERO_ERROR;
814 strcpy(utf8Out, defg);
815 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), NULL, -1, &errorCode);
816 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
817 log_err("ucasemap_utf8ToLower(src=NULL) failed\n");
818 }
819
820 /* srcLength<-1 */
821 errorCode=U_ZERO_ERROR;
822 strcpy(utf8Out, defg);
823 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -2, &errorCode);
824 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
825 log_err("ucasemap_utf8ToLower(srcLength<-1) failed\n");
826 }
827
828 /* buffer overflow */
829 errorCode=U_ZERO_ERROR;
830 strcpy(utf8Out, defg);
831 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, aBc, 3, &errorCode);
832 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3 || 0!=strcmp(defg+2, utf8Out+2)) {
833 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
834 }
835
836 /* dest not terminated (leaves g from defg alone) */
837 errorCode=U_ZERO_ERROR;
838 strcpy(utf8Out, defg);
839 length=ucasemap_utf8ToUpper(csm, utf8Out, 3, aBc, 3, &errorCode);
840 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=3 || 0!=strcmp(ABCg, utf8Out)) {
841 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
842 }
843
844 /* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
845 errorCode=U_ZERO_ERROR;
846 utf8Out[0]=0;
847 length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode);
848 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
849 log_err("ucasemap_utf8FoldCase(aBc) failed\n");
850 }
851
852 ucasemap_close(csm);
853 }
854
855 #if !UCONFIG_NO_BREAK_ITERATION
856
857 /* Try titlecasing with options. */
858 static void
TestUCaseMapToTitle(void)859 TestUCaseMapToTitle(void) {
860 /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */
861 /*
862 * Note: The sentence BreakIterator does not recognize a '.'
863 * as a sentence terminator if it is followed by lowercase.
864 * That is why the example has the '!'.
865 */
866 static const UChar
867
868 beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e },
869 titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e },
870 titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e },
871 titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e };
872
873 UChar buffer[32];
874 UCaseMap *csm;
875 UBreakIterator *sentenceIter;
876 const UBreakIterator *iter;
877 int32_t length;
878 UErrorCode errorCode;
879
880 errorCode=U_ZERO_ERROR;
881 csm=ucasemap_open("", 0, &errorCode);
882 if(U_FAILURE(errorCode)) {
883 log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode));
884 return;
885 }
886
887 iter=ucasemap_getBreakIterator(csm);
888 if(iter!=NULL) {
889 log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter);
890 }
891
892 /* Use default UBreakIterator: Word breaks. */
893 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
894 if( U_FAILURE(errorCode) ||
895 length!=UPRV_LENGTHOF(titleWord) ||
896 0!=u_memcmp(buffer, titleWord, length) ||
897 buffer[length]!=0
898 ) {
899 log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
900 }
901 if (U_SUCCESS(errorCode)) {
902 iter=ucasemap_getBreakIterator(csm);
903 if(iter==NULL) {
904 log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n");
905 }
906 }
907
908 /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */
909 ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode);
910 if(U_FAILURE(errorCode)) {
911 log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode));
912 return;
913 }
914
915 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
916 if( U_FAILURE(errorCode) ||
917 length!=UPRV_LENGTHOF(titleWordNoAdjust) ||
918 0!=u_memcmp(buffer, titleWordNoAdjust, length) ||
919 buffer[length]!=0
920 ) {
921 log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
922 }
923
924 /* Set a sentence break iterator. */
925 errorCode=U_ZERO_ERROR;
926 sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode);
927 if(U_FAILURE(errorCode)) {
928 log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode));
929 ucasemap_close(csm);
930 return;
931 }
932 ucasemap_setBreakIterator(csm, sentenceIter, &errorCode);
933 if(U_FAILURE(errorCode)) {
934 log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode));
935 ubrk_close(sentenceIter);
936 ucasemap_close(csm);
937 return;
938 }
939 iter=ucasemap_getBreakIterator(csm);
940 if(iter!=sentenceIter) {
941 log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter);
942 }
943
944 ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode);
945 if(U_FAILURE(errorCode)) {
946 log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode));
947 return;
948 }
949
950 /* Use the sentence break iterator with the option. Preflight first. */
951 length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
952 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
953 length!=UPRV_LENGTHOF(titleSentNoLower)
954 ) {
955 log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
956 }
957
958 errorCode=U_ZERO_ERROR;
959 buffer[0]=0;
960 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
961 if( U_FAILURE(errorCode) ||
962 length!=UPRV_LENGTHOF(titleSentNoLower) ||
963 0!=u_memcmp(buffer, titleSentNoLower, length) ||
964 buffer[length]!=0
965 ) {
966 log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
967 }
968
969 /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
970 {
971 char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64];
972 int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength;
973
974 errorCode=U_ZERO_ERROR;
975 u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
976 u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode);
977
978 length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode);
979 if( U_FAILURE(errorCode) ||
980 length!=utf8TitleSentNoLowerLength ||
981 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) ||
982 utf8[length]!=0
983 ) {
984 log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
985 }
986 }
987
988 ucasemap_close(csm);
989 }
990
991 #endif
992
993 /* Test case for internal API u_caseInsensitivePrefixMatch */
994 static void
TestUCaseInsensitivePrefixMatch(void)995 TestUCaseInsensitivePrefixMatch(void) {
996 struct {
997 const char *s1;
998 const char *s2;
999 int32_t r1;
1000 int32_t r2;
1001 } testCases[] = {
1002 {"ABC", "ab", 2, 2},
1003 {"ABCD", "abcx", 3, 3},
1004 {"ABC", "xyz", 0, 0},
1005 /* U+00DF LATIN SMALL LETTER SHARP S */
1006 {"A\\u00dfBC", "Ass", 2, 3},
1007 {"Fust", "Fu\\u00dfball", 2, 2},
1008 {"\\u00dfsA", "s\\u00dfB", 2, 2},
1009 {"\\u00dfs", "s\\u00df", 2, 2},
1010 /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */
1011 {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6},
1012 {0, 0, 0, 0}
1013 };
1014 int32_t i;
1015
1016 for (i = 0; testCases[i].s1 != 0; i++) {
1017 UErrorCode sts = U_ZERO_ERROR;
1018 UChar u1[64], u2[64];
1019 int32_t matchLen1, matchLen2;
1020
1021 u_unescape(testCases[i].s1, u1, 64);
1022 u_unescape(testCases[i].s2, u2, 64);
1023
1024 u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2, &sts);
1025 if (U_FAILURE(sts)) {
1026 log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1, testCases[i].s2);
1027 } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2) {
1028 log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, len2=%d",
1029 testCases[i].s1, testCases[i].s2,
1030 matchLen1, matchLen2,
1031 testCases[i].r1, testCases[i].r2);
1032 }
1033 }
1034 }
1035
1036 void addCaseTest(TestNode** root);
1037
addCaseTest(TestNode ** root)1038 void addCaseTest(TestNode** root) {
1039 /* cstrcase.c functions, declared in cucdtst.h */
1040 addTest(root, &TestCaseLower, "tsutil/cstrcase/TestCaseLower");
1041 addTest(root, &TestCaseUpper, "tsutil/cstrcase/TestCaseUpper");
1042 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1043 addTest(root, &TestCaseTitle, "tsutil/cstrcase/TestCaseTitle");
1044 addTest(root, &TestCaseDutchTitle, "tsutil/cstrcase/TestCaseDutchTitle");
1045 #endif
1046 addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding");
1047 addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare");
1048 addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap");
1049 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1050 addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle");
1051 #endif
1052 addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseInsensitivePrefixMatch");
1053 }
1054