1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: strcase.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar12
16 * created by: Markus W. Scherer
17 *
18 * Test file for string casing C++ API functions.
19 */
20
21 #include "unicode/std_string.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ures.h"
27 #include "unicode/uloc.h"
28 #include "unicode/locid.h"
29 #include "unicode/ubrk.h"
30 #include "unicode/unistr.h"
31 #include "unicode/ucasemap.h"
32 #include "unicode/ustring.h"
33 #include "ucase.h"
34 #include "ustrtest.h"
35 #include "unicode/tstdtmod.h"
36 #include "cmemory.h"
37 #include "testutil.h"
38
39 class StringCaseTest: public IntlTest {
40 public:
41 StringCaseTest();
42 virtual ~StringCaseTest();
43
44 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
45
46 void TestCaseConversion();
47
48 void TestCasingImpl(const UnicodeString &input,
49 const UnicodeString &output,
50 int32_t whichCase,
51 void *iter, const char *localeID, uint32_t options);
52 void TestCasing();
53 void TestTitleOptions();
54 void TestFullCaseFoldingIterator();
55 void TestGreekUpper();
56 void TestArmenian();
57 void TestLongUpper();
58 void TestMalformedUTF8();
59 void TestBufferOverflow();
60 void TestEdits();
61 void TestCopyMoveEdits();
62 void TestEditsFindFwdBwd();
63 void TestMergeEdits();
64 void TestCaseMapWithEdits();
65 void TestCaseMapUTF8WithEdits();
66 void TestCaseMapToString();
67 void TestCaseMapUTF8ToString();
68 void TestLongUnicodeString();
69 void TestBug13127();
70 void TestInPlaceTitle();
71 void TestCaseMapEditsIteratorDocs();
72 void TestCaseMapGreekExtended();
73
74 private:
75 void assertGreekUpper(const char16_t *s, const char16_t *expected);
76
77 Locale GREEK_LOCALE_;
78 };
79
StringCaseTest()80 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
81
~StringCaseTest()82 StringCaseTest::~StringCaseTest() {}
83
createStringCaseTest()84 extern IntlTest *createStringCaseTest() {
85 return new StringCaseTest();
86 }
87
88 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)89 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
90 if(exec) {
91 logln("TestSuite StringCaseTest: ");
92 }
93 TESTCASE_AUTO_BEGIN;
94 TESTCASE_AUTO(TestCaseConversion);
95 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
96 TESTCASE_AUTO(TestCasing);
97 TESTCASE_AUTO(TestTitleOptions);
98 #endif
99 TESTCASE_AUTO(TestFullCaseFoldingIterator);
100 TESTCASE_AUTO(TestGreekUpper);
101 TESTCASE_AUTO(TestArmenian);
102 TESTCASE_AUTO(TestLongUpper);
103 TESTCASE_AUTO(TestMalformedUTF8);
104 TESTCASE_AUTO(TestBufferOverflow);
105 TESTCASE_AUTO(TestEdits);
106 TESTCASE_AUTO(TestCopyMoveEdits);
107 TESTCASE_AUTO(TestEditsFindFwdBwd);
108 TESTCASE_AUTO(TestMergeEdits);
109 TESTCASE_AUTO(TestCaseMapWithEdits);
110 TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
111 TESTCASE_AUTO(TestCaseMapToString);
112 TESTCASE_AUTO(TestCaseMapUTF8ToString);
113 TESTCASE_AUTO(TestLongUnicodeString);
114 #if !UCONFIG_NO_BREAK_ITERATION
115 TESTCASE_AUTO(TestBug13127);
116 TESTCASE_AUTO(TestInPlaceTitle);
117 #endif
118 TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
119 TESTCASE_AUTO(TestCaseMapGreekExtended);
120 TESTCASE_AUTO_END;
121 }
122
123 void
TestCaseConversion()124 StringCaseTest::TestCaseConversion()
125 {
126 static const UChar uppercaseGreek[] =
127 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
128 0x39f, 0x3a3, 0 };
129 // "IESUS CHRISTOS"
130
131 static const UChar lowercaseGreek[] =
132 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
133 0x3bf, 0x3c2, 0 };
134 // "iesus christos"
135
136 static const UChar lowercaseTurkish[] =
137 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
138 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
139
140 static const UChar uppercaseTurkish[] =
141 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
142 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
143
144 UnicodeString expectedResult;
145 UnicodeString test3;
146
147 test3 += (UChar32)0x0130;
148 test3 += "STANBUL, NOT CONSTANTINOPLE!";
149
150 UnicodeString test4(test3);
151 test4.toLower(Locale(""));
152 expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
153 if (test4 != expectedResult)
154 errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
155
156 test4 = test3;
157 test4.toLower(Locale("tr", "TR"));
158 expectedResult = lowercaseTurkish;
159 if (test4 != expectedResult)
160 errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
161
162 test3 = "topkap";
163 test3 += (UChar32)0x0131;
164 test3 += " palace, istanbul";
165 test4 = test3;
166
167 test4.toUpper(Locale(""));
168 expectedResult = "TOPKAPI PALACE, ISTANBUL";
169 if (test4 != expectedResult)
170 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
171
172 test4 = test3;
173 test4.toUpper(Locale("tr", "TR"));
174 expectedResult = uppercaseTurkish;
175 if (test4 != expectedResult)
176 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
177
178 test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
179
180 test3.toUpper(Locale("de", "DE"));
181 expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
182 if (test3 != expectedResult)
183 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
184
185 test4.replace(0, test4.length(), uppercaseGreek);
186
187 test4.toLower(Locale("el", "GR"));
188 expectedResult = lowercaseGreek;
189 if (test4 != expectedResult)
190 errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
191
192 test4.replace(0, test4.length(), lowercaseGreek);
193
194 test4.toUpper();
195 expectedResult = uppercaseGreek;
196 if (test4 != expectedResult)
197 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
198
199 // more string case mapping tests with the new implementation
200 {
201 static const UChar
202
203 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
204 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
205 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
206
207 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
208 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
209 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
210
211 beforeMiniUpper[]= { 0xdf, 0x61 },
212 miniUpper[]= { 0x53, 0x53, 0x41 };
213
214 UnicodeString s;
215
216 /* lowercase with root locale */
217 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
218 s.toLower("");
219 if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
220 s!=UnicodeString(FALSE, lowerRoot, s.length())
221 ) {
222 errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
223 }
224
225 /* lowercase with turkish locale */
226 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
227 s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
228 if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
229 s!=UnicodeString(FALSE, lowerTurkish, s.length())
230 ) {
231 errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
232 }
233
234 /* uppercase with root locale */
235 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
236 s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
237 if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
238 s!=UnicodeString(FALSE, upperRoot, s.length())
239 ) {
240 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
241 }
242
243 /* uppercase with turkish locale */
244 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
245 s.toUpper(Locale("tr"));
246 if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
247 s!=UnicodeString(FALSE, upperTurkish, s.length())
248 ) {
249 errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
250 }
251
252 /* uppercase a short string with root locale */
253 s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
254 s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
255 if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
256 s!=UnicodeString(FALSE, miniUpper, s.length())
257 ) {
258 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
259 }
260 }
261
262 // test some supplementary characters (>= Unicode 3.1)
263 {
264 UnicodeString t;
265
266 UnicodeString
267 deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
268 deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
269 deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
270 (t=deseretInput).toLower();
271 if(t!=deseretLower) {
272 errln("error lowercasing Deseret (plane 1) characters");
273 }
274 (t=deseretInput).toUpper();
275 if(t!=deseretUpper) {
276 errln("error uppercasing Deseret (plane 1) characters");
277 }
278 }
279
280 // test some more cases that looked like problems
281 {
282 UnicodeString t;
283
284 UnicodeString
285 ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
286 ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
287 ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
288 (t=ljInput).toLower("en");
289 if(t!=ljLower) {
290 errln("error lowercasing LJ characters");
291 }
292 (t=ljInput).toUpper("en");
293 if(t!=ljUpper) {
294 errln("error uppercasing LJ characters");
295 }
296 }
297
298 #if !UCONFIG_NO_NORMALIZATION
299 // some context-sensitive casing depends on normalization data being present
300
301 // Unicode 3.1.1 SpecialCasing tests
302 {
303 UnicodeString t;
304
305 // sigmas preceded and/or followed by cased letters
306 UnicodeString
307 sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
308 sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
309 sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
310
311 (t=sigmas).toLower();
312 if(t!=sigmasLower) {
313 errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
314 }
315
316 (t=sigmas).toUpper(Locale(""));
317 if(t!=sigmasUpper) {
318 errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
319 }
320
321 // turkish & azerbaijani dotless i & dotted I
322 // remove dot above if there was a capital I before and there are no more accents above
323 UnicodeString
324 dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
325 dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
326 dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
327
328 (t=dots).toLower("tr");
329 if(t!=dotsTurkish) {
330 errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
331 }
332
333 (t=dots).toLower("de");
334 if(t!=dotsDefault) {
335 errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
336 }
337 }
338
339 // more Unicode 3.1.1 tests
340 {
341 UnicodeString t;
342
343 // lithuanian dot above in uppercasing
344 UnicodeString
345 dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
346 dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
347 dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
348
349 (t=dots).toUpper("lt");
350 if(t!=dotsLithuanian) {
351 errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
352 }
353
354 (t=dots).toUpper("de");
355 if(t!=dotsDefault) {
356 errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
357 }
358
359 // lithuanian adds dot above to i in lowercasing if there are more above accents
360 UnicodeString
361 i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
362 iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
363 iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
364
365 (t=i).toLower("lt");
366 if(t!=iLithuanian) {
367 errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
368 }
369
370 (t=i).toLower("de");
371 if(t!=iDefault) {
372 errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
373 }
374 }
375
376 #endif
377
378 // test case folding
379 {
380 UnicodeString
381 s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
382 f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
383 g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
384 t;
385
386 (t=s).foldCase();
387 if(f!=t) {
388 errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
389 }
390
391 // alternate handling for dotted I/dotless i (U+0130, U+0131)
392 (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
393 if(g!=t) {
394 errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
395 }
396 }
397 }
398
399 // data-driven case mapping tests ------------------------------------------ ***
400
401 enum {
402 TEST_LOWER,
403 TEST_UPPER,
404 TEST_TITLE,
405 TEST_FOLD,
406 TEST_COUNT
407 };
408
409 // names of TestData children in casing.txt
410 static const char *const dataNames[TEST_COUNT+1]={
411 "lowercasing",
412 "uppercasing",
413 "titlecasing",
414 "casefolding",
415 ""
416 };
417
418 void
TestCasingImpl(const UnicodeString & input,const UnicodeString & output,int32_t whichCase,void * iter,const char * localeID,uint32_t options)419 StringCaseTest::TestCasingImpl(const UnicodeString &input,
420 const UnicodeString &output,
421 int32_t whichCase,
422 void *iter, const char *localeID, uint32_t options) {
423 // UnicodeString
424 UnicodeString result;
425 const char *name;
426 Locale locale(localeID);
427
428 result=input;
429 switch(whichCase) {
430 case TEST_LOWER:
431 name="toLower";
432 result.toLower(locale);
433 break;
434 case TEST_UPPER:
435 name="toUpper";
436 result.toUpper(locale);
437 break;
438 #if !UCONFIG_NO_BREAK_ITERATION
439 case TEST_TITLE:
440 name="toTitle";
441 result.toTitle((BreakIterator *)iter, locale, options);
442 break;
443 #endif
444 case TEST_FOLD:
445 name="foldCase";
446 result.foldCase(options);
447 break;
448 default:
449 name="";
450 break; // won't happen
451 }
452 if(result!=output) {
453 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
454 }
455 #if !UCONFIG_NO_BREAK_ITERATION
456 if(whichCase==TEST_TITLE && options==0) {
457 result=input;
458 result.toTitle((BreakIterator *)iter, locale);
459 if(result!=output) {
460 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
461 }
462 }
463 #endif
464
465 // UTF-8
466 char utf8In[100], utf8Out[100];
467 int32_t utf8InLength, utf8OutLength, resultLength;
468 UChar *buffer;
469
470 IcuTestErrorCode errorCode(*this, "TestCasingImpl");
471 LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
472 #if !UCONFIG_NO_BREAK_ITERATION
473 if(iter!=NULL) {
474 // Clone the break iterator so that the UCaseMap can safely adopt it.
475 UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
476 ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
477 }
478 #endif
479
480 u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
481 switch(whichCase) {
482 case TEST_LOWER:
483 name="ucasemap_utf8ToLower";
484 utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
485 utf8Out, (int32_t)sizeof(utf8Out),
486 utf8In, utf8InLength, errorCode);
487 break;
488 case TEST_UPPER:
489 name="ucasemap_utf8ToUpper";
490 utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
491 utf8Out, (int32_t)sizeof(utf8Out),
492 utf8In, utf8InLength, errorCode);
493 break;
494 #if !UCONFIG_NO_BREAK_ITERATION
495 case TEST_TITLE:
496 name="ucasemap_utf8ToTitle";
497 utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
498 utf8Out, (int32_t)sizeof(utf8Out),
499 utf8In, utf8InLength, errorCode);
500 break;
501 #endif
502 case TEST_FOLD:
503 name="ucasemap_utf8FoldCase";
504 utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
505 utf8Out, (int32_t)sizeof(utf8Out),
506 utf8In, utf8InLength, errorCode);
507 break;
508 default:
509 name="";
510 utf8OutLength=0;
511 break; // won't happen
512 }
513 buffer=result.getBuffer(utf8OutLength);
514 u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
515 result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
516
517 if(errorCode.isFailure()) {
518 errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
519 errorCode.reset();
520 } else if(result!=output) {
521 errln("error: %s() got a wrong result for a test case from casing.res", name);
522 errln("expected \"" + output + "\" got \"" + result + "\"" );
523 }
524 }
525
526 void
TestCasing()527 StringCaseTest::TestCasing() {
528 UErrorCode status = U_ZERO_ERROR;
529 #if !UCONFIG_NO_BREAK_ITERATION
530 LocalUBreakIteratorPointer iter;
531 #endif
532 char cLocaleID[100];
533 UnicodeString locale, input, output, optionsString, result;
534 uint32_t options;
535 int32_t whichCase, type;
536 LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
537 if(U_SUCCESS(status)) {
538 for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
539 #if UCONFIG_NO_BREAK_ITERATION
540 if(whichCase==TEST_TITLE) {
541 continue;
542 }
543 #endif
544 LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
545 if(U_FAILURE(status)) {
546 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
547 break;
548 }
549 const DataMap *myCase = NULL;
550 while(casingTest->nextCase(myCase, status)) {
551 input = myCase->getString("Input", status);
552 output = myCase->getString("Output", status);
553
554 if(whichCase!=TEST_FOLD) {
555 locale = myCase->getString("Locale", status);
556 }
557 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
558
559 #if !UCONFIG_NO_BREAK_ITERATION
560 if(whichCase==TEST_TITLE) {
561 type = myCase->getInt("Type", status);
562 if(type>=0) {
563 iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
564 } else if(type==-2) {
565 // Open a trivial break iterator that only delivers { 0, length }
566 // or even just { 0 } as boundaries.
567 static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
568 UParseError parseError;
569 iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
570 }
571 }
572 #endif
573 options = 0;
574 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
575 optionsString = myCase->getString("Options", status);
576 if(optionsString.indexOf((UChar)0x54)>=0) { // T
577 options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
578 }
579 if(optionsString.indexOf((UChar)0x4c)>=0) { // L
580 options|=U_TITLECASE_NO_LOWERCASE;
581 }
582 if(optionsString.indexOf((UChar)0x41)>=0) { // A
583 options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
584 }
585 }
586
587 if(U_FAILURE(status)) {
588 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status));
589 status = U_ZERO_ERROR;
590 } else {
591 #if UCONFIG_NO_BREAK_ITERATION
592 LocalPointer<UMemory> iter;
593 #endif
594 TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
595 }
596
597 #if !UCONFIG_NO_BREAK_ITERATION
598 iter.adoptInstead(NULL);
599 #endif
600 }
601 }
602 }
603
604 #if !UCONFIG_NO_BREAK_ITERATION
605 // more tests for API coverage
606 status=U_ZERO_ERROR;
607 input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
608 (result=input).toTitle(NULL);
609 if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
610 dataerrln("UnicodeString::toTitle(NULL) failed.");
611 }
612 #endif
613 }
614
615 void
TestTitleOptions()616 StringCaseTest::TestTitleOptions() {
617 // New options in ICU 60.
618 TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
619 nullptr, "", U_TITLECASE_WHOLE_STRING);
620 TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
621 nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE);
622 TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
623 nullptr, "", U_TITLECASE_WHOLE_STRING);
624 TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
625 nullptr, "", U_TITLECASE_WHOLE_STRING);
626 TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
627 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
628 TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
629 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
630 TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
631 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE);
632 TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
633 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT);
634 TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
635 nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
636 TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
637 nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
638
639 #if !UCONFIG_NO_BREAK_ITERATION
640 // Test conflicting settings.
641 // If & when we add more options, then the ORed combinations may become
642 // indistinguishable from valid values.
643 IcuTestErrorCode errorCode(*this, "TestTitleOptions");
644 CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr,
645 u"", 0, nullptr, 0, nullptr, errorCode);
646 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
647 errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
648 errorCode.errorName());
649 }
650 errorCode.reset();
651 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr,
652 u"", 0, nullptr, 0, nullptr, errorCode);
653 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
654 errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
655 errorCode.errorName());
656 }
657 errorCode.reset();
658 LocalPointer<BreakIterator> iter(
659 BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
660 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
661 u"", 0, nullptr, 0, nullptr, errorCode);
662 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
663 errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
664 errorCode.errorName());
665 }
666 errorCode.reset();
667 #endif
668 }
669
670 void
TestFullCaseFoldingIterator()671 StringCaseTest::TestFullCaseFoldingIterator() {
672 UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
673 UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
674 FullCaseFoldingIterator iter;
675 int32_t count=0;
676 int32_t countSpecific=0;
677 UChar32 c;
678 UnicodeString full;
679 while((c=iter.next(full))>=0) {
680 ++count;
681 // Check that the full Case_Folding has more than 1 code point.
682 if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
683 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
684 continue;
685 }
686 // Check that full == Case_Folding(c).
687 UnicodeString cf(c);
688 cf.foldCase();
689 if(full!=cf) {
690 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
691 continue;
692 }
693 // Spot-check a couple of specific cases.
694 if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
695 ++countSpecific;
696 }
697 }
698 if(countSpecific!=3) {
699 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
700 }
701 if(count<70) {
702 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
703 }
704 }
705
706 void
assertGreekUpper(const char16_t * s,const char16_t * expected)707 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
708 UnicodeString s16(s);
709 UnicodeString expected16(expected);
710 UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
711 UnicodeString result16(s16);
712 result16.toUpper(GREEK_LOCALE_);
713 assertEquals(msg, expected16, result16);
714
715 msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
716 int32_t length = expected16.length();
717 int32_t capacities[] = {
718 // Keep in sync with the UTF-8 capacities near the bottom of this function.
719 0, length / 2, length - 1, length, length + 1
720 };
721 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
722 int32_t cap = capacities[i];
723 UChar *dest16 = result16.getBuffer(expected16.length() + 1);
724 u_memset(dest16, 0x55AA, result16.getCapacity());
725 UErrorCode errorCode = U_ZERO_ERROR;
726 length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
727 assertEquals(msg + cap, expected16.length(), length);
728 UErrorCode expectedErrorCode;
729 if (cap < expected16.length()) {
730 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
731 } else if (cap == expected16.length()) {
732 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
733 } else {
734 expectedErrorCode = U_ZERO_ERROR;
735 assertEquals(msg + cap + " NUL", 0, dest16[length]);
736 }
737 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
738 result16.releaseBuffer(length);
739 if (cap >= expected16.length()) {
740 assertEquals(msg + cap, expected16, result16);
741 }
742 }
743
744 UErrorCode errorCode = U_ZERO_ERROR;
745 LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
746 assertSuccess("ucasemap_open", errorCode);
747 std::string s8;
748 s16.toUTF8String(s8);
749 msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
750 char dest8[1000];
751 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
752 s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
753 assertSuccess("ucasemap_utf8ToUpper", errorCode);
754 StringPiece result8(dest8, length);
755 UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
756 assertEquals(msg, expected16, result16From8);
757
758 msg += " cap=";
759 capacities[1] = length / 2;
760 capacities[2] = length - 1;
761 capacities[3] = length;
762 capacities[4] = length + 1;
763 char dest8b[1000];
764 int32_t expected8Length = length; // Assuming the previous call worked.
765 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
766 int32_t cap = capacities[i];
767 memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
768 UErrorCode errorCode = U_ZERO_ERROR;
769 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
770 s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
771 assertEquals(msg + cap, expected8Length, length);
772 UErrorCode expectedErrorCode;
773 if (cap < expected8Length) {
774 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
775 } else if (cap == expected8Length) {
776 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
777 } else {
778 expectedErrorCode = U_ZERO_ERROR;
779 // Casts to int32_t to avoid matching UBool.
780 assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
781 }
782 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
783 if (cap >= expected8Length) {
784 assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
785 }
786 }
787 }
788
789 void
TestGreekUpper()790 StringCaseTest::TestGreekUpper() {
791 // http://bugs.icu-project.org/trac/ticket/5456
792 assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
793 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
794 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
795 assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
796 assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
797 assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
798 assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
799 assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
800 assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
801 assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
802 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
803 assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
804 assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
805 // http://unicode.org/udhr/d/udhr_ell_polytonic.html
806 assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
807 assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
808 // From Google bug report
809 assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
810 // http://crbug.com/234797
811 assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
812 assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
813 assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
814 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
815 assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
816 assertGreekUpper(u"ή.", u"Ή.");
817 }
818
TestArmenian()819 void StringCaseTest::TestArmenian() {
820 Locale hy("hy"); // Eastern Armenian
821 Locale hyw("hyw"); // Western Armenian
822 Locale root = Locale::getRoot();
823 // See ICU-13416:
824 // և ligature ech-yiwn
825 // uppercases to ԵՒ=ech+yiwn by default and in Western Armenian,
826 // but to ԵՎ=ech+vew in Eastern Armenian.
827 UnicodeString s(u"և Երևանի");
828
829 assertEquals("upper root", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(root));
830 assertEquals("upper hy", u"ԵՎ ԵՐԵՎԱՆԻ", UnicodeString(s).toUpper(hy));
831 assertEquals("upper hyw", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(hyw));
832 #if !UCONFIG_NO_BREAK_ITERATION
833 assertEquals("title root", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, root));
834 assertEquals("title hy", u"Եվ Երևանի", UnicodeString(s).toTitle(nullptr, hy));
835 assertEquals("title hyw", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, hyw));
836 #endif
837 }
838
839 void
TestLongUpper()840 StringCaseTest::TestLongUpper() {
841 if (quick) {
842 logln("not exhaustive mode: skipping this test");
843 return;
844 }
845 // Ticket #12663, crash with an extremely long string where
846 // U+0390 maps to 0399 0308 0301 so that the result is three times as long
847 // and overflows an int32_t.
848 int32_t length = 0x40000004; // more than 1G UChars
849 UnicodeString s(length, (UChar32)0x390, length);
850 UnicodeString result;
851 UChar *dest = result.getBuffer(length + 1);
852 if (s.isBogus() || dest == NULL) {
853 logln("Out of memory, unable to run this test on this machine.");
854 return;
855 }
856 IcuTestErrorCode errorCode(*this, "TestLongUpper");
857 int32_t destLength = u_strToUpper(dest, result.getCapacity(),
858 s.getBuffer(), s.length(), "", errorCode);
859 result.releaseBuffer(destLength);
860 if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
861 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
862 errorCode.errorName(), (long)destLength);
863 }
864 }
865
TestMalformedUTF8()866 void StringCaseTest::TestMalformedUTF8() {
867 // ticket #12639
868 IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
869 LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
870 if (errorCode.isFailure()) {
871 errln("ucasemap_open(English) failed - %s", errorCode.errorName());
872 return;
873 }
874 char src[1] = { (char)0x85 }; // malformed UTF-8
875 char dest[3] = { 0, 0, 0 };
876 int32_t destLength;
877 #if !UCONFIG_NO_BREAK_ITERATION
878 destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
879 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
880 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
881 errorCode.errorName(), (int)destLength, dest[0]);
882 }
883 #endif
884
885 errorCode.reset();
886 dest[0] = 0;
887 destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
888 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
889 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
890 errorCode.errorName(), (int)destLength, dest[0]);
891 }
892
893 errorCode.reset();
894 dest[0] = 0;
895 destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
896 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
897 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
898 errorCode.errorName(), (int)destLength, dest[0]);
899 }
900
901 errorCode.reset();
902 dest[0] = 0;
903 destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
904 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
905 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
906 errorCode.errorName(), (int)destLength, dest[0]);
907 }
908 }
909
TestBufferOverflow()910 void StringCaseTest::TestBufferOverflow() {
911 // Ticket #12849, incorrect result from Title Case preflight operation,
912 // when buffer overflow error is expected.
913 IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
914 LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
915 if (errorCode.isFailure()) {
916 errln("ucasemap_open(English) failed - %s", errorCode.errorName());
917 return;
918 }
919
920 UnicodeString data("hello world");
921 int32_t result;
922 #if !UCONFIG_NO_BREAK_ITERATION
923 result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
924 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
925 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
926 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
927 __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
928 }
929 #endif
930 errorCode.reset();
931
932 std::string data_utf8;
933 data.toUTF8String(data_utf8);
934 #if !UCONFIG_NO_BREAK_ITERATION
935 result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), static_cast<int32_t>(data_utf8.length()), errorCode);
936 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
937 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
938 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
939 __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
940 }
941 #endif
942 errorCode.reset();
943 }
944
TestEdits()945 void StringCaseTest::TestEdits() {
946 IcuTestErrorCode errorCode(*this, "TestEdits");
947 Edits edits;
948 assertFalse("new Edits hasChanges", edits.hasChanges());
949 assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
950 assertEquals("new Edits", 0, edits.lengthDelta());
951 edits.addUnchanged(1); // multiple unchanged ranges are combined
952 edits.addUnchanged(10000); // too long, and they are split
953 edits.addReplace(0, 0);
954 edits.addUnchanged(2);
955 assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
956 assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
957 assertEquals("unchanged 10003", 0, edits.lengthDelta());
958 edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed
959 edits.addUnchanged(0);
960 edits.addReplace(2, 1);
961 edits.addReplace(2, 1);
962 edits.addReplace(0, 10);
963 edits.addReplace(100, 0);
964 edits.addReplace(3000, 4000); // variable-length encoding
965 edits.addReplace(100000, 100000);
966 assertTrue("some edits hasChanges", edits.hasChanges());
967 assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
968 assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
969 UErrorCode outErrorCode = U_ZERO_ERROR;
970 assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
971
972 static const EditChange coarseExpectedChanges[] = {
973 { FALSE, 10003, 10003 },
974 { TRUE, 103106, 104013 }
975 };
976 TestUtility::checkEditsIter(*this, u"coarse",
977 edits.getCoarseIterator(), edits.getCoarseIterator(),
978 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
979 TestUtility::checkEditsIter(*this, u"coarse changes",
980 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
981 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
982
983 static const EditChange fineExpectedChanges[] = {
984 { FALSE, 10003, 10003 },
985 { TRUE, 2, 1 },
986 { TRUE, 2, 1 },
987 { TRUE, 2, 1 },
988 { TRUE, 0, 10 },
989 { TRUE, 100, 0 },
990 { TRUE, 3000, 4000 },
991 { TRUE, 100000, 100000 }
992 };
993 TestUtility::checkEditsIter(*this, u"fine",
994 edits.getFineIterator(), edits.getFineIterator(),
995 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
996 TestUtility::checkEditsIter(*this, u"fine changes",
997 edits.getFineChangesIterator(), edits.getFineChangesIterator(),
998 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
999
1000 edits.reset();
1001 assertFalse("reset hasChanges", edits.hasChanges());
1002 assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
1003 assertEquals("reset", 0, edits.lengthDelta());
1004 Edits::Iterator ei = edits.getCoarseChangesIterator();
1005 assertFalse("reset then iterator", ei.next(errorCode));
1006 }
1007
TestCopyMoveEdits()1008 void StringCaseTest::TestCopyMoveEdits() {
1009 IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
1010 // Exceed the stack array capacity.
1011 Edits a;
1012 for (int32_t i = 0; i < 250; ++i) {
1013 a.addReplace(i % 10, (i % 10) + 1);
1014 }
1015 assertEquals("a: many edits, length delta", 250, a.lengthDelta());
1016
1017 // copy
1018 Edits b(a);
1019 assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
1020 assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
1021 TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
1022
1023 // assign
1024 Edits c;
1025 c.addUnchanged(99);
1026 c.addReplace(88, 77);
1027 c = b;
1028 assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
1029 assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
1030 TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
1031
1032 // std::move trouble on these platforms.
1033 // See https://ssl.icu-project.org/trac/ticket/13393
1034 #if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1035 // move constructor empties object with heap array
1036 Edits d(std::move(a));
1037 assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
1038 assertFalse("a moved away: no more hasChanges", a.hasChanges());
1039 TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
1040 Edits empty;
1041 TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
1042
1043 // move assignment empties object with heap array
1044 Edits e;
1045 e.addReplace(0, 1000);
1046 e = std::move(b);
1047 assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
1048 assertFalse("b moved away: no more hasChanges", b.hasChanges());
1049 TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
1050 TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
1051
1052 // Edits::Iterator default constructor.
1053 Edits::Iterator iter;
1054 assertFalse("Edits::Iterator().next()", iter.next(errorCode));
1055 assertSuccess("Edits::Iterator().next()", errorCode);
1056 iter = e.getFineChangesIterator();
1057 assertTrue("iter.next()", iter.next(errorCode));
1058 assertSuccess("iter.next()", errorCode);
1059 assertTrue("iter.hasChange()", iter.hasChange());
1060 assertEquals("iter.newLength()", 1, iter.newLength());
1061 #endif
1062 }
1063
TestEditsFindFwdBwd()1064 void StringCaseTest::TestEditsFindFwdBwd() {
1065 IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
1066 // Some users need index mappings to be efficient when they are out of order.
1067 // The most interesting failure case for this test is it taking a very long time.
1068 Edits e;
1069 constexpr int32_t N = 200000;
1070 for (int32_t i = 0; i < N; ++i) {
1071 e.addUnchanged(1);
1072 e.addReplace(3, 1);
1073 }
1074 Edits::Iterator iter = e.getFineIterator();
1075 for (int32_t i = 0; i <= N; i += 2) {
1076 assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1077 assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1078 }
1079 for (int32_t i = N; i >= 0; i -= 2) {
1080 assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1081 assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1082 }
1083 }
1084
TestMergeEdits()1085 void StringCaseTest::TestMergeEdits() {
1086 // For debugging, set -v to see matching edits up to a failure.
1087 IcuTestErrorCode errorCode(*this, "TestMergeEdits");
1088 Edits ab, bc, ac, expected_ac;
1089
1090 // Simple: Two parallel non-changes.
1091 ab.addUnchanged(2);
1092 bc.addUnchanged(2);
1093 expected_ac.addUnchanged(2);
1094
1095 // Simple: Two aligned changes.
1096 ab.addReplace(3, 2);
1097 bc.addReplace(2, 1);
1098 expected_ac.addReplace(3, 1);
1099
1100 // Unequal non-changes.
1101 ab.addUnchanged(5);
1102 bc.addUnchanged(3);
1103 expected_ac.addUnchanged(3);
1104 // ab ahead by 2
1105
1106 // Overlapping changes accumulate until they share a boundary.
1107 ab.addReplace(4, 3);
1108 bc.addReplace(3, 2);
1109 ab.addReplace(4, 3);
1110 bc.addReplace(3, 2);
1111 ab.addReplace(4, 3);
1112 bc.addReplace(3, 2);
1113 bc.addUnchanged(4);
1114 expected_ac.addReplace(14, 8);
1115 // bc ahead by 2
1116
1117 // Balance out intermediate-string lengths.
1118 ab.addUnchanged(2);
1119 expected_ac.addUnchanged(2);
1120
1121 // Insert something and delete it: Should disappear.
1122 ab.addReplace(0, 5);
1123 ab.addReplace(0, 2);
1124 bc.addReplace(7, 0);
1125
1126 // Parallel change to make a new boundary.
1127 ab.addReplace(1, 2);
1128 bc.addReplace(2, 3);
1129 expected_ac.addReplace(1, 3);
1130
1131 // Multiple ab deletions should remain separate at the boundary.
1132 ab.addReplace(1, 0);
1133 ab.addReplace(2, 0);
1134 ab.addReplace(3, 0);
1135 expected_ac.addReplace(1, 0);
1136 expected_ac.addReplace(2, 0);
1137 expected_ac.addReplace(3, 0);
1138
1139 // Unequal non-changes can be split for another boundary.
1140 ab.addUnchanged(2);
1141 bc.addUnchanged(1);
1142 expected_ac.addUnchanged(1);
1143 // ab ahead by 1
1144
1145 // Multiple bc insertions should create a boundary and remain separate.
1146 bc.addReplace(0, 4);
1147 bc.addReplace(0, 5);
1148 bc.addReplace(0, 6);
1149 expected_ac.addReplace(0, 4);
1150 expected_ac.addReplace(0, 5);
1151 expected_ac.addReplace(0, 6);
1152 // ab ahead by 1
1153
1154 // Multiple ab deletions in the middle of a bc change are merged.
1155 bc.addReplace(2, 2);
1156 // bc ahead by 1
1157 ab.addReplace(1, 0);
1158 ab.addReplace(2, 0);
1159 ab.addReplace(3, 0);
1160 ab.addReplace(4, 1);
1161 expected_ac.addReplace(11, 2);
1162
1163 // Multiple bc insertions in the middle of an ab change are merged.
1164 ab.addReplace(5, 6);
1165 bc.addReplace(3, 3);
1166 // ab ahead by 3
1167 bc.addReplace(0, 4);
1168 bc.addReplace(0, 5);
1169 bc.addReplace(0, 6);
1170 bc.addReplace(3, 7);
1171 expected_ac.addReplace(5, 25);
1172
1173 // Delete around a deletion.
1174 ab.addReplace(4, 4);
1175 ab.addReplace(3, 0);
1176 ab.addUnchanged(2);
1177 bc.addReplace(2, 2);
1178 bc.addReplace(4, 0);
1179 expected_ac.addReplace(9, 2);
1180
1181 // Insert into an insertion.
1182 ab.addReplace(0, 2);
1183 bc.addReplace(1, 1);
1184 bc.addReplace(0, 8);
1185 bc.addUnchanged(4);
1186 expected_ac.addReplace(0, 10);
1187 // bc ahead by 3
1188
1189 // Balance out intermediate-string lengths.
1190 ab.addUnchanged(3);
1191 expected_ac.addUnchanged(3);
1192
1193 // Deletions meet insertions.
1194 // Output order is arbitrary in principle, but we expect insertions first
1195 // and want to keep it that way.
1196 ab.addReplace(2, 0);
1197 ab.addReplace(4, 0);
1198 ab.addReplace(6, 0);
1199 bc.addReplace(0, 1);
1200 bc.addReplace(0, 3);
1201 bc.addReplace(0, 5);
1202 expected_ac.addReplace(0, 1);
1203 expected_ac.addReplace(0, 3);
1204 expected_ac.addReplace(0, 5);
1205 expected_ac.addReplace(2, 0);
1206 expected_ac.addReplace(4, 0);
1207 expected_ac.addReplace(6, 0);
1208
1209 // End with a non-change, so that further edits are never reordered.
1210 ab.addUnchanged(1);
1211 bc.addUnchanged(1);
1212 expected_ac.addUnchanged(1);
1213
1214 ac.mergeAndAppend(ab, bc, errorCode);
1215 assertSuccess("ab+bc", errorCode);
1216 if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
1217 return;
1218 }
1219
1220 // Append more Edits.
1221 Edits ab2, bc2;
1222 ab2.addUnchanged(5);
1223 bc2.addReplace(1, 2);
1224 bc2.addUnchanged(4);
1225 expected_ac.addReplace(1, 2);
1226 expected_ac.addUnchanged(4);
1227 ac.mergeAndAppend(ab2, bc2, errorCode);
1228 assertSuccess("ab2+bc2", errorCode);
1229 if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
1230 return;
1231 }
1232
1233 // Append empty edits.
1234 Edits empty;
1235 ac.mergeAndAppend(empty, empty, errorCode);
1236 assertSuccess("empty+empty", errorCode);
1237 if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
1238 return;
1239 }
1240
1241 // Error: Append more edits with mismatched intermediate-string lengths.
1242 Edits mismatch;
1243 mismatch.addReplace(1, 1);
1244 ac.mergeAndAppend(ab2, mismatch, errorCode);
1245 assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1246 errorCode.reset();
1247 ac.mergeAndAppend(mismatch, bc2, errorCode);
1248 assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1249 errorCode.reset();
1250 }
1251
TestCaseMapWithEdits()1252 void StringCaseTest::TestCaseMapWithEdits() {
1253 IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
1254 UChar dest[20];
1255 Edits edits;
1256
1257 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1258 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1259 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1260 static const EditChange lowerExpectedChanges[] = {
1261 { TRUE, 1, 1 },
1262 { FALSE, 4, 4 },
1263 { TRUE, 1, 1 },
1264 { FALSE, 2, 2 }
1265 };
1266 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1267 edits.getFineIterator(), edits.getFineIterator(),
1268 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1269 TRUE, errorCode);
1270
1271 edits.reset();
1272 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1273 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1274 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1275 static const EditChange upperExpectedChanges[] = {
1276 { FALSE, 1, 1 },
1277 { TRUE, 1, 1 },
1278 { TRUE, 1, 1 },
1279 { TRUE, 1, 1 },
1280 { TRUE, 1, 1 },
1281 { TRUE, 1, 1 }
1282 };
1283 TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1284 edits.getFineIterator(), edits.getFineIterator(),
1285 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1286 TRUE, errorCode);
1287
1288 edits.reset();
1289
1290 #if !UCONFIG_NO_BREAK_ITERATION
1291 length = CaseMap::toTitle("nl",
1292 U_OMIT_UNCHANGED_TEXT |
1293 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1294 U_TITLECASE_NO_LOWERCASE,
1295 nullptr, u"IjssEL IglOo", 12,
1296 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1297 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1298 static const EditChange titleExpectedChanges[] = {
1299 { FALSE, 1, 1 },
1300 { TRUE, 1, 1 },
1301 { FALSE, 10, 10 }
1302 };
1303 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1304 edits.getFineIterator(), edits.getFineIterator(),
1305 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1306 TRUE, errorCode);
1307 #endif
1308
1309 // No explicit nor automatic edits.reset(). Edits should be appended.
1310 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1311 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1312 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1313 static const EditChange foldExpectedChanges[] = {
1314 #if !UCONFIG_NO_BREAK_ITERATION
1315 // From titlecasing.
1316 { FALSE, 1, 1 },
1317 { TRUE, 1, 1 },
1318 { FALSE, 10, 10 },
1319 #endif
1320 // From case folding.
1321 { TRUE, 1, 1 },
1322 { TRUE, 1, 2 },
1323 { FALSE, 3, 3 },
1324 { TRUE, 1, 1 },
1325 { FALSE, 2, 2 }
1326 };
1327 TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
1328 edits.getFineIterator(), edits.getFineIterator(),
1329 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1330 TRUE, errorCode);
1331 }
1332
TestCaseMapUTF8WithEdits()1333 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1334 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
1335 char dest[50];
1336 Edits edits;
1337
1338 int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
1339 reinterpret_cast<const char*>(u8"IstanBul"), 8,
1340 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1341 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1342 UnicodeString::fromUTF8(StringPiece(dest, length)));
1343 static const EditChange lowerExpectedChanges[] = {
1344 { TRUE, 1, 2 },
1345 { FALSE, 4, 4 },
1346 { TRUE, 1, 1 },
1347 { FALSE, 2, 2 }
1348 };
1349 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1350 edits.getFineIterator(), edits.getFineIterator(),
1351 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1352 TRUE, errorCode);
1353
1354 edits.reset();
1355 length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
1356 reinterpret_cast<const char*>(u8"Πατάτα"), 6 * 2,
1357 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1358 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1359 UnicodeString::fromUTF8(StringPiece(dest, length)));
1360 static const EditChange upperExpectedChanges[] = {
1361 { FALSE, 2, 2 },
1362 { TRUE, 2, 2 },
1363 { TRUE, 2, 2 },
1364 { TRUE, 2, 2 },
1365 { TRUE, 2, 2 },
1366 { TRUE, 2, 2 }
1367 };
1368 TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1369 edits.getFineIterator(), edits.getFineIterator(),
1370 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1371 TRUE, errorCode);
1372
1373 edits.reset();
1374 #if !UCONFIG_NO_BREAK_ITERATION
1375 length = CaseMap::utf8ToTitle("nl",
1376 U_OMIT_UNCHANGED_TEXT |
1377 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1378 U_TITLECASE_NO_LOWERCASE,
1379 nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), 12,
1380 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1381 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1382 UnicodeString::fromUTF8(StringPiece(dest, length)));
1383 static const EditChange titleExpectedChanges[] = {
1384 { FALSE, 1, 1 },
1385 { TRUE, 1, 1 },
1386 { FALSE, 10, 10 }
1387 };
1388 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1389 edits.getFineIterator(), edits.getFineIterator(),
1390 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1391 TRUE, errorCode);
1392 #endif
1393
1394 // No explicit nor automatic edits.reset(). Edits should be appended.
1395 length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
1396 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1397 reinterpret_cast<const char*>(u8"IßtanBul"), 1 + 2 + 6,
1398 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1399 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1400 UnicodeString::fromUTF8(StringPiece(dest, length)));
1401 static const EditChange foldExpectedChanges[] = {
1402 #if !UCONFIG_NO_BREAK_ITERATION
1403 // From titlecasing.
1404 { FALSE, 1, 1 },
1405 { TRUE, 1, 1 },
1406 { FALSE, 10, 10 },
1407 #endif
1408 // From case folding.
1409 { TRUE, 1, 2 },
1410 { TRUE, 2, 2 },
1411 { FALSE, 3, 3 },
1412 { TRUE, 1, 1 },
1413 { FALSE, 2, 2 }
1414 };
1415 TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
1416 edits.getFineIterator(), edits.getFineIterator(),
1417 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1418 TRUE, errorCode);
1419 }
1420
TestCaseMapToString()1421 void StringCaseTest::TestCaseMapToString() {
1422 // This test function name is parallel with one in UCharacterCaseTest.java.
1423 // It is a bit of a misnomer until we have CaseMap API that writes to
1424 // a UnicodeString, at which point we should change this code here.
1425 IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
1426 UChar dest[20];
1427
1428 // Omit unchanged text.
1429 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1430 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1431 assertEquals(u"toLower(IstanBul)",
1432 UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1433 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1434 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1435 assertEquals(u"toUpper(Πατάτα)",
1436 UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1437 #if !UCONFIG_NO_BREAK_ITERATION
1438 length = CaseMap::toTitle("nl",
1439 U_OMIT_UNCHANGED_TEXT |
1440 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1441 U_TITLECASE_NO_LOWERCASE,
1442 nullptr, u"IjssEL IglOo", 12,
1443 dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1444 assertEquals(u"toTitle(IjssEL IglOo)",
1445 UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1446 #endif
1447 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1448 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1449 assertEquals(u"foldCase(IßtanBul)",
1450 UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1451
1452 // Return the whole result string.
1453 length = CaseMap::toLower("tr", 0,
1454 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1455 assertEquals(u"toLower(IstanBul)",
1456 UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
1457 length = CaseMap::toUpper("el", 0,
1458 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1459 assertEquals(u"toUpper(Πατάτα)",
1460 UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1461 #if !UCONFIG_NO_BREAK_ITERATION
1462 length = CaseMap::toTitle("nl",
1463 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1464 U_TITLECASE_NO_LOWERCASE,
1465 nullptr, u"IjssEL IglOo", 12,
1466 dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1467 assertEquals(u"toTitle(IjssEL IglOo)",
1468 UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
1469 #endif
1470 length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1471 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1472 assertEquals(u"foldCase(IßtanBul)",
1473 UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
1474 }
1475
TestCaseMapUTF8ToString()1476 void StringCaseTest::TestCaseMapUTF8ToString() {
1477 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
1478 std::string dest;
1479 StringByteSink<std::string> sink(&dest);
1480
1481 // Omit unchanged text.
1482 CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
1483 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
1484 dest.clear();
1485 CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
1486 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1487 UnicodeString::fromUTF8(dest));
1488 #if !UCONFIG_NO_BREAK_ITERATION
1489 dest.clear();
1490 CaseMap::utf8ToTitle(
1491 "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1492 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1493 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1494 UnicodeString::fromUTF8(dest));
1495 #endif
1496 dest.clear();
1497 CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1498 u8"IßtanBul", sink, nullptr, errorCode);
1499 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1500 UnicodeString::fromUTF8(dest));
1501
1502 // Return the whole result string.
1503 dest.clear();
1504 CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
1505 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
1506 UnicodeString::fromUTF8(dest));
1507 dest.clear();
1508 CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
1509 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
1510 UnicodeString::fromUTF8(dest));
1511 #if !UCONFIG_NO_BREAK_ITERATION
1512 dest.clear();
1513 CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1514 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1515 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
1516 UnicodeString::fromUTF8(dest));
1517 #endif
1518 dest.clear();
1519 CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
1520 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
1521 UnicodeString::fromUTF8(dest));
1522 }
1523
TestLongUnicodeString()1524 void StringCaseTest::TestLongUnicodeString() {
1525 // Code coverage for UnicodeString case mapping code handling
1526 // long strings or many changes in a string.
1527 UnicodeString s(TRUE,
1528 (const UChar *)
1529 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1530 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1531 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1532 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1533 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1534 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1535 UnicodeString expected(TRUE,
1536 (const UChar *)
1537 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1538 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1539 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1540 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1541 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1542 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1543 s.toUpper(Locale::getRoot());
1544 assertEquals("string length 306", expected, s);
1545 }
1546
1547 #if !UCONFIG_NO_BREAK_ITERATION
TestBug13127()1548 void StringCaseTest::TestBug13127() {
1549 // Test case crashed when the bug was present.
1550 const char16_t *s16 = u"日本語";
1551 UnicodeString s(TRUE, s16, -1);
1552 s.toTitle(0, Locale::getEnglish());
1553 }
1554
TestInPlaceTitle()1555 void StringCaseTest::TestInPlaceTitle() {
1556 // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1557 IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
1558 char16_t s[32] = u"ß ß ß日本語 abcdef";
1559 const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
1560 int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
1561 assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
1562 assertEquals("u_strToTitle(in-place)", expected, s);
1563 }
1564 #endif
1565
TestCaseMapEditsIteratorDocs()1566 void StringCaseTest::TestCaseMapEditsIteratorDocs() {
1567 IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
1568 const char16_t* input = u"abcßDeF";
1569 int32_t inputLength = u_strlen(input);
1570 // output: "abcssdef"
1571
1572 char16_t output[10];
1573 Edits edits;
1574 CaseMap::fold(0, input, -1, output, 10, &edits, status);
1575
1576 static const char16_t* fineIteratorExpected[] = {
1577 u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1578 u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1579 u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1580 u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1581 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1582 };
1583 static const char16_t* fineChangesIteratorExpected[] = {
1584 u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1585 u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1586 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1587 };
1588 static const char16_t* coarseIteratorExpected[] = {
1589 u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1590 u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1591 u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1592 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1593 };
1594 static const char16_t* coarseChangesIteratorExpected[] = {
1595 u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1596 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1597 };
1598
1599 // Expected destination indices when source index is queried
1600 static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
1601 static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
1602 static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
1603 static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
1604
1605 // Expected source indices when destination index is queried
1606 static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
1607 static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
1608 static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
1609 static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
1610
1611 // Demonstrate the iterator next() method:
1612 Edits::Iterator fineIterator = edits.getFineIterator();
1613 int i = 0;
1614 UnicodeString toString;
1615 while (fineIterator.next(status)) {
1616 UnicodeString expected = fineIteratorExpected[i++];
1617 assertEquals(UnicodeString(u"Iteration #") + i,
1618 expected,
1619 fineIterator.toString(toString.remove()));
1620 }
1621 Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
1622 i = 0;
1623 while (fineChangesIterator.next(status)) {
1624 UnicodeString expected = fineChangesIteratorExpected[i++];
1625 assertEquals(UnicodeString(u"Iteration #") + i,
1626 expected,
1627 fineChangesIterator.toString(toString.remove()));
1628 }
1629 Edits::Iterator coarseIterator = edits.getCoarseIterator();
1630 i = 0;
1631 while (coarseIterator.next(status)) {
1632 UnicodeString expected = coarseIteratorExpected[i++];
1633 assertEquals(UnicodeString(u"Iteration #") + i,
1634 expected,
1635 coarseIterator.toString(toString.remove()));
1636 }
1637 Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
1638 i = 0;
1639 while (coarseChangesIterator.next(status)) {
1640 UnicodeString expected = coarseChangesIteratorExpected[i++];
1641 assertEquals(UnicodeString(u"Iteration #") + i,
1642 expected,
1643 coarseChangesIterator.toString(toString.remove()));
1644 }
1645
1646 // Demonstrate the iterator indexing methods:
1647 // fineIterator should have the same behavior as fineChangesIterator, and
1648 // coarseIterator should have the same behavior as coarseChangesIterator.
1649 for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
1650 fineIterator.findSourceIndex(srcIndex, status);
1651 fineChangesIterator.findSourceIndex(srcIndex, status);
1652 coarseIterator.findSourceIndex(srcIndex, status);
1653 coarseChangesIterator.findSourceIndex(srcIndex, status);
1654
1655 assertEquals(UnicodeString("Source index: ") + srcIndex,
1656 expectedDestFineEditIndices[srcIndex],
1657 fineIterator.destinationIndex());
1658 assertEquals(UnicodeString("Source index: ") + srcIndex,
1659 expectedDestFineEditIndices[srcIndex],
1660 fineChangesIterator.destinationIndex());
1661 assertEquals(UnicodeString("Source index: ") + srcIndex,
1662 expectedDestCoarseEditIndices[srcIndex],
1663 coarseIterator.destinationIndex());
1664 assertEquals(UnicodeString("Source index: ") + srcIndex,
1665 expectedDestCoarseEditIndices[srcIndex],
1666 coarseChangesIterator.destinationIndex());
1667
1668 assertEquals(UnicodeString("Source index: ") + srcIndex,
1669 expectedDestFineStringIndices[srcIndex],
1670 fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
1671 assertEquals(UnicodeString("Source index: ") + srcIndex,
1672 expectedDestFineStringIndices[srcIndex],
1673 fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1674 assertEquals(UnicodeString("Source index: ") + srcIndex,
1675 expectedDestCoarseStringIndices[srcIndex],
1676 coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
1677 assertEquals(UnicodeString("Source index: ") + srcIndex,
1678 expectedDestCoarseStringIndices[srcIndex],
1679 coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1680 }
1681 for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
1682 fineIterator.findDestinationIndex(destIndex, status);
1683 fineChangesIterator.findDestinationIndex(destIndex, status);
1684 coarseIterator.findDestinationIndex(destIndex, status);
1685 coarseChangesIterator.findDestinationIndex(destIndex, status);
1686
1687 assertEquals(UnicodeString("Destination index: ") + destIndex,
1688 expectedSrcFineEditIndices[destIndex],
1689 fineIterator.sourceIndex());
1690 assertEquals(UnicodeString("Destination index: ") + destIndex,
1691 expectedSrcFineEditIndices[destIndex],
1692 fineChangesIterator.sourceIndex());
1693 assertEquals(UnicodeString("Destination index: ") + destIndex,
1694 expectedSrcCoarseEditIndices[destIndex],
1695 coarseIterator.sourceIndex());
1696 assertEquals(UnicodeString("Destination index: ") + destIndex,
1697 expectedSrcCoarseEditIndices[destIndex],
1698 coarseChangesIterator.sourceIndex());
1699
1700 assertEquals(UnicodeString("Destination index: ") + destIndex,
1701 expectedSrcFineStringIndices[destIndex],
1702 fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
1703 assertEquals(UnicodeString("Destination index: ") + destIndex,
1704 expectedSrcFineStringIndices[destIndex],
1705 fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1706 assertEquals(UnicodeString("Destination index: ") + destIndex,
1707 expectedSrcCoarseStringIndices[destIndex],
1708 coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
1709 assertEquals(UnicodeString("Destination index: ") + destIndex,
1710 expectedSrcCoarseStringIndices[destIndex],
1711 coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1712 }
1713 }
1714
TestCaseMapGreekExtended()1715 void StringCaseTest::TestCaseMapGreekExtended() {
1716 // Ticket 13851
1717 UnicodeString s(u"\u1F80\u1F88\u1FFC");
1718 UnicodeString result(s);
1719 result.toLower(Locale::getRoot());
1720 assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result);
1721 #if !UCONFIG_NO_BREAK_ITERATION
1722 result = s;
1723 result.toTitle(nullptr, Locale::getRoot());
1724 assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result);
1725 #endif
1726 }
1727
1728 //#endif
1729