1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2002-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  strcase.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2002mar12
16 *   created by: Markus W. Scherer
17 *
18 *   Test file for string casing C++ API functions.
19 */
20 
21 #include "unicode/std_string.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ures.h"
27 #include "unicode/uloc.h"
28 #include "unicode/locid.h"
29 #include "unicode/ubrk.h"
30 #include "unicode/unistr.h"
31 #include "unicode/ucasemap.h"
32 #include "unicode/ustring.h"
33 #include "ucase.h"
34 #include "ustrtest.h"
35 #include "unicode/tstdtmod.h"
36 #include "cmemory.h"
37 #include "testutil.h"
38 
39 class StringCaseTest: public IntlTest {
40 public:
41     StringCaseTest();
42     virtual ~StringCaseTest();
43 
44     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
45 
46     void TestCaseConversion();
47 
48     void TestCasingImpl(const UnicodeString &input,
49                         const UnicodeString &output,
50                         int32_t whichCase,
51                         void *iter, const char *localeID, uint32_t options);
52     void TestCasing();
53     void TestTitleOptions();
54     void TestFullCaseFoldingIterator();
55     void TestGreekUpper();
56     void TestArmenian();
57     void TestLongUpper();
58     void TestMalformedUTF8();
59     void TestBufferOverflow();
60     void TestEdits();
61     void TestCopyMoveEdits();
62     void TestEditsFindFwdBwd();
63     void TestMergeEdits();
64     void TestCaseMapWithEdits();
65     void TestCaseMapUTF8WithEdits();
66     void TestCaseMapToString();
67     void TestCaseMapUTF8ToString();
68     void TestLongUnicodeString();
69     void TestBug13127();
70     void TestInPlaceTitle();
71     void TestCaseMapEditsIteratorDocs();
72     void TestCaseMapGreekExtended();
73 
74 private:
75     void assertGreekUpper(const char16_t *s, const char16_t *expected);
76 
77     Locale GREEK_LOCALE_;
78 };
79 
StringCaseTest()80 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
81 
~StringCaseTest()82 StringCaseTest::~StringCaseTest() {}
83 
createStringCaseTest()84 extern IntlTest *createStringCaseTest() {
85     return new StringCaseTest();
86 }
87 
88 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)89 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
90     if(exec) {
91         logln("TestSuite StringCaseTest: ");
92     }
93     TESTCASE_AUTO_BEGIN;
94     TESTCASE_AUTO(TestCaseConversion);
95 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
96     TESTCASE_AUTO(TestCasing);
97     TESTCASE_AUTO(TestTitleOptions);
98 #endif
99     TESTCASE_AUTO(TestFullCaseFoldingIterator);
100     TESTCASE_AUTO(TestGreekUpper);
101     TESTCASE_AUTO(TestArmenian);
102     TESTCASE_AUTO(TestLongUpper);
103     TESTCASE_AUTO(TestMalformedUTF8);
104     TESTCASE_AUTO(TestBufferOverflow);
105     TESTCASE_AUTO(TestEdits);
106     TESTCASE_AUTO(TestCopyMoveEdits);
107     TESTCASE_AUTO(TestEditsFindFwdBwd);
108     TESTCASE_AUTO(TestMergeEdits);
109     TESTCASE_AUTO(TestCaseMapWithEdits);
110     TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
111     TESTCASE_AUTO(TestCaseMapToString);
112     TESTCASE_AUTO(TestCaseMapUTF8ToString);
113     TESTCASE_AUTO(TestLongUnicodeString);
114 #if !UCONFIG_NO_BREAK_ITERATION
115     TESTCASE_AUTO(TestBug13127);
116     TESTCASE_AUTO(TestInPlaceTitle);
117 #endif
118     TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
119     TESTCASE_AUTO(TestCaseMapGreekExtended);
120     TESTCASE_AUTO_END;
121 }
122 
123 void
TestCaseConversion()124 StringCaseTest::TestCaseConversion()
125 {
126     static const UChar uppercaseGreek[] =
127         { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
128         0x39f, 0x3a3, 0 };
129         // "IESUS CHRISTOS"
130 
131     static const UChar lowercaseGreek[] =
132         { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
133         0x3bf, 0x3c2, 0 };
134         // "iesus christos"
135 
136     static const UChar lowercaseTurkish[] =
137         { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
138         0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
139 
140     static const UChar uppercaseTurkish[] =
141         { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
142         0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
143 
144     UnicodeString expectedResult;
145     UnicodeString   test3;
146 
147     test3 += (UChar32)0x0130;
148     test3 += "STANBUL, NOT CONSTANTINOPLE!";
149 
150     UnicodeString   test4(test3);
151     test4.toLower(Locale(""));
152     expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
153     if (test4 != expectedResult)
154         errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
155 
156     test4 = test3;
157     test4.toLower(Locale("tr", "TR"));
158     expectedResult = lowercaseTurkish;
159     if (test4 != expectedResult)
160         errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
161 
162     test3 = "topkap";
163     test3 += (UChar32)0x0131;
164     test3 += " palace, istanbul";
165     test4 = test3;
166 
167     test4.toUpper(Locale(""));
168     expectedResult = "TOPKAPI PALACE, ISTANBUL";
169     if (test4 != expectedResult)
170         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
171 
172     test4 = test3;
173     test4.toUpper(Locale("tr", "TR"));
174     expectedResult = uppercaseTurkish;
175     if (test4 != expectedResult)
176         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
177 
178     test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
179 
180     test3.toUpper(Locale("de", "DE"));
181     expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
182     if (test3 != expectedResult)
183         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
184 
185     test4.replace(0, test4.length(), uppercaseGreek);
186 
187     test4.toLower(Locale("el", "GR"));
188     expectedResult = lowercaseGreek;
189     if (test4 != expectedResult)
190         errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
191 
192     test4.replace(0, test4.length(), lowercaseGreek);
193 
194     test4.toUpper();
195     expectedResult = uppercaseGreek;
196     if (test4 != expectedResult)
197         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
198 
199     // more string case mapping tests with the new implementation
200     {
201         static const UChar
202 
203         beforeLower[]= { 0x61, 0x42, 0x49,  0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
204         lowerRoot[]=   { 0x61, 0x62, 0x69,  0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
205         lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
206 
207         beforeUpper[]= { 0x61, 0x42, 0x69,  0x3c2, 0xdf,       0x3c3, 0x2f, 0xfb03,           0xfb03,           0xfb03,           0xd93f, 0xdfff },
208         upperRoot[]=   { 0x41, 0x42, 0x49,  0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
209         upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
210 
211         beforeMiniUpper[]=  { 0xdf, 0x61 },
212         miniUpper[]=        { 0x53, 0x53, 0x41 };
213 
214         UnicodeString s;
215 
216         /* lowercase with root locale */
217         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
218         s.toLower("");
219         if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
220             s!=UnicodeString(FALSE, lowerRoot, s.length())
221         ) {
222             errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
223         }
224 
225         /* lowercase with turkish locale */
226         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
227         s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
228         if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
229             s!=UnicodeString(FALSE, lowerTurkish, s.length())
230         ) {
231             errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
232         }
233 
234         /* uppercase with root locale */
235         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
236         s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
237         if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
238             s!=UnicodeString(FALSE, upperRoot, s.length())
239         ) {
240             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
241         }
242 
243         /* uppercase with turkish locale */
244         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
245         s.toUpper(Locale("tr"));
246         if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
247             s!=UnicodeString(FALSE, upperTurkish, s.length())
248         ) {
249             errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
250         }
251 
252         /* uppercase a short string with root locale */
253         s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
254         s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
255         if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
256             s!=UnicodeString(FALSE, miniUpper, s.length())
257         ) {
258             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
259         }
260     }
261 
262     // test some supplementary characters (>= Unicode 3.1)
263     {
264         UnicodeString t;
265 
266         UnicodeString
267             deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
268             deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
269             deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
270         (t=deseretInput).toLower();
271         if(t!=deseretLower) {
272             errln("error lowercasing Deseret (plane 1) characters");
273         }
274         (t=deseretInput).toUpper();
275         if(t!=deseretUpper) {
276             errln("error uppercasing Deseret (plane 1) characters");
277         }
278     }
279 
280     // test some more cases that looked like problems
281     {
282         UnicodeString t;
283 
284         UnicodeString
285             ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
286             ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
287             ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
288         (t=ljInput).toLower("en");
289         if(t!=ljLower) {
290             errln("error lowercasing LJ characters");
291         }
292         (t=ljInput).toUpper("en");
293         if(t!=ljUpper) {
294             errln("error uppercasing LJ characters");
295         }
296     }
297 
298 #if !UCONFIG_NO_NORMALIZATION
299     // some context-sensitive casing depends on normalization data being present
300 
301     // Unicode 3.1.1 SpecialCasing tests
302     {
303         UnicodeString t;
304 
305         // sigmas preceded and/or followed by cased letters
306         UnicodeString
307             sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
308             sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
309             sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
310 
311         (t=sigmas).toLower();
312         if(t!=sigmasLower) {
313             errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
314         }
315 
316         (t=sigmas).toUpper(Locale(""));
317         if(t!=sigmasUpper) {
318             errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
319         }
320 
321         // turkish & azerbaijani dotless i & dotted I
322         // remove dot above if there was a capital I before and there are no more accents above
323         UnicodeString
324             dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
325             dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
326             dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
327 
328         (t=dots).toLower("tr");
329         if(t!=dotsTurkish) {
330             errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
331         }
332 
333         (t=dots).toLower("de");
334         if(t!=dotsDefault) {
335             errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
336         }
337     }
338 
339     // more Unicode 3.1.1 tests
340     {
341         UnicodeString t;
342 
343         // lithuanian dot above in uppercasing
344         UnicodeString
345             dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
346             dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
347             dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
348 
349         (t=dots).toUpper("lt");
350         if(t!=dotsLithuanian) {
351             errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
352         }
353 
354         (t=dots).toUpper("de");
355         if(t!=dotsDefault) {
356             errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
357         }
358 
359         // lithuanian adds dot above to i in lowercasing if there are more above accents
360         UnicodeString
361             i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
362             iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
363             iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
364 
365         (t=i).toLower("lt");
366         if(t!=iLithuanian) {
367             errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
368         }
369 
370         (t=i).toLower("de");
371         if(t!=iDefault) {
372             errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
373         }
374     }
375 
376 #endif
377 
378     // test case folding
379     {
380         UnicodeString
381             s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
382             f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
383             g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
384             t;
385 
386         (t=s).foldCase();
387         if(f!=t) {
388             errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
389         }
390 
391         // alternate handling for dotted I/dotless i (U+0130, U+0131)
392         (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
393         if(g!=t) {
394             errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
395         }
396     }
397 }
398 
399 // data-driven case mapping tests ------------------------------------------ ***
400 
401 enum {
402     TEST_LOWER,
403     TEST_UPPER,
404     TEST_TITLE,
405     TEST_FOLD,
406     TEST_COUNT
407 };
408 
409 // names of TestData children in casing.txt
410 static const char *const dataNames[TEST_COUNT+1]={
411     "lowercasing",
412     "uppercasing",
413     "titlecasing",
414     "casefolding",
415     ""
416 };
417 
418 void
TestCasingImpl(const UnicodeString & input,const UnicodeString & output,int32_t whichCase,void * iter,const char * localeID,uint32_t options)419 StringCaseTest::TestCasingImpl(const UnicodeString &input,
420                                const UnicodeString &output,
421                                int32_t whichCase,
422                                void *iter, const char *localeID, uint32_t options) {
423     // UnicodeString
424     UnicodeString result;
425     const char *name;
426     Locale locale(localeID);
427 
428     result=input;
429     switch(whichCase) {
430     case TEST_LOWER:
431         name="toLower";
432         result.toLower(locale);
433         break;
434     case TEST_UPPER:
435         name="toUpper";
436         result.toUpper(locale);
437         break;
438 #if !UCONFIG_NO_BREAK_ITERATION
439     case TEST_TITLE:
440         name="toTitle";
441         result.toTitle((BreakIterator *)iter, locale, options);
442         break;
443 #endif
444     case TEST_FOLD:
445         name="foldCase";
446         result.foldCase(options);
447         break;
448     default:
449         name="";
450         break; // won't happen
451     }
452     if(result!=output) {
453         dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
454     }
455 #if !UCONFIG_NO_BREAK_ITERATION
456     if(whichCase==TEST_TITLE && options==0) {
457         result=input;
458         result.toTitle((BreakIterator *)iter, locale);
459         if(result!=output) {
460             dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
461         }
462     }
463 #endif
464 
465     // UTF-8
466     char utf8In[100], utf8Out[100];
467     int32_t utf8InLength, utf8OutLength, resultLength;
468     UChar *buffer;
469 
470     IcuTestErrorCode errorCode(*this, "TestCasingImpl");
471     LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
472 #if !UCONFIG_NO_BREAK_ITERATION
473     if(iter!=NULL) {
474         // Clone the break iterator so that the UCaseMap can safely adopt it.
475         UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
476         ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
477     }
478 #endif
479 
480     u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
481     switch(whichCase) {
482     case TEST_LOWER:
483         name="ucasemap_utf8ToLower";
484         utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
485                     utf8Out, (int32_t)sizeof(utf8Out),
486                     utf8In, utf8InLength, errorCode);
487         break;
488     case TEST_UPPER:
489         name="ucasemap_utf8ToUpper";
490         utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
491                     utf8Out, (int32_t)sizeof(utf8Out),
492                     utf8In, utf8InLength, errorCode);
493         break;
494 #if !UCONFIG_NO_BREAK_ITERATION
495     case TEST_TITLE:
496         name="ucasemap_utf8ToTitle";
497         utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
498                     utf8Out, (int32_t)sizeof(utf8Out),
499                     utf8In, utf8InLength, errorCode);
500         break;
501 #endif
502     case TEST_FOLD:
503         name="ucasemap_utf8FoldCase";
504         utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
505                     utf8Out, (int32_t)sizeof(utf8Out),
506                     utf8In, utf8InLength, errorCode);
507         break;
508     default:
509         name="";
510         utf8OutLength=0;
511         break; // won't happen
512     }
513     buffer=result.getBuffer(utf8OutLength);
514     u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
515     result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
516 
517     if(errorCode.isFailure()) {
518         errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
519         errorCode.reset();
520     } else if(result!=output) {
521         errln("error: %s() got a wrong result for a test case from casing.res", name);
522         errln("expected \"" + output + "\" got \"" + result + "\"" );
523     }
524 }
525 
526 void
TestCasing()527 StringCaseTest::TestCasing() {
528     UErrorCode status = U_ZERO_ERROR;
529 #if !UCONFIG_NO_BREAK_ITERATION
530     LocalUBreakIteratorPointer iter;
531 #endif
532     char cLocaleID[100];
533     UnicodeString locale, input, output, optionsString, result;
534     uint32_t options;
535     int32_t whichCase, type;
536     LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
537     if(U_SUCCESS(status)) {
538         for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
539 #if UCONFIG_NO_BREAK_ITERATION
540             if(whichCase==TEST_TITLE) {
541                 continue;
542             }
543 #endif
544             LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
545             if(U_FAILURE(status)) {
546                 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
547                 break;
548             }
549             const DataMap *myCase = NULL;
550             while(casingTest->nextCase(myCase, status)) {
551                 input = myCase->getString("Input", status);
552                 output = myCase->getString("Output", status);
553 
554                 if(whichCase!=TEST_FOLD) {
555                     locale = myCase->getString("Locale", status);
556                 }
557                 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
558 
559 #if !UCONFIG_NO_BREAK_ITERATION
560                 if(whichCase==TEST_TITLE) {
561                     type = myCase->getInt("Type", status);
562                     if(type>=0) {
563                         iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
564                     } else if(type==-2) {
565                         // Open a trivial break iterator that only delivers { 0, length }
566                         // or even just { 0 } as boundaries.
567                         static const UChar rules[] = { 0x2e, 0x2a, 0x3b };  // ".*;"
568                         UParseError parseError;
569                         iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
570                     }
571                 }
572 #endif
573                 options = 0;
574                 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
575                     optionsString = myCase->getString("Options", status);
576                     if(optionsString.indexOf((UChar)0x54)>=0) {  // T
577                         options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
578                     }
579                     if(optionsString.indexOf((UChar)0x4c)>=0) {  // L
580                         options|=U_TITLECASE_NO_LOWERCASE;
581                     }
582                     if(optionsString.indexOf((UChar)0x41)>=0) {  // A
583                         options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
584                     }
585                 }
586 
587                 if(U_FAILURE(status)) {
588                     dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase],  u_errorName(status));
589                     status = U_ZERO_ERROR;
590                 } else {
591 #if UCONFIG_NO_BREAK_ITERATION
592                     LocalPointer<UMemory> iter;
593 #endif
594                     TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
595                 }
596 
597 #if !UCONFIG_NO_BREAK_ITERATION
598                 iter.adoptInstead(NULL);
599 #endif
600             }
601         }
602     }
603 
604 #if !UCONFIG_NO_BREAK_ITERATION
605     // more tests for API coverage
606     status=U_ZERO_ERROR;
607     input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
608     (result=input).toTitle(NULL);
609     if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
610         dataerrln("UnicodeString::toTitle(NULL) failed.");
611     }
612 #endif
613 }
614 
615 void
TestTitleOptions()616 StringCaseTest::TestTitleOptions() {
617     // New options in ICU 60.
618     TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
619                    nullptr, "", U_TITLECASE_WHOLE_STRING);
620     TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
621                    nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE);
622     TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
623                    nullptr, "", U_TITLECASE_WHOLE_STRING);
624     TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
625                    nullptr, "", U_TITLECASE_WHOLE_STRING);
626     TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
627                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
628     TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
629                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
630     TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
631                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE);
632     TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
633                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT);
634     TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
635                    nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
636     TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
637                    nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
638 
639 #if !UCONFIG_NO_BREAK_ITERATION
640     // Test conflicting settings.
641     // If & when we add more options, then the ORed combinations may become
642     // indistinguishable from valid values.
643     IcuTestErrorCode errorCode(*this, "TestTitleOptions");
644     CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr,
645                      u"", 0, nullptr, 0, nullptr, errorCode);
646     if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
647         errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
648               errorCode.errorName());
649     }
650     errorCode.reset();
651     CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr,
652                      u"", 0, nullptr, 0, nullptr, errorCode);
653     if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
654         errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
655               errorCode.errorName());
656     }
657     errorCode.reset();
658     LocalPointer<BreakIterator> iter(
659         BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
660     CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
661                      u"", 0, nullptr, 0, nullptr, errorCode);
662     if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
663         errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
664               errorCode.errorName());
665     }
666     errorCode.reset();
667 #endif
668 }
669 
670 void
TestFullCaseFoldingIterator()671 StringCaseTest::TestFullCaseFoldingIterator() {
672     UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
673     UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
674     FullCaseFoldingIterator iter;
675     int32_t count=0;
676     int32_t countSpecific=0;
677     UChar32 c;
678     UnicodeString full;
679     while((c=iter.next(full))>=0) {
680         ++count;
681         // Check that the full Case_Folding has more than 1 code point.
682         if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
683             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
684             continue;
685         }
686         // Check that full == Case_Folding(c).
687         UnicodeString cf(c);
688         cf.foldCase();
689         if(full!=cf) {
690             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
691             continue;
692         }
693         // Spot-check a couple of specific cases.
694         if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
695             ++countSpecific;
696         }
697     }
698     if(countSpecific!=3) {
699         errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
700     }
701     if(count<70) {
702         errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
703     }
704 }
705 
706 void
assertGreekUpper(const char16_t * s,const char16_t * expected)707 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
708     UnicodeString s16(s);
709     UnicodeString expected16(expected);
710     UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
711     UnicodeString result16(s16);
712     result16.toUpper(GREEK_LOCALE_);
713     assertEquals(msg, expected16, result16);
714 
715     msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
716     int32_t length = expected16.length();
717     int32_t capacities[] = {
718         // Keep in sync with the UTF-8 capacities near the bottom of this function.
719         0, length / 2, length - 1, length, length + 1
720     };
721     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
722         int32_t cap = capacities[i];
723         UChar *dest16 = result16.getBuffer(expected16.length() + 1);
724         u_memset(dest16, 0x55AA, result16.getCapacity());
725         UErrorCode errorCode = U_ZERO_ERROR;
726         length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
727         assertEquals(msg + cap, expected16.length(), length);
728         UErrorCode expectedErrorCode;
729         if (cap < expected16.length()) {
730             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
731         } else if (cap == expected16.length()) {
732             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
733         } else {
734             expectedErrorCode = U_ZERO_ERROR;
735             assertEquals(msg + cap + " NUL", 0, dest16[length]);
736         }
737         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
738         result16.releaseBuffer(length);
739         if (cap >= expected16.length()) {
740             assertEquals(msg + cap, expected16, result16);
741         }
742     }
743 
744     UErrorCode errorCode = U_ZERO_ERROR;
745     LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
746     assertSuccess("ucasemap_open", errorCode);
747     std::string s8;
748     s16.toUTF8String(s8);
749     msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
750     char dest8[1000];
751     length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
752                                   s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
753     assertSuccess("ucasemap_utf8ToUpper", errorCode);
754     StringPiece result8(dest8, length);
755     UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
756     assertEquals(msg, expected16, result16From8);
757 
758     msg += " cap=";
759     capacities[1] = length / 2;
760     capacities[2] = length - 1;
761     capacities[3] = length;
762     capacities[4] = length + 1;
763     char dest8b[1000];
764     int32_t expected8Length = length;  // Assuming the previous call worked.
765     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
766         int32_t cap = capacities[i];
767         memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
768         UErrorCode errorCode = U_ZERO_ERROR;
769         length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
770                                       s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
771         assertEquals(msg + cap, expected8Length, length);
772         UErrorCode expectedErrorCode;
773         if (cap < expected8Length) {
774             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
775         } else if (cap == expected8Length) {
776             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
777         } else {
778             expectedErrorCode = U_ZERO_ERROR;
779             // Casts to int32_t to avoid matching UBool.
780             assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
781         }
782         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
783         if (cap >= expected8Length) {
784             assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
785         }
786     }
787 }
788 
789 void
TestGreekUpper()790 StringCaseTest::TestGreekUpper() {
791     // http://bugs.icu-project.org/trac/ticket/5456
792     assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
793     // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
794     // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
795     assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
796     assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
797     assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
798     assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
799     assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
800     assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
801     assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
802     // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
803     assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
804     assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
805     // http://unicode.org/udhr/d/udhr_ell_polytonic.html
806     assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
807     assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
808     // From Google bug report
809     assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
810     // http://crbug.com/234797
811     assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
812     assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
813     assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
814     // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
815     assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
816     assertGreekUpper(u"ή.", u"Ή.");
817 }
818 
TestArmenian()819 void StringCaseTest::TestArmenian() {
820     Locale hy("hy");  // Eastern Armenian
821     Locale hyw("hyw");  // Western Armenian
822     Locale root = Locale::getRoot();
823     // See ICU-13416:
824     // և ligature ech-yiwn
825     // uppercases to ԵՒ=ech+yiwn by default and in Western Armenian,
826     // but to ԵՎ=ech+vew in Eastern Armenian.
827     UnicodeString s(u"և Երևանի");
828 
829     assertEquals("upper root", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(root));
830     assertEquals("upper hy", u"ԵՎ ԵՐԵՎԱՆԻ", UnicodeString(s).toUpper(hy));
831     assertEquals("upper hyw", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(hyw));
832 #if !UCONFIG_NO_BREAK_ITERATION
833     assertEquals("title root", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, root));
834     assertEquals("title hy", u"Եվ Երևանի", UnicodeString(s).toTitle(nullptr, hy));
835     assertEquals("title hyw", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, hyw));
836 #endif
837 }
838 
839 void
TestLongUpper()840 StringCaseTest::TestLongUpper() {
841     if (quick) {
842         logln("not exhaustive mode: skipping this test");
843         return;
844     }
845     // Ticket #12663, crash with an extremely long string where
846     // U+0390 maps to 0399 0308 0301 so that the result is three times as long
847     // and overflows an int32_t.
848     int32_t length = 0x40000004;  // more than 1G UChars
849     UnicodeString s(length, (UChar32)0x390, length);
850     UnicodeString result;
851     UChar *dest = result.getBuffer(length + 1);
852     if (s.isBogus() || dest == NULL) {
853         logln("Out of memory, unable to run this test on this machine.");
854         return;
855     }
856     IcuTestErrorCode errorCode(*this, "TestLongUpper");
857     int32_t destLength = u_strToUpper(dest, result.getCapacity(),
858                                       s.getBuffer(), s.length(), "", errorCode);
859     result.releaseBuffer(destLength);
860     if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
861         errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
862               errorCode.errorName(), (long)destLength);
863     }
864 }
865 
TestMalformedUTF8()866 void StringCaseTest::TestMalformedUTF8() {
867     // ticket #12639
868     IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
869     LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
870     if (errorCode.isFailure()) {
871         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
872         return;
873     }
874     char src[1] = { (char)0x85 };  // malformed UTF-8
875     char dest[3] = { 0, 0, 0 };
876     int32_t destLength;
877 #if !UCONFIG_NO_BREAK_ITERATION
878     destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
879     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
880         errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
881               errorCode.errorName(), (int)destLength, dest[0]);
882     }
883 #endif
884 
885     errorCode.reset();
886     dest[0] = 0;
887     destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
888     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
889         errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
890               errorCode.errorName(), (int)destLength, dest[0]);
891     }
892 
893     errorCode.reset();
894     dest[0] = 0;
895     destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
896     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
897         errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
898               errorCode.errorName(), (int)destLength, dest[0]);
899     }
900 
901     errorCode.reset();
902     dest[0] = 0;
903     destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
904     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
905         errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
906               errorCode.errorName(), (int)destLength, dest[0]);
907     }
908 }
909 
TestBufferOverflow()910 void StringCaseTest::TestBufferOverflow() {
911     // Ticket #12849, incorrect result from Title Case preflight operation,
912     // when buffer overflow error is expected.
913     IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
914     LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
915     if (errorCode.isFailure()) {
916         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
917         return;
918     }
919 
920     UnicodeString data("hello world");
921     int32_t result;
922 #if !UCONFIG_NO_BREAK_ITERATION
923     result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
924     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
925         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
926               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
927               __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
928     }
929 #endif
930     errorCode.reset();
931 
932     std::string data_utf8;
933     data.toUTF8String(data_utf8);
934 #if !UCONFIG_NO_BREAK_ITERATION
935     result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), static_cast<int32_t>(data_utf8.length()), errorCode);
936     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
937         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
938               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
939               __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
940     }
941 #endif
942     errorCode.reset();
943 }
944 
TestEdits()945 void StringCaseTest::TestEdits() {
946     IcuTestErrorCode errorCode(*this, "TestEdits");
947     Edits edits;
948     assertFalse("new Edits hasChanges", edits.hasChanges());
949     assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
950     assertEquals("new Edits", 0, edits.lengthDelta());
951     edits.addUnchanged(1);  // multiple unchanged ranges are combined
952     edits.addUnchanged(10000);  // too long, and they are split
953     edits.addReplace(0, 0);
954     edits.addUnchanged(2);
955     assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
956     assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
957     assertEquals("unchanged 10003", 0, edits.lengthDelta());
958     edits.addReplace(2, 1);  // multiple short equal-lengths edits are compressed
959     edits.addUnchanged(0);
960     edits.addReplace(2, 1);
961     edits.addReplace(2, 1);
962     edits.addReplace(0, 10);
963     edits.addReplace(100, 0);
964     edits.addReplace(3000, 4000);  // variable-length encoding
965     edits.addReplace(100000, 100000);
966     assertTrue("some edits hasChanges", edits.hasChanges());
967     assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
968     assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
969     UErrorCode outErrorCode = U_ZERO_ERROR;
970     assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
971 
972     static const EditChange coarseExpectedChanges[] = {
973             { FALSE, 10003, 10003 },
974             { TRUE, 103106, 104013 }
975     };
976     TestUtility::checkEditsIter(*this, u"coarse",
977             edits.getCoarseIterator(), edits.getCoarseIterator(),
978             coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
979     TestUtility::checkEditsIter(*this, u"coarse changes",
980             edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
981             coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
982 
983     static const EditChange fineExpectedChanges[] = {
984             { FALSE, 10003, 10003 },
985             { TRUE, 2, 1 },
986             { TRUE, 2, 1 },
987             { TRUE, 2, 1 },
988             { TRUE, 0, 10 },
989             { TRUE, 100, 0 },
990             { TRUE, 3000, 4000 },
991             { TRUE, 100000, 100000 }
992     };
993     TestUtility::checkEditsIter(*this, u"fine",
994             edits.getFineIterator(), edits.getFineIterator(),
995             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
996     TestUtility::checkEditsIter(*this, u"fine changes",
997             edits.getFineChangesIterator(), edits.getFineChangesIterator(),
998             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
999 
1000     edits.reset();
1001     assertFalse("reset hasChanges", edits.hasChanges());
1002     assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
1003     assertEquals("reset", 0, edits.lengthDelta());
1004     Edits::Iterator ei = edits.getCoarseChangesIterator();
1005     assertFalse("reset then iterator", ei.next(errorCode));
1006 }
1007 
TestCopyMoveEdits()1008 void StringCaseTest::TestCopyMoveEdits() {
1009     IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
1010     // Exceed the stack array capacity.
1011     Edits a;
1012     for (int32_t i = 0; i < 250; ++i) {
1013         a.addReplace(i % 10, (i % 10) + 1);
1014     }
1015     assertEquals("a: many edits, length delta", 250, a.lengthDelta());
1016 
1017     // copy
1018     Edits b(a);
1019     assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
1020     assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
1021     TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
1022 
1023     // assign
1024     Edits c;
1025     c.addUnchanged(99);
1026     c.addReplace(88, 77);
1027     c = b;
1028     assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
1029     assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
1030     TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
1031 
1032     // std::move trouble on these platforms.
1033     // See https://ssl.icu-project.org/trac/ticket/13393
1034 #if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1035     // move constructor empties object with heap array
1036     Edits d(std::move(a));
1037     assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
1038     assertFalse("a moved away: no more hasChanges", a.hasChanges());
1039     TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
1040     Edits empty;
1041     TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
1042 
1043     // move assignment empties object with heap array
1044     Edits e;
1045     e.addReplace(0, 1000);
1046     e = std::move(b);
1047     assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
1048     assertFalse("b moved away: no more hasChanges", b.hasChanges());
1049     TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
1050     TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
1051 
1052     // Edits::Iterator default constructor.
1053     Edits::Iterator iter;
1054     assertFalse("Edits::Iterator().next()", iter.next(errorCode));
1055     assertSuccess("Edits::Iterator().next()", errorCode);
1056     iter = e.getFineChangesIterator();
1057     assertTrue("iter.next()", iter.next(errorCode));
1058     assertSuccess("iter.next()", errorCode);
1059     assertTrue("iter.hasChange()", iter.hasChange());
1060     assertEquals("iter.newLength()", 1, iter.newLength());
1061 #endif
1062 }
1063 
TestEditsFindFwdBwd()1064 void StringCaseTest::TestEditsFindFwdBwd() {
1065     IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
1066     // Some users need index mappings to be efficient when they are out of order.
1067     // The most interesting failure case for this test is it taking a very long time.
1068     Edits e;
1069     constexpr int32_t N = 200000;
1070     for (int32_t i = 0; i < N; ++i) {
1071         e.addUnchanged(1);
1072         e.addReplace(3, 1);
1073     }
1074     Edits::Iterator iter = e.getFineIterator();
1075     for (int32_t i = 0; i <= N; i += 2) {
1076         assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1077         assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1078     }
1079     for (int32_t i = N; i >= 0; i -= 2) {
1080         assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1081         assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1082     }
1083 }
1084 
TestMergeEdits()1085 void StringCaseTest::TestMergeEdits() {
1086     // For debugging, set -v to see matching edits up to a failure.
1087     IcuTestErrorCode errorCode(*this, "TestMergeEdits");
1088     Edits ab, bc, ac, expected_ac;
1089 
1090     // Simple: Two parallel non-changes.
1091     ab.addUnchanged(2);
1092     bc.addUnchanged(2);
1093     expected_ac.addUnchanged(2);
1094 
1095     // Simple: Two aligned changes.
1096     ab.addReplace(3, 2);
1097     bc.addReplace(2, 1);
1098     expected_ac.addReplace(3, 1);
1099 
1100     // Unequal non-changes.
1101     ab.addUnchanged(5);
1102     bc.addUnchanged(3);
1103     expected_ac.addUnchanged(3);
1104     // ab ahead by 2
1105 
1106     // Overlapping changes accumulate until they share a boundary.
1107     ab.addReplace(4, 3);
1108     bc.addReplace(3, 2);
1109     ab.addReplace(4, 3);
1110     bc.addReplace(3, 2);
1111     ab.addReplace(4, 3);
1112     bc.addReplace(3, 2);
1113     bc.addUnchanged(4);
1114     expected_ac.addReplace(14, 8);
1115     // bc ahead by 2
1116 
1117     // Balance out intermediate-string lengths.
1118     ab.addUnchanged(2);
1119     expected_ac.addUnchanged(2);
1120 
1121     // Insert something and delete it: Should disappear.
1122     ab.addReplace(0, 5);
1123     ab.addReplace(0, 2);
1124     bc.addReplace(7, 0);
1125 
1126     // Parallel change to make a new boundary.
1127     ab.addReplace(1, 2);
1128     bc.addReplace(2, 3);
1129     expected_ac.addReplace(1, 3);
1130 
1131     // Multiple ab deletions should remain separate at the boundary.
1132     ab.addReplace(1, 0);
1133     ab.addReplace(2, 0);
1134     ab.addReplace(3, 0);
1135     expected_ac.addReplace(1, 0);
1136     expected_ac.addReplace(2, 0);
1137     expected_ac.addReplace(3, 0);
1138 
1139     // Unequal non-changes can be split for another boundary.
1140     ab.addUnchanged(2);
1141     bc.addUnchanged(1);
1142     expected_ac.addUnchanged(1);
1143     // ab ahead by 1
1144 
1145     // Multiple bc insertions should create a boundary and remain separate.
1146     bc.addReplace(0, 4);
1147     bc.addReplace(0, 5);
1148     bc.addReplace(0, 6);
1149     expected_ac.addReplace(0, 4);
1150     expected_ac.addReplace(0, 5);
1151     expected_ac.addReplace(0, 6);
1152     // ab ahead by 1
1153 
1154     // Multiple ab deletions in the middle of a bc change are merged.
1155     bc.addReplace(2, 2);
1156     // bc ahead by 1
1157     ab.addReplace(1, 0);
1158     ab.addReplace(2, 0);
1159     ab.addReplace(3, 0);
1160     ab.addReplace(4, 1);
1161     expected_ac.addReplace(11, 2);
1162 
1163     // Multiple bc insertions in the middle of an ab change are merged.
1164     ab.addReplace(5, 6);
1165     bc.addReplace(3, 3);
1166     // ab ahead by 3
1167     bc.addReplace(0, 4);
1168     bc.addReplace(0, 5);
1169     bc.addReplace(0, 6);
1170     bc.addReplace(3, 7);
1171     expected_ac.addReplace(5, 25);
1172 
1173     // Delete around a deletion.
1174     ab.addReplace(4, 4);
1175     ab.addReplace(3, 0);
1176     ab.addUnchanged(2);
1177     bc.addReplace(2, 2);
1178     bc.addReplace(4, 0);
1179     expected_ac.addReplace(9, 2);
1180 
1181     // Insert into an insertion.
1182     ab.addReplace(0, 2);
1183     bc.addReplace(1, 1);
1184     bc.addReplace(0, 8);
1185     bc.addUnchanged(4);
1186     expected_ac.addReplace(0, 10);
1187     // bc ahead by 3
1188 
1189     // Balance out intermediate-string lengths.
1190     ab.addUnchanged(3);
1191     expected_ac.addUnchanged(3);
1192 
1193     // Deletions meet insertions.
1194     // Output order is arbitrary in principle, but we expect insertions first
1195     // and want to keep it that way.
1196     ab.addReplace(2, 0);
1197     ab.addReplace(4, 0);
1198     ab.addReplace(6, 0);
1199     bc.addReplace(0, 1);
1200     bc.addReplace(0, 3);
1201     bc.addReplace(0, 5);
1202     expected_ac.addReplace(0, 1);
1203     expected_ac.addReplace(0, 3);
1204     expected_ac.addReplace(0, 5);
1205     expected_ac.addReplace(2, 0);
1206     expected_ac.addReplace(4, 0);
1207     expected_ac.addReplace(6, 0);
1208 
1209     // End with a non-change, so that further edits are never reordered.
1210     ab.addUnchanged(1);
1211     bc.addUnchanged(1);
1212     expected_ac.addUnchanged(1);
1213 
1214     ac.mergeAndAppend(ab, bc, errorCode);
1215     assertSuccess("ab+bc", errorCode);
1216     if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
1217         return;
1218     }
1219 
1220     // Append more Edits.
1221     Edits ab2, bc2;
1222     ab2.addUnchanged(5);
1223     bc2.addReplace(1, 2);
1224     bc2.addUnchanged(4);
1225     expected_ac.addReplace(1, 2);
1226     expected_ac.addUnchanged(4);
1227     ac.mergeAndAppend(ab2, bc2, errorCode);
1228     assertSuccess("ab2+bc2", errorCode);
1229     if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
1230         return;
1231     }
1232 
1233     // Append empty edits.
1234     Edits empty;
1235     ac.mergeAndAppend(empty, empty, errorCode);
1236     assertSuccess("empty+empty", errorCode);
1237     if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
1238         return;
1239     }
1240 
1241     // Error: Append more edits with mismatched intermediate-string lengths.
1242     Edits mismatch;
1243     mismatch.addReplace(1, 1);
1244     ac.mergeAndAppend(ab2, mismatch, errorCode);
1245     assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1246     errorCode.reset();
1247     ac.mergeAndAppend(mismatch, bc2, errorCode);
1248     assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1249     errorCode.reset();
1250 }
1251 
TestCaseMapWithEdits()1252 void StringCaseTest::TestCaseMapWithEdits() {
1253     IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
1254     UChar dest[20];
1255     Edits edits;
1256 
1257     int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1258                                       u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1259     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1260     static const EditChange lowerExpectedChanges[] = {
1261             { TRUE, 1, 1 },
1262             { FALSE, 4, 4 },
1263             { TRUE, 1, 1 },
1264             { FALSE, 2, 2 }
1265     };
1266     TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1267             edits.getFineIterator(), edits.getFineIterator(),
1268             lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1269             TRUE, errorCode);
1270 
1271     edits.reset();
1272     length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1273                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1274     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1275     static const EditChange upperExpectedChanges[] = {
1276             { FALSE, 1, 1 },
1277             { TRUE, 1, 1 },
1278             { TRUE, 1, 1 },
1279             { TRUE, 1, 1 },
1280             { TRUE, 1, 1 },
1281             { TRUE, 1, 1 }
1282     };
1283     TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1284             edits.getFineIterator(), edits.getFineIterator(),
1285             upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1286             TRUE, errorCode);
1287 
1288     edits.reset();
1289 
1290 #if !UCONFIG_NO_BREAK_ITERATION
1291     length = CaseMap::toTitle("nl",
1292                               U_OMIT_UNCHANGED_TEXT |
1293                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1294                               U_TITLECASE_NO_LOWERCASE,
1295                               nullptr, u"IjssEL IglOo", 12,
1296                               dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1297     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1298     static const EditChange titleExpectedChanges[] = {
1299             { FALSE, 1, 1 },
1300             { TRUE, 1, 1 },
1301             { FALSE, 10, 10 }
1302     };
1303     TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1304             edits.getFineIterator(), edits.getFineIterator(),
1305             titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1306             TRUE, errorCode);
1307 #endif
1308 
1309     // No explicit nor automatic edits.reset(). Edits should be appended.
1310     length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1311                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1312     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1313     static const EditChange foldExpectedChanges[] = {
1314 #if !UCONFIG_NO_BREAK_ITERATION
1315             // From titlecasing.
1316             { FALSE, 1, 1 },
1317             { TRUE, 1, 1 },
1318             { FALSE, 10, 10 },
1319 #endif
1320             // From case folding.
1321             { TRUE, 1, 1 },
1322             { TRUE, 1, 2 },
1323             { FALSE, 3, 3 },
1324             { TRUE, 1, 1 },
1325             { FALSE, 2, 2 }
1326     };
1327     TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
1328             edits.getFineIterator(), edits.getFineIterator(),
1329             foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1330             TRUE, errorCode);
1331 }
1332 
TestCaseMapUTF8WithEdits()1333 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1334     IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
1335     char dest[50];
1336     Edits edits;
1337 
1338     int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
1339                                           reinterpret_cast<const char*>(u8"IstanBul"), 8,
1340                                           dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1341     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1342                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1343     static const EditChange lowerExpectedChanges[] = {
1344             { TRUE, 1, 2 },
1345             { FALSE, 4, 4 },
1346             { TRUE, 1, 1 },
1347             { FALSE, 2, 2 }
1348     };
1349     TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1350             edits.getFineIterator(), edits.getFineIterator(),
1351             lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1352             TRUE, errorCode);
1353 
1354     edits.reset();
1355     length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
1356                                   reinterpret_cast<const char*>(u8"Πατάτα"), 6 * 2,
1357                                   dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1358     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1359                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1360     static const EditChange upperExpectedChanges[] = {
1361             { FALSE, 2, 2 },
1362             { TRUE, 2, 2 },
1363             { TRUE, 2, 2 },
1364             { TRUE, 2, 2 },
1365             { TRUE, 2, 2 },
1366             { TRUE, 2, 2 }
1367     };
1368     TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1369             edits.getFineIterator(), edits.getFineIterator(),
1370             upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1371             TRUE, errorCode);
1372 
1373     edits.reset();
1374 #if !UCONFIG_NO_BREAK_ITERATION
1375     length = CaseMap::utf8ToTitle("nl",
1376                                   U_OMIT_UNCHANGED_TEXT |
1377                                   U_TITLECASE_NO_BREAK_ADJUSTMENT |
1378                                   U_TITLECASE_NO_LOWERCASE,
1379                                   nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), 12,
1380                                   dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1381     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1382                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1383     static const EditChange titleExpectedChanges[] = {
1384             { FALSE, 1, 1 },
1385             { TRUE, 1, 1 },
1386             { FALSE, 10, 10 }
1387     };
1388     TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1389             edits.getFineIterator(), edits.getFineIterator(),
1390             titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1391             TRUE, errorCode);
1392 #endif
1393 
1394     // No explicit nor automatic edits.reset(). Edits should be appended.
1395     length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
1396                                    U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1397                                reinterpret_cast<const char*>(u8"IßtanBul"), 1 + 2 + 6,
1398                                dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1399     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1400                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1401     static const EditChange foldExpectedChanges[] = {
1402 #if !UCONFIG_NO_BREAK_ITERATION
1403             // From titlecasing.
1404             { FALSE, 1, 1 },
1405             { TRUE, 1, 1 },
1406             { FALSE, 10, 10 },
1407 #endif
1408             // From case folding.
1409             { TRUE, 1, 2 },
1410             { TRUE, 2, 2 },
1411             { FALSE, 3, 3 },
1412             { TRUE, 1, 1 },
1413             { FALSE, 2, 2 }
1414     };
1415     TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
1416             edits.getFineIterator(), edits.getFineIterator(),
1417             foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1418             TRUE, errorCode);
1419 }
1420 
TestCaseMapToString()1421 void StringCaseTest::TestCaseMapToString() {
1422     // This test function name is parallel with one in UCharacterCaseTest.java.
1423     // It is a bit of a misnomer until we have CaseMap API that writes to
1424     // a UnicodeString, at which point we should change this code here.
1425     IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
1426     UChar dest[20];
1427 
1428     // Omit unchanged text.
1429     int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1430                                       u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1431     assertEquals(u"toLower(IstanBul)",
1432                  UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1433     length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1434                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1435     assertEquals(u"toUpper(Πατάτα)",
1436                  UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1437 #if !UCONFIG_NO_BREAK_ITERATION
1438     length = CaseMap::toTitle("nl",
1439                               U_OMIT_UNCHANGED_TEXT |
1440                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1441                               U_TITLECASE_NO_LOWERCASE,
1442                               nullptr, u"IjssEL IglOo", 12,
1443                               dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1444     assertEquals(u"toTitle(IjssEL IglOo)",
1445                  UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1446 #endif
1447     length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1448                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1449     assertEquals(u"foldCase(IßtanBul)",
1450                  UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1451 
1452     // Return the whole result string.
1453     length = CaseMap::toLower("tr", 0,
1454                               u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1455     assertEquals(u"toLower(IstanBul)",
1456                  UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
1457     length = CaseMap::toUpper("el", 0,
1458                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1459     assertEquals(u"toUpper(Πατάτα)",
1460                  UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1461 #if !UCONFIG_NO_BREAK_ITERATION
1462     length = CaseMap::toTitle("nl",
1463                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1464                               U_TITLECASE_NO_LOWERCASE,
1465                               nullptr, u"IjssEL IglOo", 12,
1466                               dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1467     assertEquals(u"toTitle(IjssEL IglOo)",
1468                  UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
1469 #endif
1470     length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1471                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1472     assertEquals(u"foldCase(IßtanBul)",
1473                  UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
1474 }
1475 
TestCaseMapUTF8ToString()1476 void StringCaseTest::TestCaseMapUTF8ToString() {
1477     IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
1478     std::string dest;
1479     StringByteSink<std::string> sink(&dest);
1480 
1481     // Omit unchanged text.
1482     CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
1483     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
1484     dest.clear();
1485     CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
1486     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1487                  UnicodeString::fromUTF8(dest));
1488 #if !UCONFIG_NO_BREAK_ITERATION
1489     dest.clear();
1490     CaseMap::utf8ToTitle(
1491         "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1492         nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1493     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1494                  UnicodeString::fromUTF8(dest));
1495 #endif
1496     dest.clear();
1497     CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1498                       u8"IßtanBul", sink, nullptr, errorCode);
1499     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1500                  UnicodeString::fromUTF8(dest));
1501 
1502     // Return the whole result string.
1503     dest.clear();
1504     CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
1505     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
1506                  UnicodeString::fromUTF8(dest));
1507     dest.clear();
1508     CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
1509     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
1510                  UnicodeString::fromUTF8(dest));
1511 #if !UCONFIG_NO_BREAK_ITERATION
1512     dest.clear();
1513     CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1514                          nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1515     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
1516                  UnicodeString::fromUTF8(dest));
1517 #endif
1518     dest.clear();
1519     CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
1520     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
1521                  UnicodeString::fromUTF8(dest));
1522 }
1523 
TestLongUnicodeString()1524 void StringCaseTest::TestLongUnicodeString() {
1525     // Code coverage for UnicodeString case mapping code handling
1526     // long strings or many changes in a string.
1527     UnicodeString s(TRUE,
1528         (const UChar *)
1529         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1530         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1531         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1532         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1533         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1534         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1535     UnicodeString expected(TRUE,
1536         (const UChar *)
1537         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1538         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1539         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1540         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1541         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1542         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1543     s.toUpper(Locale::getRoot());
1544     assertEquals("string length 306", expected, s);
1545 }
1546 
1547 #if !UCONFIG_NO_BREAK_ITERATION
TestBug13127()1548 void StringCaseTest::TestBug13127() {
1549     // Test case crashed when the bug was present.
1550     const char16_t *s16 = u"日本語";
1551     UnicodeString s(TRUE, s16, -1);
1552     s.toTitle(0, Locale::getEnglish());
1553 }
1554 
TestInPlaceTitle()1555 void StringCaseTest::TestInPlaceTitle() {
1556     // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1557     IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
1558     char16_t s[32] = u"ß ß ß日本語 abcdef";
1559     const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
1560     int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
1561     assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
1562     assertEquals("u_strToTitle(in-place)", expected, s);
1563 }
1564 #endif
1565 
TestCaseMapEditsIteratorDocs()1566 void StringCaseTest::TestCaseMapEditsIteratorDocs() {
1567     IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
1568     const char16_t* input = u"abcßDeF";
1569     int32_t inputLength = u_strlen(input);
1570     // output: "abcssdef"
1571 
1572     char16_t output[10];
1573     Edits edits;
1574     CaseMap::fold(0, input, -1, output, 10, &edits, status);
1575 
1576     static const char16_t* fineIteratorExpected[] = {
1577             u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1578             u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1579             u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1580             u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1581             u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1582     };
1583     static const char16_t* fineChangesIteratorExpected[] = {
1584             u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1585             u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1586             u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1587     };
1588     static const char16_t* coarseIteratorExpected[] = {
1589             u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1590             u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1591             u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1592             u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1593     };
1594     static const char16_t* coarseChangesIteratorExpected[] = {
1595             u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1596             u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1597     };
1598 
1599     // Expected destination indices when source index is queried
1600     static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
1601     static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
1602     static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
1603     static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
1604 
1605     // Expected source indices when destination index is queried
1606     static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
1607     static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
1608     static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
1609     static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
1610 
1611     // Demonstrate the iterator next() method:
1612     Edits::Iterator fineIterator = edits.getFineIterator();
1613     int i = 0;
1614     UnicodeString toString;
1615     while (fineIterator.next(status)) {
1616         UnicodeString expected = fineIteratorExpected[i++];
1617         assertEquals(UnicodeString(u"Iteration #") + i,
1618                 expected,
1619                 fineIterator.toString(toString.remove()));
1620     }
1621     Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
1622     i = 0;
1623     while (fineChangesIterator.next(status)) {
1624         UnicodeString expected = fineChangesIteratorExpected[i++];
1625         assertEquals(UnicodeString(u"Iteration #") + i,
1626                 expected,
1627                 fineChangesIterator.toString(toString.remove()));
1628     }
1629     Edits::Iterator coarseIterator = edits.getCoarseIterator();
1630     i = 0;
1631     while (coarseIterator.next(status)) {
1632         UnicodeString expected = coarseIteratorExpected[i++];
1633         assertEquals(UnicodeString(u"Iteration #") + i,
1634                 expected,
1635                 coarseIterator.toString(toString.remove()));
1636     }
1637     Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
1638     i = 0;
1639     while (coarseChangesIterator.next(status)) {
1640         UnicodeString expected = coarseChangesIteratorExpected[i++];
1641         assertEquals(UnicodeString(u"Iteration #") + i,
1642                 expected,
1643                 coarseChangesIterator.toString(toString.remove()));
1644     }
1645 
1646     // Demonstrate the iterator indexing methods:
1647     // fineIterator should have the same behavior as fineChangesIterator, and
1648     // coarseIterator should have the same behavior as coarseChangesIterator.
1649     for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
1650         fineIterator.findSourceIndex(srcIndex, status);
1651         fineChangesIterator.findSourceIndex(srcIndex, status);
1652         coarseIterator.findSourceIndex(srcIndex, status);
1653         coarseChangesIterator.findSourceIndex(srcIndex, status);
1654 
1655         assertEquals(UnicodeString("Source index: ") + srcIndex,
1656                 expectedDestFineEditIndices[srcIndex],
1657                 fineIterator.destinationIndex());
1658         assertEquals(UnicodeString("Source index: ") + srcIndex,
1659                 expectedDestFineEditIndices[srcIndex],
1660                 fineChangesIterator.destinationIndex());
1661         assertEquals(UnicodeString("Source index: ") + srcIndex,
1662                 expectedDestCoarseEditIndices[srcIndex],
1663                 coarseIterator.destinationIndex());
1664         assertEquals(UnicodeString("Source index: ") + srcIndex,
1665                 expectedDestCoarseEditIndices[srcIndex],
1666                 coarseChangesIterator.destinationIndex());
1667 
1668         assertEquals(UnicodeString("Source index: ") + srcIndex,
1669                 expectedDestFineStringIndices[srcIndex],
1670                 fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
1671         assertEquals(UnicodeString("Source index: ") + srcIndex,
1672                 expectedDestFineStringIndices[srcIndex],
1673                 fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1674         assertEquals(UnicodeString("Source index: ") + srcIndex,
1675                 expectedDestCoarseStringIndices[srcIndex],
1676                 coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
1677         assertEquals(UnicodeString("Source index: ") + srcIndex,
1678                 expectedDestCoarseStringIndices[srcIndex],
1679                 coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1680     }
1681     for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
1682         fineIterator.findDestinationIndex(destIndex, status);
1683         fineChangesIterator.findDestinationIndex(destIndex, status);
1684         coarseIterator.findDestinationIndex(destIndex, status);
1685         coarseChangesIterator.findDestinationIndex(destIndex, status);
1686 
1687         assertEquals(UnicodeString("Destination index: ") + destIndex,
1688                 expectedSrcFineEditIndices[destIndex],
1689                 fineIterator.sourceIndex());
1690         assertEquals(UnicodeString("Destination index: ") + destIndex,
1691                 expectedSrcFineEditIndices[destIndex],
1692                 fineChangesIterator.sourceIndex());
1693         assertEquals(UnicodeString("Destination index: ") + destIndex,
1694                 expectedSrcCoarseEditIndices[destIndex],
1695                 coarseIterator.sourceIndex());
1696         assertEquals(UnicodeString("Destination index: ") + destIndex,
1697                 expectedSrcCoarseEditIndices[destIndex],
1698                 coarseChangesIterator.sourceIndex());
1699 
1700         assertEquals(UnicodeString("Destination index: ") + destIndex,
1701                 expectedSrcFineStringIndices[destIndex],
1702                 fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
1703         assertEquals(UnicodeString("Destination index: ") + destIndex,
1704                 expectedSrcFineStringIndices[destIndex],
1705                 fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1706         assertEquals(UnicodeString("Destination index: ") + destIndex,
1707                 expectedSrcCoarseStringIndices[destIndex],
1708                 coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
1709         assertEquals(UnicodeString("Destination index: ") + destIndex,
1710                 expectedSrcCoarseStringIndices[destIndex],
1711                 coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1712     }
1713 }
1714 
TestCaseMapGreekExtended()1715 void StringCaseTest::TestCaseMapGreekExtended() {
1716     // Ticket 13851
1717     UnicodeString s(u"\u1F80\u1F88\u1FFC");
1718     UnicodeString result(s);
1719     result.toLower(Locale::getRoot());
1720     assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result);
1721 #if !UCONFIG_NO_BREAK_ITERATION
1722     result = s;
1723     result.toTitle(nullptr, Locale::getRoot());
1724     assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result);
1725 #endif
1726 }
1727 
1728 //#endif
1729