1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "ustrtest.h"
8 #include "unicode/appendable.h"
9 #include "unicode/std_string.h"
10 #include "unicode/unistr.h"
11 #include "unicode/uchar.h"
12 #include "unicode/ustring.h"
13 #include "unicode/locid.h"
14 #include "unicode/ucnv.h"
15 #include "unicode/uenum.h"
16 #include "unicode/utf16.h"
17 #include "cmemory.h"
18 #include "charstr.h"
19 
20 #if 0
21 #include "unicode/ustream.h"
22 
23 #include <iostream>
24 using namespace std;
25 
26 #endif
27 
~UnicodeStringTest()28 UnicodeStringTest::~UnicodeStringTest() {}
29 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)30 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
31 {
32     if (exec) logln("TestSuite UnicodeStringTest: ");
33     TESTCASE_AUTO_BEGIN;
34     TESTCASE_AUTO_CLASS(StringCaseTest);
35     TESTCASE_AUTO(TestBasicManipulation);
36     TESTCASE_AUTO(TestCompare);
37     TESTCASE_AUTO(TestExtract);
38     TESTCASE_AUTO(TestRemoveReplace);
39     TESTCASE_AUTO(TestSearching);
40     TESTCASE_AUTO(TestSpacePadding);
41     TESTCASE_AUTO(TestPrefixAndSuffix);
42     TESTCASE_AUTO(TestFindAndReplace);
43     TESTCASE_AUTO(TestBogus);
44     TESTCASE_AUTO(TestReverse);
45     TESTCASE_AUTO(TestMiscellaneous);
46     TESTCASE_AUTO(TestStackAllocation);
47     TESTCASE_AUTO(TestUnescape);
48     TESTCASE_AUTO(TestCountChar32);
49     TESTCASE_AUTO(TestStringEnumeration);
50     TESTCASE_AUTO(TestNameSpace);
51     TESTCASE_AUTO(TestUTF32);
52     TESTCASE_AUTO(TestUTF8);
53     TESTCASE_AUTO(TestReadOnlyAlias);
54     TESTCASE_AUTO(TestAppendable);
55     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
56     TESTCASE_AUTO(TestSizeofUnicodeString);
57     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
58     TESTCASE_AUTO(TestMoveSwap);
59     TESTCASE_AUTO_END;
60 }
61 
62 void
TestBasicManipulation()63 UnicodeStringTest::TestBasicManipulation()
64 {
65     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
66     UnicodeString   expectedValue;
67     UnicodeString   *c;
68 
69     c=(UnicodeString *)test1.clone();
70     test1.insert(24, "good ");
71     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
72     if (test1 != expectedValue)
73         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
74 
75     c->insert(24, "good ");
76     if(*c != expectedValue) {
77         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
78     }
79     delete c;
80 
81     test1.remove(41, 8);
82     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
83     if (test1 != expectedValue)
84         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
85 
86     test1.replace(58, 6, "ir country");
87     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
88     if (test1 != expectedValue)
89         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
90 
91     UChar     temp[80];
92     test1.extract(0, 15, temp);
93 
94     UnicodeString       test2(temp, 15);
95 
96     expectedValue = "Now is the time";
97     if (test2 != expectedValue)
98         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
99 
100     test2 += " for me to go!\n";
101     expectedValue = "Now is the time for me to go!\n";
102     if (test2 != expectedValue)
103         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
104 
105     if (test1.length() != 70)
106         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
107     if (test2.length() != 30)
108         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
109 
110     UnicodeString test3;
111     test3.append((UChar32)0x20402);
112     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
113         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
114     }
115     if(test3.length() != 2){
116         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
117     }
118     test3.append((UChar32)0x0074);
119     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
120         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
121     }
122     if(test3.length() != 3){
123         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
124     }
125 
126     // test some UChar32 overloads
127     if( test3.setTo((UChar32)0x10330).length() != 2 ||
128         test3.insert(0, (UChar32)0x20100).length() != 4 ||
129         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
130         (test3 = (UChar32)0x14001).length() != 2
131     ) {
132         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
133     }
134 
135     {
136         // test moveIndex32()
137         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
138 
139         if(
140             s.moveIndex32(2, -1)!=0 ||
141             s.moveIndex32(2, 1)!=4 ||
142             s.moveIndex32(2, 2)!=5 ||
143             s.moveIndex32(5, -2)!=2 ||
144             s.moveIndex32(0, -1)!=0 ||
145             s.moveIndex32(6, 1)!=6
146         ) {
147             errln("UnicodeString::moveIndex32() failed");
148         }
149 
150         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
151             errln("UnicodeString::getChar32Start() failed");
152         }
153 
154         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
155             errln("UnicodeString::getChar32Limit() failed");
156         }
157     }
158 
159     {
160         // test new 2.2 constructors and setTo function that parallel Java's substring function.
161         UnicodeString src("Hello folks how are you?");
162         UnicodeString target1("how are you?");
163         if (target1 != UnicodeString(src, 12)) {
164             errln("UnicodeString(const UnicodeString&, int32_t) failed");
165         }
166         UnicodeString target2("folks");
167         if (target2 != UnicodeString(src, 6, 5)) {
168             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
169         }
170         if (target1 != target2.setTo(src, 12)) {
171             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
172         }
173     }
174 
175     {
176         // op+ is new in ICU 2.8
177         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
178         if(s!=UnicodeString("abcdefghi", "")) {
179             errln("operator+(UniStr, UniStr) failed");
180         }
181     }
182 
183     {
184         // tests for Jitterbug 2360
185         // verify that APIs with source pointer + length accept length == -1
186         // mostly test only where modified, only few functions did not already do this
187         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
188             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
189         }
190 
191         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
192         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
193 
194         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
195             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
196         }
197         if(t.length()!=u_strlen(buffer)) {
198             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
199         }
200 
201         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
202             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
203         }
204         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
205             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
206         }
207 
208         buffer[u_strlen(buffer)]=0xe4;
209         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
210         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
211             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
212         }
213         if(u.length()!=UPRV_LENGTHOF(buffer)) {
214             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
215         }
216 
217         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
218         UConverter *cnv;
219         UErrorCode errorCode=U_ZERO_ERROR;
220 
221         cnv=ucnv_open("ISO-8859-1", &errorCode);
222         UnicodeString v(cs, -1, cnv, errorCode);
223         ucnv_close(cnv);
224         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
225             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
226         }
227     }
228 
229 #if U_CHARSET_IS_UTF8
230     {
231         // Test the hardcoded-UTF-8 UnicodeString optimizations.
232         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
233         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
234         UnicodeString from8a = UnicodeString((const char *)utf8);
235         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
236         UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
237         if(from8a != from16 || from8b != from16) {
238             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
239         }
240         char buffer[16];
241         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
242         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
243             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
244         }
245         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
246         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
247             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
248         }
249     }
250 #endif
251 }
252 
253 void
TestCompare()254 UnicodeStringTest::TestCompare()
255 {
256     UnicodeString   test1("this is a test");
257     UnicodeString   test2("this is a test");
258     UnicodeString   test3("this is a test of the emergency broadcast system");
259     UnicodeString   test4("never say, \"this is a test\"!!");
260 
261     UnicodeString   test5((UChar)0x5000);
262     UnicodeString   test6((UChar)0x5100);
263 
264     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
265                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
266     char            chars[] = "this is a test";
267 
268     // test operator== and operator!=
269     if (test1 != test2 || test1 == test3 || test1 == test4)
270         errln("operator== or operator!= failed");
271 
272     // test operator> and operator<
273     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
274         !(test5 < test6)
275     ) {
276         errln("operator> or operator< failed");
277     }
278 
279     // test operator>= and operator<=
280     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
281         errln("operator>= or operator<= failed");
282 
283     // test compare(UnicodeString)
284     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
285         errln("compare(UnicodeString) failed");
286 
287     //test compare(offset, length, UnicodeString)
288     if(test1.compare(0, 14, test2) != 0 ||
289         test3.compare(0, 14, test2) != 0 ||
290         test4.compare(12, 14, test2) != 0 ||
291         test3.compare(0, 18, test1) <=0  )
292         errln("compare(offset, length, UnicodeString) failes");
293 
294     // test compare(UChar*)
295     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
296         errln("compare(UChar*) failed");
297 
298     // test compare(char*)
299     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
300         errln("compare(char*) failed");
301 
302     // test compare(UChar*, length)
303     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
304         errln("compare(UChar*, length) failed");
305 
306     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
307     if (test1.compare(0, 14, test2, 0, 14) != 0
308     || test1.compare(0, 14, test3, 0, 14) != 0
309     || test1.compare(0, 14, test4, 12, 14) != 0)
310         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
311 
312     if (test1.compare(10, 4, test2, 0, 4) >= 0
313     || test1.compare(10, 4, test3, 22, 9) <= 0
314     || test1.compare(10, 4, test4, 22, 4) != 0)
315         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
316 
317     // test compareBetween
318     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
319                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
320         errln("compareBetween failed");
321 
322     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
323                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
324         errln("compareBetween failed");
325 
326     // test compare() etc. with strings that share a buffer but are not equal
327     test2=test1; // share the buffer, length() too large for the stackBuffer
328     test2.truncate(1); // change only the length, not the buffer
329     if( test1==test2 || test1<=test2 ||
330         test1.compare(test2)<=0 ||
331         test1.compareCodePointOrder(test2)<=0 ||
332         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
333         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
334         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
335         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
336     ) {
337         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
338     }
339 
340     /* test compareCodePointOrder() */
341     {
342         /* these strings are in ascending order */
343         static const UChar strings[][4]={
344             { 0x61, 0 },                    /* U+0061 */
345             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
346             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
347             { 0xd800, 0 },                  /* U+d800 */
348             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
349             { 0xdfff, 0 },                  /* U+dfff */
350             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
351             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
352             { 0xd800, 0xdc02, 0 },          /* U+10002 */
353             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
354         };
355         UnicodeString u[20]; // must be at least as long as strings[]
356         int32_t i;
357 
358         for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])); ++i) {
359             u[i]=UnicodeString(TRUE, strings[i], -1);
360         }
361 
362         for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])-1); ++i) {
363             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
364                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
365             }
366         }
367     }
368 
369     /* test caseCompare() */
370     {
371         static const UChar
372         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
373         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
374         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
375         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
376 
377         UnicodeString
378             mixed(TRUE, _mixed, -1),
379             otherDefault(TRUE, _otherDefault, -1),
380             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
381             different(TRUE, _different, -1);
382 
383         int8_t result;
384 
385         /* test caseCompare() */
386         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
387         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
388             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
389         }
390         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
391         if(result!=0) {
392             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
393         }
394         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
395         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
396             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
397         }
398 
399         /* test caseCompare() */
400         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
401         if(result<=0) {
402             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
403         }
404 
405         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
406         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
407         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
408             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
409         }
410 
411         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
412         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
413         if(result<=0) {
414             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
415         }
416     }
417 
418     // test that srcLength=-1 is handled in functions that
419     // take input const UChar */int32_t srcLength (j785)
420     {
421         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
422         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
423 
424         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
425             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
426         }
427 
428         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
429             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
430         }
431 
432         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
433             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
434         }
435 
436         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
437             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
438         }
439 
440         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
441             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
442         }
443 
444         UnicodeString s2, s3;
445         s2.replace(0, 0, u+1, -1);
446         s3.replace(0, 0, u, 1, -1);
447         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
448             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
449         }
450     }
451 }
452 
453 void
TestExtract()454 UnicodeStringTest::TestExtract()
455 {
456     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
457     UnicodeString  test2;
458     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
459     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
460     UnicodeString  test5;
461     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
462 
463     test1.extract(11, 12, test2);
464     test1.extract(11, 12, test3);
465     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
466         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
467     }
468 
469     // test proper pinning in extractBetween()
470     test1.extractBetween(-3, 7, test5);
471     if(test5!=UNICODE_STRING("Now is ", 7)) {
472         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
473     }
474 
475     test1.extractBetween(11, 23, test5);
476     if (test1.extract(60, 71, test6) != 9) {
477         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
478     }
479     if (test1.extract(11, 12, test6) != 12) {
480         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
481     }
482 
483     // convert test4 back to Unicode for comparison
484     UnicodeString test4b(test4, 12);
485 
486     if (test1.extract(11, 12, (char *)NULL) != 12) {
487         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
488     }
489     if (test1.extract(11, -1, test6) != 0) {
490         errln("UnicodeString.extract(-1) failed to stop reading the string.");
491     }
492 
493     for (int32_t i = 0; i < 12; i++) {
494         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
495             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
496             break;
497         }
498         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
499             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
500             break;
501         }
502         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
503             errln(UnicodeString("extracting into an array of char failed at position ") + i);
504             break;
505         }
506         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
507             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
508             break;
509         }
510     }
511 
512     // test preflighting and overflows with invariant conversion
513     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
514         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
515     }
516 
517     test4[2] = (char)0xff;
518     if (test1.extract(0, 10, test4, 2, "") != 10) {
519         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
520     }
521     if (test4[2] != (char)0xff) {
522         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
523     }
524 
525     {
526         // test new, NUL-terminating extract() function
527         UnicodeString s("terminate", "");
528         UChar dest[20]={
529             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
530             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
531         };
532         UErrorCode errorCode;
533         int32_t length;
534 
535         errorCode=U_ZERO_ERROR;
536         length=s.extract((UChar *)NULL, 0, errorCode);
537         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
538             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
539         }
540 
541         errorCode=U_ZERO_ERROR;
542         length=s.extract(dest, s.length()-1, errorCode);
543         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
544             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
545                 length, u_errorName(errorCode), s.length());
546         }
547 
548         errorCode=U_ZERO_ERROR;
549         length=s.extract(dest, s.length(), errorCode);
550         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
551             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
552                 length, u_errorName(errorCode), s.length());
553         }
554         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
555             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
556         }
557 
558         errorCode=U_ZERO_ERROR;
559         length=s.extract(dest, s.length()+1, errorCode);
560         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
561             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
562                 length, u_errorName(errorCode), s.length());
563         }
564         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
565             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
566         }
567     }
568 
569     {
570         // test new UConverter extract() and constructor
571         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
572         char buffer[32];
573         static const char expect[]={
574             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
575             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
576             (char)0xc3, (char)0x84,
577             (char)0xe1, (char)0xbb, (char)0x90
578         };
579         UErrorCode errorCode=U_ZERO_ERROR;
580         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
581         int32_t length;
582 
583         if(U_SUCCESS(errorCode)) {
584             // test preflighting
585             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
586                 errorCode!=U_BUFFER_OVERFLOW_ERROR
587             ) {
588                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
589                       length, u_errorName(errorCode));
590             }
591             errorCode=U_ZERO_ERROR;
592             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
593                 errorCode!=U_BUFFER_OVERFLOW_ERROR
594             ) {
595                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
596                       length, u_errorName(errorCode));
597             }
598 
599             // try error cases
600             errorCode=U_ZERO_ERROR;
601             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
602                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
603             }
604             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
605             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
606                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
607             }
608             errorCode=U_ZERO_ERROR;
609 
610             // extract for real
611             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
612                 uprv_memcmp(buffer, expect, 13)!=0 ||
613                 buffer[13]!=0 ||
614                 U_FAILURE(errorCode)
615             ) {
616                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
617                       length, u_errorName(errorCode));
618             }
619             // Test again with just the converter name.
620             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
621                 uprv_memcmp(buffer, expect, 13)!=0 ||
622                 buffer[13]!=0 ||
623                 U_FAILURE(errorCode)
624             ) {
625                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
626                       length, u_errorName(errorCode));
627             }
628 
629             // try the constructor
630             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
631             if(U_FAILURE(errorCode) || s!=t) {
632                 errln("UnicodeString(UConverter) conversion failed (%s)",
633                       u_errorName(errorCode));
634             }
635 
636             ucnv_close(cnv);
637         }
638     }
639 }
640 
641 void
TestRemoveReplace()642 UnicodeStringTest::TestRemoveReplace()
643 {
644     UnicodeString   test1("The rain in Spain stays mainly on the plain");
645     UnicodeString   test2("eat SPAMburgers!");
646     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
647     char            test4[] = "SPAM";
648     UnicodeString&  test5 = test1;
649 
650     test1.replace(4, 4, test2, 4, 4);
651     test1.replace(12, 5, test3, 4);
652     test3[4] = 0;
653     test1.replace(17, 4, test3);
654     test1.replace(23, 4, test4);
655     test1.replaceBetween(37, 42, test2, 4, 8);
656 
657     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
658         errln("One of the replace methods failed:\n"
659               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
660               "  got \"" + test1 + "\"");
661 
662     test1.remove(21, 1);
663     test1.removeBetween(26, 28);
664 
665     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
666         errln("One of the remove methods failed:\n"
667               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
668               "  got \"" + test1 + "\"");
669 
670     for (int32_t i = 0; i < test1.length(); i++) {
671         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
672             test1.setCharAt(i, 0x78);
673         }
674     }
675 
676     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
677         errln("One of the remove methods failed:\n"
678               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
679               "  got \"" + test1 + "\"");
680 
681     test1.remove();
682     if (test1.length() != 0)
683         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
684 }
685 
686 void
TestSearching()687 UnicodeStringTest::TestSearching()
688 {
689     UnicodeString test1("test test ttest tetest testesteststt");
690     UnicodeString test2("test");
691     UChar testChar = 0x74;
692 
693     UChar32 testChar32 = 0x20402;
694     UChar testData[]={
695         //   0       1       2       3       4       5       6       7
696         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
697 
698         //   8       9      10      11      12      13      14      15
699         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
700 
701         //  16      17      18      19
702         0xdc02, 0xd841, 0x0073, 0x0000
703     };
704     UnicodeString test3(testData);
705     UnicodeString test4(testChar32);
706 
707     uint16_t occurrences = 0;
708     int32_t startPos = 0;
709     for ( ;
710           startPos != -1 && startPos < test1.length();
711           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
712         ;
713     if (occurrences != 6)
714         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
715 
716     for ( occurrences = 0, startPos = 10;
717           startPos != -1 && startPos < test1.length();
718           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
719         ;
720     if (occurrences != 4)
721         errln(UnicodeString("indexOf with starting offset failed: "
722                             "expected to find 4 occurrences, found ") + occurrences);
723 
724     int32_t endPos = 28;
725     for ( occurrences = 0, startPos = 5;
726           startPos != -1 && startPos < test1.length();
727           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
728         ;
729     if (occurrences != 4)
730         errln(UnicodeString("indexOf with starting and ending offsets failed: "
731                             "expected to find 4 occurrences, found ") + occurrences);
732 
733     //using UChar32 string
734     for ( startPos=0, occurrences=0;
735           startPos != -1 && startPos < test3.length();
736           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
737         ;
738     if (occurrences != 4)
739         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
740 
741     for ( startPos=10, occurrences=0;
742           startPos != -1 && startPos < test3.length();
743           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
744         ;
745     if (occurrences != 2)
746         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
747     //---
748 
749     for ( occurrences = 0, startPos = 0;
750           startPos != -1 && startPos < test1.length();
751           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
752         ;
753     if (occurrences != 16)
754         errln(UnicodeString("indexOf with character failed: "
755                             "expected to find 16 occurrences, found ") + occurrences);
756 
757     for ( occurrences = 0, startPos = 10;
758           startPos != -1 && startPos < test1.length();
759           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
760         ;
761     if (occurrences != 12)
762         errln(UnicodeString("indexOf with character & start offset failed: "
763                             "expected to find 12 occurrences, found ") + occurrences);
764 
765     for ( occurrences = 0, startPos = 5, endPos = 28;
766           startPos != -1 && startPos < test1.length();
767           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
768         ;
769     if (occurrences != 10)
770         errln(UnicodeString("indexOf with character & start & end offsets failed: "
771                             "expected to find 10 occurrences, found ") + occurrences);
772 
773     //testing for UChar32
774     UnicodeString subString;
775     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
776         subString.append(test3, startPos, test3.length());
777         if(subString.indexOf(testChar32) != -1 ){
778              ++occurrences;
779         }
780         subString.remove();
781     }
782     if (occurrences != 14)
783         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
784 
785     for ( occurrences = 0, startPos = 0;
786           startPos != -1 && startPos < test3.length();
787           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
788         ;
789     if (occurrences != 4)
790         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
791 
792     endPos=test3.length();
793     for ( occurrences = 0, startPos = 5;
794           startPos != -1 && startPos < test3.length();
795           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
796         ;
797     if (occurrences != 3)
798         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
799     //---
800 
801     if(test1.lastIndexOf(test2)!=29) {
802         errln("test1.lastIndexOf(test2)!=29");
803     }
804 
805     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
806         errln("test1.lastIndexOf(test2, start) failed");
807     }
808 
809     for ( occurrences = 0, startPos = 32;
810           startPos != -1;
811           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
812         ;
813     if (occurrences != 4)
814         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
815                             "expected to find 4 occurrences, found ") + occurrences);
816 
817     for ( occurrences = 0, startPos = 32;
818           startPos != -1;
819           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
820         ;
821     if (occurrences != 11)
822         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
823                             "expected to find 11 occurrences, found ") + occurrences);
824 
825     //testing UChar32
826     startPos=test3.length();
827     for ( occurrences = 0;
828           startPos != -1;
829           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
830         ;
831     if (occurrences != 3)
832         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
833 
834 
835     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
836         subString.remove();
837         subString.append(test3, 0, endPos);
838         if(subString.lastIndexOf(testChar32) != -1 ){
839             ++occurrences;
840         }
841     }
842     if (occurrences != 18)
843         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
844     //---
845 
846     // test that indexOf(UChar32) and lastIndexOf(UChar32)
847     // do not find surrogate code points when they are part of matched pairs
848     // (= part of supplementary code points)
849     // Jitterbug 1542
850     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
851         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
852     }
853     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
854         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
855         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
856     ) {
857         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
858     }
859 }
860 
861 void
TestSpacePadding()862 UnicodeStringTest::TestSpacePadding()
863 {
864     UnicodeString test1("hello");
865     UnicodeString test2("   there");
866     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
867     UnicodeString test4;
868     UBool returnVal;
869     UnicodeString expectedValue;
870 
871     returnVal = test1.padLeading(15);
872     expectedValue = "          hello";
873     if (returnVal == FALSE || test1 != expectedValue)
874         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
875 
876     returnVal = test2.padTrailing(15);
877     expectedValue = "   there       ";
878     if (returnVal == FALSE || test2 != expectedValue)
879         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
880 
881     expectedValue = test3;
882     returnVal = test3.padTrailing(15);
883     if (returnVal == TRUE || test3 != expectedValue)
884         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
885 
886     expectedValue = "hello";
887     test4.setTo(test1).trim();
888 
889     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
890         errln("trim(UnicodeString&) failed");
891 
892     test1.trim();
893     if (test1 != expectedValue)
894         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
895 
896     test2.trim();
897     expectedValue = "there";
898     if (test2 != expectedValue)
899         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
900 
901     test3.trim();
902     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
903     if (test3 != expectedValue)
904         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
905 
906     returnVal = test1.truncate(15);
907     expectedValue = "hello";
908     if (returnVal == TRUE || test1 != expectedValue)
909         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
910 
911     returnVal = test2.truncate(15);
912     expectedValue = "there";
913     if (returnVal == TRUE || test2 != expectedValue)
914         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
915 
916     returnVal = test3.truncate(15);
917     expectedValue = "Hi!  How ya doi";
918     if (returnVal == FALSE || test3 != expectedValue)
919         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
920 }
921 
922 void
TestPrefixAndSuffix()923 UnicodeStringTest::TestPrefixAndSuffix()
924 {
925     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
926     UnicodeString test2("Now");
927     UnicodeString test3("country.");
928     UnicodeString test4("count");
929 
930     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
931         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
932     }
933 
934     if (test1.startsWith(test3) ||
935         test1.startsWith(test3.getBuffer(), test3.length()) ||
936         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
937     ) {
938         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
939     }
940 
941     if (test1.endsWith(test2)) {
942         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
943     }
944 
945     if (!test1.endsWith(test3)) {
946         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
947     }
948     if (!test1.endsWith(test3, 0, INT32_MAX)) {
949         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
950     }
951 
952     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
953         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
954     }
955     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
956         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
957     }
958 
959     if (!test3.startsWith(test4)) {
960         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
961     }
962 
963     if (test4.startsWith(test3)) {
964         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
965     }
966 }
967 
968 void
TestStartsWithAndEndsWithNulTerminated()969 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
970     UnicodeString test("abcde");
971     const UChar ab[] = { 0x61, 0x62, 0 };
972     const UChar de[] = { 0x64, 0x65, 0 };
973     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
974     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
975     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
976     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
977 }
978 
979 void
TestFindAndReplace()980 UnicodeStringTest::TestFindAndReplace()
981 {
982     UnicodeString test1("One potato, two potato, three potato, four\n");
983     UnicodeString test2("potato");
984     UnicodeString test3("MISSISSIPPI");
985 
986     UnicodeString expectedValue;
987 
988     test1.findAndReplace(test2, test3);
989     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
990     if (test1 != expectedValue)
991         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
992     test1.findAndReplace(2, 32, test3, test2);
993     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
994     if (test1 != expectedValue)
995         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
996 }
997 
998 void
TestReverse()999 UnicodeStringTest::TestReverse()
1000 {
1001     UnicodeString test("backwards words say to used I");
1002 
1003     test.reverse();
1004     test.reverse(2, 4);
1005     test.reverse(7, 2);
1006     test.reverse(10, 3);
1007     test.reverse(14, 5);
1008     test.reverse(20, 9);
1009 
1010     if (test != "I used to say words backwards")
1011         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1012             + test + "\"");
1013 
1014     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1015     test.reverse();
1016     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1017         errln("reverse() failed with supplementary characters");
1018     }
1019 
1020     // Test case for ticket #8091:
1021     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1022     // an odd-length string that contains no other lead surrogates.
1023     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1024     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1025     test.reverse();
1026     if(test!=expected) {
1027         errln("reverse() failed with only lead surrogate in the middle");
1028     }
1029 }
1030 
1031 void
TestMiscellaneous()1032 UnicodeStringTest::TestMiscellaneous()
1033 {
1034     UnicodeString   test1("This is a test");
1035     UnicodeString   test2("This is a test");
1036     UnicodeString   test3("Me too!");
1037 
1038     // test getBuffer(minCapacity) and releaseBuffer()
1039     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1040     UChar *p=test1.getBuffer(20);
1041     if(test1.getCapacity()<20) {
1042         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1043     }
1044 
1045     test1.append((UChar)7); // must not be able to modify the string here
1046     test1.setCharAt(3, 7);
1047     test1.reverse();
1048     if( test1.length()!=0 ||
1049         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1050         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1051     ) {
1052         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1053     }
1054 
1055     p[0]=1;
1056     p[1]=2;
1057     p[2]=3;
1058     test1.releaseBuffer(3);
1059     test1.append((UChar)4);
1060 
1061     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1062         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1063     }
1064 
1065     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1066     test1.releaseBuffer(1);
1067     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1068         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1069     }
1070 
1071     // test getBuffer(const)
1072     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1073     if( test1.length()!=4 ||
1074         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1075         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1076     ) {
1077         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1078     }
1079 
1080     // test releaseBuffer() with a NUL-terminated buffer
1081     test1.getBuffer(20)[2]=0;
1082     test1.releaseBuffer(); // implicit -1
1083     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1084         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1085     }
1086 
1087     // test releaseBuffer() with a non-NUL-terminated buffer
1088     p=test1.getBuffer(256);
1089     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1090         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1091     }
1092     test1.releaseBuffer();  // implicit -1
1093     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1094         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1095     }
1096 
1097     // test getTerminatedBuffer()
1098     test1=UnicodeString("This is another test.", "");
1099     test2=UnicodeString("This is another test.", "");
1100     q=test1.getTerminatedBuffer();
1101     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1102         errln("getTerminatedBuffer()[length]!=0");
1103     }
1104 
1105     const UChar u[]={ 5, 6, 7, 8, 0 };
1106     test1.setTo(FALSE, u, 3);
1107     q=test1.getTerminatedBuffer();
1108     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1109         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1110     }
1111 
1112     test1.setTo(TRUE, u, -1);
1113     q=test1.getTerminatedBuffer();
1114     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1115         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1116     }
1117 
1118     test1=UNICODE_STRING("la", 2);
1119     test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1120     if(test1!=UNICODE_STRING("la lila", 7)) {
1121         errln("UnicodeString::append(const UChar *, start, length) failed");
1122     }
1123 
1124     test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1125     if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1126         errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1127     }
1128 
1129     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1130     test1.insert(9, ucs, -1);
1131     if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1132         errln("UnicodeString::insert(start, const UChar *, length) failed");
1133     }
1134 
1135     test1.replace(9, 2, (UChar)0x2b);
1136     if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1137         errln("UnicodeString::replace(start, length, UChar) failed");
1138     }
1139 
1140     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1141         errln("UnicodeString::hasMetaData() returns TRUE");
1142     }
1143 
1144     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1145     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1146     test1.truncate(36);  // ensure length()<getCapacity()
1147     test2=test1;  // share the buffer
1148     test1.truncate(5);
1149     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1150         errln("UnicodeString(shared buffer).truncate() failed");
1151     }
1152     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1153         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1154               "modified another copy of the string!");
1155     }
1156     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1157     test1.truncate(36);  // ensure length()<getCapacity()
1158     test2=test1;  // share the buffer
1159     test1.remove();
1160     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1161         errln("UnicodeString(shared buffer).remove() failed");
1162     }
1163     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1164         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1165               "modified another copy of the string!");
1166     }
1167 
1168     // ticket #9740
1169     test1.setTo(TRUE, ucs, 3);
1170     assertEquals("length of read-only alias", 3, test1.length());
1171     test1.trim();
1172     assertEquals("length of read-only alias after trim()", 2, test1.length());
1173     assertEquals("length of terminated buffer of read-only alias + trim()",
1174                  2, u_strlen(test1.getTerminatedBuffer()));
1175 }
1176 
1177 void
TestStackAllocation()1178 UnicodeStringTest::TestStackAllocation()
1179 {
1180     UChar           testString[] ={
1181         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1182     UChar           guardWord = 0x4DED;
1183     UnicodeString*  test = 0;
1184 
1185     test = new  UnicodeString(testString);
1186     if (*test != "This is a crazy test.")
1187         errln("Test string failed to initialize properly.");
1188     if (guardWord != 0x04DED)
1189         errln("Test string initialization overwrote guard word!");
1190 
1191     test->insert(8, "only ");
1192     test->remove(15, 6);
1193     if (*test != "This is only a test.")
1194         errln("Manipulation of test string failed to work right.");
1195     if (guardWord != 0x4DED)
1196         errln("Manipulation of test string overwrote guard word!");
1197 
1198     // we have to deinitialize and release the backing store by calling the destructor
1199     // explicitly, since we can't overload operator delete
1200     delete test;
1201 
1202     UChar workingBuffer[] = {
1203         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1204         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1205         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1206         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1207         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1208     UChar guardWord2 = 0x4DED;
1209 
1210     test = new UnicodeString(workingBuffer, 35, 100);
1211     if (*test != "Now is the time for all men to come")
1212         errln("Stack-allocated backing store failed to initialize correctly.");
1213     if (guardWord2 != 0x4DED)
1214         errln("Stack-allocated backing store overwrote guard word!");
1215 
1216     test->insert(24, "good ");
1217     if (*test != "Now is the time for all good men to come")
1218         errln("insert() on stack-allocated UnicodeString didn't work right");
1219     if (guardWord2 != 0x4DED)
1220         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1221 
1222     if (workingBuffer[24] != 0x67)
1223         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1224 
1225     *test += " to the aid of their country.";
1226     if (*test != "Now is the time for all good men to come to the aid of their country.")
1227         errln("Stack-allocated UnicodeString overflow didn't work");
1228     if (guardWord2 != 0x4DED)
1229         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1230 
1231     *test = "ha!";
1232     if (*test != "ha!")
1233         errln("Assignment to stack-allocated UnicodeString didn't work");
1234     if (workingBuffer[0] != 0x4e)
1235         errln("Change to UnicodeString after overflow are still affecting original buffer");
1236     if (guardWord2 != 0x4DED)
1237         errln("Change to UnicodeString after overflow overwrote guard word!");
1238 
1239     // test read-only aliasing with setTo()
1240     workingBuffer[0] = 0x20ac;
1241     workingBuffer[1] = 0x125;
1242     workingBuffer[2] = 0;
1243     test->setTo(TRUE, workingBuffer, 2);
1244     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1245         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1246     }
1247 
1248     UnicodeString *c=(UnicodeString *)test->clone();
1249 
1250     workingBuffer[1] = 0x109;
1251     if(test->charAt(1) != 0x109) {
1252         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1253     }
1254 
1255     if(c->length() != 2 || c->charAt(1) != 0x125) {
1256         errln("clone(alias) did not copy the buffer");
1257     }
1258     delete c;
1259 
1260     test->setTo(TRUE, workingBuffer, -1);
1261     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1262         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1263     }
1264 
1265     test->setTo(FALSE, workingBuffer, -1);
1266     if(!test->isBogus()) {
1267         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1268     }
1269 
1270     delete test;
1271 
1272     test=new UnicodeString();
1273     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1274     test->setTo(buffer, 4, 10);
1275     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1276         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1277         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1278     }
1279     delete test;
1280 
1281 
1282     // test the UChar32 constructor
1283     UnicodeString c32Test((UChar32)0x10ff2a);
1284     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1285         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1286     ) {
1287         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1288     }
1289 
1290     // test the (new) capacity constructor
1291     UnicodeString capTest(5, (UChar32)0x2a, 5);
1292     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1293         capTest.char32At(0) != 0x2a ||
1294         capTest.char32At(4) != 0x2a
1295     ) {
1296         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1297     }
1298 
1299     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1300     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1301         capTest.char32At(0) != 0x10ff2a ||
1302         capTest.char32At(4) != 0x10ff2a
1303     ) {
1304         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1305     }
1306 
1307     capTest = UnicodeString(5, (UChar32)0, 0);
1308     if(capTest.length() != 0) {
1309         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1310     }
1311 }
1312 
1313 /**
1314  * Test the unescape() function.
1315  */
TestUnescape(void)1316 void UnicodeStringTest::TestUnescape(void) {
1317     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1318     UnicodeString OUT("abc");
1319     OUT.append((UChar)0x4567);
1320     OUT.append(" ");
1321     OUT.append((UChar)0xA);
1322     OUT.append((UChar)0xD);
1323     OUT.append(" ");
1324     OUT.append((UChar32)0x00101234);
1325     OUT.append("xyz");
1326     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1327     UnicodeString result = IN.unescape();
1328     if (result != OUT) {
1329         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1330               prettify(result) + ", expected " +
1331               prettify(OUT));
1332     }
1333 
1334     // test that an empty string is returned in case of an error
1335     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1336         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1337     }
1338 }
1339 
1340 /* test code point counting functions --------------------------------------- */
1341 
1342 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1343 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1344 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1345     int32_t count=s.countChar32(start, length);
1346     return count>number;
1347 }
1348 
1349 /* compare the real function against the reference */
1350 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1351 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1352     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1353         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1354                 start, length, number, s.hasMoreChar32Than(start, length, number));
1355     }
1356 }
1357 
1358 void
TestCountChar32(void)1359 UnicodeStringTest::TestCountChar32(void) {
1360     {
1361         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1362 
1363         // test countChar32()
1364         // note that this also calls and tests u_countChar32(length>=0)
1365         if(
1366             s.countChar32()!=4 ||
1367             s.countChar32(1)!=4 ||
1368             s.countChar32(2)!=3 ||
1369             s.countChar32(2, 3)!=2 ||
1370             s.countChar32(2, 0)!=0
1371         ) {
1372             errln("UnicodeString::countChar32() failed");
1373         }
1374 
1375         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1376         const UChar *buffer=s.getTerminatedBuffer();
1377         if(
1378             u_countChar32(buffer, -1)!=4 ||
1379             u_countChar32(buffer+1, -1)!=4 ||
1380             u_countChar32(buffer+2, -1)!=3 ||
1381             u_countChar32(buffer+3, -1)!=3 ||
1382             u_countChar32(buffer+4, -1)!=2 ||
1383             u_countChar32(buffer+5, -1)!=1 ||
1384             u_countChar32(buffer+6, -1)!=0
1385         ) {
1386             errln("u_countChar32(length=-1) failed");
1387         }
1388 
1389         // test u_countChar32() with bad input
1390         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1391             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1392         }
1393     }
1394 
1395     /* test data and variables for hasMoreChar32Than() */
1396     static const UChar str[]={
1397         0x61, 0x62, 0xd800, 0xdc00,
1398         0xd801, 0xdc01, 0x63, 0xd802,
1399         0x64, 0xdc03, 0x65, 0x66,
1400         0xd804, 0xdc04, 0xd805, 0xdc05,
1401         0x67
1402     };
1403     UnicodeString string(str, UPRV_LENGTHOF(str));
1404     int32_t start, length, number;
1405 
1406     /* test hasMoreChar32Than() */
1407     for(length=string.length(); length>=0; --length) {
1408         for(start=0; start<=length; ++start) {
1409             for(number=-1; number<=((length-start)+2); ++number) {
1410                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1411             }
1412         }
1413     }
1414 
1415     /* test hasMoreChar32Than() with pinning */
1416     for(start=-1; start<=string.length()+1; ++start) {
1417         for(number=-1; number<=((string.length()-start)+2); ++number) {
1418             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1419         }
1420     }
1421 
1422     /* test hasMoreChar32Than() with a bogus string */
1423     string.setToBogus();
1424     for(length=-1; length<=1; ++length) {
1425         for(start=-1; start<=length; ++start) {
1426             for(number=-1; number<=((length-start)+2); ++number) {
1427                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1428             }
1429         }
1430     }
1431 }
1432 
1433 void
TestBogus()1434 UnicodeStringTest::TestBogus() {
1435     UnicodeString   test1("This is a test");
1436     UnicodeString   test2("This is a test");
1437     UnicodeString   test3("Me too!");
1438 
1439     // test isBogus() and setToBogus()
1440     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1441         errln("A string returned TRUE for isBogus()!");
1442     }
1443 
1444     // NULL pointers are treated like empty strings
1445     // use other illegal arguments to make a bogus string
1446     test3.setTo(FALSE, test1.getBuffer(), -2);
1447     if(!test3.isBogus()) {
1448         errln("A bogus string returned FALSE for isBogus()!");
1449     }
1450     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1451         errln("hashCode() failed");
1452     }
1453     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1454         errln("bogus.getBuffer()!=0");
1455     }
1456     if (test1.indexOf(test3) != -1) {
1457         errln("bogus.indexOf() != -1");
1458     }
1459     if (test1.lastIndexOf(test3) != -1) {
1460         errln("bogus.lastIndexOf() != -1");
1461     }
1462     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1463         errln("caseCompare() doesn't work with bogus strings");
1464     }
1465     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1466         errln("compareCodePointOrder() doesn't work with bogus strings");
1467     }
1468 
1469     // verify that non-assignment modifications fail and do not revive a bogus string
1470     test3.setToBogus();
1471     test3.append((UChar)0x61);
1472     if(!test3.isBogus() || test3.getBuffer()!=0) {
1473         errln("bogus.append('a') worked but must not");
1474     }
1475 
1476     test3.setToBogus();
1477     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1478     if(!test3.isBogus() || test3.getBuffer()!=0) {
1479         errln("bogus.findAndReplace() worked but must not");
1480     }
1481 
1482     test3.setToBogus();
1483     test3.trim();
1484     if(!test3.isBogus() || test3.getBuffer()!=0) {
1485         errln("bogus.trim() revived bogus but must not");
1486     }
1487 
1488     test3.setToBogus();
1489     test3.remove(1);
1490     if(!test3.isBogus() || test3.getBuffer()!=0) {
1491         errln("bogus.remove(1) revived bogus but must not");
1492     }
1493 
1494     test3.setToBogus();
1495     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1496         errln("bogus.setCharAt(0, 'b') worked but must not");
1497     }
1498 
1499     test3.setToBogus();
1500     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1501         errln("bogus.truncate(1) revived bogus but must not");
1502     }
1503 
1504     // verify that assignments revive a bogus string
1505     test3.setToBogus();
1506     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1507         errln("bogus.operator=() failed");
1508     }
1509 
1510     test3.setToBogus();
1511     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1512         errln("bogus.fastCopyFrom() failed");
1513     }
1514 
1515     test3.setToBogus();
1516     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1517         errln("bogus.setTo(UniStr) failed");
1518     }
1519 
1520     test3.setToBogus();
1521     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1522         errln("bogus.setTo(UniStr, 0) failed");
1523     }
1524 
1525     test3.setToBogus();
1526     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1527         errln("bogus.setTo(UniStr, 0, len) failed");
1528     }
1529 
1530     test3.setToBogus();
1531     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1532         errln("bogus.setTo(const UChar *, len) failed");
1533     }
1534 
1535     test3.setToBogus();
1536     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1537         errln("bogus.setTo(UChar) failed");
1538     }
1539 
1540     test3.setToBogus();
1541     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1542         errln("bogus.setTo(UChar32) failed");
1543     }
1544 
1545     test3.setToBogus();
1546     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1547         errln("bogus.setTo(readonly alias) failed");
1548     }
1549 
1550     // writable alias to another string's buffer: very bad idea, just convenient for this test
1551     test3.setToBogus();
1552     if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) {
1553         errln("bogus.setTo(writable alias) failed");
1554     }
1555 
1556     // verify simple, documented ways to turn a bogus string into an empty one
1557     test3.setToBogus();
1558     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1559         errln("bogus.operator=(UnicodeString()) failed");
1560     }
1561 
1562     test3.setToBogus();
1563     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1564         errln("bogus.setTo(UnicodeString()) failed");
1565     }
1566 
1567     test3.setToBogus();
1568     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1569         errln("bogus.remove() failed");
1570     }
1571 
1572     test3.setToBogus();
1573     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1574         errln("bogus.remove(0, INT32_MAX) failed");
1575     }
1576 
1577     test3.setToBogus();
1578     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1579         errln("bogus.truncate(0) failed");
1580     }
1581 
1582     test3.setToBogus();
1583     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1584         errln("bogus.setTo((UChar32)-1) failed");
1585     }
1586 
1587     static const UChar nul=0;
1588 
1589     test3.setToBogus();
1590     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1591         errln("bogus.setTo(&nul, 0) failed");
1592     }
1593 
1594     test3.setToBogus();
1595     if(!test3.isBogus() || test3.getBuffer()!=0) {
1596         errln("setToBogus() failed to make a string bogus");
1597     }
1598 
1599     test3.setToBogus();
1600     if(test1.isBogus() || !(test1=test3).isBogus()) {
1601         errln("normal=bogus failed to make the left string bogus");
1602     }
1603 
1604     // test that NULL primitive input string values are treated like
1605     // empty strings, not errors (bogus)
1606     test2.setTo((UChar32)0x10005);
1607     if(test2.insert(1, NULL, 1).length()!=2) {
1608         errln("UniStr.insert(...NULL...) should not modify the string but does");
1609     }
1610 
1611     UErrorCode errorCode=U_ZERO_ERROR;
1612     UnicodeString
1613         test4((const UChar *)NULL),
1614         test5(TRUE, (const UChar *)NULL, 1),
1615         test6((UChar *)NULL, 5, 5),
1616         test7((const char *)NULL, 3, NULL, errorCode);
1617     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1618         errln("a constructor set to bogus for a NULL input string, should be empty");
1619     }
1620 
1621     test4.setTo(NULL, 3);
1622     test5.setTo(TRUE, (const UChar *)NULL, 1);
1623     test6.setTo((UChar *)NULL, 5, 5);
1624     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1625         errln("a setTo() set to bogus for a NULL input string, should be empty");
1626     }
1627 
1628     // test that bogus==bogus<any
1629     if(test1!=test3 || test1.compare(test3)!=0) {
1630         errln("bogus==bogus failed");
1631     }
1632 
1633     test2.remove();
1634     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1635         errln("bogus<empty failed");
1636     }
1637 }
1638 
1639 // StringEnumeration ------------------------------------------------------- ***
1640 // most of StringEnumeration is tested elsewhere
1641 // this test improves code coverage
1642 
1643 static const char *const
1644 testEnumStrings[]={
1645     "a",
1646     "b",
1647     "c",
1648     "this is a long string which helps us test some buffer limits",
1649     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1650 };
1651 
1652 class TestEnumeration : public StringEnumeration {
1653 public:
TestEnumeration()1654     TestEnumeration() : i(0) {}
1655 
count(UErrorCode &) const1656     virtual int32_t count(UErrorCode& /*status*/) const {
1657         return UPRV_LENGTHOF(testEnumStrings);
1658     }
1659 
snext(UErrorCode & status)1660     virtual const UnicodeString *snext(UErrorCode &status) {
1661         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1662             unistr=UnicodeString(testEnumStrings[i++], "");
1663             return &unistr;
1664         }
1665 
1666         return NULL;
1667     }
1668 
reset(UErrorCode &)1669     virtual void reset(UErrorCode& /*status*/) {
1670         i=0;
1671     }
1672 
getStaticClassID()1673     static inline UClassID getStaticClassID() {
1674         return (UClassID)&fgClassID;
1675     }
getDynamicClassID() const1676     virtual UClassID getDynamicClassID() const {
1677         return getStaticClassID();
1678     }
1679 
1680 private:
1681     static const char fgClassID;
1682 
1683     int32_t i;
1684 };
1685 
1686 const char TestEnumeration::fgClassID=0;
1687 
1688 void
TestStringEnumeration()1689 UnicodeStringTest::TestStringEnumeration() {
1690     UnicodeString s;
1691     TestEnumeration ten;
1692     int32_t i, length;
1693     UErrorCode status;
1694 
1695     const UChar *pu;
1696     const char *pc;
1697 
1698     // test the next() default implementation and ensureCharsCapacity()
1699     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1700         status=U_ZERO_ERROR;
1701         pc=ten.next(&length, status);
1702         s=UnicodeString(testEnumStrings[i], "");
1703         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1704             errln("StringEnumeration.next(%d) failed", i);
1705         }
1706     }
1707     status=U_ZERO_ERROR;
1708     if(ten.next(&length, status)!=NULL) {
1709         errln("StringEnumeration.next(done)!=NULL");
1710     }
1711 
1712     // test the unext() default implementation
1713     ten.reset(status);
1714     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1715         status=U_ZERO_ERROR;
1716         pu=ten.unext(&length, status);
1717         s=UnicodeString(testEnumStrings[i], "");
1718         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1719             errln("StringEnumeration.unext(%d) failed", i);
1720         }
1721     }
1722     status=U_ZERO_ERROR;
1723     if(ten.unext(&length, status)!=NULL) {
1724         errln("StringEnumeration.unext(done)!=NULL");
1725     }
1726 
1727     // test that the default clone() implementation works, and returns NULL
1728     if(ten.clone()!=NULL) {
1729         errln("StringEnumeration.clone()!=NULL");
1730     }
1731 
1732     // test that uenum_openFromStringEnumeration() works
1733     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1734     StringEnumeration *newTen = new TestEnumeration;
1735     status=U_ZERO_ERROR;
1736     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1737     if (uten==NULL || U_FAILURE(status)) {
1738         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1739         return;
1740     }
1741 
1742     // test  uenum_next()
1743     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1744         status=U_ZERO_ERROR;
1745         pc=uenum_next(uten, &length, &status);
1746         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1747             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1748         }
1749     }
1750     status=U_ZERO_ERROR;
1751     if(uenum_next(uten, &length, &status)!=NULL) {
1752         errln("File %s, line %d, uenum_next(done)!=NULL");
1753     }
1754 
1755     // test the uenum_unext()
1756     uenum_reset(uten, &status);
1757     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1758         status=U_ZERO_ERROR;
1759         pu=uenum_unext(uten, &length, &status);
1760         s=UnicodeString(testEnumStrings[i], "");
1761         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1762             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1763         }
1764     }
1765     status=U_ZERO_ERROR;
1766     if(uenum_unext(uten, &length, &status)!=NULL) {
1767         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1768     }
1769 
1770     uenum_close(uten);
1771 }
1772 
1773 /*
1774  * Namespace test, to make sure that macros like UNICODE_STRING include the
1775  * namespace qualifier.
1776  *
1777  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1778  */
1779 namespace bogus {
1780     class UnicodeString {
1781     public:
1782         enum EInvariant { kInvariant };
UnicodeString()1783         UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1784         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1785         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1786 ) : i(length) {}
1787     private:
1788         int32_t i;
1789     };
1790 }
1791 
1792 void
TestNameSpace()1793 UnicodeStringTest::TestNameSpace() {
1794     // Provoke name collision unless the UnicodeString macros properly
1795     // qualify the icu::UnicodeString class.
1796     using namespace bogus;
1797 
1798     // Use all UnicodeString macros from unistr.h.
1799     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1800     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1801     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1802 
1803     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1804     icu::UnicodeString s4=s1+s2+s3;
1805     if(s4.length()!=9) {
1806         errln("Something wrong with UnicodeString::operator+().");
1807     }
1808 }
1809 
1810 void
TestUTF32()1811 UnicodeStringTest::TestUTF32() {
1812     // Input string length US_STACKBUF_SIZE to cause overflow of the
1813     // initially chosen fStackBuffer due to supplementary characters.
1814     static const UChar32 utf32[] = {
1815         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1816         0x10000, 0x20000, 0xe0000, 0x10ffff
1817     };
1818     static const UChar expected_utf16[] = {
1819         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1820         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1821     };
1822     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1823     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1824     if(from32 != expected) {
1825         errln("UnicodeString::fromUTF32() did not create the expected string.");
1826     }
1827 
1828     static const UChar utf16[] = {
1829         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1830     };
1831     static const UChar32 expected_utf32[] = {
1832         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1833     };
1834     UChar32 result32[16];
1835     UErrorCode errorCode = U_ZERO_ERROR;
1836     int32_t length32 =
1837         UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1838         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1839     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1840         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1841         result32[length32] != 0
1842     ) {
1843         errln("UnicodeString::toUTF32() did not create the expected string.");
1844     }
1845 }
1846 
1847 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1848 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1849     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1850             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1851     virtual void Flush() { calledFlush = TRUE; }
1852     UBool calledFlush;
1853 };
1854 
1855 void
TestUTF8()1856 UnicodeStringTest::TestUTF8() {
1857     static const uint8_t utf8[] = {
1858         // Code points:
1859         // 0x41, 0xd900,
1860         // 0x61, 0xdc00,
1861         // 0x110000, 0x5a,
1862         // 0x50000, 0x7a,
1863         // 0x10000, 0x20000,
1864         // 0xe0000, 0x10ffff
1865         0x41, 0xed, 0xa4, 0x80,
1866         0x61, 0xed, 0xb0, 0x80,
1867         0xf4, 0x90, 0x80, 0x80, 0x5a,
1868         0xf1, 0x90, 0x80, 0x80, 0x7a,
1869         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1870         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1871     };
1872     static const UChar expected_utf16[] = {
1873         0x41, 0xfffd,
1874         0x61, 0xfffd,
1875         0xfffd, 0x5a,
1876         0xd900, 0xdc00, 0x7a,
1877         0xd800, 0xdc00, 0xd840, 0xdc00,
1878         0xdb40, 0xdc00, 0xdbff, 0xdfff
1879     };
1880     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1881     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1882 
1883     if(from8 != expected) {
1884         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1885     }
1886 #if U_HAVE_STD_STRING
1887     std::string utf8_string((const char *)utf8, sizeof(utf8));
1888     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1889     if(from8b != expected) {
1890         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1891     }
1892 #endif
1893 
1894     static const UChar utf16[] = {
1895         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1896     };
1897     static const uint8_t expected_utf8[] = {
1898         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1899         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1900     };
1901     UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1902 
1903     char buffer[64];
1904     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1905     us.toUTF8(sink);
1906     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1907         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1908     ) {
1909         errln("UnicodeString::toUTF8() did not create the expected string.");
1910     }
1911     if(!sink.calledFlush) {
1912         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1913     }
1914 #if U_HAVE_STD_STRING
1915     // Initial contents for testing that toUTF8String() appends.
1916     std::string result8 = "-->";
1917     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1918     // Use the return value just for testing.
1919     std::string &result8r = us.toUTF8String(result8);
1920     if(result8r != expected8 || &result8r != &result8) {
1921         errln("UnicodeString::toUTF8String() did not create the expected string.");
1922     }
1923 #endif
1924 }
1925 
1926 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1927 static UnicodeString wrapUChars(const UChar *uchars) {
1928     return UnicodeString(TRUE, uchars, -1);
1929 }
1930 
1931 void
TestReadOnlyAlias()1932 UnicodeStringTest::TestReadOnlyAlias() {
1933     UChar uchars[]={ 0x61, 0x62, 0 };
1934     UnicodeString alias(TRUE, uchars, 2);
1935     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1936         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1937         return;
1938     }
1939     alias.truncate(1);
1940     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1941         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1942     }
1943     if(alias.getTerminatedBuffer()==uchars) {
1944         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1945               "did not allocate and copy as expected.");
1946     }
1947     if(uchars[1]!=0x62) {
1948         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1949               "modified the original buffer.");
1950     }
1951     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1952         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1953               "does not return a buffer terminated at the proper length.");
1954     }
1955 
1956     alias.setTo(TRUE, uchars, 2);
1957     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1958         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1959         return;
1960     }
1961     alias.remove();
1962     if(alias.length()!=0) {
1963         errln("UnicodeString(read-only-alias).remove() did not work.");
1964     }
1965     if(alias.getTerminatedBuffer()==uchars) {
1966         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1967               "did not un-alias as expected.");
1968     }
1969     if(uchars[0]!=0x61) {
1970         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1971               "modified the original buffer.");
1972     }
1973     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1974         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1975               "does not return a buffer terminated at length 0.");
1976     }
1977 
1978     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1979     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1980     alias.remove(0, 10);
1981     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1982         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1983     }
1984     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1985     alias.remove(27, 99);
1986     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1987         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1988     }
1989     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1990     alias.retainBetween(6, 30);
1991     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1992         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1993     }
1994 
1995     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
1996     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
1997 
1998     UnicodeString temp;
1999     temp.fastCopyFrom(longString.tempSubString());
2000     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2001         errln("UnicodeString.tempSubString() failed");
2002     }
2003     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2004     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2005         errln("UnicodeString.tempSubString(-3, 5) failed");
2006     }
2007     temp.fastCopyFrom(longString.tempSubString(17));
2008     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2009         errln("UnicodeString.tempSubString(17) failed");
2010     }
2011     temp.fastCopyFrom(longString.tempSubString(99));
2012     if(!temp.isEmpty()) {
2013         errln("UnicodeString.tempSubString(99) failed");
2014     }
2015     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2016     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2017         errln("UnicodeString.tempSubStringBetween(6) failed");
2018     }
2019     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2020     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2021         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2022     }
2023     UnicodeString bogusString;
2024     bogusString.setToBogus();
2025     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2026     if(!temp.isBogus()) {
2027         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2028     }
2029 }
2030 
2031 void
doTestAppendable(UnicodeString & dest,Appendable & app)2032 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2033     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2034     static const UChar fg[3]={ 0x66, 0x67, 0 };
2035     if(!app.reserveAppendCapacity(12)) {
2036         errln("Appendable.reserve(12) failed");
2037     }
2038     app.appendCodeUnit(0x61);
2039     app.appendCodePoint(0x62);
2040     app.appendCodePoint(0x50000);
2041     app.appendString(cde, 3);
2042     app.appendString(fg, -1);
2043     UChar scratch[3];
2044     int32_t capacity=-1;
2045     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2046     if(capacity<3) {
2047         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2048         return;
2049     }
2050     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2051     u_memcpy(buffer, hij, 3);
2052     app.appendString(buffer, 3);
2053     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2054         errln("Appendable.append(...) failed");
2055     }
2056     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2057     if(buffer!=NULL || capacity!=0) {
2058         errln("Appendable.getAppendBuffer(min=0) failed");
2059     }
2060     capacity=1;
2061     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2062     if(buffer!=NULL || capacity!=0) {
2063         errln("Appendable.getAppendBuffer(scratch<min) failed");
2064     }
2065 }
2066 
2067 class SimpleAppendable : public Appendable {
2068 public:
SimpleAppendable(UnicodeString & dest)2069     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2070     virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
reset()2071     SimpleAppendable &reset() { str.remove(); return *this; }
2072 private:
2073     UnicodeString &str;
2074 };
2075 
2076 void
TestAppendable()2077 UnicodeStringTest::TestAppendable() {
2078     UnicodeString dest;
2079     SimpleAppendable app(dest);
2080     doTestAppendable(dest, app);
2081 }
2082 
2083 void
TestUnicodeStringImplementsAppendable()2084 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2085     UnicodeString dest;
2086     UnicodeStringAppendable app(dest);
2087     doTestAppendable(dest, app);
2088 }
2089 
2090 void
TestSizeofUnicodeString()2091 UnicodeStringTest::TestSizeofUnicodeString() {
2092     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2093     // See the API comments for UNISTR_OBJECT_SIZE.
2094     size_t sizeofUniStr=sizeof(UnicodeString);
2095     size_t expected=UNISTR_OBJECT_SIZE;
2096     if(expected!=sizeofUniStr) {
2097         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2098         // of the compiler might add more internal padding than expected.
2099         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2100               (int)sizeofUniStr, (int)expected);
2101     }
2102     if(sizeofUniStr<32) {
2103         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2104     }
2105     // We assume that the entire UnicodeString object,
2106     // minus the vtable pointer and 2 bytes for flags and short length,
2107     // is available for internal storage of UChars.
2108     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2109     UnicodeString s;
2110     const UChar *emptyBuffer=s.getBuffer();
2111     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2112         s.append((UChar)0x2e);
2113     }
2114     const UChar *fullBuffer=s.getBuffer();
2115     if(fullBuffer!=emptyBuffer) {
2116         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2117               expectedStackBufferLength);
2118     }
2119     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2120     if(terminatedBuffer==emptyBuffer) {
2121         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2122               expectedStackBufferLength);
2123     }
2124 }
2125 
2126 void
TestMoveSwap()2127 UnicodeStringTest::TestMoveSwap() {
2128     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2129     UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2130     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2131     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2132     const UChar *p = s2.getBuffer();
2133     s1.swap(s2);
2134     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2135         errln("UnicodeString.swap() did not swap");
2136     }
2137     swap(s2, s3);
2138     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2139         errln("swap(UnicodeString) did not swap back");
2140     }
2141     UnicodeString s4;
2142     s4.moveFrom(s1);
2143     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2144         errln("UnicodeString.moveFrom(heap) did not move");
2145     }
2146     UnicodeString s5;
2147     s5.moveFrom(s2);
2148     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2149         errln("UnicodeString.moveFrom(stack) did not move");
2150     }
2151     UnicodeString s6;
2152     s6.moveFrom(s3);
2153     if(s6.getBuffer() != abc || s6.length() != 3) {
2154         errln("UnicodeString.moveFrom(alias) did not move");
2155     }
2156 #if U_HAVE_RVALUE_REFERENCES
2157     infoln("TestMoveSwap() with rvalue references");
2158     s1 = static_cast<UnicodeString &&>(s6);
2159     if(s1.getBuffer() != abc || s1.length() != 3) {
2160         errln("UnicodeString move assignment operator did not move");
2161     }
2162     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2163     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2164         errln("UnicodeString move constructor did not move");
2165     }
2166 #else
2167     infoln("TestMoveSwap() without rvalue references");
2168     UnicodeString s7;
2169 #endif
2170 
2171     // Move self assignment leaves the object valid but in an undefined state.
2172     // Do it to make sure there is no crash,
2173     // but do not check for any particular resulting value.
2174     s1.moveFrom(s1);
2175     s2.moveFrom(s2);
2176     s3.moveFrom(s3);
2177     s4.moveFrom(s4);
2178     s5.moveFrom(s5);
2179     s6.moveFrom(s6);
2180     s7.moveFrom(s7);
2181     // Simple copy assignment must work.
2182     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2183     s1 = s6 = s4 = s7 = simple;
2184     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2185         errln("UnicodeString copy after self-move did not work");
2186     }
2187 }
2188