1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include "ustrtest.h"
10 #include "unicode/appendable.h"
11 #include "unicode/std_string.h"
12 #include "unicode/unistr.h"
13 #include "unicode/uchar.h"
14 #include "unicode/ustring.h"
15 #include "unicode/locid.h"
16 #include "unicode/strenum.h"
17 #include "unicode/ucnv.h"
18 #include "unicode/uenum.h"
19 #include "unicode/utf16.h"
20 #include "cmemory.h"
21 #include "charstr.h"
22
23 #if 0
24 #include "unicode/ustream.h"
25
26 #include <iostream>
27 using namespace std;
28
29 #endif
30
~UnicodeStringTest()31 UnicodeStringTest::~UnicodeStringTest() {}
32
33 extern IntlTest *createStringCaseTest();
34
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)35 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
36 {
37 if (exec) logln("TestSuite UnicodeStringTest: ");
38 TESTCASE_AUTO_BEGIN;
39 TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
40 TESTCASE_AUTO(TestBasicManipulation);
41 TESTCASE_AUTO(TestCompare);
42 TESTCASE_AUTO(TestExtract);
43 TESTCASE_AUTO(TestRemoveReplace);
44 TESTCASE_AUTO(TestSearching);
45 TESTCASE_AUTO(TestSpacePadding);
46 TESTCASE_AUTO(TestPrefixAndSuffix);
47 TESTCASE_AUTO(TestFindAndReplace);
48 TESTCASE_AUTO(TestBogus);
49 TESTCASE_AUTO(TestReverse);
50 TESTCASE_AUTO(TestMiscellaneous);
51 TESTCASE_AUTO(TestStackAllocation);
52 TESTCASE_AUTO(TestUnescape);
53 TESTCASE_AUTO(TestCountChar32);
54 TESTCASE_AUTO(TestStringEnumeration);
55 TESTCASE_AUTO(TestNameSpace);
56 TESTCASE_AUTO(TestUTF32);
57 TESTCASE_AUTO(TestUTF8);
58 TESTCASE_AUTO(TestReadOnlyAlias);
59 TESTCASE_AUTO(TestAppendable);
60 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
61 TESTCASE_AUTO(TestSizeofUnicodeString);
62 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
63 TESTCASE_AUTO(TestMoveSwap);
64 TESTCASE_AUTO(TestUInt16Pointers);
65 TESTCASE_AUTO(TestWCharPointers);
66 TESTCASE_AUTO(TestNullPointers);
67 TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
68 TESTCASE_AUTO_END;
69 }
70
71 void
TestBasicManipulation()72 UnicodeStringTest::TestBasicManipulation()
73 {
74 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
75 UnicodeString expectedValue;
76 UnicodeString *c;
77
78 c=(UnicodeString *)test1.clone();
79 test1.insert(24, "good ");
80 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
81 if (test1 != expectedValue)
82 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
83
84 c->insert(24, "good ");
85 if(*c != expectedValue) {
86 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
87 }
88 delete c;
89
90 test1.remove(41, 8);
91 expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
92 if (test1 != expectedValue)
93 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
94
95 test1.replace(58, 6, "ir country");
96 expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
97 if (test1 != expectedValue)
98 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
99
100 UChar temp[80];
101 test1.extract(0, 15, temp);
102
103 UnicodeString test2(temp, 15);
104
105 expectedValue = "Now is the time";
106 if (test2 != expectedValue)
107 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
108
109 test2 += " for me to go!\n";
110 expectedValue = "Now is the time for me to go!\n";
111 if (test2 != expectedValue)
112 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
113
114 if (test1.length() != 70)
115 errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
116 if (test2.length() != 30)
117 errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
118
119 UnicodeString test3;
120 test3.append((UChar32)0x20402);
121 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
122 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
123 }
124 if(test3.length() != 2){
125 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
126 }
127 test3.append((UChar32)0x0074);
128 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
129 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
130 }
131 if(test3.length() != 3){
132 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
133 }
134
135 // test some UChar32 overloads
136 if( test3.setTo((UChar32)0x10330).length() != 2 ||
137 test3.insert(0, (UChar32)0x20100).length() != 4 ||
138 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
139 (test3 = (UChar32)0x14001).length() != 2
140 ) {
141 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
142 }
143
144 {
145 // test moveIndex32()
146 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
147
148 if(
149 s.moveIndex32(2, -1)!=0 ||
150 s.moveIndex32(2, 1)!=4 ||
151 s.moveIndex32(2, 2)!=5 ||
152 s.moveIndex32(5, -2)!=2 ||
153 s.moveIndex32(0, -1)!=0 ||
154 s.moveIndex32(6, 1)!=6
155 ) {
156 errln("UnicodeString::moveIndex32() failed");
157 }
158
159 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
160 errln("UnicodeString::getChar32Start() failed");
161 }
162
163 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
164 errln("UnicodeString::getChar32Limit() failed");
165 }
166 }
167
168 {
169 // test new 2.2 constructors and setTo function that parallel Java's substring function.
170 UnicodeString src("Hello folks how are you?");
171 UnicodeString target1("how are you?");
172 if (target1 != UnicodeString(src, 12)) {
173 errln("UnicodeString(const UnicodeString&, int32_t) failed");
174 }
175 UnicodeString target2("folks");
176 if (target2 != UnicodeString(src, 6, 5)) {
177 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
178 }
179 if (target1 != target2.setTo(src, 12)) {
180 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
181 }
182 }
183
184 {
185 // op+ is new in ICU 2.8
186 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
187 if(s!=UnicodeString("abcdefghi", "")) {
188 errln("operator+(UniStr, UniStr) failed");
189 }
190 }
191
192 {
193 // tests for Jitterbug 2360
194 // verify that APIs with source pointer + length accept length == -1
195 // mostly test only where modified, only few functions did not already do this
196 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
197 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
198 }
199
200 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
201 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
202
203 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
204 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
205 }
206 if(t.length()!=u_strlen(buffer)) {
207 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
208 }
209
210 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
211 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
212 }
213 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
214 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
215 }
216
217 buffer[u_strlen(buffer)]=0xe4;
218 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
219 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
220 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
221 }
222 if(u.length()!=UPRV_LENGTHOF(buffer)) {
223 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
224 }
225
226 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
227 UConverter *cnv;
228 UErrorCode errorCode=U_ZERO_ERROR;
229
230 cnv=ucnv_open("ISO-8859-1", &errorCode);
231 UnicodeString v(cs, -1, cnv, errorCode);
232 ucnv_close(cnv);
233 if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
234 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
235 }
236 }
237
238 #if U_CHARSET_IS_UTF8
239 {
240 // Test the hardcoded-UTF-8 UnicodeString optimizations.
241 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
242 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
243 UnicodeString from8a = UnicodeString((const char *)utf8);
244 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
245 UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
246 if(from8a != from16 || from8b != from16) {
247 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
248 }
249 char buffer[16];
250 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
251 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
252 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
253 }
254 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
255 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
256 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
257 }
258 }
259 #endif
260 }
261
262 void
TestCompare()263 UnicodeStringTest::TestCompare()
264 {
265 UnicodeString test1("this is a test");
266 UnicodeString test2("this is a test");
267 UnicodeString test3("this is a test of the emergency broadcast system");
268 UnicodeString test4("never say, \"this is a test\"!!");
269
270 UnicodeString test5((UChar)0x5000);
271 UnicodeString test6((UChar)0x5100);
272
273 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
274 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
275 char chars[] = "this is a test";
276
277 // test operator== and operator!=
278 if (test1 != test2 || test1 == test3 || test1 == test4)
279 errln("operator== or operator!= failed");
280
281 // test operator> and operator<
282 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
283 !(test5 < test6)
284 ) {
285 errln("operator> or operator< failed");
286 }
287
288 // test operator>= and operator<=
289 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
290 errln("operator>= or operator<= failed");
291
292 // test compare(UnicodeString)
293 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
294 errln("compare(UnicodeString) failed");
295
296 //test compare(offset, length, UnicodeString)
297 if(test1.compare(0, 14, test2) != 0 ||
298 test3.compare(0, 14, test2) != 0 ||
299 test4.compare(12, 14, test2) != 0 ||
300 test3.compare(0, 18, test1) <=0 )
301 errln("compare(offset, length, UnicodeString) failes");
302
303 // test compare(UChar*)
304 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
305 errln("compare(UChar*) failed");
306
307 // test compare(char*)
308 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
309 errln("compare(char*) failed");
310
311 // test compare(UChar*, length)
312 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
313 errln("compare(UChar*, length) failed");
314
315 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
316 if (test1.compare(0, 14, test2, 0, 14) != 0
317 || test1.compare(0, 14, test3, 0, 14) != 0
318 || test1.compare(0, 14, test4, 12, 14) != 0)
319 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
320
321 if (test1.compare(10, 4, test2, 0, 4) >= 0
322 || test1.compare(10, 4, test3, 22, 9) <= 0
323 || test1.compare(10, 4, test4, 22, 4) != 0)
324 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
325
326 // test compareBetween
327 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
328 || test1.compareBetween(0, 14, test4, 12, 26) != 0)
329 errln("compareBetween failed");
330
331 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
332 || test1.compareBetween(10, 14, test4, 22, 26) != 0)
333 errln("compareBetween failed");
334
335 // test compare() etc. with strings that share a buffer but are not equal
336 test2=test1; // share the buffer, length() too large for the stackBuffer
337 test2.truncate(1); // change only the length, not the buffer
338 if( test1==test2 || test1<=test2 ||
339 test1.compare(test2)<=0 ||
340 test1.compareCodePointOrder(test2)<=0 ||
341 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
342 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
343 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
344 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
345 ) {
346 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
347 }
348
349 /* test compareCodePointOrder() */
350 {
351 /* these strings are in ascending order */
352 static const UChar strings[][4]={
353 { 0x61, 0 }, /* U+0061 */
354 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
355 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
356 { 0xd800, 0 }, /* U+d800 */
357 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
358 { 0xdfff, 0 }, /* U+dfff */
359 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
360 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
361 { 0xd800, 0xdc02, 0 }, /* U+10002 */
362 { 0xd84d, 0xdc56, 0 } /* U+23456 */
363 };
364 UnicodeString u[20]; // must be at least as long as strings[]
365 int32_t i;
366
367 for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
368 u[i]=UnicodeString(TRUE, strings[i], -1);
369 }
370
371 for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
372 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
373 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
374 }
375 }
376 }
377
378 /* test caseCompare() */
379 {
380 static const UChar
381 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
382 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
383 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
384 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
385
386 UnicodeString
387 mixed(TRUE, _mixed, -1),
388 otherDefault(TRUE, _otherDefault, -1),
389 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
390 different(TRUE, _different, -1);
391
392 int8_t result;
393
394 /* test caseCompare() */
395 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
396 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
397 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
398 }
399 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
400 if(result!=0) {
401 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
402 }
403 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
404 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
405 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
406 }
407
408 /* test caseCompare() */
409 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
410 if(result<=0) {
411 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
412 }
413
414 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
415 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
416 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
417 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
418 }
419
420 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
421 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
422 if(result<=0) {
423 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
424 }
425 }
426
427 // test that srcLength=-1 is handled in functions that
428 // take input const UChar */int32_t srcLength (j785)
429 {
430 static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
431 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
432
433 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
434 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
435 }
436
437 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
438 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
439 }
440
441 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
442 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
443 }
444
445 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
446 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
447 }
448
449 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
450 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
451 }
452
453 UnicodeString s2, s3;
454 s2.replace(0, 0, u+1, -1);
455 s3.replace(0, 0, u, 1, -1);
456 if(s.compare(1, 999, s2)!=0 || s2!=s3) {
457 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
458 }
459 }
460 }
461
462 void
TestExtract()463 UnicodeStringTest::TestExtract()
464 {
465 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
466 UnicodeString test2;
467 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
468 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
469 UnicodeString test5;
470 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471
472 test1.extract(11, 12, test2);
473 test1.extract(11, 12, test3);
474 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
475 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
476 }
477
478 // test proper pinning in extractBetween()
479 test1.extractBetween(-3, 7, test5);
480 if(test5!=UNICODE_STRING("Now is ", 7)) {
481 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
482 }
483
484 test1.extractBetween(11, 23, test5);
485 if (test1.extract(60, 71, test6) != 9) {
486 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
487 }
488 if (test1.extract(11, 12, test6) != 12) {
489 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
490 }
491
492 // convert test4 back to Unicode for comparison
493 UnicodeString test4b(test4, 12);
494
495 if (test1.extract(11, 12, (char *)NULL) != 12) {
496 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
497 }
498 if (test1.extract(11, -1, test6) != 0) {
499 errln("UnicodeString.extract(-1) failed to stop reading the string.");
500 }
501
502 for (int32_t i = 0; i < 12; i++) {
503 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
504 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
505 break;
506 }
507 if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
508 errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
509 break;
510 }
511 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
512 errln(UnicodeString("extracting into an array of char failed at position ") + i);
513 break;
514 }
515 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
516 errln(UnicodeString("extracting with extractBetween failed at position ") + i);
517 break;
518 }
519 }
520
521 // test preflighting and overflows with invariant conversion
522 if (test1.extract(0, 10, (char *)NULL, "") != 10) {
523 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
524 }
525
526 test4[2] = (char)0xff;
527 if (test1.extract(0, 10, test4, 2, "") != 10) {
528 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
529 }
530 if (test4[2] != (char)0xff) {
531 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
532 }
533
534 {
535 // test new, NUL-terminating extract() function
536 UnicodeString s("terminate", "");
537 UChar dest[20]={
538 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
539 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
540 };
541 UErrorCode errorCode;
542 int32_t length;
543
544 errorCode=U_ZERO_ERROR;
545 length=s.extract((UChar *)NULL, 0, errorCode);
546 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
547 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
548 }
549
550 errorCode=U_ZERO_ERROR;
551 length=s.extract(dest, s.length()-1, errorCode);
552 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
553 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
554 length, u_errorName(errorCode), s.length());
555 }
556
557 errorCode=U_ZERO_ERROR;
558 length=s.extract(dest, s.length(), errorCode);
559 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
560 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
561 length, u_errorName(errorCode), s.length());
562 }
563 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
564 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
565 }
566
567 errorCode=U_ZERO_ERROR;
568 length=s.extract(dest, s.length()+1, errorCode);
569 if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
570 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
571 length, u_errorName(errorCode), s.length());
572 }
573 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
574 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
575 }
576 }
577
578 {
579 // test new UConverter extract() and constructor
580 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
581 char buffer[32];
582 static const char expect[]={
583 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
584 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
585 (char)0xc3, (char)0x84,
586 (char)0xe1, (char)0xbb, (char)0x90
587 };
588 UErrorCode errorCode=U_ZERO_ERROR;
589 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
590 int32_t length;
591
592 if(U_SUCCESS(errorCode)) {
593 // test preflighting
594 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
595 errorCode!=U_BUFFER_OVERFLOW_ERROR
596 ) {
597 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
598 length, u_errorName(errorCode));
599 }
600 errorCode=U_ZERO_ERROR;
601 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
602 errorCode!=U_BUFFER_OVERFLOW_ERROR
603 ) {
604 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
605 length, u_errorName(errorCode));
606 }
607
608 // try error cases
609 errorCode=U_ZERO_ERROR;
610 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
611 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
612 }
613 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
614 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
615 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
616 }
617 errorCode=U_ZERO_ERROR;
618
619 // extract for real
620 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
621 uprv_memcmp(buffer, expect, 13)!=0 ||
622 buffer[13]!=0 ||
623 U_FAILURE(errorCode)
624 ) {
625 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
626 length, u_errorName(errorCode));
627 }
628 // Test again with just the converter name.
629 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
630 uprv_memcmp(buffer, expect, 13)!=0 ||
631 buffer[13]!=0 ||
632 U_FAILURE(errorCode)
633 ) {
634 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
635 length, u_errorName(errorCode));
636 }
637
638 // try the constructor
639 UnicodeString t(expect, sizeof(expect), cnv, errorCode);
640 if(U_FAILURE(errorCode) || s!=t) {
641 errln("UnicodeString(UConverter) conversion failed (%s)",
642 u_errorName(errorCode));
643 }
644
645 ucnv_close(cnv);
646 }
647 }
648 }
649
650 void
TestRemoveReplace()651 UnicodeStringTest::TestRemoveReplace()
652 {
653 UnicodeString test1("The rain in Spain stays mainly on the plain");
654 UnicodeString test2("eat SPAMburgers!");
655 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
656 char test4[] = "SPAM";
657 UnicodeString& test5 = test1;
658
659 test1.replace(4, 4, test2, 4, 4);
660 test1.replace(12, 5, test3, 4);
661 test3[4] = 0;
662 test1.replace(17, 4, test3);
663 test1.replace(23, 4, test4);
664 test1.replaceBetween(37, 42, test2, 4, 8);
665
666 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
667 errln("One of the replace methods failed:\n"
668 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
669 " got \"" + test1 + "\"");
670
671 test1.remove(21, 1);
672 test1.removeBetween(26, 28);
673
674 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
675 errln("One of the remove methods failed:\n"
676 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
677 " got \"" + test1 + "\"");
678
679 for (int32_t i = 0; i < test1.length(); i++) {
680 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
681 test1.setCharAt(i, 0x78);
682 }
683 }
684
685 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
686 errln("One of the remove methods failed:\n"
687 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
688 " got \"" + test1 + "\"");
689
690 test1.remove();
691 if (test1.length() != 0)
692 errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
693 }
694
695 void
TestSearching()696 UnicodeStringTest::TestSearching()
697 {
698 UnicodeString test1("test test ttest tetest testesteststt");
699 UnicodeString test2("test");
700 UChar testChar = 0x74;
701
702 UChar32 testChar32 = 0x20402;
703 UChar testData[]={
704 // 0 1 2 3 4 5 6 7
705 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
706
707 // 8 9 10 11 12 13 14 15
708 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
709
710 // 16 17 18 19
711 0xdc02, 0xd841, 0x0073, 0x0000
712 };
713 UnicodeString test3(testData);
714 UnicodeString test4(testChar32);
715
716 uint16_t occurrences = 0;
717 int32_t startPos = 0;
718 for ( ;
719 startPos != -1 && startPos < test1.length();
720 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
721 ;
722 if (occurrences != 6)
723 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
724
725 for ( occurrences = 0, startPos = 10;
726 startPos != -1 && startPos < test1.length();
727 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
728 ;
729 if (occurrences != 4)
730 errln(UnicodeString("indexOf with starting offset failed: "
731 "expected to find 4 occurrences, found ") + occurrences);
732
733 int32_t endPos = 28;
734 for ( occurrences = 0, startPos = 5;
735 startPos != -1 && startPos < test1.length();
736 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
737 ;
738 if (occurrences != 4)
739 errln(UnicodeString("indexOf with starting and ending offsets failed: "
740 "expected to find 4 occurrences, found ") + occurrences);
741
742 //using UChar32 string
743 for ( startPos=0, occurrences=0;
744 startPos != -1 && startPos < test3.length();
745 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
746 ;
747 if (occurrences != 4)
748 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
749
750 for ( startPos=10, occurrences=0;
751 startPos != -1 && startPos < test3.length();
752 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
753 ;
754 if (occurrences != 2)
755 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
756 //---
757
758 for ( occurrences = 0, startPos = 0;
759 startPos != -1 && startPos < test1.length();
760 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
761 ;
762 if (occurrences != 16)
763 errln(UnicodeString("indexOf with character failed: "
764 "expected to find 16 occurrences, found ") + occurrences);
765
766 for ( occurrences = 0, startPos = 10;
767 startPos != -1 && startPos < test1.length();
768 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
769 ;
770 if (occurrences != 12)
771 errln(UnicodeString("indexOf with character & start offset failed: "
772 "expected to find 12 occurrences, found ") + occurrences);
773
774 for ( occurrences = 0, startPos = 5, endPos = 28;
775 startPos != -1 && startPos < test1.length();
776 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
777 ;
778 if (occurrences != 10)
779 errln(UnicodeString("indexOf with character & start & end offsets failed: "
780 "expected to find 10 occurrences, found ") + occurrences);
781
782 //testing for UChar32
783 UnicodeString subString;
784 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
785 subString.append(test3, startPos, test3.length());
786 if(subString.indexOf(testChar32) != -1 ){
787 ++occurrences;
788 }
789 subString.remove();
790 }
791 if (occurrences != 14)
792 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
793
794 for ( occurrences = 0, startPos = 0;
795 startPos != -1 && startPos < test3.length();
796 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
797 ;
798 if (occurrences != 4)
799 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
800
801 endPos=test3.length();
802 for ( occurrences = 0, startPos = 5;
803 startPos != -1 && startPos < test3.length();
804 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
805 ;
806 if (occurrences != 3)
807 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
808 //---
809
810 if(test1.lastIndexOf(test2)!=29) {
811 errln("test1.lastIndexOf(test2)!=29");
812 }
813
814 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
815 errln("test1.lastIndexOf(test2, start) failed");
816 }
817
818 for ( occurrences = 0, startPos = 32;
819 startPos != -1;
820 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
821 ;
822 if (occurrences != 4)
823 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
824 "expected to find 4 occurrences, found ") + occurrences);
825
826 for ( occurrences = 0, startPos = 32;
827 startPos != -1;
828 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
829 ;
830 if (occurrences != 11)
831 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
832 "expected to find 11 occurrences, found ") + occurrences);
833
834 //testing UChar32
835 startPos=test3.length();
836 for ( occurrences = 0;
837 startPos != -1;
838 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
839 ;
840 if (occurrences != 3)
841 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
842
843
844 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
845 subString.remove();
846 subString.append(test3, 0, endPos);
847 if(subString.lastIndexOf(testChar32) != -1 ){
848 ++occurrences;
849 }
850 }
851 if (occurrences != 18)
852 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
853 //---
854
855 // test that indexOf(UChar32) and lastIndexOf(UChar32)
856 // do not find surrogate code points when they are part of matched pairs
857 // (= part of supplementary code points)
858 // Jitterbug 1542
859 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
860 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
861 }
862 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
863 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
864 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
865 ) {
866 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
867 }
868 }
869
870 void
TestSpacePadding()871 UnicodeStringTest::TestSpacePadding()
872 {
873 UnicodeString test1("hello");
874 UnicodeString test2(" there");
875 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
876 UnicodeString test4;
877 UBool returnVal;
878 UnicodeString expectedValue;
879
880 returnVal = test1.padLeading(15);
881 expectedValue = " hello";
882 if (returnVal == FALSE || test1 != expectedValue)
883 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
884
885 returnVal = test2.padTrailing(15);
886 expectedValue = " there ";
887 if (returnVal == FALSE || test2 != expectedValue)
888 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
889
890 expectedValue = test3;
891 returnVal = test3.padTrailing(15);
892 if (returnVal == TRUE || test3 != expectedValue)
893 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
894
895 expectedValue = "hello";
896 test4.setTo(test1).trim();
897
898 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
899 errln("trim(UnicodeString&) failed");
900
901 test1.trim();
902 if (test1 != expectedValue)
903 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
904
905 test2.trim();
906 expectedValue = "there";
907 if (test2 != expectedValue)
908 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
909
910 test3.trim();
911 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
912 if (test3 != expectedValue)
913 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
914
915 returnVal = test1.truncate(15);
916 expectedValue = "hello";
917 if (returnVal == TRUE || test1 != expectedValue)
918 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
919
920 returnVal = test2.truncate(15);
921 expectedValue = "there";
922 if (returnVal == TRUE || test2 != expectedValue)
923 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
924
925 returnVal = test3.truncate(15);
926 expectedValue = "Hi! How ya doi";
927 if (returnVal == FALSE || test3 != expectedValue)
928 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
929 }
930
931 void
TestPrefixAndSuffix()932 UnicodeStringTest::TestPrefixAndSuffix()
933 {
934 UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
935 UnicodeString test2("Now");
936 UnicodeString test3("country.");
937 UnicodeString test4("count");
938
939 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
940 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
941 }
942
943 if (test1.startsWith(test3) ||
944 test1.startsWith(test3.getBuffer(), test3.length()) ||
945 test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
946 ) {
947 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
948 }
949
950 if (test1.endsWith(test2)) {
951 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
952 }
953
954 if (!test1.endsWith(test3)) {
955 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
956 }
957 if (!test1.endsWith(test3, 0, INT32_MAX)) {
958 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959 }
960
961 if(!test1.endsWith(test3.getBuffer(), test3.length())) {
962 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
963 }
964 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
965 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966 }
967
968 if (!test3.startsWith(test4)) {
969 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
970 }
971
972 if (test4.startsWith(test3)) {
973 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
974 }
975 }
976
977 void
TestStartsWithAndEndsWithNulTerminated()978 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
979 UnicodeString test("abcde");
980 const UChar ab[] = { 0x61, 0x62, 0 };
981 const UChar de[] = { 0x64, 0x65, 0 };
982 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
983 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
984 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
985 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
986 }
987
988 void
TestFindAndReplace()989 UnicodeStringTest::TestFindAndReplace()
990 {
991 UnicodeString test1("One potato, two potato, three potato, four\n");
992 UnicodeString test2("potato");
993 UnicodeString test3("MISSISSIPPI");
994
995 UnicodeString expectedValue;
996
997 test1.findAndReplace(test2, test3);
998 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
999 if (test1 != expectedValue)
1000 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1001 test1.findAndReplace(2, 32, test3, test2);
1002 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1003 if (test1 != expectedValue)
1004 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1005 }
1006
1007 void
TestReverse()1008 UnicodeStringTest::TestReverse()
1009 {
1010 UnicodeString test("backwards words say to used I");
1011
1012 test.reverse();
1013 test.reverse(2, 4);
1014 test.reverse(7, 2);
1015 test.reverse(10, 3);
1016 test.reverse(14, 5);
1017 test.reverse(20, 9);
1018
1019 if (test != "I used to say words backwards")
1020 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1021 + test + "\"");
1022
1023 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1024 test.reverse();
1025 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1026 errln("reverse() failed with supplementary characters");
1027 }
1028
1029 // Test case for ticket #8091:
1030 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1031 // an odd-length string that contains no other lead surrogates.
1032 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1033 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1034 test.reverse();
1035 if(test!=expected) {
1036 errln("reverse() failed with only lead surrogate in the middle");
1037 }
1038 }
1039
1040 void
TestMiscellaneous()1041 UnicodeStringTest::TestMiscellaneous()
1042 {
1043 UnicodeString test1("This is a test");
1044 UnicodeString test2("This is a test");
1045 UnicodeString test3("Me too!");
1046
1047 // test getBuffer(minCapacity) and releaseBuffer()
1048 test1=UnicodeString(); // make sure that it starts with its stackBuffer
1049 UChar *p=test1.getBuffer(20);
1050 if(test1.getCapacity()<20) {
1051 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1052 }
1053
1054 test1.append((UChar)7); // must not be able to modify the string here
1055 test1.setCharAt(3, 7);
1056 test1.reverse();
1057 if( test1.length()!=0 ||
1058 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1059 test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1060 ) {
1061 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1062 }
1063
1064 p[0]=1;
1065 p[1]=2;
1066 p[2]=3;
1067 test1.releaseBuffer(3);
1068 test1.append((UChar)4);
1069
1070 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1071 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1072 }
1073
1074 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1075 test1.releaseBuffer(1);
1076 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1077 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1078 }
1079
1080 // test getBuffer(const)
1081 const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1082 if( test1.length()!=4 ||
1083 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1084 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1085 ) {
1086 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1087 }
1088
1089 // test releaseBuffer() with a NUL-terminated buffer
1090 test1.getBuffer(20)[2]=0;
1091 test1.releaseBuffer(); // implicit -1
1092 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1093 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1094 }
1095
1096 // test releaseBuffer() with a non-NUL-terminated buffer
1097 p=test1.getBuffer(256);
1098 for(int32_t i=0; i<test1.getCapacity(); ++i) {
1099 p[i]=(UChar)1; // fill the buffer with all non-NUL code units
1100 }
1101 test1.releaseBuffer(); // implicit -1
1102 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1103 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1104 }
1105
1106 // test getTerminatedBuffer()
1107 test1=UnicodeString("This is another test.", "");
1108 test2=UnicodeString("This is another test.", "");
1109 q=test1.getTerminatedBuffer();
1110 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1111 errln("getTerminatedBuffer()[length]!=0");
1112 }
1113
1114 const UChar u[]={ 5, 6, 7, 8, 0 };
1115 test1.setTo(FALSE, u, 3);
1116 q=test1.getTerminatedBuffer();
1117 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1118 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1119 }
1120
1121 test1.setTo(TRUE, u, -1);
1122 q=test1.getTerminatedBuffer();
1123 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1124 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1125 }
1126
1127 // NOTE: Some compilers will optimize u"la" to point to the same static memory
1128 // as u" lila", offset by 3 code units
1129 test1=UnicodeString(TRUE, u"la", 2);
1130 test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
1131 assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1132 u"la lila", test1);
1133
1134 test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
1135 assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1136 u"la dudum lila", test1);
1137
1138 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1139 test1.insert(9, ucs, -1);
1140 assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1141 u"la dudum hm lila", test1);
1142
1143 test1.replace(9, 2, (UChar)0x2b);
1144 assertEquals("UnicodeString::replace(start, length, UChar) failed",
1145 u"la dudum + lila", test1);
1146
1147 if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1148 errln("UnicodeString::hasMetaData() returns TRUE");
1149 }
1150
1151 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1152 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1153 test1.truncate(36); // ensure length()<getCapacity()
1154 test2=test1; // share the buffer
1155 test1.truncate(5);
1156 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1157 errln("UnicodeString(shared buffer).truncate() failed");
1158 }
1159 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1160 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1161 "modified another copy of the string!");
1162 }
1163 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1164 test1.truncate(36); // ensure length()<getCapacity()
1165 test2=test1; // share the buffer
1166 test1.remove();
1167 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1168 errln("UnicodeString(shared buffer).remove() failed");
1169 }
1170 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1171 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1172 "modified another copy of the string!");
1173 }
1174
1175 // ticket #9740
1176 test1.setTo(TRUE, ucs, 3);
1177 assertEquals("length of read-only alias", 3, test1.length());
1178 test1.trim();
1179 assertEquals("length of read-only alias after trim()", 2, test1.length());
1180 assertEquals("length of terminated buffer of read-only alias + trim()",
1181 2, u_strlen(test1.getTerminatedBuffer()));
1182 }
1183
1184 void
TestStackAllocation()1185 UnicodeStringTest::TestStackAllocation()
1186 {
1187 UChar testString[] ={
1188 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1189 UChar guardWord = 0x4DED;
1190 UnicodeString* test = 0;
1191
1192 test = new UnicodeString(testString);
1193 if (*test != "This is a crazy test.")
1194 errln("Test string failed to initialize properly.");
1195 if (guardWord != 0x04DED)
1196 errln("Test string initialization overwrote guard word!");
1197
1198 test->insert(8, "only ");
1199 test->remove(15, 6);
1200 if (*test != "This is only a test.")
1201 errln("Manipulation of test string failed to work right.");
1202 if (guardWord != 0x4DED)
1203 errln("Manipulation of test string overwrote guard word!");
1204
1205 // we have to deinitialize and release the backing store by calling the destructor
1206 // explicitly, since we can't overload operator delete
1207 delete test;
1208
1209 UChar workingBuffer[] = {
1210 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1211 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1212 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1215 UChar guardWord2 = 0x4DED;
1216
1217 test = new UnicodeString(workingBuffer, 35, 100);
1218 if (*test != "Now is the time for all men to come")
1219 errln("Stack-allocated backing store failed to initialize correctly.");
1220 if (guardWord2 != 0x4DED)
1221 errln("Stack-allocated backing store overwrote guard word!");
1222
1223 test->insert(24, "good ");
1224 if (*test != "Now is the time for all good men to come")
1225 errln("insert() on stack-allocated UnicodeString didn't work right");
1226 if (guardWord2 != 0x4DED)
1227 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1228
1229 if (workingBuffer[24] != 0x67)
1230 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1231
1232 *test += " to the aid of their country.";
1233 if (*test != "Now is the time for all good men to come to the aid of their country.")
1234 errln("Stack-allocated UnicodeString overflow didn't work");
1235 if (guardWord2 != 0x4DED)
1236 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1237
1238 *test = "ha!";
1239 if (*test != "ha!")
1240 errln("Assignment to stack-allocated UnicodeString didn't work");
1241 if (workingBuffer[0] != 0x4e)
1242 errln("Change to UnicodeString after overflow are still affecting original buffer");
1243 if (guardWord2 != 0x4DED)
1244 errln("Change to UnicodeString after overflow overwrote guard word!");
1245
1246 // test read-only aliasing with setTo()
1247 workingBuffer[0] = 0x20ac;
1248 workingBuffer[1] = 0x125;
1249 workingBuffer[2] = 0;
1250 test->setTo(TRUE, workingBuffer, 2);
1251 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1252 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1253 }
1254
1255 UnicodeString *c=(UnicodeString *)test->clone();
1256
1257 workingBuffer[1] = 0x109;
1258 if(test->charAt(1) != 0x109) {
1259 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1260 }
1261
1262 if(c->length() != 2 || c->charAt(1) != 0x125) {
1263 errln("clone(alias) did not copy the buffer");
1264 }
1265 delete c;
1266
1267 test->setTo(TRUE, workingBuffer, -1);
1268 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1269 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1270 }
1271
1272 test->setTo(FALSE, workingBuffer, -1);
1273 if(!test->isBogus()) {
1274 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1275 }
1276
1277 delete test;
1278
1279 test=new UnicodeString();
1280 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1281 test->setTo(buffer, 4, 10);
1282 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1283 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1284 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1285 }
1286 delete test;
1287
1288
1289 // test the UChar32 constructor
1290 UnicodeString c32Test((UChar32)0x10ff2a);
1291 if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1292 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1293 ) {
1294 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1295 }
1296
1297 // test the (new) capacity constructor
1298 UnicodeString capTest(5, (UChar32)0x2a, 5);
1299 if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1300 capTest.char32At(0) != 0x2a ||
1301 capTest.char32At(4) != 0x2a
1302 ) {
1303 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1304 }
1305
1306 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1307 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1308 capTest.char32At(0) != 0x10ff2a ||
1309 capTest.char32At(4) != 0x10ff2a
1310 ) {
1311 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1312 }
1313
1314 capTest = UnicodeString(5, (UChar32)0, 0);
1315 if(capTest.length() != 0) {
1316 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1317 }
1318 }
1319
1320 /**
1321 * Test the unescape() function.
1322 */
TestUnescape(void)1323 void UnicodeStringTest::TestUnescape(void) {
1324 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1325 UnicodeString OUT("abc");
1326 OUT.append((UChar)0x4567);
1327 OUT.append(" ");
1328 OUT.append((UChar)0xA);
1329 OUT.append((UChar)0xD);
1330 OUT.append(" ");
1331 OUT.append((UChar32)0x00101234);
1332 OUT.append("xyz");
1333 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1334 UnicodeString result = IN.unescape();
1335 if (result != OUT) {
1336 errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1337 prettify(result) + ", expected " +
1338 prettify(OUT));
1339 }
1340
1341 // test that an empty string is returned in case of an error
1342 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1343 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1344 }
1345 }
1346
1347 /* test code point counting functions --------------------------------------- */
1348
1349 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1350 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1351 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1352 int32_t count=s.countChar32(start, length);
1353 return count>number;
1354 }
1355
1356 /* compare the real function against the reference */
1357 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1358 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1359 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1360 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1361 start, length, number, s.hasMoreChar32Than(start, length, number));
1362 }
1363 }
1364
1365 void
TestCountChar32(void)1366 UnicodeStringTest::TestCountChar32(void) {
1367 {
1368 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1369
1370 // test countChar32()
1371 // note that this also calls and tests u_countChar32(length>=0)
1372 if(
1373 s.countChar32()!=4 ||
1374 s.countChar32(1)!=4 ||
1375 s.countChar32(2)!=3 ||
1376 s.countChar32(2, 3)!=2 ||
1377 s.countChar32(2, 0)!=0
1378 ) {
1379 errln("UnicodeString::countChar32() failed");
1380 }
1381
1382 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1383 const UChar *buffer=s.getTerminatedBuffer();
1384 if(
1385 u_countChar32(buffer, -1)!=4 ||
1386 u_countChar32(buffer+1, -1)!=4 ||
1387 u_countChar32(buffer+2, -1)!=3 ||
1388 u_countChar32(buffer+3, -1)!=3 ||
1389 u_countChar32(buffer+4, -1)!=2 ||
1390 u_countChar32(buffer+5, -1)!=1 ||
1391 u_countChar32(buffer+6, -1)!=0
1392 ) {
1393 errln("u_countChar32(length=-1) failed");
1394 }
1395
1396 // test u_countChar32() with bad input
1397 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1398 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1399 }
1400 }
1401
1402 /* test data and variables for hasMoreChar32Than() */
1403 static const UChar str[]={
1404 0x61, 0x62, 0xd800, 0xdc00,
1405 0xd801, 0xdc01, 0x63, 0xd802,
1406 0x64, 0xdc03, 0x65, 0x66,
1407 0xd804, 0xdc04, 0xd805, 0xdc05,
1408 0x67
1409 };
1410 UnicodeString string(str, UPRV_LENGTHOF(str));
1411 int32_t start, length, number;
1412
1413 /* test hasMoreChar32Than() */
1414 for(length=string.length(); length>=0; --length) {
1415 for(start=0; start<=length; ++start) {
1416 for(number=-1; number<=((length-start)+2); ++number) {
1417 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1418 }
1419 }
1420 }
1421
1422 /* test hasMoreChar32Than() with pinning */
1423 for(start=-1; start<=string.length()+1; ++start) {
1424 for(number=-1; number<=((string.length()-start)+2); ++number) {
1425 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1426 }
1427 }
1428
1429 /* test hasMoreChar32Than() with a bogus string */
1430 string.setToBogus();
1431 for(length=-1; length<=1; ++length) {
1432 for(start=-1; start<=length; ++start) {
1433 for(number=-1; number<=((length-start)+2); ++number) {
1434 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1435 }
1436 }
1437 }
1438 }
1439
1440 void
TestBogus()1441 UnicodeStringTest::TestBogus() {
1442 UnicodeString test1("This is a test");
1443 UnicodeString test2("This is a test");
1444 UnicodeString test3("Me too!");
1445
1446 // test isBogus() and setToBogus()
1447 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1448 errln("A string returned TRUE for isBogus()!");
1449 }
1450
1451 // NULL pointers are treated like empty strings
1452 // use other illegal arguments to make a bogus string
1453 test3.setTo(FALSE, test1.getBuffer(), -2);
1454 if(!test3.isBogus()) {
1455 errln("A bogus string returned FALSE for isBogus()!");
1456 }
1457 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1458 errln("hashCode() failed");
1459 }
1460 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1461 errln("bogus.getBuffer()!=0");
1462 }
1463 if (test1.indexOf(test3) != -1) {
1464 errln("bogus.indexOf() != -1");
1465 }
1466 if (test1.lastIndexOf(test3) != -1) {
1467 errln("bogus.lastIndexOf() != -1");
1468 }
1469 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1470 errln("caseCompare() doesn't work with bogus strings");
1471 }
1472 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1473 errln("compareCodePointOrder() doesn't work with bogus strings");
1474 }
1475
1476 // verify that non-assignment modifications fail and do not revive a bogus string
1477 test3.setToBogus();
1478 test3.append((UChar)0x61);
1479 if(!test3.isBogus() || test3.getBuffer()!=0) {
1480 errln("bogus.append('a') worked but must not");
1481 }
1482
1483 test3.setToBogus();
1484 test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1485 if(!test3.isBogus() || test3.getBuffer()!=0) {
1486 errln("bogus.findAndReplace() worked but must not");
1487 }
1488
1489 test3.setToBogus();
1490 test3.trim();
1491 if(!test3.isBogus() || test3.getBuffer()!=0) {
1492 errln("bogus.trim() revived bogus but must not");
1493 }
1494
1495 test3.setToBogus();
1496 test3.remove(1);
1497 if(!test3.isBogus() || test3.getBuffer()!=0) {
1498 errln("bogus.remove(1) revived bogus but must not");
1499 }
1500
1501 test3.setToBogus();
1502 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1503 errln("bogus.setCharAt(0, 'b') worked but must not");
1504 }
1505
1506 test3.setToBogus();
1507 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1508 errln("bogus.truncate(1) revived bogus but must not");
1509 }
1510
1511 // verify that assignments revive a bogus string
1512 test3.setToBogus();
1513 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1514 errln("bogus.operator=() failed");
1515 }
1516
1517 test3.setToBogus();
1518 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1519 errln("bogus.fastCopyFrom() failed");
1520 }
1521
1522 test3.setToBogus();
1523 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1524 errln("bogus.setTo(UniStr) failed");
1525 }
1526
1527 test3.setToBogus();
1528 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1529 errln("bogus.setTo(UniStr, 0) failed");
1530 }
1531
1532 test3.setToBogus();
1533 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1534 errln("bogus.setTo(UniStr, 0, len) failed");
1535 }
1536
1537 test3.setToBogus();
1538 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1539 errln("bogus.setTo(const UChar *, len) failed");
1540 }
1541
1542 test3.setToBogus();
1543 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1544 errln("bogus.setTo(UChar) failed");
1545 }
1546
1547 test3.setToBogus();
1548 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1549 errln("bogus.setTo(UChar32) failed");
1550 }
1551
1552 test3.setToBogus();
1553 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1554 errln("bogus.setTo(readonly alias) failed");
1555 }
1556
1557 // writable alias to another string's buffer: very bad idea, just convenient for this test
1558 test3.setToBogus();
1559 if(!test3.isBogus() ||
1560 test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1561 test1.length(), test1.getCapacity()).isBogus() ||
1562 test3!=test1) {
1563 errln("bogus.setTo(writable alias) failed");
1564 }
1565
1566 // verify simple, documented ways to turn a bogus string into an empty one
1567 test3.setToBogus();
1568 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1569 errln("bogus.operator=(UnicodeString()) failed");
1570 }
1571
1572 test3.setToBogus();
1573 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1574 errln("bogus.setTo(UnicodeString()) failed");
1575 }
1576
1577 test3.setToBogus();
1578 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1579 errln("bogus.remove() failed");
1580 }
1581
1582 test3.setToBogus();
1583 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1584 errln("bogus.remove(0, INT32_MAX) failed");
1585 }
1586
1587 test3.setToBogus();
1588 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1589 errln("bogus.truncate(0) failed");
1590 }
1591
1592 test3.setToBogus();
1593 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1594 errln("bogus.setTo((UChar32)-1) failed");
1595 }
1596
1597 static const UChar nul=0;
1598
1599 test3.setToBogus();
1600 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1601 errln("bogus.setTo(&nul, 0) failed");
1602 }
1603
1604 test3.setToBogus();
1605 if(!test3.isBogus() || test3.getBuffer()!=0) {
1606 errln("setToBogus() failed to make a string bogus");
1607 }
1608
1609 test3.setToBogus();
1610 if(test1.isBogus() || !(test1=test3).isBogus()) {
1611 errln("normal=bogus failed to make the left string bogus");
1612 }
1613
1614 // test that NULL primitive input string values are treated like
1615 // empty strings, not errors (bogus)
1616 test2.setTo((UChar32)0x10005);
1617 if(test2.insert(1, nullptr, 1).length()!=2) {
1618 errln("UniStr.insert(...nullptr...) should not modify the string but does");
1619 }
1620
1621 UErrorCode errorCode=U_ZERO_ERROR;
1622 UnicodeString
1623 test4((const UChar *)NULL),
1624 test5(TRUE, (const UChar *)NULL, 1),
1625 test6((UChar *)NULL, 5, 5),
1626 test7((const char *)NULL, 3, NULL, errorCode);
1627 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1628 errln("a constructor set to bogus for a NULL input string, should be empty");
1629 }
1630
1631 test4.setTo(NULL, 3);
1632 test5.setTo(TRUE, (const UChar *)NULL, 1);
1633 test6.setTo((UChar *)NULL, 5, 5);
1634 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1635 errln("a setTo() set to bogus for a NULL input string, should be empty");
1636 }
1637
1638 // test that bogus==bogus<any
1639 if(test1!=test3 || test1.compare(test3)!=0) {
1640 errln("bogus==bogus failed");
1641 }
1642
1643 test2.remove();
1644 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1645 errln("bogus<empty failed");
1646 }
1647 }
1648
1649 // StringEnumeration ------------------------------------------------------- ***
1650 // most of StringEnumeration is tested elsewhere
1651 // this test improves code coverage
1652
1653 static const char *const
1654 testEnumStrings[]={
1655 "a",
1656 "b",
1657 "c",
1658 "this is a long string which helps us test some buffer limits",
1659 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1660 };
1661
1662 class TestEnumeration : public StringEnumeration {
1663 public:
TestEnumeration()1664 TestEnumeration() : i(0) {}
1665
count(UErrorCode &) const1666 virtual int32_t count(UErrorCode& /*status*/) const {
1667 return UPRV_LENGTHOF(testEnumStrings);
1668 }
1669
snext(UErrorCode & status)1670 virtual const UnicodeString *snext(UErrorCode &status) {
1671 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1672 unistr=UnicodeString(testEnumStrings[i++], "");
1673 return &unistr;
1674 }
1675
1676 return NULL;
1677 }
1678
reset(UErrorCode &)1679 virtual void reset(UErrorCode& /*status*/) {
1680 i=0;
1681 }
1682
getStaticClassID()1683 static inline UClassID getStaticClassID() {
1684 return (UClassID)&fgClassID;
1685 }
getDynamicClassID() const1686 virtual UClassID getDynamicClassID() const {
1687 return getStaticClassID();
1688 }
1689
1690 private:
1691 static const char fgClassID;
1692
1693 int32_t i;
1694 };
1695
1696 const char TestEnumeration::fgClassID=0;
1697
1698 void
TestStringEnumeration()1699 UnicodeStringTest::TestStringEnumeration() {
1700 UnicodeString s;
1701 TestEnumeration ten;
1702 int32_t i, length;
1703 UErrorCode status;
1704
1705 const UChar *pu;
1706 const char *pc;
1707
1708 // test the next() default implementation and ensureCharsCapacity()
1709 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1710 status=U_ZERO_ERROR;
1711 pc=ten.next(&length, status);
1712 s=UnicodeString(testEnumStrings[i], "");
1713 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1714 errln("StringEnumeration.next(%d) failed", i);
1715 }
1716 }
1717 status=U_ZERO_ERROR;
1718 if(ten.next(&length, status)!=NULL) {
1719 errln("StringEnumeration.next(done)!=NULL");
1720 }
1721
1722 // test the unext() default implementation
1723 ten.reset(status);
1724 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1725 status=U_ZERO_ERROR;
1726 pu=ten.unext(&length, status);
1727 s=UnicodeString(testEnumStrings[i], "");
1728 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1729 errln("StringEnumeration.unext(%d) failed", i);
1730 }
1731 }
1732 status=U_ZERO_ERROR;
1733 if(ten.unext(&length, status)!=NULL) {
1734 errln("StringEnumeration.unext(done)!=NULL");
1735 }
1736
1737 // test that the default clone() implementation works, and returns NULL
1738 if(ten.clone()!=NULL) {
1739 errln("StringEnumeration.clone()!=NULL");
1740 }
1741
1742 // test that uenum_openFromStringEnumeration() works
1743 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1744 StringEnumeration *newTen = new TestEnumeration;
1745 status=U_ZERO_ERROR;
1746 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1747 if (uten==NULL || U_FAILURE(status)) {
1748 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1749 return;
1750 }
1751
1752 // test uenum_next()
1753 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1754 status=U_ZERO_ERROR;
1755 pc=uenum_next(uten, &length, &status);
1756 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1757 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1758 }
1759 }
1760 status=U_ZERO_ERROR;
1761 if(uenum_next(uten, &length, &status)!=NULL) {
1762 errln("File %s, line %d, uenum_next(done)!=NULL");
1763 }
1764
1765 // test the uenum_unext()
1766 uenum_reset(uten, &status);
1767 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1768 status=U_ZERO_ERROR;
1769 pu=uenum_unext(uten, &length, &status);
1770 s=UnicodeString(testEnumStrings[i], "");
1771 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1772 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1773 }
1774 }
1775 status=U_ZERO_ERROR;
1776 if(uenum_unext(uten, &length, &status)!=NULL) {
1777 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1778 }
1779
1780 uenum_close(uten);
1781 }
1782
1783 /*
1784 * Namespace test, to make sure that macros like UNICODE_STRING include the
1785 * namespace qualifier.
1786 *
1787 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1788 */
1789 namespace bogus {
1790 class UnicodeString {
1791 public:
1792 enum EInvariant { kInvariant };
UnicodeString()1793 UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1794 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1795 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1796 ) : i(length) {}
1797 private:
1798 int32_t i;
1799 };
1800 }
1801
1802 void
TestNameSpace()1803 UnicodeStringTest::TestNameSpace() {
1804 // Provoke name collision unless the UnicodeString macros properly
1805 // qualify the icu::UnicodeString class.
1806 using namespace bogus;
1807
1808 // Use all UnicodeString macros from unistr.h.
1809 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1810 icu::UnicodeString s2=UNICODE_STRING("def", 3);
1811 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1812
1813 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1814 icu::UnicodeString s4=s1+s2+s3;
1815 if(s4.length()!=9) {
1816 errln("Something wrong with UnicodeString::operator+().");
1817 }
1818 }
1819
1820 void
TestUTF32()1821 UnicodeStringTest::TestUTF32() {
1822 // Input string length US_STACKBUF_SIZE to cause overflow of the
1823 // initially chosen fStackBuffer due to supplementary characters.
1824 static const UChar32 utf32[] = {
1825 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1826 0x10000, 0x20000, 0xe0000, 0x10ffff
1827 };
1828 static const UChar expected_utf16[] = {
1829 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1830 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1831 };
1832 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1833 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1834 if(from32 != expected) {
1835 errln("UnicodeString::fromUTF32() did not create the expected string.");
1836 }
1837
1838 static const UChar utf16[] = {
1839 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1840 };
1841 static const UChar32 expected_utf32[] = {
1842 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1843 };
1844 UChar32 result32[16];
1845 UErrorCode errorCode = U_ZERO_ERROR;
1846 int32_t length32 =
1847 UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1848 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1849 if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1850 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1851 result32[length32] != 0
1852 ) {
1853 errln("UnicodeString::toUTF32() did not create the expected string.");
1854 }
1855 }
1856
1857 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1858 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1859 TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1860 : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1861 virtual void Flush() { calledFlush = TRUE; }
1862 UBool calledFlush;
1863 };
1864
1865 void
TestUTF8()1866 UnicodeStringTest::TestUTF8() {
1867 static const uint8_t utf8[] = {
1868 // Code points:
1869 // 0x41, 0xd900,
1870 // 0x61, 0xdc00,
1871 // 0x110000, 0x5a,
1872 // 0x50000, 0x7a,
1873 // 0x10000, 0x20000,
1874 // 0xe0000, 0x10ffff
1875 0x41, 0xed, 0xa4, 0x80,
1876 0x61, 0xed, 0xb0, 0x80,
1877 0xf4, 0x90, 0x80, 0x80, 0x5a,
1878 0xf1, 0x90, 0x80, 0x80, 0x7a,
1879 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1880 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1881 };
1882 static const UChar expected_utf16[] = {
1883 0x41, 0xfffd, 0xfffd, 0xfffd,
1884 0x61, 0xfffd, 0xfffd, 0xfffd,
1885 0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a,
1886 0xd900, 0xdc00, 0x7a,
1887 0xd800, 0xdc00, 0xd840, 0xdc00,
1888 0xdb40, 0xdc00, 0xdbff, 0xdfff
1889 };
1890 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1891 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1892
1893 if(from8 != expected) {
1894 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1895 }
1896 std::string utf8_string((const char *)utf8, sizeof(utf8));
1897 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1898 if(from8b != expected) {
1899 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1900 }
1901
1902 static const UChar utf16[] = {
1903 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1904 };
1905 static const uint8_t expected_utf8[] = {
1906 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1907 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1908 };
1909 UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1910
1911 char buffer[64];
1912 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1913 us.toUTF8(sink);
1914 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1915 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1916 ) {
1917 errln("UnicodeString::toUTF8() did not create the expected string.");
1918 }
1919 if(!sink.calledFlush) {
1920 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1921 }
1922 // Initial contents for testing that toUTF8String() appends.
1923 std::string result8 = "-->";
1924 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1925 // Use the return value just for testing.
1926 std::string &result8r = us.toUTF8String(result8);
1927 if(result8r != expected8 || &result8r != &result8) {
1928 errln("UnicodeString::toUTF8String() did not create the expected string.");
1929 }
1930 }
1931
1932 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1933 static UnicodeString wrapUChars(const UChar *uchars) {
1934 return UnicodeString(TRUE, uchars, -1);
1935 }
1936
1937 void
TestReadOnlyAlias()1938 UnicodeStringTest::TestReadOnlyAlias() {
1939 UChar uchars[]={ 0x61, 0x62, 0 };
1940 UnicodeString alias(TRUE, uchars, 2);
1941 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1942 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1943 return;
1944 }
1945 alias.truncate(1);
1946 if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1947 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1948 }
1949 if(alias.getTerminatedBuffer()==uchars) {
1950 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1951 "did not allocate and copy as expected.");
1952 }
1953 if(uchars[1]!=0x62) {
1954 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1955 "modified the original buffer.");
1956 }
1957 if(1!=u_strlen(alias.getTerminatedBuffer())) {
1958 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1959 "does not return a buffer terminated at the proper length.");
1960 }
1961
1962 alias.setTo(TRUE, uchars, 2);
1963 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1964 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1965 return;
1966 }
1967 alias.remove();
1968 if(alias.length()!=0) {
1969 errln("UnicodeString(read-only-alias).remove() did not work.");
1970 }
1971 if(alias.getTerminatedBuffer()==uchars) {
1972 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1973 "did not un-alias as expected.");
1974 }
1975 if(uchars[0]!=0x61) {
1976 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1977 "modified the original buffer.");
1978 }
1979 if(0!=u_strlen(alias.getTerminatedBuffer())) {
1980 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1981 "does not return a buffer terminated at length 0.");
1982 }
1983
1984 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1985 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1986 alias.remove(0, 10);
1987 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1988 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1989 }
1990 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1991 alias.remove(27, 99);
1992 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1993 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1994 }
1995 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1996 alias.retainBetween(6, 30);
1997 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1998 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1999 }
2000
2001 UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2002 UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2003
2004 UnicodeString temp;
2005 temp.fastCopyFrom(longString.tempSubString());
2006 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2007 errln("UnicodeString.tempSubString() failed");
2008 }
2009 temp.fastCopyFrom(longString.tempSubString(-3, 5));
2010 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2011 errln("UnicodeString.tempSubString(-3, 5) failed");
2012 }
2013 temp.fastCopyFrom(longString.tempSubString(17));
2014 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2015 errln("UnicodeString.tempSubString(17) failed");
2016 }
2017 temp.fastCopyFrom(longString.tempSubString(99));
2018 if(!temp.isEmpty()) {
2019 errln("UnicodeString.tempSubString(99) failed");
2020 }
2021 temp.fastCopyFrom(longString.tempSubStringBetween(6));
2022 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2023 errln("UnicodeString.tempSubStringBetween(6) failed");
2024 }
2025 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2026 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2027 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2028 }
2029 UnicodeString bogusString;
2030 bogusString.setToBogus();
2031 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2032 if(!temp.isBogus()) {
2033 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2034 }
2035 }
2036
2037 void
doTestAppendable(UnicodeString & dest,Appendable & app)2038 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2039 static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2040 static const UChar fg[3]={ 0x66, 0x67, 0 };
2041 if(!app.reserveAppendCapacity(12)) {
2042 errln("Appendable.reserve(12) failed");
2043 }
2044 app.appendCodeUnit(0x61);
2045 app.appendCodePoint(0x62);
2046 app.appendCodePoint(0x50000);
2047 app.appendString(cde, 3);
2048 app.appendString(fg, -1);
2049 UChar scratch[3];
2050 int32_t capacity=-1;
2051 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2052 if(capacity<3) {
2053 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2054 return;
2055 }
2056 static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2057 u_memcpy(buffer, hij, 3);
2058 app.appendString(buffer, 3);
2059 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2060 errln("Appendable.append(...) failed");
2061 }
2062 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2063 if(buffer!=NULL || capacity!=0) {
2064 errln("Appendable.getAppendBuffer(min=0) failed");
2065 }
2066 capacity=1;
2067 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2068 if(buffer!=NULL || capacity!=0) {
2069 errln("Appendable.getAppendBuffer(scratch<min) failed");
2070 }
2071 }
2072
2073 class SimpleAppendable : public Appendable {
2074 public:
SimpleAppendable(UnicodeString & dest)2075 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2076 virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
reset()2077 SimpleAppendable &reset() { str.remove(); return *this; }
2078 private:
2079 UnicodeString &str;
2080 };
2081
2082 void
TestAppendable()2083 UnicodeStringTest::TestAppendable() {
2084 UnicodeString dest;
2085 SimpleAppendable app(dest);
2086 doTestAppendable(dest, app);
2087 }
2088
2089 void
TestUnicodeStringImplementsAppendable()2090 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2091 UnicodeString dest;
2092 UnicodeStringAppendable app(dest);
2093 doTestAppendable(dest, app);
2094 }
2095
2096 void
TestSizeofUnicodeString()2097 UnicodeStringTest::TestSizeofUnicodeString() {
2098 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2099 // See the API comments for UNISTR_OBJECT_SIZE.
2100 size_t sizeofUniStr=sizeof(UnicodeString);
2101 size_t expected=UNISTR_OBJECT_SIZE;
2102 if(expected!=sizeofUniStr) {
2103 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2104 // of the compiler might add more internal padding than expected.
2105 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2106 (int)sizeofUniStr, (int)expected);
2107 }
2108 if(sizeofUniStr<32) {
2109 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2110 }
2111 // We assume that the entire UnicodeString object,
2112 // minus the vtable pointer and 2 bytes for flags and short length,
2113 // is available for internal storage of UChars.
2114 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2115 UnicodeString s;
2116 const UChar *emptyBuffer=s.getBuffer();
2117 for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2118 s.append((UChar)0x2e);
2119 }
2120 const UChar *fullBuffer=s.getBuffer();
2121 if(fullBuffer!=emptyBuffer) {
2122 errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2123 expectedStackBufferLength);
2124 }
2125 const UChar *terminatedBuffer=s.getTerminatedBuffer();
2126 if(terminatedBuffer==emptyBuffer) {
2127 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2128 expectedStackBufferLength);
2129 }
2130 }
2131
2132 void
TestMoveSwap()2133 UnicodeStringTest::TestMoveSwap() {
2134 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
2135 UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc)); // read-only alias
2136 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
2137 UnicodeString s3("defg", 4, US_INV); // in stack buffer
2138 const UChar *p = s2.getBuffer();
2139 s1.swap(s2);
2140 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2141 errln("UnicodeString.swap() did not swap");
2142 }
2143 swap(s2, s3);
2144 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2145 errln("swap(UnicodeString) did not swap back");
2146 }
2147 UnicodeString s4;
2148 s4.moveFrom(s1);
2149 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2150 errln("UnicodeString.moveFrom(heap) did not move");
2151 }
2152 UnicodeString s5;
2153 s5.moveFrom(s2);
2154 if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2155 errln("UnicodeString.moveFrom(stack) did not move");
2156 }
2157 UnicodeString s6;
2158 s6.moveFrom(s3);
2159 if(s6.getBuffer() != abc || s6.length() != 3) {
2160 errln("UnicodeString.moveFrom(alias) did not move");
2161 }
2162 infoln("TestMoveSwap() with rvalue references");
2163 s1 = static_cast<UnicodeString &&>(s6);
2164 if(s1.getBuffer() != abc || s1.length() != 3) {
2165 errln("UnicodeString move assignment operator did not move");
2166 }
2167 UnicodeString s7(static_cast<UnicodeString &&>(s4));
2168 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2169 errln("UnicodeString move constructor did not move");
2170 }
2171
2172 // Move self assignment leaves the object valid but in an undefined state.
2173 // Do it to make sure there is no crash,
2174 // but do not check for any particular resulting value.
2175 s1.moveFrom(s1);
2176 s2.moveFrom(s2);
2177 s3.moveFrom(s3);
2178 s4.moveFrom(s4);
2179 s5.moveFrom(s5);
2180 s6.moveFrom(s6);
2181 s7.moveFrom(s7);
2182 // Simple copy assignment must work.
2183 UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2184 s1 = s6 = s4 = s7 = simple;
2185 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2186 errln("UnicodeString copy after self-move did not work");
2187 }
2188 }
2189
2190 void
TestUInt16Pointers()2191 UnicodeStringTest::TestUInt16Pointers() {
2192 static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2193 uint16_t arr[4];
2194
2195 UnicodeString expected(u"abc");
2196 assertEquals("abc from pointer", expected, UnicodeString(carr));
2197 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2198 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2199
2200 UnicodeString alias(arr, 0, 4);
2201 alias.append(u'a').append(u'b').append(u'c');
2202 assertEquals("abc from writable alias", expected, alias);
2203 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2204
2205 UErrorCode errorCode = U_ZERO_ERROR;
2206 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2207 TEST_ASSERT_STATUS(errorCode);
2208 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2209 }
2210
2211 void
TestWCharPointers()2212 UnicodeStringTest::TestWCharPointers() {
2213 #if U_SIZEOF_WCHAR_T==2
2214 static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2215 wchar_t arr[4];
2216
2217 UnicodeString expected(u"abc");
2218 assertEquals("abc from pointer", expected, UnicodeString(carr));
2219 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2220 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2221
2222 UnicodeString alias(arr, 0, 4);
2223 alias.append(u'a').append(u'b').append(u'c');
2224 assertEquals("abc from writable alias", expected, alias);
2225 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2226
2227 UErrorCode errorCode = U_ZERO_ERROR;
2228 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2229 TEST_ASSERT_STATUS(errorCode);
2230 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2231 #endif
2232 }
2233
2234 void
TestNullPointers()2235 UnicodeStringTest::TestNullPointers() {
2236 assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2237 assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2238 assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2239
2240 UnicodeString alias(nullptr, 4, 4); // empty, no alias
2241 assertTrue("empty from writable alias", alias.isEmpty());
2242 alias.append(u'a').append(u'b').append(u'c');
2243 UnicodeString expected(u"abc");
2244 assertEquals("abc from writable alias", expected, alias);
2245
2246 UErrorCode errorCode = U_ZERO_ERROR;
2247 UnicodeString(u"def").extract(nullptr, 0, errorCode);
2248 assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2249 }
2250
TestUnicodeStringInsertAppendToSelf()2251 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2252 IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2253
2254 // Test append operation
2255 UnicodeString str(u"foo ");
2256 str.append(str);
2257 str.append(str);
2258 str.append(str);
2259 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2260
2261 // Test append operation with readonly alias to start
2262 str = UnicodeString(TRUE, u"foo ", 4);
2263 str.append(str);
2264 str.append(str);
2265 str.append(str);
2266 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2267
2268 // Test append operation with aliased substring
2269 str = u"abcde";
2270 UnicodeString sub = str.tempSubString(1, 2);
2271 str.append(sub);
2272 assertEquals("", u"abcdebc", str);
2273
2274 // Test append operation with double-aliased substring
2275 str = UnicodeString(TRUE, u"abcde", 5);
2276 sub = str.tempSubString(1, 2);
2277 str.append(sub);
2278 assertEquals("", u"abcdebc", str);
2279
2280 // Test insert operation
2281 str = u"a-*b";
2282 str.insert(2, str);
2283 str.insert(4, str);
2284 str.insert(8, str);
2285 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2286
2287 // Test insert operation with readonly alias to start
2288 str = UnicodeString(TRUE, u"a-*b", 4);
2289 str.insert(2, str);
2290 str.insert(4, str);
2291 str.insert(8, str);
2292 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2293
2294 // Test insert operation with aliased substring
2295 str = u"abcde";
2296 sub = str.tempSubString(1, 3);
2297 str.insert(2, sub);
2298 assertEquals("", u"abbcdcde", str);
2299
2300 // Test insert operation with double-aliased substring
2301 str = UnicodeString(TRUE, u"abcde", 5);
2302 sub = str.tempSubString(1, 3);
2303 str.insert(2, sub);
2304 assertEquals("", u"abbcdcde", str);
2305 }
2306