1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /* file name: strtest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov22
14 * created by: Markus W. Scherer
15 */
16
17 #ifdef U_HAVE_STRING_VIEW
18 #include <string_view>
19 #endif
20
21 #include <cstddef>
22 #include <string.h>
23
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/std_string.h"
27 #include "unicode/stringpiece.h"
28 #include "unicode/unistr.h"
29 #include "unicode/ustring.h"
30 #include "unicode/utf_old.h" // for UTF8_COUNT_TRAIL_BYTES
31 #include "unicode/utf8.h"
32 #include "charstr.h"
33 #include "cstr.h"
34 #include "intltest.h"
35 #include "strtest.h"
36 #include "uinvchar.h"
37
~StringTest()38 StringTest::~StringTest() {}
39
TestEndian(void)40 void StringTest::TestEndian(void) {
41 union {
42 uint8_t byte;
43 uint16_t word;
44 } u;
45 u.word=0x0100;
46 if(U_IS_BIG_ENDIAN!=u.byte) {
47 errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
48 }
49 }
50
TestSizeofTypes(void)51 void StringTest::TestSizeofTypes(void) {
52 if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
53 errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
54 }
55 #ifdef U_INT64_T_UNAVAILABLE
56 errln("int64_t and uint64_t are undefined.");
57 #else
58 if(8!=sizeof(int64_t)) {
59 errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
60 }
61 if(8!=sizeof(uint64_t)) {
62 errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
63 }
64 #endif
65 if(8!=sizeof(double)) {
66 errln("8!=sizeof(double) - putil.c code may not work");
67 }
68 if(4!=sizeof(int32_t)) {
69 errln("4!=sizeof(int32_t)");
70 }
71 if(4!=sizeof(uint32_t)) {
72 errln("4!=sizeof(uint32_t)");
73 }
74 if(2!=sizeof(int16_t)) {
75 errln("2!=sizeof(int16_t)");
76 }
77 if(2!=sizeof(uint16_t)) {
78 errln("2!=sizeof(uint16_t)");
79 }
80 if(2!=sizeof(UChar)) {
81 errln("2!=sizeof(UChar)");
82 }
83 if(1!=sizeof(int8_t)) {
84 errln("1!=sizeof(int8_t)");
85 }
86 if(1!=sizeof(uint8_t)) {
87 errln("1!=sizeof(uint8_t)");
88 }
89 if(1!=sizeof(UBool)) {
90 errln("1!=sizeof(UBool)");
91 }
92 }
93
TestCharsetFamily(void)94 void StringTest::TestCharsetFamily(void) {
95 unsigned char c='A';
96 if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
97 (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
98 ) {
99 errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
100 }
101 }
102
103 U_STRING_DECL(ustringVar, "aZ0 -", 5);
104
105 void
Test_U_STRING()106 StringTest::Test_U_STRING() {
107 U_STRING_INIT(ustringVar, "aZ0 -", 5);
108 if( u_strlen(ustringVar)!=5 ||
109 ustringVar[0]!=0x61 ||
110 ustringVar[1]!=0x5a ||
111 ustringVar[2]!=0x30 ||
112 ustringVar[3]!=0x20 ||
113 ustringVar[4]!=0x2d ||
114 ustringVar[5]!=0
115 ) {
116 errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
117 "See putil.h and utypes.h with platform.h.");
118 }
119 }
120
121 void
Test_UNICODE_STRING()122 StringTest::Test_UNICODE_STRING() {
123 UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
124 if( ustringVar.length()!=5 ||
125 ustringVar[0]!=0x61 ||
126 ustringVar[1]!=0x5a ||
127 ustringVar[2]!=0x30 ||
128 ustringVar[3]!=0x20 ||
129 ustringVar[4]!=0x2d
130 ) {
131 errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
132 "See unistr.h and utypes.h with platform.h.");
133 }
134 }
135
136 void
Test_UNICODE_STRING_SIMPLE()137 StringTest::Test_UNICODE_STRING_SIMPLE() {
138 UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
139 if( ustringVar.length()!=5 ||
140 ustringVar[0]!=0x61 ||
141 ustringVar[1]!=0x5a ||
142 ustringVar[2]!=0x30 ||
143 ustringVar[3]!=0x20 ||
144 ustringVar[4]!=0x2d
145 ) {
146 errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
147 "See unistr.h and utypes.h with platform.h.");
148 }
149 }
150
151 namespace {
152
153 // See U_CHARSET_FAMILY in unicode/platform.h.
154 const char *nativeInvChars =
155 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
156 "abcdefghijklmnopqrstuvwxyz"
157 "0123456789 \"%&'()*+,-./:;<=>?_";
158 const char16_t *asciiInvChars =
159 u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
160 u"abcdefghijklmnopqrstuvwxyz"
161 u"0123456789 \"%&'()*+,-./:;<=>?_";
162
163 } // namespace
164
165 void
TestUpperOrdinal()166 StringTest::TestUpperOrdinal() {
167 for (int32_t i = 0;; ++i) {
168 char ic = nativeInvChars[i];
169 uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
170 int32_t expected = ac - 'A';
171 int32_t actual = uprv_upperOrdinal(ic);
172 if (0 <= expected && expected <= 25) {
173 if (actual != expected) {
174 errln("uprv_upperOrdinal('%c')=%d != expected %d",
175 ic, (int)actual, (int)expected);
176 }
177 } else {
178 if (0 <= actual && actual <= 25) {
179 errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
180 ic, (int)actual);
181 }
182 }
183 if (ic == 0) { break; }
184 }
185 }
186
187 void
TestLowerOrdinal()188 StringTest::TestLowerOrdinal() {
189 for (int32_t i = 0;; ++i) {
190 char ic = nativeInvChars[i];
191 uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
192 int32_t expected = ac - 'a';
193 int32_t actual = uprv_lowerOrdinal(ic);
194 if (0 <= expected && expected <= 25) {
195 if (actual != expected) {
196 errln("uprv_lowerOrdinal('%c')=%d != expected %d",
197 ic, (int)actual, (int)expected);
198 }
199 } else {
200 if (0 <= actual && actual <= 25) {
201 errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
202 ic, (int)actual);
203 }
204 }
205 if (ic == 0) { break; }
206 }
207 }
208
209 void
Test_UTF8_COUNT_TRAIL_BYTES()210 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
211 #if !U_HIDE_OBSOLETE_UTF_OLD_H
212 if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
213 || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
214 || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
215 || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
216 errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
217 }
218 #endif
219 // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
220 // have completely different implementations.
221 if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
222 || U8_COUNT_TRAIL_BYTES(0xC2) != 1
223 || U8_COUNT_TRAIL_BYTES(0xE0) != 2
224 || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
225 errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
226 }
227 }
228
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)229 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
230 if(exec) {
231 logln("TestSuite Character and String Test: ");
232 }
233 TESTCASE_AUTO_BEGIN;
234 TESTCASE_AUTO(TestEndian);
235 TESTCASE_AUTO(TestSizeofTypes);
236 TESTCASE_AUTO(TestCharsetFamily);
237 TESTCASE_AUTO(Test_U_STRING);
238 TESTCASE_AUTO(Test_UNICODE_STRING);
239 TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
240 TESTCASE_AUTO(TestUpperOrdinal);
241 TESTCASE_AUTO(TestLowerOrdinal);
242 TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
243 TESTCASE_AUTO(TestSTLCompatibility);
244 TESTCASE_AUTO(TestStringPiece);
245 TESTCASE_AUTO(TestStringPieceComparisons);
246 TESTCASE_AUTO(TestStringPieceFind);
247 TESTCASE_AUTO(TestStringPieceOther);
248 #ifdef U_HAVE_STRING_VIEW
249 TESTCASE_AUTO(TestStringPieceStringView);
250 #endif
251 TESTCASE_AUTO(TestStringPieceU8);
252 TESTCASE_AUTO(TestByteSink);
253 TESTCASE_AUTO(TestCheckedArrayByteSink);
254 TESTCASE_AUTO(TestStringByteSink);
255 TESTCASE_AUTO(TestStringByteSinkAppendU8);
256 TESTCASE_AUTO(TestCharString);
257 TESTCASE_AUTO(TestCStr);
258 TESTCASE_AUTO(Testctou);
259 TESTCASE_AUTO_END;
260 }
261
262 void
TestStringPiece()263 StringTest::TestStringPiece() {
264 // Default constructor.
265 StringPiece empty;
266 if(!empty.empty() || empty.data()!=NULL || empty.length()!=0 || empty.size()!=0) {
267 errln("StringPiece() failed");
268 }
269 // Construct from NULL const char * pointer.
270 StringPiece null((const char *)nullptr);
271 if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) {
272 errln("StringPiece(NULL) failed");
273 }
274 // Construct from const char * pointer.
275 static const char *abc_chars="abc";
276 StringPiece abc(abc_chars);
277 if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
278 errln("StringPiece(abc_chars) failed");
279 }
280 // Construct from const char * pointer and length.
281 static const char *abcdefg_chars="abcdefg";
282 StringPiece abcd(abcdefg_chars, 4);
283 if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
284 errln("StringPiece(abcdefg_chars, 4) failed");
285 }
286 // Construct from std::string.
287 std::string uvwxyz_string("uvwxyz");
288 StringPiece uvwxyz(uvwxyz_string);
289 if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
290 errln("StringPiece(uvwxyz_string) failed");
291 }
292 // Substring constructor with pos.
293 StringPiece sp(abcd, -1);
294 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
295 errln("StringPiece(abcd, -1) failed");
296 }
297 sp=StringPiece(abcd, 5);
298 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
299 errln("StringPiece(abcd, 5) failed");
300 }
301 sp=StringPiece(abcd, 2);
302 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
303 errln("StringPiece(abcd, -1) failed");
304 }
305 // Substring constructor with pos and len.
306 sp=StringPiece(abcd, -1, 8);
307 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
308 errln("StringPiece(abcd, -1, 8) failed");
309 }
310 sp=StringPiece(abcd, 5, 8);
311 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
312 errln("StringPiece(abcd, 5, 8) failed");
313 }
314 sp=StringPiece(abcd, 2, 8);
315 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
316 errln("StringPiece(abcd, -1) failed");
317 }
318 sp=StringPiece(abcd, 2, -1);
319 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
320 errln("StringPiece(abcd, 5, -1) failed");
321 }
322 // static const npos
323 const int32_t *ptr_npos=&StringPiece::npos;
324 if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
325 errln("StringPiece::npos!=0x7fffffff");
326 }
327 // substr() method with pos, using len=npos.
328 sp=abcd.substr(-1);
329 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
330 errln("abcd.substr(-1) failed");
331 }
332 sp=abcd.substr(5);
333 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
334 errln("abcd.substr(5) failed");
335 }
336 sp=abcd.substr(2);
337 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
338 errln("abcd.substr(-1) failed");
339 }
340 // substr() method with pos and len.
341 sp=abcd.substr(-1, 8);
342 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
343 errln("abcd.substr(-1, 8) failed");
344 }
345 sp=abcd.substr(5, 8);
346 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
347 errln("abcd.substr(5, 8) failed");
348 }
349 sp=abcd.substr(2, 8);
350 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
351 errln("abcd.substr(-1) failed");
352 }
353 sp=abcd.substr(2, -1);
354 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
355 errln("abcd.substr(5, -1) failed");
356 }
357 // clear()
358 sp=abcd;
359 sp.clear();
360 if(!sp.empty() || sp.data()!=NULL || sp.length()!=0 || sp.size()!=0) {
361 errln("abcd.clear() failed");
362 }
363 // remove_prefix()
364 sp=abcd;
365 sp.remove_prefix(-1);
366 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
367 errln("abcd.remove_prefix(-1) failed");
368 }
369 sp=abcd;
370 sp.remove_prefix(2);
371 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
372 errln("abcd.remove_prefix(2) failed");
373 }
374 sp=abcd;
375 sp.remove_prefix(5);
376 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
377 errln("abcd.remove_prefix(5) failed");
378 }
379 // remove_suffix()
380 sp=abcd;
381 sp.remove_suffix(-1);
382 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
383 errln("abcd.remove_suffix(-1) failed");
384 }
385 sp=abcd;
386 sp.remove_suffix(2);
387 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
388 errln("abcd.remove_suffix(2) failed");
389 }
390 sp=abcd;
391 sp.remove_suffix(5);
392 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
393 errln("abcd.remove_suffix(5) failed");
394 }
395 }
396
397 void
TestStringPieceComparisons()398 StringTest::TestStringPieceComparisons() {
399 StringPiece empty;
400 StringPiece null(nullptr);
401 StringPiece abc("abc");
402 StringPiece abcd("abcdefg", 4);
403 StringPiece abx("abx");
404 if(empty!=null) {
405 errln("empty!=null");
406 }
407 if(empty==abc) {
408 errln("empty==abc");
409 }
410 if(abc==abcd) {
411 errln("abc==abcd");
412 }
413
414 assertTrue("null<abc", null.compare(abc) < 0);
415 assertTrue("abc>null", abc.compare(null) > 0);
416 assertTrue("abc<abcd", abc.compare(abcd) < 0);
417 assertTrue("abcd>abc", abcd.compare(abc) > 0);
418 assertTrue("abc<abx", abc.compare(abx) < 0);
419 assertTrue("abx>abc", abx.compare(abc) > 0);
420 assertTrue("abx>abcd", abx.compare(abcd) > 0);
421 assertTrue("abcd<abx", abcd.compare(abx) < 0);
422 assertTrue("abx==abx", abx.compare(abx) == 0);
423
424 // Behavior should be the same as std::string::compare
425 {
426 std::string null("");
427 std::string abc("abc");
428 std::string abcd("abcdefg", 4);
429 std::string abx("abx");
430
431 assertTrue("std: null<abc", null.compare(abc) < 0);
432 assertTrue("std: abc>null", abc.compare(null) > 0);
433 assertTrue("std: abc<abcd", abc.compare(abcd) < 0);
434 assertTrue("std: abcd>abc", abcd.compare(abc) > 0);
435 assertTrue("std: abc<abx", abc.compare(abx) < 0);
436 assertTrue("std: abx>abc", abx.compare(abc) > 0);
437 assertTrue("std: abx>abcd", abx.compare(abcd) > 0);
438 assertTrue("std: abcd<abx", abcd.compare(abx) < 0);
439 assertTrue("std: abx==abx", abx.compare(abx) == 0);
440 }
441
442 abcd.remove_suffix(1);
443 if(abc!=abcd) {
444 errln("abc!=abcd.remove_suffix(1)");
445 }
446 if(abc==abx) {
447 errln("abc==abx");
448 }
449 }
450
451 void
TestStringPieceFind()452 StringTest::TestStringPieceFind() {
453 struct TestCase {
454 const char* haystack;
455 const char* needle;
456 int32_t expected;
457 } cases[] = {
458 { "", "", 0 },
459 { "", "x", -1 },
460 { "x", "", 0 },
461 { "x", "x", 0 },
462 { "xy", "x", 0 },
463 { "xy", "y", 1 },
464 { "xy", "xy", 0 },
465 { "xy", "xyz", -1 },
466 { "qwerty", "qqw", -1 },
467 { "qwerty", "qw", 0 },
468 { "qwerty", "er", 2 },
469 { "qwerty", "err", -1 },
470 { "qwerty", "ert", 2 },
471 { "qwerty", "ty", 4 },
472 { "qwerty", "tyy", -1 },
473 { "qwerty", "a", -1 },
474 { "qwerty", "abc", -1 }
475 };
476 int32_t caseNumber = 0;
477 for (auto& cas : cases) {
478 StringPiece haystack(cas.haystack);
479 StringPiece needle(cas.needle);
480 assertEquals(Int64ToUnicodeString(caseNumber),
481 cas.expected, haystack.find(needle, 0));
482 // Should be same as std::string::find
483 std::string stdhaystack(cas.haystack);
484 std::string stdneedle(cas.needle);
485 assertEquals(Int64ToUnicodeString(caseNumber) + u" (std)",
486 cas.expected, static_cast<int32_t>(stdhaystack.find(stdneedle, 0)));
487 // Test offsets against std::string::find
488 for (int32_t offset = 0; offset < haystack.length(); offset++) {
489 assertEquals(Int64ToUnicodeString(caseNumber) + "u @ " + Int64ToUnicodeString(offset),
490 static_cast<int32_t>(stdhaystack.find(stdneedle, offset)), haystack.find(needle, offset));
491 }
492 caseNumber++;
493 }
494 }
495
496 void
TestStringPieceOther()497 StringTest::TestStringPieceOther() {
498 static constexpr char msg[] = "Kapow!";
499
500 // Another string piece implementation.
501 struct Other {
502 const char* data() { return msg; }
503 size_t size() { return sizeof msg - 1; }
504 };
505
506 Other other;
507 StringPiece piece(other);
508
509 assertEquals("size()", piece.size(), static_cast<int32_t>(other.size()));
510 assertEquals("data()", piece.data(), other.data());
511 }
512
513 #ifdef U_HAVE_STRING_VIEW
514 void
TestStringPieceStringView()515 StringTest::TestStringPieceStringView() {
516 static constexpr char msg[] = "Kapow!";
517
518 std::string_view view(msg); // C++17
519 StringPiece piece(view);
520
521 assertEquals("size()", piece.size(), view.size());
522 assertEquals("data()", piece.data(), view.data());
523 }
524 #endif
525
526 void
TestStringPieceU8()527 StringTest::TestStringPieceU8() {
528 // ICU-20984 "mitigate some C++20 char8_t breakages"
529 // For the following APIs there are overloads for both
530 // const char * and const char8_t *.
531 // A u8"string literal" has one type or the other
532 // depending on C++ version and compiler settings.
533 StringPiece abc(u8"abc");
534 assertEquals("abc.length", 3, abc.length());
535 assertEquals("abc", "\x61\x62\x63", abc.data());
536
537 StringPiece abc3(u8"abcdef", 3);
538 assertEquals("abc3.length", 3, abc3.length());
539 assertEquals("abc3[0]", 0x61, abc3.data()[0]);
540 assertEquals("abc3[1]", 0x62, abc3.data()[1]);
541 assertEquals("abc3[2]", 0x63, abc3.data()[2]);
542
543 StringPiece uvw("q");
544 uvw.set(u8"uvw");
545 assertEquals("uvw.length", 3, uvw.length());
546 assertEquals("uvw", "\x75\x76\x77", uvw.data());
547
548 StringPiece xyz("r");
549 xyz.set(u8"xyzXYZ", 3);
550 assertEquals("xyz.length", 3, xyz.length());
551 assertEquals("xyz[0]", 0x78, xyz.data()[0]);
552 assertEquals("xyz[1]", 0x79, xyz.data()[1]);
553 assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
554
555 StringPiece null(nullptr);
556 assertTrue("null is empty", null.empty());
557 assertTrue("null is null", null.data() == nullptr);
558
559 #ifdef __cpp_lib_char8_t
560 std::u8string_view u8sv(u8"sv"); // C++20
561 StringPiece u8svsp(u8sv);
562 assertEquals("u8svsp.length", 2, u8svsp.length());
563 assertEquals("u8svsp", "\x73\x76", u8svsp.data());
564
565 std::u8string u8str(u8"str"); // C++20
566 StringPiece u8strsp(u8str);
567 assertEquals("u8strsp.length", 3, u8strsp.length());
568 assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
569 #endif // __cpp_lib_char8_t
570 }
571
572 // Verify that ByteSink is subclassable and Flush() overridable.
573 class SimpleByteSink : public ByteSink {
574 public:
SimpleByteSink(char * outbuf)575 SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)576 virtual void Append(const char *bytes, int32_t n) {
577 if(fOutbuf != bytes) {
578 memcpy(fOutbuf, bytes, n);
579 }
580 fOutbuf += n;
581 fLength += n;
582 }
Flush()583 virtual void Flush() { Append("z", 1); }
length()584 int32_t length() { return fLength; }
585 private:
586 char *fOutbuf;
587 int32_t fLength;
588 };
589
590 // Test the ByteSink base class.
591 void
TestByteSink()592 StringTest::TestByteSink() {
593 char buffer[20];
594 buffer[4] = '!';
595 SimpleByteSink sink(buffer);
596 sink.Append("abc", 3);
597 sink.Flush();
598 if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
599 errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
600 return;
601 }
602 char scratch[20];
603 int32_t capacity = -1;
604 char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
605 if(dest != NULL || capacity != 0) {
606 errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
607 return;
608 }
609 dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
610 if(dest != NULL || capacity != 0) {
611 errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
612 return;
613 }
614 dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
615 if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
616 errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
617 }
618 }
619
620 void
TestCheckedArrayByteSink()621 StringTest::TestCheckedArrayByteSink() {
622 char buffer[20]; // < 26 for the test code to work
623 buffer[3] = '!';
624 CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
625 sink.Append("abc", 3);
626 if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
627 0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
628 !sink.Overflowed()
629 ) {
630 errln("CheckedArrayByteSink did not Append() as expected");
631 return;
632 }
633 char scratch[10];
634 int32_t capacity = -1;
635 char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
636 if(dest != NULL || capacity != 0) {
637 errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
638 return;
639 }
640 dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
641 if(dest != NULL || capacity != 0) {
642 errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
643 return;
644 }
645 dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
646 if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
647 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
648 return;
649 }
650 memcpy(dest, "defghijklm", 10);
651 sink.Append(dest, 10);
652 if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
653 0 == memcmp("abcdefghijklm", buffer, 13) &&
654 !sink.Overflowed())
655 ) {
656 errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
657 return;
658 }
659 dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
660 if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
661 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
662 }
663 memcpy(dest, "nopqrstuvw", 10);
664 sink.Append(dest, 10);
665 if(!(sink.NumberOfBytesAppended() == 23 &&
666 sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
667 0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
668 sink.Overflowed())
669 ) {
670 errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
671 return;
672 }
673 sink.Reset().Append("123", 3);
674 if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
675 0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
676 !sink.Overflowed())
677 ) {
678 errln("CheckedArrayByteSink did not Reset().Append() as expected");
679 return;
680 }
681 }
682
683 void
TestStringByteSink()684 StringTest::TestStringByteSink() {
685 // Not much to test because only the constructors and Append()
686 // are implemented, and trivially so.
687 std::string result("abc"); // std::string
688 StringByteSink<std::string> sink(&result);
689 sink.Append("def", 3);
690 if(result != "abcdef") {
691 errln("StringByteSink did not Append() as expected");
692 }
693 StringByteSink<std::string> sink2(&result, 20);
694 if(result.capacity() < (result.length() + 20)) {
695 errln("StringByteSink should have 20 append capacity, has only %d",
696 (int)(result.capacity() - result.length()));
697 }
698 sink.Append("ghi", 3);
699 if(result != "abcdefghi") {
700 errln("StringByteSink did not Append() as expected");
701 }
702 }
703
704 void
TestStringByteSinkAppendU8()705 StringTest::TestStringByteSinkAppendU8() {
706 // ICU-20984 "mitigate some C++20 char8_t breakages"
707 // For the following APIs there are overloads for both
708 // const char * and const char8_t *.
709 // A u8"string literal" has one type or the other
710 // depending on C++ version and compiler settings.
711 std::string result("abc");
712 StringByteSink<std::string> sink(&result);
713 sink.AppendU8("def", 3);
714 sink.AppendU8(u8"ghijkl", 4);
715 assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
716 }
717
718 #if defined(_MSC_VER)
719 #include <vector>
720 #endif
721
722 void
TestSTLCompatibility()723 StringTest::TestSTLCompatibility() {
724 #if defined(_MSC_VER)
725 /* Just make sure that it compiles with STL's placement new usage. */
726 std::vector<UnicodeString> myvect;
727 myvect.push_back(UnicodeString("blah"));
728 #endif
729 }
730
731 void
TestCharString()732 StringTest::TestCharString() {
733 IcuTestErrorCode errorCode(*this, "TestCharString()");
734 char expected[400];
735 static const char longStr[] =
736 "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
737 CharString chStr(longStr, errorCode);
738 if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
739 errln("CharString(longStr) failed.");
740 }
741 CharString test("Test", errorCode);
742 CharString copy(test,errorCode);
743 copy.copyFrom(chStr, errorCode);
744 if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
745 errln("CharString.copyFrom() failed.");
746 }
747 StringPiece sp(chStr.toStringPiece());
748 sp.remove_prefix(4);
749 chStr.append(sp, errorCode).append(chStr, errorCode);
750 strcpy(expected, longStr);
751 strcat(expected, longStr+4);
752 strcat(expected, longStr);
753 strcat(expected, longStr+4);
754 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
755 errln("CharString(longStr).append(substring of self).append(self) failed.");
756 }
757 chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
758 if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
759 errln("CharString.clear().append(abc).append(defghij, 3) failed.");
760 }
761 chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
762 "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
763 errorCode);
764 strcpy(expected, "abcdef");
765 strcat(expected, longStr);
766 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
767 errln("CharString.appendInvariantChars(longStr) failed.");
768 }
769 int32_t appendCapacity = 0;
770 char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
771 if (errorCode.isFailure()) {
772 return;
773 }
774 memcpy(buffer, "*****", 5);
775 chStr.append(buffer, 5, errorCode);
776 chStr.truncate(chStr.length()-3);
777 strcat(expected, "**");
778 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
779 errln("CharString.getAppendBuffer().append(**) failed.");
780 }
781
782 UErrorCode ec = U_ZERO_ERROR;
783 chStr.clear();
784 chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
785 if (ec != U_INVARIANT_CONVERSION_ERROR) {
786 errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
787 }
788 if (chStr.length() != 0) {
789 errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
790 }
791
792 {
793 CharString s1("Short string", errorCode);
794 CharString s2(std::move(s1));
795 assertEquals("s2 should have content of s1", "Short string", s2.data());
796 CharString s3("Dummy", errorCode);
797 s3 = std::move(s2);
798 assertEquals("s3 should have content of s2", "Short string", s3.data());
799 }
800
801 {
802 CharString s1("Long string over 40 characters to trigger heap allocation", errorCode);
803 CharString s2(std::move(s1));
804 assertEquals("s2 should have content of s1",
805 "Long string over 40 characters to trigger heap allocation",
806 s2.data());
807 CharString s3("Dummy string with over 40 characters to trigger heap allocation", errorCode);
808 s3 = std::move(s2);
809 assertEquals("s3 should have content of s2",
810 "Long string over 40 characters to trigger heap allocation",
811 s3.data());
812 }
813
814 {
815 // extract()
816 errorCode.reset();
817 CharString s("abc", errorCode);
818 char buffer[10];
819
820 s.extract(buffer, 10, errorCode);
821 assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
822 assertEquals("abc.extract(10) output", "abc", buffer);
823
824 strcpy(buffer, "012345");
825 s.extract(buffer, 3, errorCode);
826 assertEquals("abc.extract(3) not terminated",
827 U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
828 assertEquals("abc.extract(3) output", "abc345", buffer);
829
830 strcpy(buffer, "012345");
831 s.extract(buffer, 2, errorCode);
832 assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
833 }
834 }
835
836 void
TestCStr()837 StringTest::TestCStr() {
838 const char *cs = "This is a test string.";
839 UnicodeString us(cs);
840 if (0 != strcmp(CStr(us)(), cs)) {
841 errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
842 }
843 }
844
845 void
Testctou()846 StringTest::Testctou() {
847 const char *cs = "Fa\\u0127mu";
848 UnicodeString u = ctou(cs);
849 assertEquals("Testing unescape@0", (int32_t)0x0046, u.charAt(0));
850 assertEquals("Testing unescape@2", (int32_t)295, u.charAt(2));
851 }
852