1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /*   file name:  strtest.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 1999nov22
12 *   created by: Markus W. Scherer
13 */
14 
15 #include <string.h>
16 
17 #include "unicode/utypes.h"
18 #include "unicode/putil.h"
19 #include "unicode/std_string.h"
20 #include "unicode/stringpiece.h"
21 #include "unicode/unistr.h"
22 #include "unicode/ustring.h"
23 #include "charstr.h"
24 #include "cstr.h"
25 #include "intltest.h"
26 #include "strtest.h"
27 
~StringTest()28 StringTest::~StringTest() {}
29 
TestEndian(void)30 void StringTest::TestEndian(void) {
31     union {
32         uint8_t byte;
33         uint16_t word;
34     } u;
35     u.word=0x0100;
36     if(U_IS_BIG_ENDIAN!=u.byte) {
37         errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
38     }
39 }
40 
TestSizeofTypes(void)41 void StringTest::TestSizeofTypes(void) {
42     if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
43         errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
44     }
45 #ifdef U_INT64_T_UNAVAILABLE
46     errln("int64_t and uint64_t are undefined.");
47 #else
48     if(8!=sizeof(int64_t)) {
49         errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
50     }
51     if(8!=sizeof(uint64_t)) {
52         errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
53     }
54 #endif
55     if(8!=sizeof(double)) {
56         errln("8!=sizeof(double) - putil.c code may not work");
57     }
58     if(4!=sizeof(int32_t)) {
59         errln("4!=sizeof(int32_t)");
60     }
61     if(4!=sizeof(uint32_t)) {
62         errln("4!=sizeof(uint32_t)");
63     }
64     if(2!=sizeof(int16_t)) {
65         errln("2!=sizeof(int16_t)");
66     }
67     if(2!=sizeof(uint16_t)) {
68         errln("2!=sizeof(uint16_t)");
69     }
70     if(2!=sizeof(UChar)) {
71         errln("2!=sizeof(UChar)");
72     }
73     if(1!=sizeof(int8_t)) {
74         errln("1!=sizeof(int8_t)");
75     }
76     if(1!=sizeof(uint8_t)) {
77         errln("1!=sizeof(uint8_t)");
78     }
79     if(1!=sizeof(UBool)) {
80         errln("1!=sizeof(UBool)");
81     }
82 }
83 
TestCharsetFamily(void)84 void StringTest::TestCharsetFamily(void) {
85     unsigned char c='A';
86     if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
87         (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
88     ) {
89         errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
90     }
91 }
92 
93 U_STRING_DECL(ustringVar, "aZ0 -", 5);
94 
95 void
Test_U_STRING()96 StringTest::Test_U_STRING() {
97     U_STRING_INIT(ustringVar, "aZ0 -", 5);
98     if( u_strlen(ustringVar)!=5 ||
99         ustringVar[0]!=0x61 ||
100         ustringVar[1]!=0x5a ||
101         ustringVar[2]!=0x30 ||
102         ustringVar[3]!=0x20 ||
103         ustringVar[4]!=0x2d ||
104         ustringVar[5]!=0
105     ) {
106         errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
107               "See putil.h and utypes.h with platform.h.");
108     }
109 }
110 
111 void
Test_UNICODE_STRING()112 StringTest::Test_UNICODE_STRING() {
113     UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
114     if( ustringVar.length()!=5 ||
115         ustringVar[0]!=0x61 ||
116         ustringVar[1]!=0x5a ||
117         ustringVar[2]!=0x30 ||
118         ustringVar[3]!=0x20 ||
119         ustringVar[4]!=0x2d
120     ) {
121         errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
122               "See unistr.h and utypes.h with platform.h.");
123     }
124 }
125 
126 void
Test_UNICODE_STRING_SIMPLE()127 StringTest::Test_UNICODE_STRING_SIMPLE() {
128     UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
129     if( ustringVar.length()!=5 ||
130         ustringVar[0]!=0x61 ||
131         ustringVar[1]!=0x5a ||
132         ustringVar[2]!=0x30 ||
133         ustringVar[3]!=0x20 ||
134         ustringVar[4]!=0x2d
135     ) {
136         errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
137               "See unistr.h and utypes.h with platform.h.");
138     }
139 }
140 
141 void
Test_UTF8_COUNT_TRAIL_BYTES()142 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
143     if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
144         || UTF8_COUNT_TRAIL_BYTES(0xC0) != 1
145         || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
146         || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3)
147     {
148         errln("Test_UTF8_COUNT_TRAIL_BYTES: UTF8_COUNT_TRAIL_BYTES does not work right! "
149               "See utf8.h.");
150     }
151 }
152 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)153 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
154     if(exec) {
155         logln("TestSuite Character and String Test: ");
156     }
157     TESTCASE_AUTO_BEGIN;
158     TESTCASE_AUTO(TestEndian);
159     TESTCASE_AUTO(TestSizeofTypes);
160     TESTCASE_AUTO(TestCharsetFamily);
161     TESTCASE_AUTO(Test_U_STRING);
162     TESTCASE_AUTO(Test_UNICODE_STRING);
163     TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
164     TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
165     TESTCASE_AUTO(TestSTLCompatibility);
166     TESTCASE_AUTO(TestStringPiece);
167     TESTCASE_AUTO(TestStringPieceComparisons);
168     TESTCASE_AUTO(TestByteSink);
169     TESTCASE_AUTO(TestCheckedArrayByteSink);
170     TESTCASE_AUTO(TestStringByteSink);
171     TESTCASE_AUTO(TestCharString);
172     TESTCASE_AUTO(TestCStr);
173     TESTCASE_AUTO_END;
174 }
175 
176 void
TestStringPiece()177 StringTest::TestStringPiece() {
178     // Default constructor.
179     StringPiece empty;
180     if(!empty.empty() || empty.data()!=NULL || empty.length()!=0 || empty.size()!=0) {
181         errln("StringPiece() failed");
182     }
183     // Construct from NULL const char * pointer.
184     StringPiece null(NULL);
185     if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) {
186         errln("StringPiece(NULL) failed");
187     }
188     // Construct from const char * pointer.
189     static const char *abc_chars="abc";
190     StringPiece abc(abc_chars);
191     if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
192         errln("StringPiece(abc_chars) failed");
193     }
194     // Construct from const char * pointer and length.
195     static const char *abcdefg_chars="abcdefg";
196     StringPiece abcd(abcdefg_chars, 4);
197     if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
198         errln("StringPiece(abcdefg_chars, 4) failed");
199     }
200 #if U_HAVE_STD_STRING
201     // Construct from std::string.
202     std::string uvwxyz_string("uvwxyz");
203     StringPiece uvwxyz(uvwxyz_string);
204     if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
205         errln("StringPiece(uvwxyz_string) failed");
206     }
207 #endif
208     // Substring constructor with pos.
209     StringPiece sp(abcd, -1);
210     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
211         errln("StringPiece(abcd, -1) failed");
212     }
213     sp=StringPiece(abcd, 5);
214     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
215         errln("StringPiece(abcd, 5) failed");
216     }
217     sp=StringPiece(abcd, 2);
218     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
219         errln("StringPiece(abcd, -1) failed");
220     }
221     // Substring constructor with pos and len.
222     sp=StringPiece(abcd, -1, 8);
223     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
224         errln("StringPiece(abcd, -1, 8) failed");
225     }
226     sp=StringPiece(abcd, 5, 8);
227     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
228         errln("StringPiece(abcd, 5, 8) failed");
229     }
230     sp=StringPiece(abcd, 2, 8);
231     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
232         errln("StringPiece(abcd, -1) failed");
233     }
234     sp=StringPiece(abcd, 2, -1);
235     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
236         errln("StringPiece(abcd, 5, -1) failed");
237     }
238     // static const npos
239     const int32_t *ptr_npos=&StringPiece::npos;
240     if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
241         errln("StringPiece::npos!=0x7fffffff");
242     }
243     // substr() method with pos, using len=npos.
244     sp=abcd.substr(-1);
245     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
246         errln("abcd.substr(-1) failed");
247     }
248     sp=abcd.substr(5);
249     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
250         errln("abcd.substr(5) failed");
251     }
252     sp=abcd.substr(2);
253     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
254         errln("abcd.substr(-1) failed");
255     }
256     // substr() method with pos and len.
257     sp=abcd.substr(-1, 8);
258     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
259         errln("abcd.substr(-1, 8) failed");
260     }
261     sp=abcd.substr(5, 8);
262     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
263         errln("abcd.substr(5, 8) failed");
264     }
265     sp=abcd.substr(2, 8);
266     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
267         errln("abcd.substr(-1) failed");
268     }
269     sp=abcd.substr(2, -1);
270     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
271         errln("abcd.substr(5, -1) failed");
272     }
273     // clear()
274     sp=abcd;
275     sp.clear();
276     if(!sp.empty() || sp.data()!=NULL || sp.length()!=0 || sp.size()!=0) {
277         errln("abcd.clear() failed");
278     }
279     // remove_prefix()
280     sp=abcd;
281     sp.remove_prefix(-1);
282     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
283         errln("abcd.remove_prefix(-1) failed");
284     }
285     sp=abcd;
286     sp.remove_prefix(2);
287     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
288         errln("abcd.remove_prefix(2) failed");
289     }
290     sp=abcd;
291     sp.remove_prefix(5);
292     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
293         errln("abcd.remove_prefix(5) failed");
294     }
295     // remove_suffix()
296     sp=abcd;
297     sp.remove_suffix(-1);
298     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
299         errln("abcd.remove_suffix(-1) failed");
300     }
301     sp=abcd;
302     sp.remove_suffix(2);
303     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
304         errln("abcd.remove_suffix(2) failed");
305     }
306     sp=abcd;
307     sp.remove_suffix(5);
308     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
309         errln("abcd.remove_suffix(5) failed");
310     }
311 }
312 
313 void
TestStringPieceComparisons()314 StringTest::TestStringPieceComparisons() {
315     StringPiece empty;
316     StringPiece null(NULL);
317     StringPiece abc("abc");
318     StringPiece abcd("abcdefg", 4);
319     StringPiece abx("abx");
320     if(empty!=null) {
321         errln("empty!=null");
322     }
323     if(empty==abc) {
324         errln("empty==abc");
325     }
326     if(abc==abcd) {
327         errln("abc==abcd");
328     }
329     abcd.remove_suffix(1);
330     if(abc!=abcd) {
331         errln("abc!=abcd.remove_suffix(1)");
332     }
333     if(abc==abx) {
334         errln("abc==abx");
335     }
336 }
337 
338 // Verify that ByteSink is subclassable and Flush() overridable.
339 class SimpleByteSink : public ByteSink {
340 public:
SimpleByteSink(char * outbuf)341     SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)342     virtual void Append(const char *bytes, int32_t n) {
343         if(fOutbuf != bytes) {
344             memcpy(fOutbuf, bytes, n);
345         }
346         fOutbuf += n;
347         fLength += n;
348     }
Flush()349     virtual void Flush() { Append("z", 1); }
length()350     int32_t length() { return fLength; }
351 private:
352     char *fOutbuf;
353     int32_t fLength;
354 };
355 
356 // Test the ByteSink base class.
357 void
TestByteSink()358 StringTest::TestByteSink() {
359     char buffer[20];
360     buffer[4] = '!';
361     SimpleByteSink sink(buffer);
362     sink.Append("abc", 3);
363     sink.Flush();
364     if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
365         errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
366         return;
367     }
368     char scratch[20];
369     int32_t capacity = -1;
370     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
371     if(dest != NULL || capacity != 0) {
372         errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
373         return;
374     }
375     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
376     if(dest != NULL || capacity != 0) {
377         errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
378         return;
379     }
380     dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
381     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
382         errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
383     }
384 }
385 
386 void
TestCheckedArrayByteSink()387 StringTest::TestCheckedArrayByteSink() {
388     char buffer[20];  // < 26 for the test code to work
389     buffer[3] = '!';
390     CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
391     sink.Append("abc", 3);
392     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
393          0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
394          !sink.Overflowed()
395     ) {
396         errln("CheckedArrayByteSink did not Append() as expected");
397         return;
398     }
399     char scratch[10];
400     int32_t capacity = -1;
401     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
402     if(dest != NULL || capacity != 0) {
403         errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
404         return;
405     }
406     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
407     if(dest != NULL || capacity != 0) {
408         errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
409         return;
410     }
411     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
412     if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
413         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
414         return;
415     }
416     memcpy(dest, "defghijklm", 10);
417     sink.Append(dest, 10);
418     if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
419          0 == memcmp("abcdefghijklm", buffer, 13) &&
420          !sink.Overflowed())
421     ) {
422         errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
423         return;
424     }
425     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
426     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
427         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
428     }
429     memcpy(dest, "nopqrstuvw", 10);
430     sink.Append(dest, 10);
431     if(!(sink.NumberOfBytesAppended() == 23 &&
432          sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
433          0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
434          sink.Overflowed())
435     ) {
436         errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
437         return;
438     }
439     sink.Reset().Append("123", 3);
440     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
441          0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
442          !sink.Overflowed())
443     ) {
444         errln("CheckedArrayByteSink did not Reset().Append() as expected");
445         return;
446     }
447 }
448 
449 void
TestStringByteSink()450 StringTest::TestStringByteSink() {
451 #if U_HAVE_STD_STRING
452     // Not much to test because only the constructor and Append()
453     // are implemented, and trivially so.
454     std::string result("abc");  // std::string
455     StringByteSink<std::string> sink(&result);
456     sink.Append("def", 3);
457     if(result != "abcdef") {
458         errln("StringByteSink did not Append() as expected");
459     }
460 #endif
461 }
462 
463 #if defined(_MSC_VER)
464 #include <vector>
465 #endif
466 
467 void
TestSTLCompatibility()468 StringTest::TestSTLCompatibility() {
469 #if defined(_MSC_VER)
470     /* Just make sure that it compiles with STL's placement new usage. */
471     std::vector<UnicodeString> myvect;
472     myvect.push_back(UnicodeString("blah"));
473 #endif
474 }
475 
476 void
TestCharString()477 StringTest::TestCharString() {
478     IcuTestErrorCode errorCode(*this, "TestCharString()");
479     char expected[400];
480     static const char longStr[] =
481         "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
482     CharString chStr(longStr, errorCode);
483     if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
484         errln("CharString(longStr) failed.");
485     }
486     CharString test("Test", errorCode);
487     CharString copy(test,errorCode);
488     copy.copyFrom(chStr, errorCode);
489     if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
490         errln("CharString.copyFrom() failed.");
491     }
492     StringPiece sp(chStr.toStringPiece());
493     sp.remove_prefix(4);
494     chStr.append(sp, errorCode).append(chStr, errorCode);
495     strcpy(expected, longStr);
496     strcat(expected, longStr+4);
497     strcat(expected, longStr);
498     strcat(expected, longStr+4);
499     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
500         errln("CharString(longStr).append(substring of self).append(self) failed.");
501     }
502     chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
503     if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
504         errln("CharString.clear().append(abc).append(defghij, 3) failed.");
505     }
506     chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
507         "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
508         errorCode);
509     strcpy(expected, "abcdef");
510     strcat(expected, longStr);
511     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
512         errln("CharString.appendInvariantChars(longStr) failed.");
513     }
514     int32_t appendCapacity = 0;
515     char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
516     if (errorCode.isFailure()) {
517         return;
518     }
519     memcpy(buffer, "*****", 5);
520     chStr.append(buffer, 5, errorCode);
521     chStr.truncate(chStr.length()-3);
522     strcat(expected, "**");
523     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
524         errln("CharString.getAppendBuffer().append(**) failed.");
525     }
526 
527     UErrorCode ec = U_ZERO_ERROR;
528     chStr.clear();
529     chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
530     if (ec != U_INVARIANT_CONVERSION_ERROR) {
531         errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
532     }
533     if (chStr.length() != 0) {
534         errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
535     }
536 }
537 
538 void
TestCStr()539 StringTest::TestCStr() {
540     const char *cs = "This is a test string.";
541     UnicodeString us(cs);
542     if (0 != strcmp(CStr(us)(), cs)) {
543         errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
544     }
545 }
546