1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  ucharstrietest.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010nov16
12 *   created by: Markus W. Scherer
13 */
14 
15 #include <string.h>
16 
17 #include "unicode/utypes.h"
18 #include "unicode/appendable.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/ucharstrie.h"
21 #include "unicode/ucharstriebuilder.h"
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26 
27 struct StringAndValue {
28     const char *s;
29     int32_t value;
30 };
31 
32 class UCharsTrieTest : public IntlTest {
33 public:
34     UCharsTrieTest();
35     virtual ~UCharsTrieTest();
36 
37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38     void TestBuilder();
39     void TestEmpty();
40     void Test_a();
41     void Test_a_ab();
42     void TestShortestBranch();
43     void TestBranches();
44     void TestLongSequence();
45     void TestLongBranch();
46     void TestValuesForState();
47     void TestCompact();
48     void TestFirstForCodePoint();
49     void TestNextForCodePoint();
50 
51     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
52     void TestLargeTrie();
53 
54     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
55     void TestHasUniqueValue();
56     void TestGetNextUChars();
57     void TestIteratorFromBranch();
58     void TestIteratorFromLinearMatch();
59     void TestTruncatingIteratorFromRoot();
60     void TestTruncatingIteratorFromLinearMatchShort();
61     void TestTruncatingIteratorFromLinearMatchLong();
62     void TestIteratorFromUChars();
63 
64     void checkData(const StringAndValue data[], int32_t dataLength);
65     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
66     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
67                           UStringTrieBuildOption buildOption);
68     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
69     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
70     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
74 
75 private:
76     UCharsTrieBuilder *builder_;
77 };
78 
createUCharsTrieTest()79 extern IntlTest *createUCharsTrieTest() {
80     return new UCharsTrieTest();
81 }
82 
UCharsTrieTest()83 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
84     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
85     builder_=new UCharsTrieBuilder(errorCode);
86 }
87 
~UCharsTrieTest()88 UCharsTrieTest::~UCharsTrieTest() {
89     delete builder_;
90 }
91 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)92 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
93     if(exec) {
94         logln("TestSuite UCharsTrieTest: ");
95     }
96     TESTCASE_AUTO_BEGIN;
97     TESTCASE_AUTO(TestBuilder);
98     TESTCASE_AUTO(TestEmpty);
99     TESTCASE_AUTO(Test_a);
100     TESTCASE_AUTO(Test_a_ab);
101     TESTCASE_AUTO(TestShortestBranch);
102     TESTCASE_AUTO(TestBranches);
103     TESTCASE_AUTO(TestLongSequence);
104     TESTCASE_AUTO(TestLongBranch);
105     TESTCASE_AUTO(TestValuesForState);
106     TESTCASE_AUTO(TestCompact);
107     TESTCASE_AUTO(TestFirstForCodePoint);
108     TESTCASE_AUTO(TestNextForCodePoint);
109     TESTCASE_AUTO(TestLargeTrie);
110     TESTCASE_AUTO(TestHasUniqueValue);
111     TESTCASE_AUTO(TestGetNextUChars);
112     TESTCASE_AUTO(TestIteratorFromBranch);
113     TESTCASE_AUTO(TestIteratorFromLinearMatch);
114     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
115     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
116     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
117     TESTCASE_AUTO(TestIteratorFromUChars);
118     TESTCASE_AUTO_END;
119 }
120 
TestBuilder()121 void UCharsTrieTest::TestBuilder() {
122     IcuTestErrorCode errorCode(*this, "TestBuilder()");
123     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
124     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
125         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
126         return;
127     }
128     // TODO: remove .build(...) once add() checks for duplicates.
129     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
130     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
131         errln("UCharsTrieBuilder.add() did not detect duplicates");
132         return;
133     }
134 }
135 
TestEmpty()136 void UCharsTrieTest::TestEmpty() {
137     static const StringAndValue data[]={
138         { "", 0 }
139     };
140     checkData(data, UPRV_LENGTHOF(data));
141 }
142 
Test_a()143 void UCharsTrieTest::Test_a() {
144     static const StringAndValue data[]={
145         { "a", 1 }
146     };
147     checkData(data, UPRV_LENGTHOF(data));
148 }
149 
Test_a_ab()150 void UCharsTrieTest::Test_a_ab() {
151     static const StringAndValue data[]={
152         { "a", 1 },
153         { "ab", 100 }
154     };
155     checkData(data, UPRV_LENGTHOF(data));
156 }
157 
TestShortestBranch()158 void UCharsTrieTest::TestShortestBranch() {
159     static const StringAndValue data[]={
160         { "a", 1000 },
161         { "b", 2000 }
162     };
163     checkData(data, UPRV_LENGTHOF(data));
164 }
165 
TestBranches()166 void UCharsTrieTest::TestBranches() {
167     static const StringAndValue data[]={
168         { "a", 0x10 },
169         { "cc", 0x40 },
170         { "e", 0x100 },
171         { "ggg", 0x400 },
172         { "i", 0x1000 },
173         { "kkkk", 0x4000 },
174         { "n", 0x10000 },
175         { "ppppp", 0x40000 },
176         { "r", 0x100000 },
177         { "sss", 0x200000 },
178         { "t", 0x400000 },
179         { "uu", 0x800000 },
180         { "vv", 0x7fffffff },
181         { "zz", (int32_t)0x80000000 }
182     };
183     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
184         logln("TestBranches length=%d", (int)length);
185         checkData(data, length);
186     }
187 }
188 
TestLongSequence()189 void UCharsTrieTest::TestLongSequence() {
190     static const StringAndValue data[]={
191         { "a", -1 },
192         // sequence of linear-match nodes
193         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
194         // more than 256 units
195         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
201     };
202     checkData(data, UPRV_LENGTHOF(data));
203 }
204 
TestLongBranch()205 void UCharsTrieTest::TestLongBranch() {
206     // Split-branch and interesting compact-integer values.
207     static const StringAndValue data[]={
208         { "a", -2 },
209         { "b", -1 },
210         { "c", 0 },
211         { "d2", 1 },
212         { "f", 0x3f },
213         { "g", 0x40 },
214         { "h", 0x41 },
215         { "j23", 0x1900 },
216         { "j24", 0x19ff },
217         { "j25", 0x1a00 },
218         { "k2", 0x1a80 },
219         { "k3", 0x1aff },
220         { "l234567890", 0x1b00 },
221         { "l234567890123", 0x1b01 },
222         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
223         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
224         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
225         { "r", 0x333333 },
226         { "s2345", 0x4444444 },
227         { "t234567890", 0x77777777 },
228         { "z", (int32_t)0x80000001 }
229     };
230     checkData(data, UPRV_LENGTHOF(data));
231 }
232 
TestValuesForState()233 void UCharsTrieTest::TestValuesForState() {
234     // Check that saveState() and resetToState() interact properly
235     // with next() and current().
236     static const StringAndValue data[]={
237         { "a", -1 },
238         { "ab", -2 },
239         { "abc", -3 },
240         { "abcd", -4 },
241         { "abcde", -5 },
242         { "abcdef", -6 }
243     };
244     checkData(data, UPRV_LENGTHOF(data));
245 }
246 
TestCompact()247 void UCharsTrieTest::TestCompact() {
248     // Duplicate trailing strings and values provide opportunities for compacting.
249     static const StringAndValue data[]={
250         { "+", 0 },
251         { "+august", 8 },
252         { "+december", 12 },
253         { "+july", 7 },
254         { "+june", 6 },
255         { "+november", 11 },
256         { "+october", 10 },
257         { "+september", 9 },
258         { "-", 0 },
259         { "-august", 8 },
260         { "-december", 12 },
261         { "-july", 7 },
262         { "-june", 6 },
263         { "-november", 11 },
264         { "-october", 10 },
265         { "-september", 9 },
266         // The l+n branch (with its sub-nodes) is a duplicate but will be written
267         // both times because each time it follows a different linear-match node.
268         { "xjuly", 7 },
269         { "xjune", 6 }
270     };
271     checkData(data, UPRV_LENGTHOF(data));
272 }
273 
TestFirstForCodePoint()274 void UCharsTrieTest::TestFirstForCodePoint() {
275     static const StringAndValue data[]={
276         { "a", 1 },
277         { "a\\ud800", 2 },
278         { "a\\U00010000", 3 },
279         { "\\ud840", 4 },
280         { "\\U00020000\\udbff", 5 },
281         { "\\U00020000\\U0010ffff", 6 },
282         { "\\U00020000\\U0010ffffz", 7 },
283         { "\\U00050000xy", 8 },
284         { "\\U00050000xyz", 9 }
285     };
286     checkData(data, UPRV_LENGTHOF(data));
287 }
288 
TestNextForCodePoint()289 void UCharsTrieTest::TestNextForCodePoint() {
290     static const StringAndValue data[]={
291         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
292         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
293         { "\\u4dff\\U000103ff", 99999 }
294     };
295     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
296     if(trie.isNull()) {
297         return;  // buildTrie() reported an error
298     }
299     UStringTrieResult result;
300     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
301         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
302         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
306         trie->getValue()!=2000000000
307     ) {
308         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
309     }
310     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
311         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
312         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
314         trie->getValue()!=44444
315     ) {
316         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
317     }
318     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
319         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
320         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
322     ) {
323         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
324     }
325     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
326         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
327         trie->getValue()!=99999
328     ) {
329         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
330     }
331 }
332 
333 // Definitions in the anonymous namespace are invisible outside this file.
334 namespace {
335 
336 // Generate (string, value) pairs.
337 // The first string (before next()) will be empty.
338 class Generator {
339 public:
Generator()340     Generator() : value(4711), num(0) {}
next()341     void next() {
342         UChar c;
343         s.truncate(0);
344         s.append(c=(UChar)(value>>16));
345         s.append((UChar)(value>>4));
346         if(value&1) {
347             s.append((UChar)value);
348         }
349         set.add(c);
350         value+=((value>>5)&0x7ff)*3+1;
351         ++num;
352     }
getString() const353     const UnicodeString &getString() const { return s; }
getValue() const354     int32_t getValue() const { return value; }
countUniqueFirstChars() const355     int32_t countUniqueFirstChars() const { return set.size(); }
getIndex() const356     int32_t getIndex() const { return num; }
357 
358 private:
359     UnicodeString s;
360     UnicodeSet set;
361     int32_t value;
362     int32_t num;
363 };
364 
365 }  // end namespace
366 
buildLargeTrie(int32_t numUniqueFirst)367 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
368     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
369     Generator gen;
370     builder_->clear();
371     while(gen.countUniqueFirstChars()<numUniqueFirst) {
372         builder_->add(gen.getString(), gen.getValue(), errorCode);
373         gen.next();
374     }
375     logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
376     UnicodeString trieUChars;
377     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
378     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
379     return new UCharsTrie(trieUChars.getBuffer());
380 }
381 
382 // Exercise a large branch node.
TestLargeTrie()383 void UCharsTrieTest::TestLargeTrie() {
384     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
385     if(trie.isNull()) {
386         return;  // buildTrie() reported an error
387     }
388     Generator gen;
389     while(gen.countUniqueFirstChars()<1111) {
390         UnicodeString x(gen.getString());
391         int32_t value=gen.getValue();
392         if(!x.isEmpty()) {
393             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
394                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
395                       x[0], (long)gen.getIndex());
396                 break;
397             }
398             x.remove(0, 1);
399         }
400         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
401         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
402             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
403                   "next()!=current() or getValue() wrong "
404                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
405             break;
406         }
407         gen.next();
408     }
409 }
410 
411 enum {
412     u_a=0x61,
413     u_b=0x62,
414     u_c=0x63,
415     u_j=0x6a,
416     u_n=0x6e,
417     u_r=0x72,
418     u_u=0x75,
419     u_y=0x79
420 };
421 
buildMonthsTrie(UStringTrieBuildOption buildOption)422 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
423     // All types of nodes leading to the same value,
424     // for code coverage of recursive functions.
425     // In particular, we need a lot of branches on some single level
426     // to exercise a split-branch node.
427     static const StringAndValue data[]={
428         { "august", 8 },
429         { "jan", 1 },
430         { "jan.", 1 },
431         { "jana", 1 },
432         { "janbb", 1 },
433         { "janc", 1 },
434         { "janddd", 1 },
435         { "janee", 1 },
436         { "janef", 1 },
437         { "janf", 1 },
438         { "jangg", 1 },
439         { "janh", 1 },
440         { "janiiii", 1 },
441         { "janj", 1 },
442         { "jankk", 1 },
443         { "jankl", 1 },
444         { "jankmm", 1 },
445         { "janl", 1 },
446         { "janm", 1 },
447         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
448         { "jano", 1 },
449         { "janpp", 1 },
450         { "janqqq", 1 },
451         { "janr", 1 },
452         { "januar", 1 },
453         { "january", 1 },
454         { "july", 7 },
455         { "jun", 6 },
456         { "jun.", 6 },
457         { "june", 6 }
458     };
459     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
460 }
461 
TestHasUniqueValue()462 void UCharsTrieTest::TestHasUniqueValue() {
463     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
464     if(trie.isNull()) {
465         return;  // buildTrie() reported an error
466     }
467     int32_t uniqueValue;
468     if(trie->hasUniqueValue(uniqueValue)) {
469         errln("unique value at root");
470     }
471     trie->next(u_j);
472     trie->next(u_a);
473     trie->next(u_n);
474     // hasUniqueValue() directly after next()
475     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
476         errln("not unique value 1 after \"jan\"");
477     }
478     trie->first(u_j);
479     trie->next(u_u);
480     if(trie->hasUniqueValue(uniqueValue)) {
481         errln("unique value after \"ju\"");
482     }
483     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
484         errln("not normal value 6 after \"jun\"");
485     }
486     // hasUniqueValue() after getValue()
487     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
488         errln("not unique value 6 after \"jun\"");
489     }
490     // hasUniqueValue() from within a linear-match node
491     trie->first(u_a);
492     trie->next(u_u);
493     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
494         errln("not unique value 8 after \"au\"");
495     }
496 }
497 
TestGetNextUChars()498 void UCharsTrieTest::TestGetNextUChars() {
499     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
500     if(trie.isNull()) {
501         return;  // buildTrie() reported an error
502     }
503     UnicodeString buffer;
504     UnicodeStringAppendable app(buffer);
505     int32_t count=trie->getNextUChars(app);
506     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
507         errln("months getNextUChars()!=[aj] at root");
508     }
509     trie->next(u_j);
510     trie->next(u_a);
511     trie->next(u_n);
512     // getNextUChars() directly after next()
513     buffer.remove();
514     count=trie->getNextUChars(app);
515     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
516         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
517     }
518     // getNextUChars() after getValue()
519     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
520     buffer.remove();
521     count=trie->getNextUChars(app);
522     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
523         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
524     }
525     // getNextUChars() from a linear-match node
526     trie->next(u_u);
527     buffer.remove();
528     count=trie->getNextUChars(app);
529     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
530         errln("months getNextUChars()!=[a] after \"janu\"");
531     }
532     trie->next(u_a);
533     buffer.remove();
534     count=trie->getNextUChars(app);
535     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
536         errln("months getNextUChars()!=[r] after \"janua\"");
537     }
538     trie->next(u_r);
539     trie->next(u_y);
540     // getNextUChars() after a final match
541     buffer.remove();
542     count=trie->getNextUChars(app);
543     if(count!=0 || buffer.length()!=0) {
544         errln("months getNextUChars()!=[] after \"january\"");
545     }
546 }
547 
TestIteratorFromBranch()548 void UCharsTrieTest::TestIteratorFromBranch() {
549     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
550     if(trie.isNull()) {
551         return;  // buildTrie() reported an error
552     }
553     // Go to a branch node.
554     trie->next(u_j);
555     trie->next(u_a);
556     trie->next(u_n);
557     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
558     UCharsTrie::Iterator iter(*trie, 0, errorCode);
559     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
560         return;
561     }
562     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
563     // following "jan".
564     static const StringAndValue data[]={
565         { "", 1 },
566         { ".", 1 },
567         { "a", 1 },
568         { "bb", 1 },
569         { "c", 1 },
570         { "ddd", 1 },
571         { "ee", 1 },
572         { "ef", 1 },
573         { "f", 1 },
574         { "gg", 1 },
575         { "h", 1 },
576         { "iiii", 1 },
577         { "j", 1 },
578         { "kk", 1 },
579         { "kl", 1 },
580         { "kmm", 1 },
581         { "l", 1 },
582         { "m", 1 },
583         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
584         { "o", 1 },
585         { "pp", 1 },
586         { "qqq", 1 },
587         { "r", 1 },
588         { "uar", 1 },
589         { "uary", 1 }
590     };
591     checkIterator(iter, data, UPRV_LENGTHOF(data));
592     // Reset, and we should get the same result.
593     logln("after iter.reset()");
594     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
595 }
596 
TestIteratorFromLinearMatch()597 void UCharsTrieTest::TestIteratorFromLinearMatch() {
598     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
599     if(trie.isNull()) {
600         return;  // buildTrie() reported an error
601     }
602     // Go into a linear-match node.
603     trie->next(u_j);
604     trie->next(u_a);
605     trie->next(u_n);
606     trie->next(u_u);
607     trie->next(u_a);
608     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
609     UCharsTrie::Iterator iter(*trie, 0, errorCode);
610     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
611         return;
612     }
613     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
614     // following "janua".
615     static const StringAndValue data[]={
616         { "r", 1 },
617         { "ry", 1 }
618     };
619     checkIterator(iter, data, UPRV_LENGTHOF(data));
620     // Reset, and we should get the same result.
621     logln("after iter.reset()");
622     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
623 }
624 
TestTruncatingIteratorFromRoot()625 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
626     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
627     if(trie.isNull()) {
628         return;  // buildTrie() reported an error
629     }
630     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
631     UCharsTrie::Iterator iter(*trie, 4, errorCode);
632     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
633         return;
634     }
635     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
636     // of each string, and no string duplicates from the truncation.
637     static const StringAndValue data[]={
638         { "augu", -1 },
639         { "jan", 1 },
640         { "jan.", 1 },
641         { "jana", 1 },
642         { "janb", -1 },
643         { "janc", 1 },
644         { "jand", -1 },
645         { "jane", -1 },
646         { "janf", 1 },
647         { "jang", -1 },
648         { "janh", 1 },
649         { "jani", -1 },
650         { "janj", 1 },
651         { "jank", -1 },
652         { "janl", 1 },
653         { "janm", 1 },
654         { "jann", -1 },
655         { "jano", 1 },
656         { "janp", -1 },
657         { "janq", -1 },
658         { "janr", 1 },
659         { "janu", -1 },
660         { "july", 7 },
661         { "jun", 6 },
662         { "jun.", 6 },
663         { "june", 6 }
664     };
665     checkIterator(iter, data, UPRV_LENGTHOF(data));
666     // Reset, and we should get the same result.
667     logln("after iter.reset()");
668     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
669 }
670 
TestTruncatingIteratorFromLinearMatchShort()671 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
672     static const StringAndValue data[]={
673         { "abcdef", 10 },
674         { "abcdepq", 200 },
675         { "abcdeyz", 3000 }
676     };
677     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
678     if(trie.isNull()) {
679         return;  // buildTrie() reported an error
680     }
681     // Go into a linear-match node.
682     trie->next(u_a);
683     trie->next(u_b);
684     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
685     // Truncate within the linear-match node.
686     UCharsTrie::Iterator iter(*trie, 2, errorCode);
687     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
688         return;
689     }
690     static const StringAndValue expected[]={
691         { "cd", -1 }
692     };
693     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
694     // Reset, and we should get the same result.
695     logln("after iter.reset()");
696     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
697 }
698 
TestTruncatingIteratorFromLinearMatchLong()699 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
700     static const StringAndValue data[]={
701         { "abcdef", 10 },
702         { "abcdepq", 200 },
703         { "abcdeyz", 3000 }
704     };
705     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
706     if(trie.isNull()) {
707         return;  // buildTrie() reported an error
708     }
709     // Go into a linear-match node.
710     trie->next(u_a);
711     trie->next(u_b);
712     trie->next(u_c);
713     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
714     // Truncate after the linear-match node.
715     UCharsTrie::Iterator iter(*trie, 3, errorCode);
716     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
717         return;
718     }
719     static const StringAndValue expected[]={
720         { "def", 10 },
721         { "dep", -1 },
722         { "dey", -1 }
723     };
724     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
725     // Reset, and we should get the same result.
726     logln("after iter.reset()");
727     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
728 }
729 
TestIteratorFromUChars()730 void UCharsTrieTest::TestIteratorFromUChars() {
731     static const StringAndValue data[]={
732         { "mm", 3 },
733         { "mmm", 33 },
734         { "mmnop", 333 }
735     };
736     builder_->clear();
737     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
738     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
739         builder_->add(data[i].s, data[i].value, errorCode);
740     }
741     UnicodeString trieUChars;
742     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
743     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
744     checkIterator(iter, data, UPRV_LENGTHOF(data));
745 }
746 
checkData(const StringAndValue data[],int32_t dataLength)747 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
748     logln("checkData(dataLength=%d, fast)", (int)dataLength);
749     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
750     logln("checkData(dataLength=%d, small)", (int)dataLength);
751     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
752 }
753 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)754 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
755     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
756     if(trie.isNull()) {
757         return;  // buildTrie() reported an error
758     }
759     checkFirst(*trie, data, dataLength);
760     checkNext(*trie, data, dataLength);
761     checkNextWithState(*trie, data, dataLength);
762     checkNextString(*trie, data, dataLength);
763     checkIterator(*trie, data, dataLength);
764 }
765 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)766 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
767                                       UStringTrieBuildOption buildOption) {
768     IcuTestErrorCode errorCode(*this, "buildTrie()");
769     // Add the items to the trie builder in an interesting (not trivial, not random) order.
770     int32_t index, step;
771     if(dataLength&1) {
772         // Odd number of items.
773         index=dataLength/2;
774         step=2;
775     } else if((dataLength%3)!=0) {
776         // Not a multiple of 3.
777         index=dataLength/5;
778         step=3;
779     } else {
780         index=dataLength-1;
781         step=-1;
782     }
783     builder_->clear();
784     for(int32_t i=0; i<dataLength; ++i) {
785         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
786                       data[index].value, errorCode);
787         index=(index+step)%dataLength;
788     }
789     UnicodeString trieUChars;
790     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
791     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
792     if(!errorCode.logIfFailureAndReset("add()/build()")) {
793         builder_->add("zzz", 999, errorCode);
794         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
795             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
796         }
797     }
798     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
799     UnicodeString trieUChars2;
800     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
801     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
802         errln("builder.buildUnicodeString() before & after build() returned same array");
803     }
804     if(errorCode.isFailure()) {
805         return NULL;
806     }
807     // Tries from either build() method should be identical but
808     // UCharsTrie does not implement equals().
809     // We just return either one.
810     if((dataLength&1)!=0) {
811         return trie.orphan();
812     } else {
813         return new UCharsTrie(trieUChars2.getBuffer());
814     }
815 }
816 
checkFirst(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)817 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
818                                 const StringAndValue data[], int32_t dataLength) {
819     for(int32_t i=0; i<dataLength; ++i) {
820         if(*data[i].s==0) {
821             continue;  // skip empty string
822         }
823         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
824         UChar32 c=expectedString[0];
825         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
826         UStringTrieResult firstResult=trie.first(c);
827         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
828         UStringTrieResult nextResult=trie.next(nextCp);
829         if(firstResult!=trie.reset().next(c) ||
830            firstResult!=trie.current() ||
831            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
832            nextResult!=trie.next(nextCp)
833         ) {
834             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
835                   c, data[i].s);
836         }
837         c=expectedString.char32At(0);
838         int32_t cLength=U16_LENGTH(c);
839         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
840         firstResult=trie.firstForCodePoint(c);
841         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
842         nextResult=trie.nextForCodePoint(nextCp);
843         if(firstResult!=trie.reset().nextForCodePoint(c) ||
844            firstResult!=trie.current() ||
845            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
846            nextResult!=trie.nextForCodePoint(nextCp)
847         ) {
848             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
849                   c, data[i].s);
850         }
851     }
852     trie.reset();
853 }
854 
checkNext(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)855 void UCharsTrieTest::checkNext(UCharsTrie &trie,
856                                const StringAndValue data[], int32_t dataLength) {
857     UCharsTrie::State state;
858     for(int32_t i=0; i<dataLength; ++i) {
859         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
860         int32_t stringLength= (i&1) ? -1 : expectedString.length();
861         UStringTrieResult result;
862         if( !USTRINGTRIE_HAS_VALUE(
863                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
864             result!=trie.current()
865         ) {
866             errln("trie does not seem to contain %s", data[i].s);
867         } else if(trie.getValue()!=data[i].value) {
868             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
869                   data[i].s,
870                   (long)trie.getValue(), (long)trie.getValue(),
871                   (long)data[i].value, (long)data[i].value);
872         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
873             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
874         }
875         trie.reset();
876         stringLength=expectedString.length();
877         result=trie.current();
878         for(int32_t j=0; j<stringLength; ++j) {
879             if(!USTRINGTRIE_HAS_NEXT(result)) {
880                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
881                 break;
882             }
883             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
884                 trie.getValue();
885                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
886                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
887                     break;
888                 }
889             }
890             result=trie.next(expectedString[j]);
891             if(!USTRINGTRIE_MATCHES(result)) {
892                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
893                 break;
894             }
895             if(result!=trie.current()) {
896                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
897                 break;
898             }
899         }
900         if(!USTRINGTRIE_HAS_VALUE(result)) {
901             errln("trie.next()!=hasValue at the end of %s", data[i].s);
902             continue;
903         }
904         trie.getValue();
905         if(result!=trie.current()) {
906             errln("trie.current() != current()+getValue()+current() after end of %s",
907                   data[i].s);
908         }
909         // Compare the final current() with whether next() can actually continue.
910         trie.saveState(state);
911         UBool nextContinues=FALSE;
912         for(int32_t c=0x20; c<0xe000; ++c) {
913             if(c==0x80) {
914                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
915             }
916             if(trie.resetToState(state).next(c)) {
917                 nextContinues=TRUE;
918                 break;
919             }
920         }
921         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
922             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
923                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
924         }
925         trie.reset();
926     }
927 }
928 
checkNextWithState(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)929 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
930                                         const StringAndValue data[], int32_t dataLength) {
931     UCharsTrie::State noState, state;
932     for(int32_t i=0; i<dataLength; ++i) {
933         if((i&1)==0) {
934             // This should have no effect.
935             trie.resetToState(noState);
936         }
937         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
938         int32_t stringLength=expectedString.length();
939         int32_t partialLength=stringLength/3;
940         for(int32_t j=0; j<partialLength; ++j) {
941             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
942                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
943                 return;
944             }
945         }
946         trie.saveState(state);
947         UStringTrieResult resultAtState=trie.current();
948         UStringTrieResult result;
949         int32_t valueAtState=-99;
950         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
951             valueAtState=trie.getValue();
952         }
953         result=trie.next(0);  // mismatch
954         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
955             errln("trie.next(0) matched after part of %s", data[i].s);
956         }
957         if( resultAtState!=trie.resetToState(state).current() ||
958             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
959         ) {
960             errln("trie.next(part of %s) changes current()/getValue() after "
961                   "saveState/next(0)/resetToState",
962                   data[i].s);
963         } else if(!USTRINGTRIE_HAS_VALUE(
964                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
965                                        stringLength-partialLength)) ||
966                   result!=trie.current()) {
967             errln("trie.next(rest of %s) does not seem to contain %s after "
968                   "saveState/next(0)/resetToState",
969                   data[i].s, data[i].s);
970         } else if(!USTRINGTRIE_HAS_VALUE(
971                       result=trie.resetToState(state).
972                                   next(expectedString.getTerminatedBuffer()+partialLength,
973                                        stringLength-partialLength)) ||
974                   result!=trie.current()) {
975             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
976                   data[i].s);
977         } else if(trie.getValue()!=data[i].value) {
978             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
979                   data[i].s,
980                   (long)trie.getValue(), (long)trie.getValue(),
981                   (long)data[i].value, (long)data[i].value);
982         }
983         trie.reset();
984     }
985 }
986 
987 // next(string) is also tested in other functions,
988 // but here we try to go partway through the string, and then beyond it.
checkNextString(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)989 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
990                                      const StringAndValue data[], int32_t dataLength) {
991     for(int32_t i=0; i<dataLength; ++i) {
992         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
993         int32_t stringLength=expectedString.length();
994         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
995             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
996             continue;
997         }
998         // Test that we stop properly at the end of the string.
999         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1000                      stringLength+1-stringLength/2)) {
1001             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1002         }
1003         trie.reset();
1004     }
1005 }
1006 
checkIterator(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)1007 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1008                                    const StringAndValue data[], int32_t dataLength) {
1009     IcuTestErrorCode errorCode(*this, "checkIterator()");
1010     UCharsTrie::Iterator iter(trie, 0, errorCode);
1011     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1012         return;
1013     }
1014     checkIterator(iter, data, dataLength);
1015 }
1016 
checkIterator(UCharsTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)1017 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1018                                    const StringAndValue data[], int32_t dataLength) {
1019     IcuTestErrorCode errorCode(*this, "checkIterator()");
1020     for(int32_t i=0; i<dataLength; ++i) {
1021         if(!iter.hasNext()) {
1022             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1023             break;
1024         }
1025         UBool hasNext=iter.next(errorCode);
1026         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1027             break;
1028         }
1029         if(!hasNext) {
1030             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1031             break;
1032         }
1033         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1034         if(iter.getString()!=expectedString) {
1035             char buffer[1000];
1036             UnicodeString invString(prettify(iter.getString()));
1037             invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
1038             errln("trie iterator next().getString()=%s but expected %s for item %d",
1039                   buffer, data[i].s, (int)i);
1040         }
1041         if(iter.getValue()!=data[i].value) {
1042             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1043                   (long)iter.getValue(), (long)iter.getValue(),
1044                   (long)data[i].value, (long)data[i].value,
1045                   (int)i, data[i].s);
1046         }
1047     }
1048     if(iter.hasNext()) {
1049         errln("trie iterator hasNext()=TRUE after all items");
1050     }
1051     UBool hasNext=iter.next(errorCode);
1052     errorCode.logIfFailureAndReset("trie iterator next() after all items");
1053     if(hasNext) {
1054         errln("trie iterator next()=TRUE after all items");
1055     }
1056 }
1057