1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  ucharstrietest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010nov16
14 *   created by: Markus W. Scherer
15 */
16 
17 #include <string.h>
18 
19 #include "unicode/utypes.h"
20 #include "unicode/appendable.h"
21 #include "unicode/localpointer.h"
22 #include "unicode/ucharstrie.h"
23 #include "unicode/ucharstriebuilder.h"
24 #include "unicode/uniset.h"
25 #include "unicode/unistr.h"
26 #include "unicode/utf16.h"
27 #include "intltest.h"
28 #include "cmemory.h"
29 
30 struct StringAndValue {
31     const char *s;
32     int32_t value;
33 };
34 
35 class UCharsTrieTest : public IntlTest {
36 public:
37     UCharsTrieTest();
38     virtual ~UCharsTrieTest();
39 
40     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
41     void TestBuilder();
42     void TestEmpty();
43     void Test_a();
44     void Test_a_ab();
45     void TestShortestBranch();
46     void TestBranches();
47     void TestLongSequence();
48     void TestLongBranch();
49     void TestValuesForState();
50     void TestCompact();
51     void TestFirstForCodePoint();
52     void TestNextForCodePoint();
53 
54     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
55     void TestLargeTrie();
56 
57     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
58     void TestHasUniqueValue();
59     void TestGetNextUChars();
60     void TestIteratorFromBranch();
61     void TestIteratorFromLinearMatch();
62     void TestTruncatingIteratorFromRoot();
63     void TestTruncatingIteratorFromLinearMatchShort();
64     void TestTruncatingIteratorFromLinearMatchLong();
65     void TestIteratorFromUChars();
66 
67     void checkData(const StringAndValue data[], int32_t dataLength);
68     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
69     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
70                           UStringTrieBuildOption buildOption);
71     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
75     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
76     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
77 
78 private:
79     UCharsTrieBuilder *builder_;
80 };
81 
createUCharsTrieTest()82 extern IntlTest *createUCharsTrieTest() {
83     return new UCharsTrieTest();
84 }
85 
UCharsTrieTest()86 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
87     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
88     builder_=new UCharsTrieBuilder(errorCode);
89 }
90 
~UCharsTrieTest()91 UCharsTrieTest::~UCharsTrieTest() {
92     delete builder_;
93 }
94 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)95 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
96     if(exec) {
97         logln("TestSuite UCharsTrieTest: ");
98     }
99     TESTCASE_AUTO_BEGIN;
100     TESTCASE_AUTO(TestBuilder);
101     TESTCASE_AUTO(TestEmpty);
102     TESTCASE_AUTO(Test_a);
103     TESTCASE_AUTO(Test_a_ab);
104     TESTCASE_AUTO(TestShortestBranch);
105     TESTCASE_AUTO(TestBranches);
106     TESTCASE_AUTO(TestLongSequence);
107     TESTCASE_AUTO(TestLongBranch);
108     TESTCASE_AUTO(TestValuesForState);
109     TESTCASE_AUTO(TestCompact);
110     TESTCASE_AUTO(TestFirstForCodePoint);
111     TESTCASE_AUTO(TestNextForCodePoint);
112     TESTCASE_AUTO(TestLargeTrie);
113     TESTCASE_AUTO(TestHasUniqueValue);
114     TESTCASE_AUTO(TestGetNextUChars);
115     TESTCASE_AUTO(TestIteratorFromBranch);
116     TESTCASE_AUTO(TestIteratorFromLinearMatch);
117     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
118     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
119     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
120     TESTCASE_AUTO(TestIteratorFromUChars);
121     TESTCASE_AUTO_END;
122 }
123 
TestBuilder()124 void UCharsTrieTest::TestBuilder() {
125     IcuTestErrorCode errorCode(*this, "TestBuilder()");
126     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
127     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
128         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
129         return;
130     }
131     // TODO: remove .build(...) once add() checks for duplicates.
132     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
133     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
134         errln("UCharsTrieBuilder.add() did not detect duplicates");
135         return;
136     }
137 }
138 
TestEmpty()139 void UCharsTrieTest::TestEmpty() {
140     static const StringAndValue data[]={
141         { "", 0 }
142     };
143     checkData(data, UPRV_LENGTHOF(data));
144 }
145 
Test_a()146 void UCharsTrieTest::Test_a() {
147     static const StringAndValue data[]={
148         { "a", 1 }
149     };
150     checkData(data, UPRV_LENGTHOF(data));
151 }
152 
Test_a_ab()153 void UCharsTrieTest::Test_a_ab() {
154     static const StringAndValue data[]={
155         { "a", 1 },
156         { "ab", 100 }
157     };
158     checkData(data, UPRV_LENGTHOF(data));
159 }
160 
TestShortestBranch()161 void UCharsTrieTest::TestShortestBranch() {
162     static const StringAndValue data[]={
163         { "a", 1000 },
164         { "b", 2000 }
165     };
166     checkData(data, UPRV_LENGTHOF(data));
167 }
168 
TestBranches()169 void UCharsTrieTest::TestBranches() {
170     static const StringAndValue data[]={
171         { "a", 0x10 },
172         { "cc", 0x40 },
173         { "e", 0x100 },
174         { "ggg", 0x400 },
175         { "i", 0x1000 },
176         { "kkkk", 0x4000 },
177         { "n", 0x10000 },
178         { "ppppp", 0x40000 },
179         { "r", 0x100000 },
180         { "sss", 0x200000 },
181         { "t", 0x400000 },
182         { "uu", 0x800000 },
183         { "vv", 0x7fffffff },
184         { "zz", (int32_t)0x80000000 }
185     };
186     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
187         logln("TestBranches length=%d", (int)length);
188         checkData(data, length);
189     }
190 }
191 
TestLongSequence()192 void UCharsTrieTest::TestLongSequence() {
193     static const StringAndValue data[]={
194         { "a", -1 },
195         // sequence of linear-match nodes
196         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
197         // more than 256 units
198         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
202           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
203           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
204     };
205     checkData(data, UPRV_LENGTHOF(data));
206 }
207 
TestLongBranch()208 void UCharsTrieTest::TestLongBranch() {
209     // Split-branch and interesting compact-integer values.
210     static const StringAndValue data[]={
211         { "a", -2 },
212         { "b", -1 },
213         { "c", 0 },
214         { "d2", 1 },
215         { "f", 0x3f },
216         { "g", 0x40 },
217         { "h", 0x41 },
218         { "j23", 0x1900 },
219         { "j24", 0x19ff },
220         { "j25", 0x1a00 },
221         { "k2", 0x1a80 },
222         { "k3", 0x1aff },
223         { "l234567890", 0x1b00 },
224         { "l234567890123", 0x1b01 },
225         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
226         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
227         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
228         { "r", 0x333333 },
229         { "s2345", 0x4444444 },
230         { "t234567890", 0x77777777 },
231         { "z", (int32_t)0x80000001 }
232     };
233     checkData(data, UPRV_LENGTHOF(data));
234 }
235 
TestValuesForState()236 void UCharsTrieTest::TestValuesForState() {
237     // Check that saveState() and resetToState() interact properly
238     // with next() and current().
239     static const StringAndValue data[]={
240         { "a", -1 },
241         { "ab", -2 },
242         { "abc", -3 },
243         { "abcd", -4 },
244         { "abcde", -5 },
245         { "abcdef", -6 }
246     };
247     checkData(data, UPRV_LENGTHOF(data));
248 }
249 
TestCompact()250 void UCharsTrieTest::TestCompact() {
251     // Duplicate trailing strings and values provide opportunities for compacting.
252     static const StringAndValue data[]={
253         { "+", 0 },
254         { "+august", 8 },
255         { "+december", 12 },
256         { "+july", 7 },
257         { "+june", 6 },
258         { "+november", 11 },
259         { "+october", 10 },
260         { "+september", 9 },
261         { "-", 0 },
262         { "-august", 8 },
263         { "-december", 12 },
264         { "-july", 7 },
265         { "-june", 6 },
266         { "-november", 11 },
267         { "-october", 10 },
268         { "-september", 9 },
269         // The l+n branch (with its sub-nodes) is a duplicate but will be written
270         // both times because each time it follows a different linear-match node.
271         { "xjuly", 7 },
272         { "xjune", 6 }
273     };
274     checkData(data, UPRV_LENGTHOF(data));
275 }
276 
TestFirstForCodePoint()277 void UCharsTrieTest::TestFirstForCodePoint() {
278     static const StringAndValue data[]={
279         { "a", 1 },
280         { "a\\ud800", 2 },
281         { "a\\U00010000", 3 },
282         { "\\ud840", 4 },
283         { "\\U00020000\\udbff", 5 },
284         { "\\U00020000\\U0010ffff", 6 },
285         { "\\U00020000\\U0010ffffz", 7 },
286         { "\\U00050000xy", 8 },
287         { "\\U00050000xyz", 9 }
288     };
289     checkData(data, UPRV_LENGTHOF(data));
290 }
291 
TestNextForCodePoint()292 void UCharsTrieTest::TestNextForCodePoint() {
293     static const StringAndValue data[]={
294         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
295         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
296         { "\\u4dff\\U000103ff", 99999 }
297     };
298     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
299     if(trie.isNull()) {
300         return;  // buildTrie() reported an error
301     }
302     UStringTrieResult result;
303     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
307         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
308         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
309         trie->getValue()!=2000000000
310     ) {
311         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
312     }
313     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
315         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
316         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
317         trie->getValue()!=44444
318     ) {
319         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
320     }
321     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
323         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
324         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
325     ) {
326         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
327     }
328     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
329         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
330         trie->getValue()!=99999
331     ) {
332         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
333     }
334 }
335 
336 // Definitions in the anonymous namespace are invisible outside this file.
337 namespace {
338 
339 // Generate (string, value) pairs.
340 // The first string (before next()) will be empty.
341 class Generator {
342 public:
Generator()343     Generator() : value(4711), num(0) {}
next()344     void next() {
345         UChar c;
346         s.truncate(0);
347         s.append(c=(UChar)(value>>16));
348         s.append((UChar)(value>>4));
349         if(value&1) {
350             s.append((UChar)value);
351         }
352         set.add(c);
353         value+=((value>>5)&0x7ff)*3+1;
354         ++num;
355     }
getString() const356     const UnicodeString &getString() const { return s; }
getValue() const357     int32_t getValue() const { return value; }
countUniqueFirstChars() const358     int32_t countUniqueFirstChars() const { return set.size(); }
getIndex() const359     int32_t getIndex() const { return num; }
360 
361 private:
362     UnicodeString s;
363     UnicodeSet set;
364     int32_t value;
365     int32_t num;
366 };
367 
368 }  // end namespace
369 
buildLargeTrie(int32_t numUniqueFirst)370 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
371     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
372     Generator gen;
373     builder_->clear();
374     while(gen.countUniqueFirstChars()<numUniqueFirst) {
375         builder_->add(gen.getString(), gen.getValue(), errorCode);
376         gen.next();
377     }
378     logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
379     UnicodeString trieUChars;
380     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
381     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
382     return new UCharsTrie(trieUChars.getBuffer());
383 }
384 
385 // Exercise a large branch node.
TestLargeTrie()386 void UCharsTrieTest::TestLargeTrie() {
387     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
388     if(trie.isNull()) {
389         return;  // buildTrie() reported an error
390     }
391     Generator gen;
392     while(gen.countUniqueFirstChars()<1111) {
393         UnicodeString x(gen.getString());
394         int32_t value=gen.getValue();
395         if(!x.isEmpty()) {
396             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
397                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
398                       x[0], (long)gen.getIndex());
399                 break;
400             }
401             x.remove(0, 1);
402         }
403         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
404         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
405             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
406                   "next()!=current() or getValue() wrong "
407                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
408             break;
409         }
410         gen.next();
411     }
412 }
413 
414 enum {
415     u_a=0x61,
416     u_b=0x62,
417     u_c=0x63,
418     u_j=0x6a,
419     u_n=0x6e,
420     u_r=0x72,
421     u_u=0x75,
422     u_y=0x79
423 };
424 
buildMonthsTrie(UStringTrieBuildOption buildOption)425 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
426     // All types of nodes leading to the same value,
427     // for code coverage of recursive functions.
428     // In particular, we need a lot of branches on some single level
429     // to exercise a split-branch node.
430     static const StringAndValue data[]={
431         { "august", 8 },
432         { "jan", 1 },
433         { "jan.", 1 },
434         { "jana", 1 },
435         { "janbb", 1 },
436         { "janc", 1 },
437         { "janddd", 1 },
438         { "janee", 1 },
439         { "janef", 1 },
440         { "janf", 1 },
441         { "jangg", 1 },
442         { "janh", 1 },
443         { "janiiii", 1 },
444         { "janj", 1 },
445         { "jankk", 1 },
446         { "jankl", 1 },
447         { "jankmm", 1 },
448         { "janl", 1 },
449         { "janm", 1 },
450         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
451         { "jano", 1 },
452         { "janpp", 1 },
453         { "janqqq", 1 },
454         { "janr", 1 },
455         { "januar", 1 },
456         { "january", 1 },
457         { "july", 7 },
458         { "jun", 6 },
459         { "jun.", 6 },
460         { "june", 6 }
461     };
462     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
463 }
464 
TestHasUniqueValue()465 void UCharsTrieTest::TestHasUniqueValue() {
466     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
467     if(trie.isNull()) {
468         return;  // buildTrie() reported an error
469     }
470     int32_t uniqueValue;
471     if(trie->hasUniqueValue(uniqueValue)) {
472         errln("unique value at root");
473     }
474     trie->next(u_j);
475     trie->next(u_a);
476     trie->next(u_n);
477     // hasUniqueValue() directly after next()
478     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
479         errln("not unique value 1 after \"jan\"");
480     }
481     trie->first(u_j);
482     trie->next(u_u);
483     if(trie->hasUniqueValue(uniqueValue)) {
484         errln("unique value after \"ju\"");
485     }
486     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
487         errln("not normal value 6 after \"jun\"");
488     }
489     // hasUniqueValue() after getValue()
490     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
491         errln("not unique value 6 after \"jun\"");
492     }
493     // hasUniqueValue() from within a linear-match node
494     trie->first(u_a);
495     trie->next(u_u);
496     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
497         errln("not unique value 8 after \"au\"");
498     }
499 }
500 
TestGetNextUChars()501 void UCharsTrieTest::TestGetNextUChars() {
502     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
503     if(trie.isNull()) {
504         return;  // buildTrie() reported an error
505     }
506     UnicodeString buffer;
507     UnicodeStringAppendable app(buffer);
508     int32_t count=trie->getNextUChars(app);
509     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
510         errln("months getNextUChars()!=[aj] at root");
511     }
512     trie->next(u_j);
513     trie->next(u_a);
514     trie->next(u_n);
515     // getNextUChars() directly after next()
516     buffer.remove();
517     count=trie->getNextUChars(app);
518     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
519         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
520     }
521     // getNextUChars() after getValue()
522     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
523     buffer.remove();
524     count=trie->getNextUChars(app);
525     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
526         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
527     }
528     // getNextUChars() from a linear-match node
529     trie->next(u_u);
530     buffer.remove();
531     count=trie->getNextUChars(app);
532     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
533         errln("months getNextUChars()!=[a] after \"janu\"");
534     }
535     trie->next(u_a);
536     buffer.remove();
537     count=trie->getNextUChars(app);
538     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
539         errln("months getNextUChars()!=[r] after \"janua\"");
540     }
541     trie->next(u_r);
542     trie->next(u_y);
543     // getNextUChars() after a final match
544     buffer.remove();
545     count=trie->getNextUChars(app);
546     if(count!=0 || buffer.length()!=0) {
547         errln("months getNextUChars()!=[] after \"january\"");
548     }
549 }
550 
TestIteratorFromBranch()551 void UCharsTrieTest::TestIteratorFromBranch() {
552     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
553     if(trie.isNull()) {
554         return;  // buildTrie() reported an error
555     }
556     // Go to a branch node.
557     trie->next(u_j);
558     trie->next(u_a);
559     trie->next(u_n);
560     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
561     UCharsTrie::Iterator iter(*trie, 0, errorCode);
562     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
563         return;
564     }
565     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
566     // following "jan".
567     static const StringAndValue data[]={
568         { "", 1 },
569         { ".", 1 },
570         { "a", 1 },
571         { "bb", 1 },
572         { "c", 1 },
573         { "ddd", 1 },
574         { "ee", 1 },
575         { "ef", 1 },
576         { "f", 1 },
577         { "gg", 1 },
578         { "h", 1 },
579         { "iiii", 1 },
580         { "j", 1 },
581         { "kk", 1 },
582         { "kl", 1 },
583         { "kmm", 1 },
584         { "l", 1 },
585         { "m", 1 },
586         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
587         { "o", 1 },
588         { "pp", 1 },
589         { "qqq", 1 },
590         { "r", 1 },
591         { "uar", 1 },
592         { "uary", 1 }
593     };
594     checkIterator(iter, data, UPRV_LENGTHOF(data));
595     // Reset, and we should get the same result.
596     logln("after iter.reset()");
597     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
598 }
599 
TestIteratorFromLinearMatch()600 void UCharsTrieTest::TestIteratorFromLinearMatch() {
601     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
602     if(trie.isNull()) {
603         return;  // buildTrie() reported an error
604     }
605     // Go into a linear-match node.
606     trie->next(u_j);
607     trie->next(u_a);
608     trie->next(u_n);
609     trie->next(u_u);
610     trie->next(u_a);
611     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
612     UCharsTrie::Iterator iter(*trie, 0, errorCode);
613     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
614         return;
615     }
616     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
617     // following "janua".
618     static const StringAndValue data[]={
619         { "r", 1 },
620         { "ry", 1 }
621     };
622     checkIterator(iter, data, UPRV_LENGTHOF(data));
623     // Reset, and we should get the same result.
624     logln("after iter.reset()");
625     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
626 }
627 
TestTruncatingIteratorFromRoot()628 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
629     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
630     if(trie.isNull()) {
631         return;  // buildTrie() reported an error
632     }
633     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
634     UCharsTrie::Iterator iter(*trie, 4, errorCode);
635     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
636         return;
637     }
638     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
639     // of each string, and no string duplicates from the truncation.
640     static const StringAndValue data[]={
641         { "augu", -1 },
642         { "jan", 1 },
643         { "jan.", 1 },
644         { "jana", 1 },
645         { "janb", -1 },
646         { "janc", 1 },
647         { "jand", -1 },
648         { "jane", -1 },
649         { "janf", 1 },
650         { "jang", -1 },
651         { "janh", 1 },
652         { "jani", -1 },
653         { "janj", 1 },
654         { "jank", -1 },
655         { "janl", 1 },
656         { "janm", 1 },
657         { "jann", -1 },
658         { "jano", 1 },
659         { "janp", -1 },
660         { "janq", -1 },
661         { "janr", 1 },
662         { "janu", -1 },
663         { "july", 7 },
664         { "jun", 6 },
665         { "jun.", 6 },
666         { "june", 6 }
667     };
668     checkIterator(iter, data, UPRV_LENGTHOF(data));
669     // Reset, and we should get the same result.
670     logln("after iter.reset()");
671     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
672 }
673 
TestTruncatingIteratorFromLinearMatchShort()674 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
675     static const StringAndValue data[]={
676         { "abcdef", 10 },
677         { "abcdepq", 200 },
678         { "abcdeyz", 3000 }
679     };
680     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
681     if(trie.isNull()) {
682         return;  // buildTrie() reported an error
683     }
684     // Go into a linear-match node.
685     trie->next(u_a);
686     trie->next(u_b);
687     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
688     // Truncate within the linear-match node.
689     UCharsTrie::Iterator iter(*trie, 2, errorCode);
690     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
691         return;
692     }
693     static const StringAndValue expected[]={
694         { "cd", -1 }
695     };
696     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
697     // Reset, and we should get the same result.
698     logln("after iter.reset()");
699     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
700 }
701 
TestTruncatingIteratorFromLinearMatchLong()702 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
703     static const StringAndValue data[]={
704         { "abcdef", 10 },
705         { "abcdepq", 200 },
706         { "abcdeyz", 3000 }
707     };
708     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
709     if(trie.isNull()) {
710         return;  // buildTrie() reported an error
711     }
712     // Go into a linear-match node.
713     trie->next(u_a);
714     trie->next(u_b);
715     trie->next(u_c);
716     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
717     // Truncate after the linear-match node.
718     UCharsTrie::Iterator iter(*trie, 3, errorCode);
719     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
720         return;
721     }
722     static const StringAndValue expected[]={
723         { "def", 10 },
724         { "dep", -1 },
725         { "dey", -1 }
726     };
727     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
728     // Reset, and we should get the same result.
729     logln("after iter.reset()");
730     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
731 }
732 
TestIteratorFromUChars()733 void UCharsTrieTest::TestIteratorFromUChars() {
734     static const StringAndValue data[]={
735         { "mm", 3 },
736         { "mmm", 33 },
737         { "mmnop", 333 }
738     };
739     builder_->clear();
740     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
741     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
742         builder_->add(data[i].s, data[i].value, errorCode);
743     }
744     UnicodeString trieUChars;
745     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
746     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
747     checkIterator(iter, data, UPRV_LENGTHOF(data));
748 }
749 
checkData(const StringAndValue data[],int32_t dataLength)750 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
751     logln("checkData(dataLength=%d, fast)", (int)dataLength);
752     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
753     logln("checkData(dataLength=%d, small)", (int)dataLength);
754     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
755 }
756 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)757 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
758     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
759     if(trie.isNull()) {
760         return;  // buildTrie() reported an error
761     }
762     checkFirst(*trie, data, dataLength);
763     checkNext(*trie, data, dataLength);
764     checkNextWithState(*trie, data, dataLength);
765     checkNextString(*trie, data, dataLength);
766     checkIterator(*trie, data, dataLength);
767 }
768 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)769 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
770                                       UStringTrieBuildOption buildOption) {
771     IcuTestErrorCode errorCode(*this, "buildTrie()");
772     // Add the items to the trie builder in an interesting (not trivial, not random) order.
773     int32_t index, step;
774     if(dataLength&1) {
775         // Odd number of items.
776         index=dataLength/2;
777         step=2;
778     } else if((dataLength%3)!=0) {
779         // Not a multiple of 3.
780         index=dataLength/5;
781         step=3;
782     } else {
783         index=dataLength-1;
784         step=-1;
785     }
786     builder_->clear();
787     for(int32_t i=0; i<dataLength; ++i) {
788         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
789                       data[index].value, errorCode);
790         index=(index+step)%dataLength;
791     }
792     UnicodeString trieUChars;
793     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
794     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
795     if(!errorCode.errIfFailureAndReset("add()/build()")) {
796         builder_->add("zzz", 999, errorCode);
797         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
798             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
799         }
800     }
801     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
802     UnicodeString trieUChars2;
803     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
804     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
805         errln("builder.buildUnicodeString() before & after build() returned same array");
806     }
807     if(errorCode.isFailure()) {
808         return NULL;
809     }
810     // Tries from either build() method should be identical but
811     // UCharsTrie does not implement equals().
812     // We just return either one.
813     if((dataLength&1)!=0) {
814         return trie.orphan();
815     } else {
816         return new UCharsTrie(trieUChars2.getBuffer());
817     }
818 }
819 
checkFirst(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)820 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
821                                 const StringAndValue data[], int32_t dataLength) {
822     for(int32_t i=0; i<dataLength; ++i) {
823         if(*data[i].s==0) {
824             continue;  // skip empty string
825         }
826         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
827         UChar32 c=expectedString[0];
828         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
829         UStringTrieResult firstResult=trie.first(c);
830         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
831         UStringTrieResult nextResult=trie.next(nextCp);
832         if(firstResult!=trie.reset().next(c) ||
833            firstResult!=trie.current() ||
834            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
835            nextResult!=trie.next(nextCp)
836         ) {
837             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
838                   c, data[i].s);
839         }
840         c=expectedString.char32At(0);
841         int32_t cLength=U16_LENGTH(c);
842         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
843         firstResult=trie.firstForCodePoint(c);
844         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
845         nextResult=trie.nextForCodePoint(nextCp);
846         if(firstResult!=trie.reset().nextForCodePoint(c) ||
847            firstResult!=trie.current() ||
848            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
849            nextResult!=trie.nextForCodePoint(nextCp)
850         ) {
851             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
852                   c, data[i].s);
853         }
854     }
855     trie.reset();
856 }
857 
checkNext(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)858 void UCharsTrieTest::checkNext(UCharsTrie &trie,
859                                const StringAndValue data[], int32_t dataLength) {
860     UCharsTrie::State state;
861     for(int32_t i=0; i<dataLength; ++i) {
862         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
863         int32_t stringLength= (i&1) ? -1 : expectedString.length();
864         UStringTrieResult result;
865         if( !USTRINGTRIE_HAS_VALUE(
866                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
867             result!=trie.current()
868         ) {
869             errln("trie does not seem to contain %s", data[i].s);
870         } else if(trie.getValue()!=data[i].value) {
871             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
872                   data[i].s,
873                   (long)trie.getValue(), (long)trie.getValue(),
874                   (long)data[i].value, (long)data[i].value);
875         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
876             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
877         }
878         trie.reset();
879         stringLength=expectedString.length();
880         result=trie.current();
881         for(int32_t j=0; j<stringLength; ++j) {
882             if(!USTRINGTRIE_HAS_NEXT(result)) {
883                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
884                 break;
885             }
886             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
887                 trie.getValue();
888                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
889                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
890                     break;
891                 }
892             }
893             result=trie.next(expectedString[j]);
894             if(!USTRINGTRIE_MATCHES(result)) {
895                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
896                 break;
897             }
898             if(result!=trie.current()) {
899                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
900                 break;
901             }
902         }
903         if(!USTRINGTRIE_HAS_VALUE(result)) {
904             errln("trie.next()!=hasValue at the end of %s", data[i].s);
905             continue;
906         }
907         trie.getValue();
908         if(result!=trie.current()) {
909             errln("trie.current() != current()+getValue()+current() after end of %s",
910                   data[i].s);
911         }
912         // Compare the final current() with whether next() can actually continue.
913         trie.saveState(state);
914         UBool nextContinues=FALSE;
915         for(int32_t c=0x20; c<0xe000; ++c) {
916             if(c==0x80) {
917                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
918             }
919             if(trie.resetToState(state).next(c)) {
920                 nextContinues=TRUE;
921                 break;
922             }
923         }
924         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
925             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
926                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
927         }
928         trie.reset();
929     }
930 }
931 
checkNextWithState(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)932 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
933                                         const StringAndValue data[], int32_t dataLength) {
934     UCharsTrie::State noState, state;
935     for(int32_t i=0; i<dataLength; ++i) {
936         if((i&1)==0) {
937             // This should have no effect.
938             trie.resetToState(noState);
939         }
940         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
941         int32_t stringLength=expectedString.length();
942         int32_t partialLength=stringLength/3;
943         for(int32_t j=0; j<partialLength; ++j) {
944             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
945                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
946                 return;
947             }
948         }
949         trie.saveState(state);
950         UStringTrieResult resultAtState=trie.current();
951         UStringTrieResult result;
952         int32_t valueAtState=-99;
953         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
954             valueAtState=trie.getValue();
955         }
956         result=trie.next(0);  // mismatch
957         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
958             errln("trie.next(0) matched after part of %s", data[i].s);
959         }
960         if( resultAtState!=trie.resetToState(state).current() ||
961             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
962         ) {
963             errln("trie.next(part of %s) changes current()/getValue() after "
964                   "saveState/next(0)/resetToState",
965                   data[i].s);
966         } else if(!USTRINGTRIE_HAS_VALUE(
967                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
968                                        stringLength-partialLength)) ||
969                   result!=trie.current()) {
970             errln("trie.next(rest of %s) does not seem to contain %s after "
971                   "saveState/next(0)/resetToState",
972                   data[i].s, data[i].s);
973         } else if(!USTRINGTRIE_HAS_VALUE(
974                       result=trie.resetToState(state).
975                                   next(expectedString.getTerminatedBuffer()+partialLength,
976                                        stringLength-partialLength)) ||
977                   result!=trie.current()) {
978             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
979                   data[i].s);
980         } else if(trie.getValue()!=data[i].value) {
981             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
982                   data[i].s,
983                   (long)trie.getValue(), (long)trie.getValue(),
984                   (long)data[i].value, (long)data[i].value);
985         }
986         trie.reset();
987     }
988 }
989 
990 // next(string) is also tested in other functions,
991 // but here we try to go partway through the string, and then beyond it.
checkNextString(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)992 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
993                                      const StringAndValue data[], int32_t dataLength) {
994     for(int32_t i=0; i<dataLength; ++i) {
995         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
996         int32_t stringLength=expectedString.length();
997         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
998             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
999             continue;
1000         }
1001         // Test that we stop properly at the end of the string.
1002         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1003                      stringLength+1-stringLength/2)) {
1004             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1005         }
1006         trie.reset();
1007     }
1008 }
1009 
checkIterator(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)1010 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1011                                    const StringAndValue data[], int32_t dataLength) {
1012     IcuTestErrorCode errorCode(*this, "checkIterator()");
1013     UCharsTrie::Iterator iter(trie, 0, errorCode);
1014     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1015         return;
1016     }
1017     checkIterator(iter, data, dataLength);
1018 }
1019 
checkIterator(UCharsTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)1020 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1021                                    const StringAndValue data[], int32_t dataLength) {
1022     IcuTestErrorCode errorCode(*this, "checkIterator()");
1023     for(int32_t i=0; i<dataLength; ++i) {
1024         if(!iter.hasNext()) {
1025             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1026             break;
1027         }
1028         UBool hasNext=iter.next(errorCode);
1029         if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1030             break;
1031         }
1032         if(!hasNext) {
1033             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1034             break;
1035         }
1036         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1037         if(iter.getString()!=expectedString) {
1038             char buffer[1000];
1039             UnicodeString invString(prettify(iter.getString()));
1040             invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
1041             errln("trie iterator next().getString()=%s but expected %s for item %d",
1042                   buffer, data[i].s, (int)i);
1043         }
1044         if(iter.getValue()!=data[i].value) {
1045             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1046                   (long)iter.getValue(), (long)iter.getValue(),
1047                   (long)data[i].value, (long)data[i].value,
1048                   (int)i, data[i].s);
1049         }
1050     }
1051     if(iter.hasNext()) {
1052         errln("trie iterator hasNext()=TRUE after all items");
1053     }
1054     UBool hasNext=iter.next(errorCode);
1055     errorCode.errIfFailureAndReset("trie iterator next() after all items");
1056     if(hasNext) {
1057         errln("trie iterator next()=TRUE after all items");
1058     }
1059 }
1060