1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  bytetrietest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010nov16
14 *   created by: Markus W. Scherer
15 */
16 
17 #include <string.h>
18 
19 #include "unicode/utypes.h"
20 #include "unicode/bytestrie.h"
21 #include "unicode/bytestriebuilder.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/stringpiece.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26 
27 struct StringAndValue {
28     const char *s;
29     int32_t value;
30 };
31 
32 class BytesTrieTest : public IntlTest {
33 public:
34     BytesTrieTest();
35     virtual ~BytesTrieTest();
36 
37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38     void TestBuilder();
39     void TestEmpty();
40     void Test_a();
41     void Test_a_ab();
42     void TestShortestBranch();
43     void TestBranches();
44     void TestLongSequence();
45     void TestLongBranch();
46     void TestValuesForState();
47     void TestCompact();
48 
49     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50     void TestHasUniqueValue();
51     void TestGetNextBytes();
52     void TestIteratorFromBranch();
53     void TestIteratorFromLinearMatch();
54     void TestTruncatingIteratorFromRoot();
55     void TestTruncatingIteratorFromLinearMatchShort();
56     void TestTruncatingIteratorFromLinearMatchLong();
57     void TestIteratorFromBytes();
58     void TestFailedIterator();
59 
60     void checkData(const StringAndValue data[], int32_t dataLength);
61     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
62     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
63                          UStringTrieBuildOption buildOption);
64     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
70 
71 private:
72     BytesTrieBuilder *builder_;
73 };
74 
createBytesTrieTest()75 extern IntlTest *createBytesTrieTest() {
76     return new BytesTrieTest();
77 }
78 
BytesTrieTest()79 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
80     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
81     builder_=new BytesTrieBuilder(errorCode);
82 }
83 
~BytesTrieTest()84 BytesTrieTest::~BytesTrieTest() {
85     delete builder_;
86 }
87 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)88 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
89     if(exec) {
90         logln("TestSuite BytesTrieTest: ");
91     }
92     TESTCASE_AUTO_BEGIN;
93     TESTCASE_AUTO(TestBuilder);
94     TESTCASE_AUTO(TestEmpty);
95     TESTCASE_AUTO(Test_a);
96     TESTCASE_AUTO(Test_a_ab);
97     TESTCASE_AUTO(TestShortestBranch);
98     TESTCASE_AUTO(TestBranches);
99     TESTCASE_AUTO(TestLongSequence);
100     TESTCASE_AUTO(TestLongBranch);
101     TESTCASE_AUTO(TestValuesForState);
102     TESTCASE_AUTO(TestCompact);
103     TESTCASE_AUTO(TestHasUniqueValue);
104     TESTCASE_AUTO(TestGetNextBytes);
105     TESTCASE_AUTO(TestIteratorFromBranch);
106     TESTCASE_AUTO(TestIteratorFromLinearMatch);
107     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
108     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
109     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
110     TESTCASE_AUTO(TestIteratorFromBytes);
111     TESTCASE_AUTO(TestFailedIterator);
112     TESTCASE_AUTO_END;
113 }
114 
TestBuilder()115 void BytesTrieTest::TestBuilder() {
116     IcuTestErrorCode errorCode(*this, "TestBuilder()");
117     builder_->clear();
118     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
119     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
120         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
121         return;
122     }
123     // TODO: remove .build(...) once add() checks for duplicates.
124     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
125     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
126         errln("BytesTrieBuilder.add() did not detect duplicates");
127         return;
128     }
129 }
130 
TestEmpty()131 void BytesTrieTest::TestEmpty() {
132     static const StringAndValue data[]={
133         { "", 0 }
134     };
135     checkData(data, UPRV_LENGTHOF(data));
136 }
137 
Test_a()138 void BytesTrieTest::Test_a() {
139     static const StringAndValue data[]={
140         { "a", 1 }
141     };
142     checkData(data, UPRV_LENGTHOF(data));
143 }
144 
Test_a_ab()145 void BytesTrieTest::Test_a_ab() {
146     static const StringAndValue data[]={
147         { "a", 1 },
148         { "ab", 100 }
149     };
150     checkData(data, UPRV_LENGTHOF(data));
151 }
152 
TestShortestBranch()153 void BytesTrieTest::TestShortestBranch() {
154     static const StringAndValue data[]={
155         { "a", 1000 },
156         { "b", 2000 }
157     };
158     checkData(data, UPRV_LENGTHOF(data));
159 }
160 
TestBranches()161 void BytesTrieTest::TestBranches() {
162     static const StringAndValue data[]={
163         { "a", 0x10 },
164         { "cc", 0x40 },
165         { "e", 0x100 },
166         { "ggg", 0x400 },
167         { "i", 0x1000 },
168         { "kkkk", 0x4000 },
169         { "n", 0x10000 },
170         { "ppppp", 0x40000 },
171         { "r", 0x100000 },
172         { "sss", 0x200000 },
173         { "t", 0x400000 },
174         { "uu", 0x800000 },
175         { "vv", 0x7fffffff },
176         { "zz", (int32_t)0x80000000 }
177     };
178     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
179         logln("TestBranches length=%d", (int)length);
180         checkData(data, length);
181     }
182 }
183 
TestLongSequence()184 void BytesTrieTest::TestLongSequence() {
185     static const StringAndValue data[]={
186         { "a", -1 },
187         // sequence of linear-match nodes
188         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
189         // more than 256 bytes
190         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
193           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
196     };
197     checkData(data, UPRV_LENGTHOF(data));
198 }
199 
TestLongBranch()200 void BytesTrieTest::TestLongBranch() {
201     // Split-branch and interesting compact-integer values.
202     static const StringAndValue data[]={
203         { "a", -2 },
204         { "b", -1 },
205         { "c", 0 },
206         { "d2", 1 },
207         { "f", 0x3f },
208         { "g", 0x40 },
209         { "h", 0x41 },
210         { "j23", 0x1900 },
211         { "j24", 0x19ff },
212         { "j25", 0x1a00 },
213         { "k2", 0x1a80 },
214         { "k3", 0x1aff },
215         { "l234567890", 0x1b00 },
216         { "l234567890123", 0x1b01 },
217         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
218         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
219         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
220         { "r", 0x333333 },
221         { "s2345", 0x4444444 },
222         { "t234567890", 0x77777777 },
223         { "z", (int32_t)0x80000001 }
224     };
225     checkData(data, UPRV_LENGTHOF(data));
226 }
227 
TestValuesForState()228 void BytesTrieTest::TestValuesForState() {
229     // Check that saveState() and resetToState() interact properly
230     // with next() and current().
231     static const StringAndValue data[]={
232         { "a", -1 },
233         { "ab", -2 },
234         { "abc", -3 },
235         { "abcd", -4 },
236         { "abcde", -5 },
237         { "abcdef", -6 }
238     };
239     checkData(data, UPRV_LENGTHOF(data));
240 }
241 
TestCompact()242 void BytesTrieTest::TestCompact() {
243     // Duplicate trailing strings and values provide opportunities for compacting.
244     static const StringAndValue data[]={
245         { "+", 0 },
246         { "+august", 8 },
247         { "+december", 12 },
248         { "+july", 7 },
249         { "+june", 6 },
250         { "+november", 11 },
251         { "+october", 10 },
252         { "+september", 9 },
253         { "-", 0 },
254         { "-august", 8 },
255         { "-december", 12 },
256         { "-july", 7 },
257         { "-june", 6 },
258         { "-november", 11 },
259         { "-october", 10 },
260         { "-september", 9 },
261         // The l+n branch (with its sub-nodes) is a duplicate but will be written
262         // both times because each time it follows a different linear-match node.
263         { "xjuly", 7 },
264         { "xjune", 6 }
265     };
266     checkData(data, UPRV_LENGTHOF(data));
267 }
268 
buildMonthsTrie(UStringTrieBuildOption buildOption)269 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
270     // All types of nodes leading to the same value,
271     // for code coverage of recursive functions.
272     // In particular, we need a lot of branches on some single level
273     // to exercise a split-branch node.
274     static const StringAndValue data[]={
275         { "august", 8 },
276         { "jan", 1 },
277         { "jan.", 1 },
278         { "jana", 1 },
279         { "janbb", 1 },
280         { "janc", 1 },
281         { "janddd", 1 },
282         { "janee", 1 },
283         { "janef", 1 },
284         { "janf", 1 },
285         { "jangg", 1 },
286         { "janh", 1 },
287         { "janiiii", 1 },
288         { "janj", 1 },
289         { "jankk", 1 },
290         { "jankl", 1 },
291         { "jankmm", 1 },
292         { "janl", 1 },
293         { "janm", 1 },
294         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
295         { "jano", 1 },
296         { "janpp", 1 },
297         { "janqqq", 1 },
298         { "janr", 1 },
299         { "januar", 1 },
300         { "january", 1 },
301         { "july", 7 },
302         { "jun", 6 },
303         { "jun.", 6 },
304         { "june", 6 }
305     };
306     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
307 }
308 
TestHasUniqueValue()309 void BytesTrieTest::TestHasUniqueValue() {
310     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
311     if(trie.isNull()) {
312         return;  // buildTrie() reported an error
313     }
314     int32_t uniqueValue;
315     if(trie->hasUniqueValue(uniqueValue)) {
316         errln("unique value at root");
317     }
318     trie->next('j');
319     trie->next('a');
320     trie->next('n');
321     // hasUniqueValue() directly after next()
322     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
323         errln("not unique value 1 after \"jan\"");
324     }
325     trie->first('j');
326     trie->next('u');
327     if(trie->hasUniqueValue(uniqueValue)) {
328         errln("unique value after \"ju\"");
329     }
330     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
331         errln("not normal value 6 after \"jun\"");
332     }
333     // hasUniqueValue() after getValue()
334     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
335         errln("not unique value 6 after \"jun\"");
336     }
337     // hasUniqueValue() from within a linear-match node
338     trie->first('a');
339     trie->next('u');
340     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
341         errln("not unique value 8 after \"au\"");
342     }
343 }
344 
TestGetNextBytes()345 void BytesTrieTest::TestGetNextBytes() {
346     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
347     if(trie.isNull()) {
348         return;  // buildTrie() reported an error
349     }
350     char buffer[40];
351     CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
352     int32_t count=trie->getNextBytes(sink);
353     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
354         errln("months getNextBytes()!=[aj] at root");
355     }
356     trie->next('j');
357     trie->next('a');
358     trie->next('n');
359     // getNextBytes() directly after next()
360     count=trie->getNextBytes(sink.Reset());
361     buffer[count]=0;
362     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
363         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
364     }
365     // getNextBytes() after getValue()
366     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
367     memset(buffer, 0, sizeof(buffer));
368     count=trie->getNextBytes(sink.Reset());
369     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
370         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
371     }
372     // getNextBytes() from a linear-match node
373     trie->next('u');
374     memset(buffer, 0, sizeof(buffer));
375     count=trie->getNextBytes(sink.Reset());
376     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
377         errln("months getNextBytes()!=[a] after \"janu\"");
378     }
379     trie->next('a');
380     memset(buffer, 0, sizeof(buffer));
381     count=trie->getNextBytes(sink.Reset());
382     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
383         errln("months getNextBytes()!=[r] after \"janua\"");
384     }
385     trie->next('r');
386     trie->next('y');
387     // getNextBytes() after a final match
388     count=trie->getNextBytes(sink.Reset());
389     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
390         errln("months getNextBytes()!=[] after \"january\"");
391     }
392 }
393 
TestIteratorFromBranch()394 void BytesTrieTest::TestIteratorFromBranch() {
395     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
396     if(trie.isNull()) {
397         return;  // buildTrie() reported an error
398     }
399     // Go to a branch node.
400     trie->next('j');
401     trie->next('a');
402     trie->next('n');
403     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
404     BytesTrie::Iterator iter(*trie, 0, errorCode);
405     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
406         return;
407     }
408     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
409     // following "jan".
410     static const StringAndValue data[]={
411         { "", 1 },
412         { ".", 1 },
413         { "a", 1 },
414         { "bb", 1 },
415         { "c", 1 },
416         { "ddd", 1 },
417         { "ee", 1 },
418         { "ef", 1 },
419         { "f", 1 },
420         { "gg", 1 },
421         { "h", 1 },
422         { "iiii", 1 },
423         { "j", 1 },
424         { "kk", 1 },
425         { "kl", 1 },
426         { "kmm", 1 },
427         { "l", 1 },
428         { "m", 1 },
429         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
430         { "o", 1 },
431         { "pp", 1 },
432         { "qqq", 1 },
433         { "r", 1 },
434         { "uar", 1 },
435         { "uary", 1 }
436     };
437     checkIterator(iter, data, UPRV_LENGTHOF(data));
438     // Reset, and we should get the same result.
439     logln("after iter.reset()");
440     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
441 }
442 
TestIteratorFromLinearMatch()443 void BytesTrieTest::TestIteratorFromLinearMatch() {
444     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
445     if(trie.isNull()) {
446         return;  // buildTrie() reported an error
447     }
448     // Go into a linear-match node.
449     trie->next('j');
450     trie->next('a');
451     trie->next('n');
452     trie->next('u');
453     trie->next('a');
454     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
455     BytesTrie::Iterator iter(*trie, 0, errorCode);
456     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
457         return;
458     }
459     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
460     // following "janua".
461     static const StringAndValue data[]={
462         { "r", 1 },
463         { "ry", 1 }
464     };
465     checkIterator(iter, data, UPRV_LENGTHOF(data));
466     // Reset, and we should get the same result.
467     logln("after iter.reset()");
468     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
469 }
470 
TestTruncatingIteratorFromRoot()471 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
472     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
473     if(trie.isNull()) {
474         return;  // buildTrie() reported an error
475     }
476     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
477     BytesTrie::Iterator iter(*trie, 4, errorCode);
478     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
479         return;
480     }
481     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
482     // of each string, and no string duplicates from the truncation.
483     static const StringAndValue data[]={
484         { "augu", -1 },
485         { "jan", 1 },
486         { "jan.", 1 },
487         { "jana", 1 },
488         { "janb", -1 },
489         { "janc", 1 },
490         { "jand", -1 },
491         { "jane", -1 },
492         { "janf", 1 },
493         { "jang", -1 },
494         { "janh", 1 },
495         { "jani", -1 },
496         { "janj", 1 },
497         { "jank", -1 },
498         { "janl", 1 },
499         { "janm", 1 },
500         { "jann", -1 },
501         { "jano", 1 },
502         { "janp", -1 },
503         { "janq", -1 },
504         { "janr", 1 },
505         { "janu", -1 },
506         { "july", 7 },
507         { "jun", 6 },
508         { "jun.", 6 },
509         { "june", 6 }
510     };
511     checkIterator(iter, data, UPRV_LENGTHOF(data));
512     // Reset, and we should get the same result.
513     logln("after iter.reset()");
514     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
515 }
516 
TestTruncatingIteratorFromLinearMatchShort()517 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
518     static const StringAndValue data[]={
519         { "abcdef", 10 },
520         { "abcdepq", 200 },
521         { "abcdeyz", 3000 }
522     };
523     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
524     if(trie.isNull()) {
525         return;  // buildTrie() reported an error
526     }
527     // Go into a linear-match node.
528     trie->next('a');
529     trie->next('b');
530     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
531     // Truncate within the linear-match node.
532     BytesTrie::Iterator iter(*trie, 2, errorCode);
533     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
534         return;
535     }
536     static const StringAndValue expected[]={
537         { "cd", -1 }
538     };
539     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
540     // Reset, and we should get the same result.
541     logln("after iter.reset()");
542     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
543 }
544 
TestTruncatingIteratorFromLinearMatchLong()545 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
546     static const StringAndValue data[]={
547         { "abcdef", 10 },
548         { "abcdepq", 200 },
549         { "abcdeyz", 3000 }
550     };
551     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
552     if(trie.isNull()) {
553         return;  // buildTrie() reported an error
554     }
555     // Go into a linear-match node.
556     trie->next('a');
557     trie->next('b');
558     trie->next('c');
559     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
560     // Truncate after the linear-match node.
561     BytesTrie::Iterator iter(*trie, 3, errorCode);
562     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
563         return;
564     }
565     static const StringAndValue expected[]={
566         { "def", 10 },
567         { "dep", -1 },
568         { "dey", -1 }
569     };
570     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
571     // Reset, and we should get the same result.
572     logln("after iter.reset()");
573     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
574 }
575 
TestIteratorFromBytes()576 void BytesTrieTest::TestIteratorFromBytes() {
577     static const StringAndValue data[]={
578         { "mm", 3 },
579         { "mmm", 33 },
580         { "mmnop", 333 }
581     };
582     builder_->clear();
583     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
584     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
585         builder_->add(data[i].s, data[i].value, errorCode);
586     }
587     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
588     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
589     checkIterator(iter, data, UPRV_LENGTHOF(data));
590 }
591 
TestFailedIterator()592 void BytesTrieTest::TestFailedIterator() {
593     UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
594     BytesTrie::Iterator iter(NULL, 0, failure);
595     StringPiece sp = iter.getString();
596     if (!sp.empty()) {
597         errln("failed iterator returned garbage data");
598     }
599 }
600 
checkData(const StringAndValue data[],int32_t dataLength)601 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
602     logln("checkData(dataLength=%d, fast)", (int)dataLength);
603     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
604     logln("checkData(dataLength=%d, small)", (int)dataLength);
605     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
606 }
607 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)608 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
609     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
610     if(trie.isNull()) {
611         return;  // buildTrie() reported an error
612     }
613     checkFirst(*trie, data, dataLength);
614     checkNext(*trie, data, dataLength);
615     checkNextWithState(*trie, data, dataLength);
616     checkNextString(*trie, data, dataLength);
617     checkIterator(*trie, data, dataLength);
618 }
619 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)620 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
621                                     UStringTrieBuildOption buildOption) {
622     IcuTestErrorCode errorCode(*this, "buildTrie()");
623     // Add the items to the trie builder in an interesting (not trivial, not random) order.
624     int32_t index, step;
625     if(dataLength&1) {
626         // Odd number of items.
627         index=dataLength/2;
628         step=2;
629     } else if((dataLength%3)!=0) {
630         // Not a multiple of 3.
631         index=dataLength/5;
632         step=3;
633     } else {
634         index=dataLength-1;
635         step=-1;
636     }
637     builder_->clear();
638     for(int32_t i=0; i<dataLength; ++i) {
639         builder_->add(data[index].s, data[index].value, errorCode);
640         index=(index+step)%dataLength;
641     }
642     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
643     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
644     if(!errorCode.errIfFailureAndReset("add()/build()")) {
645         builder_->add("zzz", 999, errorCode);
646         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
647             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
648         }
649     }
650     logln("serialized trie size: %ld bytes\n", (long)sp.length());
651     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
652     if(sp.data()==sp2.data()) {
653         errln("builder.buildStringPiece() before & after build() returned same array");
654     }
655     if(errorCode.isFailure()) {
656         return NULL;
657     }
658     // Tries from either build() method should be identical but
659     // BytesTrie does not implement equals().
660     // We just return either one.
661     if((dataLength&1)!=0) {
662         return trie.orphan();
663     } else {
664         return new BytesTrie(sp2.data());
665     }
666 }
667 
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)668 void BytesTrieTest::checkFirst(BytesTrie &trie,
669                                const StringAndValue data[], int32_t dataLength) {
670     for(int32_t i=0; i<dataLength; ++i) {
671         int c=*data[i].s;
672         if(c==0) {
673             continue;  // skip empty string
674         }
675         UStringTrieResult firstResult=trie.first(c);
676         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
677         UStringTrieResult nextResult=trie.next(data[i].s[1]);
678         if(firstResult!=trie.reset().next(c) ||
679            firstResult!=trie.current() ||
680            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
681            nextResult!=trie.next(data[i].s[1])
682         ) {
683             errln("trie.first(%c)!=trie.reset().next(same) for %s",
684                   c, data[i].s);
685         }
686     }
687     trie.reset();
688 }
689 
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)690 void BytesTrieTest::checkNext(BytesTrie &trie,
691                               const StringAndValue data[], int32_t dataLength) {
692     BytesTrie::State state;
693     for(int32_t i=0; i<dataLength; ++i) {
694         int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
695         UStringTrieResult result;
696         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
697             result!=trie.current()
698         ) {
699             errln("trie does not seem to contain %s", data[i].s);
700         } else if(trie.getValue()!=data[i].value) {
701             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
702                   data[i].s,
703                   (long)trie.getValue(), (long)trie.getValue(),
704                   (long)data[i].value, (long)data[i].value);
705         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
706             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
707         }
708         trie.reset();
709         stringLength=strlen(data[i].s);
710         result=trie.current();
711         for(int32_t j=0; j<stringLength; ++j) {
712             if(!USTRINGTRIE_HAS_NEXT(result)) {
713                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
714                 break;
715             }
716             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
717                 trie.getValue();
718                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
719                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
720                     break;
721                 }
722             }
723             result=trie.next(data[i].s[j]);
724             if(!USTRINGTRIE_MATCHES(result)) {
725                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
726                 break;
727             }
728             if(result!=trie.current()) {
729                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
730                 break;
731             }
732         }
733         if(!USTRINGTRIE_HAS_VALUE(result)) {
734             errln("trie.next()!=hasValue at the end of %s", data[i].s);
735             continue;
736         }
737         trie.getValue();
738         if(result!=trie.current()) {
739             errln("trie.current() != current()+getValue()+current() after end of %s",
740                   data[i].s);
741         }
742         // Compare the final current() with whether next() can actually continue.
743         trie.saveState(state);
744         UBool nextContinues=FALSE;
745         // Try all graphic characters; we only use those in test strings in this file.
746 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
747         const int32_t minChar=0x20;
748         const int32_t maxChar=0x7e;
749 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
750         const int32_t minChar=0x40;
751         const int32_t maxChar=0xfe;
752 #else
753         const int32_t minChar=0;
754         const int32_t maxChar=0xff;
755 #endif
756         for(int32_t c=minChar; c<=maxChar; ++c) {
757             if(trie.resetToState(state).next(c)) {
758                 nextContinues=TRUE;
759                 break;
760             }
761         }
762         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
763             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
764                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
765         }
766         trie.reset();
767     }
768 }
769 
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)770 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
771                                        const StringAndValue data[], int32_t dataLength) {
772     BytesTrie::State noState, state;
773     for(int32_t i=0; i<dataLength; ++i) {
774         if((i&1)==0) {
775             // This should have no effect.
776             trie.resetToState(noState);
777         }
778         const char *expectedString=data[i].s;
779         int32_t stringLength=strlen(expectedString);
780         int32_t partialLength=stringLength/3;
781         for(int32_t j=0; j<partialLength; ++j) {
782             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
783                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
784                 return;
785             }
786         }
787         trie.saveState(state);
788         UStringTrieResult resultAtState=trie.current();
789         UStringTrieResult result;
790         int32_t valueAtState=-99;
791         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
792             valueAtState=trie.getValue();
793         }
794         result=trie.next(0);  // mismatch
795         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
796             errln("trie.next(0) matched after part of %s", data[i].s);
797         }
798         if( resultAtState!=trie.resetToState(state).current() ||
799             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
800         ) {
801             errln("trie.next(part of %s) changes current()/getValue() after "
802                   "saveState/next(0)/resetToState",
803                   data[i].s);
804         } else if(!USTRINGTRIE_HAS_VALUE(
805                       result=trie.next(expectedString+partialLength,
806                                        stringLength-partialLength)) ||
807                   result!=trie.current()) {
808             errln("trie.next(rest of %s) does not seem to contain %s after "
809                   "saveState/next(0)/resetToState",
810                   data[i].s, data[i].s);
811         } else if(!USTRINGTRIE_HAS_VALUE(
812                       result=trie.resetToState(state).
813                                   next(expectedString+partialLength,
814                                        stringLength-partialLength)) ||
815                   result!=trie.current()) {
816             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
817                   data[i].s);
818         } else if(trie.getValue()!=data[i].value) {
819             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
820                   data[i].s,
821                   (long)trie.getValue(), (long)trie.getValue(),
822                   (long)data[i].value, (long)data[i].value);
823         }
824         trie.reset();
825     }
826 }
827 
828 // next(string) is also tested in other functions,
829 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)830 void BytesTrieTest::checkNextString(BytesTrie &trie,
831                                     const StringAndValue data[], int32_t dataLength) {
832     for(int32_t i=0; i<dataLength; ++i) {
833         const char *expectedString=data[i].s;
834         int32_t stringLength=strlen(expectedString);
835         if(!trie.next(expectedString, stringLength/2)) {
836             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
837             continue;
838         }
839         // Test that we stop properly at the end of the string.
840         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
841             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
842         }
843         trie.reset();
844     }
845 }
846 
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)847 void BytesTrieTest::checkIterator(const BytesTrie &trie,
848                                   const StringAndValue data[], int32_t dataLength) {
849     IcuTestErrorCode errorCode(*this, "checkIterator()");
850     BytesTrie::Iterator iter(trie, 0, errorCode);
851     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
852         return;
853     }
854     checkIterator(iter, data, dataLength);
855 }
856 
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)857 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
858                                   const StringAndValue data[], int32_t dataLength) {
859     IcuTestErrorCode errorCode(*this, "checkIterator()");
860     for(int32_t i=0; i<dataLength; ++i) {
861         if(!iter.hasNext()) {
862             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
863             break;
864         }
865         UBool hasNext=iter.next(errorCode);
866         if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
867             break;
868         }
869         if(!hasNext) {
870             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
871             break;
872         }
873         if(iter.getString()!=StringPiece(data[i].s)) {
874             errln("trie iterator next().getString()=%s but expected %s for item %d",
875                   iter.getString().data(), data[i].s, (int)i);
876         }
877         if(iter.getValue()!=data[i].value) {
878             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
879                   (long)iter.getValue(), (long)iter.getValue(),
880                   (long)data[i].value, (long)data[i].value,
881                   (int)i, data[i].s);
882         }
883     }
884     if(iter.hasNext()) {
885         errln("trie iterator hasNext()=TRUE after all items");
886     }
887     UBool hasNext=iter.next(errorCode);
888     errorCode.errIfFailureAndReset("trie iterator next() after all items");
889     if(hasNext) {
890         errln("trie iterator next()=TRUE after all items");
891     }
892 }
893