1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  bytetrietest.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010nov16
12 *   created by: Markus W. Scherer
13 */
14 
15 #include <string.h>
16 
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/bytestriebuilder.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/stringpiece.h"
22 #include "intltest.h"
23 #include "cmemory.h"
24 
25 struct StringAndValue {
26     const char *s;
27     int32_t value;
28 };
29 
30 class BytesTrieTest : public IntlTest {
31 public:
32     BytesTrieTest();
33     virtual ~BytesTrieTest();
34 
35     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
36     void TestBuilder();
37     void TestEmpty();
38     void Test_a();
39     void Test_a_ab();
40     void TestShortestBranch();
41     void TestBranches();
42     void TestLongSequence();
43     void TestLongBranch();
44     void TestValuesForState();
45     void TestCompact();
46 
47     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
48     void TestHasUniqueValue();
49     void TestGetNextBytes();
50     void TestIteratorFromBranch();
51     void TestIteratorFromLinearMatch();
52     void TestTruncatingIteratorFromRoot();
53     void TestTruncatingIteratorFromLinearMatchShort();
54     void TestTruncatingIteratorFromLinearMatchLong();
55     void TestIteratorFromBytes();
56 
57     void checkData(const StringAndValue data[], int32_t dataLength);
58     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
59     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
60                          UStringTrieBuildOption buildOption);
61     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
62     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
63     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
64     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
67 
68 private:
69     BytesTrieBuilder *builder_;
70 };
71 
createBytesTrieTest()72 extern IntlTest *createBytesTrieTest() {
73     return new BytesTrieTest();
74 }
75 
BytesTrieTest()76 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
77     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
78     builder_=new BytesTrieBuilder(errorCode);
79 }
80 
~BytesTrieTest()81 BytesTrieTest::~BytesTrieTest() {
82     delete builder_;
83 }
84 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)85 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
86     if(exec) {
87         logln("TestSuite BytesTrieTest: ");
88     }
89     TESTCASE_AUTO_BEGIN;
90     TESTCASE_AUTO(TestBuilder);
91     TESTCASE_AUTO(TestEmpty);
92     TESTCASE_AUTO(Test_a);
93     TESTCASE_AUTO(Test_a_ab);
94     TESTCASE_AUTO(TestShortestBranch);
95     TESTCASE_AUTO(TestBranches);
96     TESTCASE_AUTO(TestLongSequence);
97     TESTCASE_AUTO(TestLongBranch);
98     TESTCASE_AUTO(TestValuesForState);
99     TESTCASE_AUTO(TestCompact);
100     TESTCASE_AUTO(TestHasUniqueValue);
101     TESTCASE_AUTO(TestGetNextBytes);
102     TESTCASE_AUTO(TestIteratorFromBranch);
103     TESTCASE_AUTO(TestIteratorFromLinearMatch);
104     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
105     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
106     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
107     TESTCASE_AUTO(TestIteratorFromBytes);
108     TESTCASE_AUTO_END;
109 }
110 
TestBuilder()111 void BytesTrieTest::TestBuilder() {
112     IcuTestErrorCode errorCode(*this, "TestBuilder()");
113     builder_->clear();
114     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
115     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
116         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
117         return;
118     }
119     // TODO: remove .build(...) once add() checks for duplicates.
120     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
121     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
122         errln("BytesTrieBuilder.add() did not detect duplicates");
123         return;
124     }
125 }
126 
TestEmpty()127 void BytesTrieTest::TestEmpty() {
128     static const StringAndValue data[]={
129         { "", 0 }
130     };
131     checkData(data, UPRV_LENGTHOF(data));
132 }
133 
Test_a()134 void BytesTrieTest::Test_a() {
135     static const StringAndValue data[]={
136         { "a", 1 }
137     };
138     checkData(data, UPRV_LENGTHOF(data));
139 }
140 
Test_a_ab()141 void BytesTrieTest::Test_a_ab() {
142     static const StringAndValue data[]={
143         { "a", 1 },
144         { "ab", 100 }
145     };
146     checkData(data, UPRV_LENGTHOF(data));
147 }
148 
TestShortestBranch()149 void BytesTrieTest::TestShortestBranch() {
150     static const StringAndValue data[]={
151         { "a", 1000 },
152         { "b", 2000 }
153     };
154     checkData(data, UPRV_LENGTHOF(data));
155 }
156 
TestBranches()157 void BytesTrieTest::TestBranches() {
158     static const StringAndValue data[]={
159         { "a", 0x10 },
160         { "cc", 0x40 },
161         { "e", 0x100 },
162         { "ggg", 0x400 },
163         { "i", 0x1000 },
164         { "kkkk", 0x4000 },
165         { "n", 0x10000 },
166         { "ppppp", 0x40000 },
167         { "r", 0x100000 },
168         { "sss", 0x200000 },
169         { "t", 0x400000 },
170         { "uu", 0x800000 },
171         { "vv", 0x7fffffff },
172         { "zz", (int32_t)0x80000000 }
173     };
174     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
175         logln("TestBranches length=%d", (int)length);
176         checkData(data, length);
177     }
178 }
179 
TestLongSequence()180 void BytesTrieTest::TestLongSequence() {
181     static const StringAndValue data[]={
182         { "a", -1 },
183         // sequence of linear-match nodes
184         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
185         // more than 256 bytes
186         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
187           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
188           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
189           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
190           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
192     };
193     checkData(data, UPRV_LENGTHOF(data));
194 }
195 
TestLongBranch()196 void BytesTrieTest::TestLongBranch() {
197     // Split-branch and interesting compact-integer values.
198     static const StringAndValue data[]={
199         { "a", -2 },
200         { "b", -1 },
201         { "c", 0 },
202         { "d2", 1 },
203         { "f", 0x3f },
204         { "g", 0x40 },
205         { "h", 0x41 },
206         { "j23", 0x1900 },
207         { "j24", 0x19ff },
208         { "j25", 0x1a00 },
209         { "k2", 0x1a80 },
210         { "k3", 0x1aff },
211         { "l234567890", 0x1b00 },
212         { "l234567890123", 0x1b01 },
213         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
214         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
215         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
216         { "r", 0x333333 },
217         { "s2345", 0x4444444 },
218         { "t234567890", 0x77777777 },
219         { "z", (int32_t)0x80000001 }
220     };
221     checkData(data, UPRV_LENGTHOF(data));
222 }
223 
TestValuesForState()224 void BytesTrieTest::TestValuesForState() {
225     // Check that saveState() and resetToState() interact properly
226     // with next() and current().
227     static const StringAndValue data[]={
228         { "a", -1 },
229         { "ab", -2 },
230         { "abc", -3 },
231         { "abcd", -4 },
232         { "abcde", -5 },
233         { "abcdef", -6 }
234     };
235     checkData(data, UPRV_LENGTHOF(data));
236 }
237 
TestCompact()238 void BytesTrieTest::TestCompact() {
239     // Duplicate trailing strings and values provide opportunities for compacting.
240     static const StringAndValue data[]={
241         { "+", 0 },
242         { "+august", 8 },
243         { "+december", 12 },
244         { "+july", 7 },
245         { "+june", 6 },
246         { "+november", 11 },
247         { "+october", 10 },
248         { "+september", 9 },
249         { "-", 0 },
250         { "-august", 8 },
251         { "-december", 12 },
252         { "-july", 7 },
253         { "-june", 6 },
254         { "-november", 11 },
255         { "-october", 10 },
256         { "-september", 9 },
257         // The l+n branch (with its sub-nodes) is a duplicate but will be written
258         // both times because each time it follows a different linear-match node.
259         { "xjuly", 7 },
260         { "xjune", 6 }
261     };
262     checkData(data, UPRV_LENGTHOF(data));
263 }
264 
buildMonthsTrie(UStringTrieBuildOption buildOption)265 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
266     // All types of nodes leading to the same value,
267     // for code coverage of recursive functions.
268     // In particular, we need a lot of branches on some single level
269     // to exercise a split-branch node.
270     static const StringAndValue data[]={
271         { "august", 8 },
272         { "jan", 1 },
273         { "jan.", 1 },
274         { "jana", 1 },
275         { "janbb", 1 },
276         { "janc", 1 },
277         { "janddd", 1 },
278         { "janee", 1 },
279         { "janef", 1 },
280         { "janf", 1 },
281         { "jangg", 1 },
282         { "janh", 1 },
283         { "janiiii", 1 },
284         { "janj", 1 },
285         { "jankk", 1 },
286         { "jankl", 1 },
287         { "jankmm", 1 },
288         { "janl", 1 },
289         { "janm", 1 },
290         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
291         { "jano", 1 },
292         { "janpp", 1 },
293         { "janqqq", 1 },
294         { "janr", 1 },
295         { "januar", 1 },
296         { "january", 1 },
297         { "july", 7 },
298         { "jun", 6 },
299         { "jun.", 6 },
300         { "june", 6 }
301     };
302     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
303 }
304 
TestHasUniqueValue()305 void BytesTrieTest::TestHasUniqueValue() {
306     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
307     if(trie.isNull()) {
308         return;  // buildTrie() reported an error
309     }
310     int32_t uniqueValue;
311     if(trie->hasUniqueValue(uniqueValue)) {
312         errln("unique value at root");
313     }
314     trie->next('j');
315     trie->next('a');
316     trie->next('n');
317     // hasUniqueValue() directly after next()
318     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
319         errln("not unique value 1 after \"jan\"");
320     }
321     trie->first('j');
322     trie->next('u');
323     if(trie->hasUniqueValue(uniqueValue)) {
324         errln("unique value after \"ju\"");
325     }
326     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
327         errln("not normal value 6 after \"jun\"");
328     }
329     // hasUniqueValue() after getValue()
330     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
331         errln("not unique value 6 after \"jun\"");
332     }
333     // hasUniqueValue() from within a linear-match node
334     trie->first('a');
335     trie->next('u');
336     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
337         errln("not unique value 8 after \"au\"");
338     }
339 }
340 
TestGetNextBytes()341 void BytesTrieTest::TestGetNextBytes() {
342     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
343     if(trie.isNull()) {
344         return;  // buildTrie() reported an error
345     }
346     char buffer[40];
347     CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
348     int32_t count=trie->getNextBytes(sink);
349     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
350         errln("months getNextBytes()!=[aj] at root");
351     }
352     trie->next('j');
353     trie->next('a');
354     trie->next('n');
355     // getNextBytes() directly after next()
356     count=trie->getNextBytes(sink.Reset());
357     buffer[count]=0;
358     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
359         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
360     }
361     // getNextBytes() after getValue()
362     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
363     memset(buffer, 0, sizeof(buffer));
364     count=trie->getNextBytes(sink.Reset());
365     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
366         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
367     }
368     // getNextBytes() from a linear-match node
369     trie->next('u');
370     memset(buffer, 0, sizeof(buffer));
371     count=trie->getNextBytes(sink.Reset());
372     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
373         errln("months getNextBytes()!=[a] after \"janu\"");
374     }
375     trie->next('a');
376     memset(buffer, 0, sizeof(buffer));
377     count=trie->getNextBytes(sink.Reset());
378     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
379         errln("months getNextBytes()!=[r] after \"janua\"");
380     }
381     trie->next('r');
382     trie->next('y');
383     // getNextBytes() after a final match
384     count=trie->getNextBytes(sink.Reset());
385     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
386         errln("months getNextBytes()!=[] after \"january\"");
387     }
388 }
389 
TestIteratorFromBranch()390 void BytesTrieTest::TestIteratorFromBranch() {
391     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
392     if(trie.isNull()) {
393         return;  // buildTrie() reported an error
394     }
395     // Go to a branch node.
396     trie->next('j');
397     trie->next('a');
398     trie->next('n');
399     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
400     BytesTrie::Iterator iter(*trie, 0, errorCode);
401     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
402         return;
403     }
404     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
405     // following "jan".
406     static const StringAndValue data[]={
407         { "", 1 },
408         { ".", 1 },
409         { "a", 1 },
410         { "bb", 1 },
411         { "c", 1 },
412         { "ddd", 1 },
413         { "ee", 1 },
414         { "ef", 1 },
415         { "f", 1 },
416         { "gg", 1 },
417         { "h", 1 },
418         { "iiii", 1 },
419         { "j", 1 },
420         { "kk", 1 },
421         { "kl", 1 },
422         { "kmm", 1 },
423         { "l", 1 },
424         { "m", 1 },
425         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
426         { "o", 1 },
427         { "pp", 1 },
428         { "qqq", 1 },
429         { "r", 1 },
430         { "uar", 1 },
431         { "uary", 1 }
432     };
433     checkIterator(iter, data, UPRV_LENGTHOF(data));
434     // Reset, and we should get the same result.
435     logln("after iter.reset()");
436     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
437 }
438 
TestIteratorFromLinearMatch()439 void BytesTrieTest::TestIteratorFromLinearMatch() {
440     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
441     if(trie.isNull()) {
442         return;  // buildTrie() reported an error
443     }
444     // Go into a linear-match node.
445     trie->next('j');
446     trie->next('a');
447     trie->next('n');
448     trie->next('u');
449     trie->next('a');
450     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
451     BytesTrie::Iterator iter(*trie, 0, errorCode);
452     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
453         return;
454     }
455     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
456     // following "janua".
457     static const StringAndValue data[]={
458         { "r", 1 },
459         { "ry", 1 }
460     };
461     checkIterator(iter, data, UPRV_LENGTHOF(data));
462     // Reset, and we should get the same result.
463     logln("after iter.reset()");
464     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
465 }
466 
TestTruncatingIteratorFromRoot()467 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
468     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
469     if(trie.isNull()) {
470         return;  // buildTrie() reported an error
471     }
472     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
473     BytesTrie::Iterator iter(*trie, 4, errorCode);
474     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
475         return;
476     }
477     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
478     // of each string, and no string duplicates from the truncation.
479     static const StringAndValue data[]={
480         { "augu", -1 },
481         { "jan", 1 },
482         { "jan.", 1 },
483         { "jana", 1 },
484         { "janb", -1 },
485         { "janc", 1 },
486         { "jand", -1 },
487         { "jane", -1 },
488         { "janf", 1 },
489         { "jang", -1 },
490         { "janh", 1 },
491         { "jani", -1 },
492         { "janj", 1 },
493         { "jank", -1 },
494         { "janl", 1 },
495         { "janm", 1 },
496         { "jann", -1 },
497         { "jano", 1 },
498         { "janp", -1 },
499         { "janq", -1 },
500         { "janr", 1 },
501         { "janu", -1 },
502         { "july", 7 },
503         { "jun", 6 },
504         { "jun.", 6 },
505         { "june", 6 }
506     };
507     checkIterator(iter, data, UPRV_LENGTHOF(data));
508     // Reset, and we should get the same result.
509     logln("after iter.reset()");
510     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
511 }
512 
TestTruncatingIteratorFromLinearMatchShort()513 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
514     static const StringAndValue data[]={
515         { "abcdef", 10 },
516         { "abcdepq", 200 },
517         { "abcdeyz", 3000 }
518     };
519     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
520     if(trie.isNull()) {
521         return;  // buildTrie() reported an error
522     }
523     // Go into a linear-match node.
524     trie->next('a');
525     trie->next('b');
526     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
527     // Truncate within the linear-match node.
528     BytesTrie::Iterator iter(*trie, 2, errorCode);
529     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
530         return;
531     }
532     static const StringAndValue expected[]={
533         { "cd", -1 }
534     };
535     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
536     // Reset, and we should get the same result.
537     logln("after iter.reset()");
538     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
539 }
540 
TestTruncatingIteratorFromLinearMatchLong()541 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
542     static const StringAndValue data[]={
543         { "abcdef", 10 },
544         { "abcdepq", 200 },
545         { "abcdeyz", 3000 }
546     };
547     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
548     if(trie.isNull()) {
549         return;  // buildTrie() reported an error
550     }
551     // Go into a linear-match node.
552     trie->next('a');
553     trie->next('b');
554     trie->next('c');
555     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
556     // Truncate after the linear-match node.
557     BytesTrie::Iterator iter(*trie, 3, errorCode);
558     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
559         return;
560     }
561     static const StringAndValue expected[]={
562         { "def", 10 },
563         { "dep", -1 },
564         { "dey", -1 }
565     };
566     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
567     // Reset, and we should get the same result.
568     logln("after iter.reset()");
569     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
570 }
571 
TestIteratorFromBytes()572 void BytesTrieTest::TestIteratorFromBytes() {
573     static const StringAndValue data[]={
574         { "mm", 3 },
575         { "mmm", 33 },
576         { "mmnop", 333 }
577     };
578     builder_->clear();
579     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
580     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
581         builder_->add(data[i].s, data[i].value, errorCode);
582     }
583     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
584     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
585     checkIterator(iter, data, UPRV_LENGTHOF(data));
586 }
587 
checkData(const StringAndValue data[],int32_t dataLength)588 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
589     logln("checkData(dataLength=%d, fast)", (int)dataLength);
590     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
591     logln("checkData(dataLength=%d, small)", (int)dataLength);
592     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
593 }
594 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)595 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
596     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
597     if(trie.isNull()) {
598         return;  // buildTrie() reported an error
599     }
600     checkFirst(*trie, data, dataLength);
601     checkNext(*trie, data, dataLength);
602     checkNextWithState(*trie, data, dataLength);
603     checkNextString(*trie, data, dataLength);
604     checkIterator(*trie, data, dataLength);
605 }
606 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)607 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
608                                     UStringTrieBuildOption buildOption) {
609     IcuTestErrorCode errorCode(*this, "buildTrie()");
610     // Add the items to the trie builder in an interesting (not trivial, not random) order.
611     int32_t index, step;
612     if(dataLength&1) {
613         // Odd number of items.
614         index=dataLength/2;
615         step=2;
616     } else if((dataLength%3)!=0) {
617         // Not a multiple of 3.
618         index=dataLength/5;
619         step=3;
620     } else {
621         index=dataLength-1;
622         step=-1;
623     }
624     builder_->clear();
625     for(int32_t i=0; i<dataLength; ++i) {
626         builder_->add(data[index].s, data[index].value, errorCode);
627         index=(index+step)%dataLength;
628     }
629     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
630     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
631     if(!errorCode.logIfFailureAndReset("add()/build()")) {
632         builder_->add("zzz", 999, errorCode);
633         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
634             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
635         }
636     }
637     logln("serialized trie size: %ld bytes\n", (long)sp.length());
638     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
639     if(sp.data()==sp2.data()) {
640         errln("builder.buildStringPiece() before & after build() returned same array");
641     }
642     if(errorCode.isFailure()) {
643         return NULL;
644     }
645     // Tries from either build() method should be identical but
646     // BytesTrie does not implement equals().
647     // We just return either one.
648     if((dataLength&1)!=0) {
649         return trie.orphan();
650     } else {
651         return new BytesTrie(sp2.data());
652     }
653 }
654 
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)655 void BytesTrieTest::checkFirst(BytesTrie &trie,
656                                const StringAndValue data[], int32_t dataLength) {
657     for(int32_t i=0; i<dataLength; ++i) {
658         int c=*data[i].s;
659         if(c==0) {
660             continue;  // skip empty string
661         }
662         UStringTrieResult firstResult=trie.first(c);
663         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
664         UStringTrieResult nextResult=trie.next(data[i].s[1]);
665         if(firstResult!=trie.reset().next(c) ||
666            firstResult!=trie.current() ||
667            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
668            nextResult!=trie.next(data[i].s[1])
669         ) {
670             errln("trie.first(%c)!=trie.reset().next(same) for %s",
671                   c, data[i].s);
672         }
673     }
674     trie.reset();
675 }
676 
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)677 void BytesTrieTest::checkNext(BytesTrie &trie,
678                               const StringAndValue data[], int32_t dataLength) {
679     BytesTrie::State state;
680     for(int32_t i=0; i<dataLength; ++i) {
681         int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
682         UStringTrieResult result;
683         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
684             result!=trie.current()
685         ) {
686             errln("trie does not seem to contain %s", data[i].s);
687         } else if(trie.getValue()!=data[i].value) {
688             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
689                   data[i].s,
690                   (long)trie.getValue(), (long)trie.getValue(),
691                   (long)data[i].value, (long)data[i].value);
692         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
693             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
694         }
695         trie.reset();
696         stringLength=strlen(data[i].s);
697         result=trie.current();
698         for(int32_t j=0; j<stringLength; ++j) {
699             if(!USTRINGTRIE_HAS_NEXT(result)) {
700                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
701                 break;
702             }
703             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
704                 trie.getValue();
705                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
706                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
707                     break;
708                 }
709             }
710             result=trie.next(data[i].s[j]);
711             if(!USTRINGTRIE_MATCHES(result)) {
712                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
713                 break;
714             }
715             if(result!=trie.current()) {
716                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
717                 break;
718             }
719         }
720         if(!USTRINGTRIE_HAS_VALUE(result)) {
721             errln("trie.next()!=hasValue at the end of %s", data[i].s);
722             continue;
723         }
724         trie.getValue();
725         if(result!=trie.current()) {
726             errln("trie.current() != current()+getValue()+current() after end of %s",
727                   data[i].s);
728         }
729         // Compare the final current() with whether next() can actually continue.
730         trie.saveState(state);
731         UBool nextContinues=FALSE;
732         // Try all graphic characters; we only use those in test strings in this file.
733 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
734         const int32_t minChar=0x20;
735         const int32_t maxChar=0x7e;
736 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
737         const int32_t minChar=0x40;
738         const int32_t maxChar=0xfe;
739 #else
740         const int32_t minChar=0;
741         const int32_t maxChar=0xff;
742 #endif
743         for(int32_t c=minChar; c<=maxChar; ++c) {
744             if(trie.resetToState(state).next(c)) {
745                 nextContinues=TRUE;
746                 break;
747             }
748         }
749         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
750             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
751                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
752         }
753         trie.reset();
754     }
755 }
756 
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)757 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
758                                        const StringAndValue data[], int32_t dataLength) {
759     BytesTrie::State noState, state;
760     for(int32_t i=0; i<dataLength; ++i) {
761         if((i&1)==0) {
762             // This should have no effect.
763             trie.resetToState(noState);
764         }
765         const char *expectedString=data[i].s;
766         int32_t stringLength=strlen(expectedString);
767         int32_t partialLength=stringLength/3;
768         for(int32_t j=0; j<partialLength; ++j) {
769             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
770                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
771                 return;
772             }
773         }
774         trie.saveState(state);
775         UStringTrieResult resultAtState=trie.current();
776         UStringTrieResult result;
777         int32_t valueAtState=-99;
778         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
779             valueAtState=trie.getValue();
780         }
781         result=trie.next(0);  // mismatch
782         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
783             errln("trie.next(0) matched after part of %s", data[i].s);
784         }
785         if( resultAtState!=trie.resetToState(state).current() ||
786             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
787         ) {
788             errln("trie.next(part of %s) changes current()/getValue() after "
789                   "saveState/next(0)/resetToState",
790                   data[i].s);
791         } else if(!USTRINGTRIE_HAS_VALUE(
792                       result=trie.next(expectedString+partialLength,
793                                        stringLength-partialLength)) ||
794                   result!=trie.current()) {
795             errln("trie.next(rest of %s) does not seem to contain %s after "
796                   "saveState/next(0)/resetToState",
797                   data[i].s, data[i].s);
798         } else if(!USTRINGTRIE_HAS_VALUE(
799                       result=trie.resetToState(state).
800                                   next(expectedString+partialLength,
801                                        stringLength-partialLength)) ||
802                   result!=trie.current()) {
803             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
804                   data[i].s);
805         } else if(trie.getValue()!=data[i].value) {
806             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
807                   data[i].s,
808                   (long)trie.getValue(), (long)trie.getValue(),
809                   (long)data[i].value, (long)data[i].value);
810         }
811         trie.reset();
812     }
813 }
814 
815 // next(string) is also tested in other functions,
816 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)817 void BytesTrieTest::checkNextString(BytesTrie &trie,
818                                     const StringAndValue data[], int32_t dataLength) {
819     for(int32_t i=0; i<dataLength; ++i) {
820         const char *expectedString=data[i].s;
821         int32_t stringLength=strlen(expectedString);
822         if(!trie.next(expectedString, stringLength/2)) {
823             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
824             continue;
825         }
826         // Test that we stop properly at the end of the string.
827         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
828             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
829         }
830         trie.reset();
831     }
832 }
833 
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)834 void BytesTrieTest::checkIterator(const BytesTrie &trie,
835                                   const StringAndValue data[], int32_t dataLength) {
836     IcuTestErrorCode errorCode(*this, "checkIterator()");
837     BytesTrie::Iterator iter(trie, 0, errorCode);
838     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
839         return;
840     }
841     checkIterator(iter, data, dataLength);
842 }
843 
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)844 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
845                                   const StringAndValue data[], int32_t dataLength) {
846     IcuTestErrorCode errorCode(*this, "checkIterator()");
847     for(int32_t i=0; i<dataLength; ++i) {
848         if(!iter.hasNext()) {
849             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
850             break;
851         }
852         UBool hasNext=iter.next(errorCode);
853         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
854             break;
855         }
856         if(!hasNext) {
857             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
858             break;
859         }
860         if(iter.getString()!=StringPiece(data[i].s)) {
861             errln("trie iterator next().getString()=%s but expected %s for item %d",
862                   iter.getString().data(), data[i].s, (int)i);
863         }
864         if(iter.getValue()!=data[i].value) {
865             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
866                   (long)iter.getValue(), (long)iter.getValue(),
867                   (long)data[i].value, (long)data[i].value,
868                   (int)i, data[i].s);
869         }
870     }
871     if(iter.hasNext()) {
872         errln("trie iterator hasNext()=TRUE after all items");
873     }
874     UBool hasNext=iter.next(errorCode);
875     errorCode.logIfFailureAndReset("trie iterator next() after all items");
876     if(hasNext) {
877         errln("trie iterator next()=TRUE after all items");
878     }
879 }
880