1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucharstrietest.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010nov16
12 * created by: Markus W. Scherer
13 */
14
15 #include <string.h>
16
17 #include "unicode/utypes.h"
18 #include "unicode/appendable.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/ucharstrie.h"
21 #include "unicode/ucharstriebuilder.h"
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26
27 struct StringAndValue {
28 const char *s;
29 int32_t value;
30 };
31
32 class UCharsTrieTest : public IntlTest {
33 public:
34 UCharsTrieTest();
35 virtual ~UCharsTrieTest();
36
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38 void TestBuilder();
39 void TestEmpty();
40 void Test_a();
41 void Test_a_ab();
42 void TestShortestBranch();
43 void TestBranches();
44 void TestLongSequence();
45 void TestLongBranch();
46 void TestValuesForState();
47 void TestCompact();
48 void TestFirstForCodePoint();
49 void TestNextForCodePoint();
50
51 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
52 void TestLargeTrie();
53
54 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
55 void TestHasUniqueValue();
56 void TestGetNextUChars();
57 void TestIteratorFromBranch();
58 void TestIteratorFromLinearMatch();
59 void TestTruncatingIteratorFromRoot();
60 void TestTruncatingIteratorFromLinearMatchShort();
61 void TestTruncatingIteratorFromLinearMatchLong();
62 void TestIteratorFromUChars();
63
64 void checkData(const StringAndValue data[], int32_t dataLength);
65 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
66 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
67 UStringTrieBuildOption buildOption);
68 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
69 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
70 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
74
75 private:
76 UCharsTrieBuilder *builder_;
77 };
78
createUCharsTrieTest()79 extern IntlTest *createUCharsTrieTest() {
80 return new UCharsTrieTest();
81 }
82
UCharsTrieTest()83 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
84 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
85 builder_=new UCharsTrieBuilder(errorCode);
86 }
87
~UCharsTrieTest()88 UCharsTrieTest::~UCharsTrieTest() {
89 delete builder_;
90 }
91
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)92 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
93 if(exec) {
94 logln("TestSuite UCharsTrieTest: ");
95 }
96 TESTCASE_AUTO_BEGIN;
97 TESTCASE_AUTO(TestBuilder);
98 TESTCASE_AUTO(TestEmpty);
99 TESTCASE_AUTO(Test_a);
100 TESTCASE_AUTO(Test_a_ab);
101 TESTCASE_AUTO(TestShortestBranch);
102 TESTCASE_AUTO(TestBranches);
103 TESTCASE_AUTO(TestLongSequence);
104 TESTCASE_AUTO(TestLongBranch);
105 TESTCASE_AUTO(TestValuesForState);
106 TESTCASE_AUTO(TestCompact);
107 TESTCASE_AUTO(TestFirstForCodePoint);
108 TESTCASE_AUTO(TestNextForCodePoint);
109 TESTCASE_AUTO(TestLargeTrie);
110 TESTCASE_AUTO(TestHasUniqueValue);
111 TESTCASE_AUTO(TestGetNextUChars);
112 TESTCASE_AUTO(TestIteratorFromBranch);
113 TESTCASE_AUTO(TestIteratorFromLinearMatch);
114 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
115 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
116 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
117 TESTCASE_AUTO(TestIteratorFromUChars);
118 TESTCASE_AUTO_END;
119 }
120
TestBuilder()121 void UCharsTrieTest::TestBuilder() {
122 IcuTestErrorCode errorCode(*this, "TestBuilder()");
123 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
124 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
125 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
126 return;
127 }
128 // TODO: remove .build(...) once add() checks for duplicates.
129 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
130 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
131 errln("UCharsTrieBuilder.add() did not detect duplicates");
132 return;
133 }
134 }
135
TestEmpty()136 void UCharsTrieTest::TestEmpty() {
137 static const StringAndValue data[]={
138 { "", 0 }
139 };
140 checkData(data, UPRV_LENGTHOF(data));
141 }
142
Test_a()143 void UCharsTrieTest::Test_a() {
144 static const StringAndValue data[]={
145 { "a", 1 }
146 };
147 checkData(data, UPRV_LENGTHOF(data));
148 }
149
Test_a_ab()150 void UCharsTrieTest::Test_a_ab() {
151 static const StringAndValue data[]={
152 { "a", 1 },
153 { "ab", 100 }
154 };
155 checkData(data, UPRV_LENGTHOF(data));
156 }
157
TestShortestBranch()158 void UCharsTrieTest::TestShortestBranch() {
159 static const StringAndValue data[]={
160 { "a", 1000 },
161 { "b", 2000 }
162 };
163 checkData(data, UPRV_LENGTHOF(data));
164 }
165
TestBranches()166 void UCharsTrieTest::TestBranches() {
167 static const StringAndValue data[]={
168 { "a", 0x10 },
169 { "cc", 0x40 },
170 { "e", 0x100 },
171 { "ggg", 0x400 },
172 { "i", 0x1000 },
173 { "kkkk", 0x4000 },
174 { "n", 0x10000 },
175 { "ppppp", 0x40000 },
176 { "r", 0x100000 },
177 { "sss", 0x200000 },
178 { "t", 0x400000 },
179 { "uu", 0x800000 },
180 { "vv", 0x7fffffff },
181 { "zz", (int32_t)0x80000000 }
182 };
183 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
184 logln("TestBranches length=%d", (int)length);
185 checkData(data, length);
186 }
187 }
188
TestLongSequence()189 void UCharsTrieTest::TestLongSequence() {
190 static const StringAndValue data[]={
191 { "a", -1 },
192 // sequence of linear-match nodes
193 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
194 // more than 256 units
195 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
201 };
202 checkData(data, UPRV_LENGTHOF(data));
203 }
204
TestLongBranch()205 void UCharsTrieTest::TestLongBranch() {
206 // Split-branch and interesting compact-integer values.
207 static const StringAndValue data[]={
208 { "a", -2 },
209 { "b", -1 },
210 { "c", 0 },
211 { "d2", 1 },
212 { "f", 0x3f },
213 { "g", 0x40 },
214 { "h", 0x41 },
215 { "j23", 0x1900 },
216 { "j24", 0x19ff },
217 { "j25", 0x1a00 },
218 { "k2", 0x1a80 },
219 { "k3", 0x1aff },
220 { "l234567890", 0x1b00 },
221 { "l234567890123", 0x1b01 },
222 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
223 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
224 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
225 { "r", 0x333333 },
226 { "s2345", 0x4444444 },
227 { "t234567890", 0x77777777 },
228 { "z", (int32_t)0x80000001 }
229 };
230 checkData(data, UPRV_LENGTHOF(data));
231 }
232
TestValuesForState()233 void UCharsTrieTest::TestValuesForState() {
234 // Check that saveState() and resetToState() interact properly
235 // with next() and current().
236 static const StringAndValue data[]={
237 { "a", -1 },
238 { "ab", -2 },
239 { "abc", -3 },
240 { "abcd", -4 },
241 { "abcde", -5 },
242 { "abcdef", -6 }
243 };
244 checkData(data, UPRV_LENGTHOF(data));
245 }
246
TestCompact()247 void UCharsTrieTest::TestCompact() {
248 // Duplicate trailing strings and values provide opportunities for compacting.
249 static const StringAndValue data[]={
250 { "+", 0 },
251 { "+august", 8 },
252 { "+december", 12 },
253 { "+july", 7 },
254 { "+june", 6 },
255 { "+november", 11 },
256 { "+october", 10 },
257 { "+september", 9 },
258 { "-", 0 },
259 { "-august", 8 },
260 { "-december", 12 },
261 { "-july", 7 },
262 { "-june", 6 },
263 { "-november", 11 },
264 { "-october", 10 },
265 { "-september", 9 },
266 // The l+n branch (with its sub-nodes) is a duplicate but will be written
267 // both times because each time it follows a different linear-match node.
268 { "xjuly", 7 },
269 { "xjune", 6 }
270 };
271 checkData(data, UPRV_LENGTHOF(data));
272 }
273
TestFirstForCodePoint()274 void UCharsTrieTest::TestFirstForCodePoint() {
275 static const StringAndValue data[]={
276 { "a", 1 },
277 { "a\\ud800", 2 },
278 { "a\\U00010000", 3 },
279 { "\\ud840", 4 },
280 { "\\U00020000\\udbff", 5 },
281 { "\\U00020000\\U0010ffff", 6 },
282 { "\\U00020000\\U0010ffffz", 7 },
283 { "\\U00050000xy", 8 },
284 { "\\U00050000xyz", 9 }
285 };
286 checkData(data, UPRV_LENGTHOF(data));
287 }
288
TestNextForCodePoint()289 void UCharsTrieTest::TestNextForCodePoint() {
290 static const StringAndValue data[]={
291 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
292 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
293 { "\\u4dff\\U000103ff", 99999 }
294 };
295 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
296 if(trie.isNull()) {
297 return; // buildTrie() reported an error
298 }
299 UStringTrieResult result;
300 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
301 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
302 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
306 trie->getValue()!=2000000000
307 ) {
308 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
309 }
310 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
311 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
312 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
314 trie->getValue()!=44444
315 ) {
316 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
317 }
318 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
319 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
320 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate
322 ) {
323 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
324 }
325 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
326 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
327 trie->getValue()!=99999
328 ) {
329 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
330 }
331 }
332
333 // Definitions in the anonymous namespace are invisible outside this file.
334 namespace {
335
336 // Generate (string, value) pairs.
337 // The first string (before next()) will be empty.
338 class Generator {
339 public:
Generator()340 Generator() : value(4711), num(0) {}
next()341 void next() {
342 UChar c;
343 s.truncate(0);
344 s.append(c=(UChar)(value>>16));
345 s.append((UChar)(value>>4));
346 if(value&1) {
347 s.append((UChar)value);
348 }
349 set.add(c);
350 value+=((value>>5)&0x7ff)*3+1;
351 ++num;
352 }
getString() const353 const UnicodeString &getString() const { return s; }
getValue() const354 int32_t getValue() const { return value; }
countUniqueFirstChars() const355 int32_t countUniqueFirstChars() const { return set.size(); }
getIndex() const356 int32_t getIndex() const { return num; }
357
358 private:
359 UnicodeString s;
360 UnicodeSet set;
361 int32_t value;
362 int32_t num;
363 };
364
365 } // end namespace
366
buildLargeTrie(int32_t numUniqueFirst)367 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
368 IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
369 Generator gen;
370 builder_->clear();
371 while(gen.countUniqueFirstChars()<numUniqueFirst) {
372 builder_->add(gen.getString(), gen.getValue(), errorCode);
373 gen.next();
374 }
375 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
376 UnicodeString trieUChars;
377 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
378 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
379 return new UCharsTrie(trieUChars.getBuffer());
380 }
381
382 // Exercise a large branch node.
TestLargeTrie()383 void UCharsTrieTest::TestLargeTrie() {
384 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
385 if(trie.isNull()) {
386 return; // buildTrie() reported an error
387 }
388 Generator gen;
389 while(gen.countUniqueFirstChars()<1111) {
390 UnicodeString x(gen.getString());
391 int32_t value=gen.getValue();
392 if(!x.isEmpty()) {
393 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
394 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
395 x[0], (long)gen.getIndex());
396 break;
397 }
398 x.remove(0, 1);
399 }
400 UStringTrieResult result=trie->next(x.getBuffer(), x.length());
401 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
402 errln("next(%d chars U+%04X U+%04X)!=hasValue or "
403 "next()!=current() or getValue() wrong "
404 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
405 break;
406 }
407 gen.next();
408 }
409 }
410
411 enum {
412 u_a=0x61,
413 u_b=0x62,
414 u_c=0x63,
415 u_j=0x6a,
416 u_n=0x6e,
417 u_r=0x72,
418 u_u=0x75,
419 u_y=0x79
420 };
421
buildMonthsTrie(UStringTrieBuildOption buildOption)422 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
423 // All types of nodes leading to the same value,
424 // for code coverage of recursive functions.
425 // In particular, we need a lot of branches on some single level
426 // to exercise a split-branch node.
427 static const StringAndValue data[]={
428 { "august", 8 },
429 { "jan", 1 },
430 { "jan.", 1 },
431 { "jana", 1 },
432 { "janbb", 1 },
433 { "janc", 1 },
434 { "janddd", 1 },
435 { "janee", 1 },
436 { "janef", 1 },
437 { "janf", 1 },
438 { "jangg", 1 },
439 { "janh", 1 },
440 { "janiiii", 1 },
441 { "janj", 1 },
442 { "jankk", 1 },
443 { "jankl", 1 },
444 { "jankmm", 1 },
445 { "janl", 1 },
446 { "janm", 1 },
447 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
448 { "jano", 1 },
449 { "janpp", 1 },
450 { "janqqq", 1 },
451 { "janr", 1 },
452 { "januar", 1 },
453 { "january", 1 },
454 { "july", 7 },
455 { "jun", 6 },
456 { "jun.", 6 },
457 { "june", 6 }
458 };
459 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
460 }
461
TestHasUniqueValue()462 void UCharsTrieTest::TestHasUniqueValue() {
463 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
464 if(trie.isNull()) {
465 return; // buildTrie() reported an error
466 }
467 int32_t uniqueValue;
468 if(trie->hasUniqueValue(uniqueValue)) {
469 errln("unique value at root");
470 }
471 trie->next(u_j);
472 trie->next(u_a);
473 trie->next(u_n);
474 // hasUniqueValue() directly after next()
475 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
476 errln("not unique value 1 after \"jan\"");
477 }
478 trie->first(u_j);
479 trie->next(u_u);
480 if(trie->hasUniqueValue(uniqueValue)) {
481 errln("unique value after \"ju\"");
482 }
483 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
484 errln("not normal value 6 after \"jun\"");
485 }
486 // hasUniqueValue() after getValue()
487 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
488 errln("not unique value 6 after \"jun\"");
489 }
490 // hasUniqueValue() from within a linear-match node
491 trie->first(u_a);
492 trie->next(u_u);
493 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
494 errln("not unique value 8 after \"au\"");
495 }
496 }
497
TestGetNextUChars()498 void UCharsTrieTest::TestGetNextUChars() {
499 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
500 if(trie.isNull()) {
501 return; // buildTrie() reported an error
502 }
503 UnicodeString buffer;
504 UnicodeStringAppendable app(buffer);
505 int32_t count=trie->getNextUChars(app);
506 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
507 errln("months getNextUChars()!=[aj] at root");
508 }
509 trie->next(u_j);
510 trie->next(u_a);
511 trie->next(u_n);
512 // getNextUChars() directly after next()
513 buffer.remove();
514 count=trie->getNextUChars(app);
515 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
516 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
517 }
518 // getNextUChars() after getValue()
519 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
520 buffer.remove();
521 count=trie->getNextUChars(app);
522 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
523 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
524 }
525 // getNextUChars() from a linear-match node
526 trie->next(u_u);
527 buffer.remove();
528 count=trie->getNextUChars(app);
529 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
530 errln("months getNextUChars()!=[a] after \"janu\"");
531 }
532 trie->next(u_a);
533 buffer.remove();
534 count=trie->getNextUChars(app);
535 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
536 errln("months getNextUChars()!=[r] after \"janua\"");
537 }
538 trie->next(u_r);
539 trie->next(u_y);
540 // getNextUChars() after a final match
541 buffer.remove();
542 count=trie->getNextUChars(app);
543 if(count!=0 || buffer.length()!=0) {
544 errln("months getNextUChars()!=[] after \"january\"");
545 }
546 }
547
TestIteratorFromBranch()548 void UCharsTrieTest::TestIteratorFromBranch() {
549 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
550 if(trie.isNull()) {
551 return; // buildTrie() reported an error
552 }
553 // Go to a branch node.
554 trie->next(u_j);
555 trie->next(u_a);
556 trie->next(u_n);
557 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
558 UCharsTrie::Iterator iter(*trie, 0, errorCode);
559 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
560 return;
561 }
562 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
563 // following "jan".
564 static const StringAndValue data[]={
565 { "", 1 },
566 { ".", 1 },
567 { "a", 1 },
568 { "bb", 1 },
569 { "c", 1 },
570 { "ddd", 1 },
571 { "ee", 1 },
572 { "ef", 1 },
573 { "f", 1 },
574 { "gg", 1 },
575 { "h", 1 },
576 { "iiii", 1 },
577 { "j", 1 },
578 { "kk", 1 },
579 { "kl", 1 },
580 { "kmm", 1 },
581 { "l", 1 },
582 { "m", 1 },
583 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
584 { "o", 1 },
585 { "pp", 1 },
586 { "qqq", 1 },
587 { "r", 1 },
588 { "uar", 1 },
589 { "uary", 1 }
590 };
591 checkIterator(iter, data, UPRV_LENGTHOF(data));
592 // Reset, and we should get the same result.
593 logln("after iter.reset()");
594 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
595 }
596
TestIteratorFromLinearMatch()597 void UCharsTrieTest::TestIteratorFromLinearMatch() {
598 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
599 if(trie.isNull()) {
600 return; // buildTrie() reported an error
601 }
602 // Go into a linear-match node.
603 trie->next(u_j);
604 trie->next(u_a);
605 trie->next(u_n);
606 trie->next(u_u);
607 trie->next(u_a);
608 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
609 UCharsTrie::Iterator iter(*trie, 0, errorCode);
610 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
611 return;
612 }
613 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
614 // following "janua".
615 static const StringAndValue data[]={
616 { "r", 1 },
617 { "ry", 1 }
618 };
619 checkIterator(iter, data, UPRV_LENGTHOF(data));
620 // Reset, and we should get the same result.
621 logln("after iter.reset()");
622 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
623 }
624
TestTruncatingIteratorFromRoot()625 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
626 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
627 if(trie.isNull()) {
628 return; // buildTrie() reported an error
629 }
630 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
631 UCharsTrie::Iterator iter(*trie, 4, errorCode);
632 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
633 return;
634 }
635 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
636 // of each string, and no string duplicates from the truncation.
637 static const StringAndValue data[]={
638 { "augu", -1 },
639 { "jan", 1 },
640 { "jan.", 1 },
641 { "jana", 1 },
642 { "janb", -1 },
643 { "janc", 1 },
644 { "jand", -1 },
645 { "jane", -1 },
646 { "janf", 1 },
647 { "jang", -1 },
648 { "janh", 1 },
649 { "jani", -1 },
650 { "janj", 1 },
651 { "jank", -1 },
652 { "janl", 1 },
653 { "janm", 1 },
654 { "jann", -1 },
655 { "jano", 1 },
656 { "janp", -1 },
657 { "janq", -1 },
658 { "janr", 1 },
659 { "janu", -1 },
660 { "july", 7 },
661 { "jun", 6 },
662 { "jun.", 6 },
663 { "june", 6 }
664 };
665 checkIterator(iter, data, UPRV_LENGTHOF(data));
666 // Reset, and we should get the same result.
667 logln("after iter.reset()");
668 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
669 }
670
TestTruncatingIteratorFromLinearMatchShort()671 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
672 static const StringAndValue data[]={
673 { "abcdef", 10 },
674 { "abcdepq", 200 },
675 { "abcdeyz", 3000 }
676 };
677 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
678 if(trie.isNull()) {
679 return; // buildTrie() reported an error
680 }
681 // Go into a linear-match node.
682 trie->next(u_a);
683 trie->next(u_b);
684 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
685 // Truncate within the linear-match node.
686 UCharsTrie::Iterator iter(*trie, 2, errorCode);
687 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
688 return;
689 }
690 static const StringAndValue expected[]={
691 { "cd", -1 }
692 };
693 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
694 // Reset, and we should get the same result.
695 logln("after iter.reset()");
696 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
697 }
698
TestTruncatingIteratorFromLinearMatchLong()699 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
700 static const StringAndValue data[]={
701 { "abcdef", 10 },
702 { "abcdepq", 200 },
703 { "abcdeyz", 3000 }
704 };
705 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
706 if(trie.isNull()) {
707 return; // buildTrie() reported an error
708 }
709 // Go into a linear-match node.
710 trie->next(u_a);
711 trie->next(u_b);
712 trie->next(u_c);
713 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
714 // Truncate after the linear-match node.
715 UCharsTrie::Iterator iter(*trie, 3, errorCode);
716 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
717 return;
718 }
719 static const StringAndValue expected[]={
720 { "def", 10 },
721 { "dep", -1 },
722 { "dey", -1 }
723 };
724 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
725 // Reset, and we should get the same result.
726 logln("after iter.reset()");
727 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
728 }
729
TestIteratorFromUChars()730 void UCharsTrieTest::TestIteratorFromUChars() {
731 static const StringAndValue data[]={
732 { "mm", 3 },
733 { "mmm", 33 },
734 { "mmnop", 333 }
735 };
736 builder_->clear();
737 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
738 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
739 builder_->add(data[i].s, data[i].value, errorCode);
740 }
741 UnicodeString trieUChars;
742 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
743 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
744 checkIterator(iter, data, UPRV_LENGTHOF(data));
745 }
746
checkData(const StringAndValue data[],int32_t dataLength)747 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
748 logln("checkData(dataLength=%d, fast)", (int)dataLength);
749 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
750 logln("checkData(dataLength=%d, small)", (int)dataLength);
751 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
752 }
753
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)754 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
755 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
756 if(trie.isNull()) {
757 return; // buildTrie() reported an error
758 }
759 checkFirst(*trie, data, dataLength);
760 checkNext(*trie, data, dataLength);
761 checkNextWithState(*trie, data, dataLength);
762 checkNextString(*trie, data, dataLength);
763 checkIterator(*trie, data, dataLength);
764 }
765
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)766 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
767 UStringTrieBuildOption buildOption) {
768 IcuTestErrorCode errorCode(*this, "buildTrie()");
769 // Add the items to the trie builder in an interesting (not trivial, not random) order.
770 int32_t index, step;
771 if(dataLength&1) {
772 // Odd number of items.
773 index=dataLength/2;
774 step=2;
775 } else if((dataLength%3)!=0) {
776 // Not a multiple of 3.
777 index=dataLength/5;
778 step=3;
779 } else {
780 index=dataLength-1;
781 step=-1;
782 }
783 builder_->clear();
784 for(int32_t i=0; i<dataLength; ++i) {
785 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
786 data[index].value, errorCode);
787 index=(index+step)%dataLength;
788 }
789 UnicodeString trieUChars;
790 builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
791 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
792 if(!errorCode.logIfFailureAndReset("add()/build()")) {
793 builder_->add("zzz", 999, errorCode);
794 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
795 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
796 }
797 }
798 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
799 UnicodeString trieUChars2;
800 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
801 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
802 errln("builder.buildUnicodeString() before & after build() returned same array");
803 }
804 if(errorCode.isFailure()) {
805 return NULL;
806 }
807 // Tries from either build() method should be identical but
808 // UCharsTrie does not implement equals().
809 // We just return either one.
810 if((dataLength&1)!=0) {
811 return trie.orphan();
812 } else {
813 return new UCharsTrie(trieUChars2.getBuffer());
814 }
815 }
816
checkFirst(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)817 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
818 const StringAndValue data[], int32_t dataLength) {
819 for(int32_t i=0; i<dataLength; ++i) {
820 if(*data[i].s==0) {
821 continue; // skip empty string
822 }
823 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
824 UChar32 c=expectedString[0];
825 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
826 UStringTrieResult firstResult=trie.first(c);
827 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
828 UStringTrieResult nextResult=trie.next(nextCp);
829 if(firstResult!=trie.reset().next(c) ||
830 firstResult!=trie.current() ||
831 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
832 nextResult!=trie.next(nextCp)
833 ) {
834 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
835 c, data[i].s);
836 }
837 c=expectedString.char32At(0);
838 int32_t cLength=U16_LENGTH(c);
839 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
840 firstResult=trie.firstForCodePoint(c);
841 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
842 nextResult=trie.nextForCodePoint(nextCp);
843 if(firstResult!=trie.reset().nextForCodePoint(c) ||
844 firstResult!=trie.current() ||
845 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
846 nextResult!=trie.nextForCodePoint(nextCp)
847 ) {
848 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
849 c, data[i].s);
850 }
851 }
852 trie.reset();
853 }
854
checkNext(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)855 void UCharsTrieTest::checkNext(UCharsTrie &trie,
856 const StringAndValue data[], int32_t dataLength) {
857 UCharsTrie::State state;
858 for(int32_t i=0; i<dataLength; ++i) {
859 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
860 int32_t stringLength= (i&1) ? -1 : expectedString.length();
861 UStringTrieResult result;
862 if( !USTRINGTRIE_HAS_VALUE(
863 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
864 result!=trie.current()
865 ) {
866 errln("trie does not seem to contain %s", data[i].s);
867 } else if(trie.getValue()!=data[i].value) {
868 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
869 data[i].s,
870 (long)trie.getValue(), (long)trie.getValue(),
871 (long)data[i].value, (long)data[i].value);
872 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
873 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
874 }
875 trie.reset();
876 stringLength=expectedString.length();
877 result=trie.current();
878 for(int32_t j=0; j<stringLength; ++j) {
879 if(!USTRINGTRIE_HAS_NEXT(result)) {
880 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
881 break;
882 }
883 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
884 trie.getValue();
885 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
886 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
887 break;
888 }
889 }
890 result=trie.next(expectedString[j]);
891 if(!USTRINGTRIE_MATCHES(result)) {
892 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
893 break;
894 }
895 if(result!=trie.current()) {
896 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
897 break;
898 }
899 }
900 if(!USTRINGTRIE_HAS_VALUE(result)) {
901 errln("trie.next()!=hasValue at the end of %s", data[i].s);
902 continue;
903 }
904 trie.getValue();
905 if(result!=trie.current()) {
906 errln("trie.current() != current()+getValue()+current() after end of %s",
907 data[i].s);
908 }
909 // Compare the final current() with whether next() can actually continue.
910 trie.saveState(state);
911 UBool nextContinues=FALSE;
912 for(int32_t c=0x20; c<0xe000; ++c) {
913 if(c==0x80) {
914 c=0xd800; // Check for ASCII and surrogates but not all of the BMP.
915 }
916 if(trie.resetToState(state).next(c)) {
917 nextContinues=TRUE;
918 break;
919 }
920 }
921 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
922 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
923 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
924 }
925 trie.reset();
926 }
927 }
928
checkNextWithState(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)929 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
930 const StringAndValue data[], int32_t dataLength) {
931 UCharsTrie::State noState, state;
932 for(int32_t i=0; i<dataLength; ++i) {
933 if((i&1)==0) {
934 // This should have no effect.
935 trie.resetToState(noState);
936 }
937 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
938 int32_t stringLength=expectedString.length();
939 int32_t partialLength=stringLength/3;
940 for(int32_t j=0; j<partialLength; ++j) {
941 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
942 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
943 return;
944 }
945 }
946 trie.saveState(state);
947 UStringTrieResult resultAtState=trie.current();
948 UStringTrieResult result;
949 int32_t valueAtState=-99;
950 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
951 valueAtState=trie.getValue();
952 }
953 result=trie.next(0); // mismatch
954 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
955 errln("trie.next(0) matched after part of %s", data[i].s);
956 }
957 if( resultAtState!=trie.resetToState(state).current() ||
958 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
959 ) {
960 errln("trie.next(part of %s) changes current()/getValue() after "
961 "saveState/next(0)/resetToState",
962 data[i].s);
963 } else if(!USTRINGTRIE_HAS_VALUE(
964 result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
965 stringLength-partialLength)) ||
966 result!=trie.current()) {
967 errln("trie.next(rest of %s) does not seem to contain %s after "
968 "saveState/next(0)/resetToState",
969 data[i].s, data[i].s);
970 } else if(!USTRINGTRIE_HAS_VALUE(
971 result=trie.resetToState(state).
972 next(expectedString.getTerminatedBuffer()+partialLength,
973 stringLength-partialLength)) ||
974 result!=trie.current()) {
975 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
976 data[i].s);
977 } else if(trie.getValue()!=data[i].value) {
978 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
979 data[i].s,
980 (long)trie.getValue(), (long)trie.getValue(),
981 (long)data[i].value, (long)data[i].value);
982 }
983 trie.reset();
984 }
985 }
986
987 // next(string) is also tested in other functions,
988 // but here we try to go partway through the string, and then beyond it.
checkNextString(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)989 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
990 const StringAndValue data[], int32_t dataLength) {
991 for(int32_t i=0; i<dataLength; ++i) {
992 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
993 int32_t stringLength=expectedString.length();
994 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
995 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
996 continue;
997 }
998 // Test that we stop properly at the end of the string.
999 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1000 stringLength+1-stringLength/2)) {
1001 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1002 }
1003 trie.reset();
1004 }
1005 }
1006
checkIterator(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)1007 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1008 const StringAndValue data[], int32_t dataLength) {
1009 IcuTestErrorCode errorCode(*this, "checkIterator()");
1010 UCharsTrie::Iterator iter(trie, 0, errorCode);
1011 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1012 return;
1013 }
1014 checkIterator(iter, data, dataLength);
1015 }
1016
checkIterator(UCharsTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)1017 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1018 const StringAndValue data[], int32_t dataLength) {
1019 IcuTestErrorCode errorCode(*this, "checkIterator()");
1020 for(int32_t i=0; i<dataLength; ++i) {
1021 if(!iter.hasNext()) {
1022 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1023 break;
1024 }
1025 UBool hasNext=iter.next(errorCode);
1026 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1027 break;
1028 }
1029 if(!hasNext) {
1030 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1031 break;
1032 }
1033 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1034 if(iter.getString()!=expectedString) {
1035 char buffer[1000];
1036 UnicodeString invString(prettify(iter.getString()));
1037 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
1038 errln("trie iterator next().getString()=%s but expected %s for item %d",
1039 buffer, data[i].s, (int)i);
1040 }
1041 if(iter.getValue()!=data[i].value) {
1042 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1043 (long)iter.getValue(), (long)iter.getValue(),
1044 (long)data[i].value, (long)data[i].value,
1045 (int)i, data[i].s);
1046 }
1047 }
1048 if(iter.hasNext()) {
1049 errln("trie iterator hasNext()=TRUE after all items");
1050 }
1051 UBool hasNext=iter.next(errorCode);
1052 errorCode.logIfFailureAndReset("trie iterator next() after all items");
1053 if(hasNext) {
1054 errln("trie iterator next()=TRUE after all items");
1055 }
1056 }
1057