1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucharstrietest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010nov16
14 * created by: Markus W. Scherer
15 */
16
17 #include <string.h>
18
19 #include "unicode/utypes.h"
20 #include "unicode/appendable.h"
21 #include "unicode/localpointer.h"
22 #include "unicode/ucharstrie.h"
23 #include "unicode/ucharstriebuilder.h"
24 #include "unicode/uniset.h"
25 #include "unicode/unistr.h"
26 #include "unicode/utf16.h"
27 #include "intltest.h"
28 #include "cmemory.h"
29
30 struct StringAndValue {
31 const char *s;
32 int32_t value;
33 };
34
35 class UCharsTrieTest : public IntlTest {
36 public:
37 UCharsTrieTest();
38 virtual ~UCharsTrieTest();
39
40 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
41 void TestBuilder();
42 void TestEmpty();
43 void Test_a();
44 void Test_a_ab();
45 void TestShortestBranch();
46 void TestBranches();
47 void TestLongSequence();
48 void TestLongBranch();
49 void TestValuesForState();
50 void TestCompact();
51 void TestFirstForCodePoint();
52 void TestNextForCodePoint();
53
54 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
55 void TestLargeTrie();
56
57 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
58 void TestHasUniqueValue();
59 void TestGetNextUChars();
60 void TestIteratorFromBranch();
61 void TestIteratorFromLinearMatch();
62 void TestTruncatingIteratorFromRoot();
63 void TestTruncatingIteratorFromLinearMatchShort();
64 void TestTruncatingIteratorFromLinearMatchLong();
65 void TestIteratorFromUChars();
66
67 void checkData(const StringAndValue data[], int32_t dataLength);
68 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
69 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
70 UStringTrieBuildOption buildOption);
71 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
75 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
76 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
77
78 private:
79 UCharsTrieBuilder *builder_;
80 };
81
createUCharsTrieTest()82 extern IntlTest *createUCharsTrieTest() {
83 return new UCharsTrieTest();
84 }
85
UCharsTrieTest()86 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
87 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
88 builder_=new UCharsTrieBuilder(errorCode);
89 }
90
~UCharsTrieTest()91 UCharsTrieTest::~UCharsTrieTest() {
92 delete builder_;
93 }
94
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)95 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
96 if(exec) {
97 logln("TestSuite UCharsTrieTest: ");
98 }
99 TESTCASE_AUTO_BEGIN;
100 TESTCASE_AUTO(TestBuilder);
101 TESTCASE_AUTO(TestEmpty);
102 TESTCASE_AUTO(Test_a);
103 TESTCASE_AUTO(Test_a_ab);
104 TESTCASE_AUTO(TestShortestBranch);
105 TESTCASE_AUTO(TestBranches);
106 TESTCASE_AUTO(TestLongSequence);
107 TESTCASE_AUTO(TestLongBranch);
108 TESTCASE_AUTO(TestValuesForState);
109 TESTCASE_AUTO(TestCompact);
110 TESTCASE_AUTO(TestFirstForCodePoint);
111 TESTCASE_AUTO(TestNextForCodePoint);
112 TESTCASE_AUTO(TestLargeTrie);
113 TESTCASE_AUTO(TestHasUniqueValue);
114 TESTCASE_AUTO(TestGetNextUChars);
115 TESTCASE_AUTO(TestIteratorFromBranch);
116 TESTCASE_AUTO(TestIteratorFromLinearMatch);
117 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
118 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
119 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
120 TESTCASE_AUTO(TestIteratorFromUChars);
121 TESTCASE_AUTO_END;
122 }
123
TestBuilder()124 void UCharsTrieTest::TestBuilder() {
125 IcuTestErrorCode errorCode(*this, "TestBuilder()");
126 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
127 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
128 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
129 return;
130 }
131 // TODO: remove .build(...) once add() checks for duplicates.
132 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
133 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
134 errln("UCharsTrieBuilder.add() did not detect duplicates");
135 return;
136 }
137 }
138
TestEmpty()139 void UCharsTrieTest::TestEmpty() {
140 static const StringAndValue data[]={
141 { "", 0 }
142 };
143 checkData(data, UPRV_LENGTHOF(data));
144 }
145
Test_a()146 void UCharsTrieTest::Test_a() {
147 static const StringAndValue data[]={
148 { "a", 1 }
149 };
150 checkData(data, UPRV_LENGTHOF(data));
151 }
152
Test_a_ab()153 void UCharsTrieTest::Test_a_ab() {
154 static const StringAndValue data[]={
155 { "a", 1 },
156 { "ab", 100 }
157 };
158 checkData(data, UPRV_LENGTHOF(data));
159 }
160
TestShortestBranch()161 void UCharsTrieTest::TestShortestBranch() {
162 static const StringAndValue data[]={
163 { "a", 1000 },
164 { "b", 2000 }
165 };
166 checkData(data, UPRV_LENGTHOF(data));
167 }
168
TestBranches()169 void UCharsTrieTest::TestBranches() {
170 static const StringAndValue data[]={
171 { "a", 0x10 },
172 { "cc", 0x40 },
173 { "e", 0x100 },
174 { "ggg", 0x400 },
175 { "i", 0x1000 },
176 { "kkkk", 0x4000 },
177 { "n", 0x10000 },
178 { "ppppp", 0x40000 },
179 { "r", 0x100000 },
180 { "sss", 0x200000 },
181 { "t", 0x400000 },
182 { "uu", 0x800000 },
183 { "vv", 0x7fffffff },
184 { "zz", (int32_t)0x80000000 }
185 };
186 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
187 logln("TestBranches length=%d", (int)length);
188 checkData(data, length);
189 }
190 }
191
TestLongSequence()192 void UCharsTrieTest::TestLongSequence() {
193 static const StringAndValue data[]={
194 { "a", -1 },
195 // sequence of linear-match nodes
196 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
197 // more than 256 units
198 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
202 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
203 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
204 };
205 checkData(data, UPRV_LENGTHOF(data));
206 }
207
TestLongBranch()208 void UCharsTrieTest::TestLongBranch() {
209 // Split-branch and interesting compact-integer values.
210 static const StringAndValue data[]={
211 { "a", -2 },
212 { "b", -1 },
213 { "c", 0 },
214 { "d2", 1 },
215 { "f", 0x3f },
216 { "g", 0x40 },
217 { "h", 0x41 },
218 { "j23", 0x1900 },
219 { "j24", 0x19ff },
220 { "j25", 0x1a00 },
221 { "k2", 0x1a80 },
222 { "k3", 0x1aff },
223 { "l234567890", 0x1b00 },
224 { "l234567890123", 0x1b01 },
225 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
226 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
227 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
228 { "r", 0x333333 },
229 { "s2345", 0x4444444 },
230 { "t234567890", 0x77777777 },
231 { "z", (int32_t)0x80000001 }
232 };
233 checkData(data, UPRV_LENGTHOF(data));
234 }
235
TestValuesForState()236 void UCharsTrieTest::TestValuesForState() {
237 // Check that saveState() and resetToState() interact properly
238 // with next() and current().
239 static const StringAndValue data[]={
240 { "a", -1 },
241 { "ab", -2 },
242 { "abc", -3 },
243 { "abcd", -4 },
244 { "abcde", -5 },
245 { "abcdef", -6 }
246 };
247 checkData(data, UPRV_LENGTHOF(data));
248 }
249
TestCompact()250 void UCharsTrieTest::TestCompact() {
251 // Duplicate trailing strings and values provide opportunities for compacting.
252 static const StringAndValue data[]={
253 { "+", 0 },
254 { "+august", 8 },
255 { "+december", 12 },
256 { "+july", 7 },
257 { "+june", 6 },
258 { "+november", 11 },
259 { "+october", 10 },
260 { "+september", 9 },
261 { "-", 0 },
262 { "-august", 8 },
263 { "-december", 12 },
264 { "-july", 7 },
265 { "-june", 6 },
266 { "-november", 11 },
267 { "-october", 10 },
268 { "-september", 9 },
269 // The l+n branch (with its sub-nodes) is a duplicate but will be written
270 // both times because each time it follows a different linear-match node.
271 { "xjuly", 7 },
272 { "xjune", 6 }
273 };
274 checkData(data, UPRV_LENGTHOF(data));
275 }
276
TestFirstForCodePoint()277 void UCharsTrieTest::TestFirstForCodePoint() {
278 static const StringAndValue data[]={
279 { "a", 1 },
280 { "a\\ud800", 2 },
281 { "a\\U00010000", 3 },
282 { "\\ud840", 4 },
283 { "\\U00020000\\udbff", 5 },
284 { "\\U00020000\\U0010ffff", 6 },
285 { "\\U00020000\\U0010ffffz", 7 },
286 { "\\U00050000xy", 8 },
287 { "\\U00050000xyz", 9 }
288 };
289 checkData(data, UPRV_LENGTHOF(data));
290 }
291
TestNextForCodePoint()292 void UCharsTrieTest::TestNextForCodePoint() {
293 static const StringAndValue data[]={
294 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
295 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
296 { "\\u4dff\\U000103ff", 99999 }
297 };
298 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
299 if(trie.isNull()) {
300 return; // buildTrie() reported an error
301 }
302 UStringTrieResult result;
303 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
307 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
308 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
309 trie->getValue()!=2000000000
310 ) {
311 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
312 }
313 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
315 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
316 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
317 trie->getValue()!=44444
318 ) {
319 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
320 }
321 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
323 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
324 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate
325 ) {
326 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
327 }
328 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
329 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
330 trie->getValue()!=99999
331 ) {
332 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
333 }
334 }
335
336 // Definitions in the anonymous namespace are invisible outside this file.
337 namespace {
338
339 // Generate (string, value) pairs.
340 // The first string (before next()) will be empty.
341 class Generator {
342 public:
Generator()343 Generator() : value(4711), num(0) {}
next()344 void next() {
345 UChar c;
346 s.truncate(0);
347 s.append(c=(UChar)(value>>16));
348 s.append((UChar)(value>>4));
349 if(value&1) {
350 s.append((UChar)value);
351 }
352 set.add(c);
353 value+=((value>>5)&0x7ff)*3+1;
354 ++num;
355 }
getString() const356 const UnicodeString &getString() const { return s; }
getValue() const357 int32_t getValue() const { return value; }
countUniqueFirstChars() const358 int32_t countUniqueFirstChars() const { return set.size(); }
getIndex() const359 int32_t getIndex() const { return num; }
360
361 private:
362 UnicodeString s;
363 UnicodeSet set;
364 int32_t value;
365 int32_t num;
366 };
367
368 } // end namespace
369
buildLargeTrie(int32_t numUniqueFirst)370 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
371 IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
372 Generator gen;
373 builder_->clear();
374 while(gen.countUniqueFirstChars()<numUniqueFirst) {
375 builder_->add(gen.getString(), gen.getValue(), errorCode);
376 gen.next();
377 }
378 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
379 UnicodeString trieUChars;
380 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
381 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
382 return new UCharsTrie(trieUChars.getBuffer());
383 }
384
385 // Exercise a large branch node.
TestLargeTrie()386 void UCharsTrieTest::TestLargeTrie() {
387 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
388 if(trie.isNull()) {
389 return; // buildTrie() reported an error
390 }
391 Generator gen;
392 while(gen.countUniqueFirstChars()<1111) {
393 UnicodeString x(gen.getString());
394 int32_t value=gen.getValue();
395 if(!x.isEmpty()) {
396 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
397 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
398 x[0], (long)gen.getIndex());
399 break;
400 }
401 x.remove(0, 1);
402 }
403 UStringTrieResult result=trie->next(x.getBuffer(), x.length());
404 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
405 errln("next(%d chars U+%04X U+%04X)!=hasValue or "
406 "next()!=current() or getValue() wrong "
407 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
408 break;
409 }
410 gen.next();
411 }
412 }
413
414 enum {
415 u_a=0x61,
416 u_b=0x62,
417 u_c=0x63,
418 u_j=0x6a,
419 u_n=0x6e,
420 u_r=0x72,
421 u_u=0x75,
422 u_y=0x79
423 };
424
buildMonthsTrie(UStringTrieBuildOption buildOption)425 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
426 // All types of nodes leading to the same value,
427 // for code coverage of recursive functions.
428 // In particular, we need a lot of branches on some single level
429 // to exercise a split-branch node.
430 static const StringAndValue data[]={
431 { "august", 8 },
432 { "jan", 1 },
433 { "jan.", 1 },
434 { "jana", 1 },
435 { "janbb", 1 },
436 { "janc", 1 },
437 { "janddd", 1 },
438 { "janee", 1 },
439 { "janef", 1 },
440 { "janf", 1 },
441 { "jangg", 1 },
442 { "janh", 1 },
443 { "janiiii", 1 },
444 { "janj", 1 },
445 { "jankk", 1 },
446 { "jankl", 1 },
447 { "jankmm", 1 },
448 { "janl", 1 },
449 { "janm", 1 },
450 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
451 { "jano", 1 },
452 { "janpp", 1 },
453 { "janqqq", 1 },
454 { "janr", 1 },
455 { "januar", 1 },
456 { "january", 1 },
457 { "july", 7 },
458 { "jun", 6 },
459 { "jun.", 6 },
460 { "june", 6 }
461 };
462 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
463 }
464
TestHasUniqueValue()465 void UCharsTrieTest::TestHasUniqueValue() {
466 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
467 if(trie.isNull()) {
468 return; // buildTrie() reported an error
469 }
470 int32_t uniqueValue;
471 if(trie->hasUniqueValue(uniqueValue)) {
472 errln("unique value at root");
473 }
474 trie->next(u_j);
475 trie->next(u_a);
476 trie->next(u_n);
477 // hasUniqueValue() directly after next()
478 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
479 errln("not unique value 1 after \"jan\"");
480 }
481 trie->first(u_j);
482 trie->next(u_u);
483 if(trie->hasUniqueValue(uniqueValue)) {
484 errln("unique value after \"ju\"");
485 }
486 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
487 errln("not normal value 6 after \"jun\"");
488 }
489 // hasUniqueValue() after getValue()
490 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
491 errln("not unique value 6 after \"jun\"");
492 }
493 // hasUniqueValue() from within a linear-match node
494 trie->first(u_a);
495 trie->next(u_u);
496 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
497 errln("not unique value 8 after \"au\"");
498 }
499 }
500
TestGetNextUChars()501 void UCharsTrieTest::TestGetNextUChars() {
502 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
503 if(trie.isNull()) {
504 return; // buildTrie() reported an error
505 }
506 UnicodeString buffer;
507 UnicodeStringAppendable app(buffer);
508 int32_t count=trie->getNextUChars(app);
509 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
510 errln("months getNextUChars()!=[aj] at root");
511 }
512 trie->next(u_j);
513 trie->next(u_a);
514 trie->next(u_n);
515 // getNextUChars() directly after next()
516 buffer.remove();
517 count=trie->getNextUChars(app);
518 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
519 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
520 }
521 // getNextUChars() after getValue()
522 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
523 buffer.remove();
524 count=trie->getNextUChars(app);
525 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
526 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
527 }
528 // getNextUChars() from a linear-match node
529 trie->next(u_u);
530 buffer.remove();
531 count=trie->getNextUChars(app);
532 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
533 errln("months getNextUChars()!=[a] after \"janu\"");
534 }
535 trie->next(u_a);
536 buffer.remove();
537 count=trie->getNextUChars(app);
538 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
539 errln("months getNextUChars()!=[r] after \"janua\"");
540 }
541 trie->next(u_r);
542 trie->next(u_y);
543 // getNextUChars() after a final match
544 buffer.remove();
545 count=trie->getNextUChars(app);
546 if(count!=0 || buffer.length()!=0) {
547 errln("months getNextUChars()!=[] after \"january\"");
548 }
549 }
550
TestIteratorFromBranch()551 void UCharsTrieTest::TestIteratorFromBranch() {
552 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
553 if(trie.isNull()) {
554 return; // buildTrie() reported an error
555 }
556 // Go to a branch node.
557 trie->next(u_j);
558 trie->next(u_a);
559 trie->next(u_n);
560 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
561 UCharsTrie::Iterator iter(*trie, 0, errorCode);
562 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
563 return;
564 }
565 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
566 // following "jan".
567 static const StringAndValue data[]={
568 { "", 1 },
569 { ".", 1 },
570 { "a", 1 },
571 { "bb", 1 },
572 { "c", 1 },
573 { "ddd", 1 },
574 { "ee", 1 },
575 { "ef", 1 },
576 { "f", 1 },
577 { "gg", 1 },
578 { "h", 1 },
579 { "iiii", 1 },
580 { "j", 1 },
581 { "kk", 1 },
582 { "kl", 1 },
583 { "kmm", 1 },
584 { "l", 1 },
585 { "m", 1 },
586 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
587 { "o", 1 },
588 { "pp", 1 },
589 { "qqq", 1 },
590 { "r", 1 },
591 { "uar", 1 },
592 { "uary", 1 }
593 };
594 checkIterator(iter, data, UPRV_LENGTHOF(data));
595 // Reset, and we should get the same result.
596 logln("after iter.reset()");
597 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
598 }
599
TestIteratorFromLinearMatch()600 void UCharsTrieTest::TestIteratorFromLinearMatch() {
601 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
602 if(trie.isNull()) {
603 return; // buildTrie() reported an error
604 }
605 // Go into a linear-match node.
606 trie->next(u_j);
607 trie->next(u_a);
608 trie->next(u_n);
609 trie->next(u_u);
610 trie->next(u_a);
611 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
612 UCharsTrie::Iterator iter(*trie, 0, errorCode);
613 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
614 return;
615 }
616 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
617 // following "janua".
618 static const StringAndValue data[]={
619 { "r", 1 },
620 { "ry", 1 }
621 };
622 checkIterator(iter, data, UPRV_LENGTHOF(data));
623 // Reset, and we should get the same result.
624 logln("after iter.reset()");
625 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
626 }
627
TestTruncatingIteratorFromRoot()628 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
629 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
630 if(trie.isNull()) {
631 return; // buildTrie() reported an error
632 }
633 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
634 UCharsTrie::Iterator iter(*trie, 4, errorCode);
635 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
636 return;
637 }
638 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
639 // of each string, and no string duplicates from the truncation.
640 static const StringAndValue data[]={
641 { "augu", -1 },
642 { "jan", 1 },
643 { "jan.", 1 },
644 { "jana", 1 },
645 { "janb", -1 },
646 { "janc", 1 },
647 { "jand", -1 },
648 { "jane", -1 },
649 { "janf", 1 },
650 { "jang", -1 },
651 { "janh", 1 },
652 { "jani", -1 },
653 { "janj", 1 },
654 { "jank", -1 },
655 { "janl", 1 },
656 { "janm", 1 },
657 { "jann", -1 },
658 { "jano", 1 },
659 { "janp", -1 },
660 { "janq", -1 },
661 { "janr", 1 },
662 { "janu", -1 },
663 { "july", 7 },
664 { "jun", 6 },
665 { "jun.", 6 },
666 { "june", 6 }
667 };
668 checkIterator(iter, data, UPRV_LENGTHOF(data));
669 // Reset, and we should get the same result.
670 logln("after iter.reset()");
671 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
672 }
673
TestTruncatingIteratorFromLinearMatchShort()674 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
675 static const StringAndValue data[]={
676 { "abcdef", 10 },
677 { "abcdepq", 200 },
678 { "abcdeyz", 3000 }
679 };
680 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
681 if(trie.isNull()) {
682 return; // buildTrie() reported an error
683 }
684 // Go into a linear-match node.
685 trie->next(u_a);
686 trie->next(u_b);
687 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
688 // Truncate within the linear-match node.
689 UCharsTrie::Iterator iter(*trie, 2, errorCode);
690 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
691 return;
692 }
693 static const StringAndValue expected[]={
694 { "cd", -1 }
695 };
696 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
697 // Reset, and we should get the same result.
698 logln("after iter.reset()");
699 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
700 }
701
TestTruncatingIteratorFromLinearMatchLong()702 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
703 static const StringAndValue data[]={
704 { "abcdef", 10 },
705 { "abcdepq", 200 },
706 { "abcdeyz", 3000 }
707 };
708 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
709 if(trie.isNull()) {
710 return; // buildTrie() reported an error
711 }
712 // Go into a linear-match node.
713 trie->next(u_a);
714 trie->next(u_b);
715 trie->next(u_c);
716 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
717 // Truncate after the linear-match node.
718 UCharsTrie::Iterator iter(*trie, 3, errorCode);
719 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
720 return;
721 }
722 static const StringAndValue expected[]={
723 { "def", 10 },
724 { "dep", -1 },
725 { "dey", -1 }
726 };
727 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
728 // Reset, and we should get the same result.
729 logln("after iter.reset()");
730 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
731 }
732
TestIteratorFromUChars()733 void UCharsTrieTest::TestIteratorFromUChars() {
734 static const StringAndValue data[]={
735 { "mm", 3 },
736 { "mmm", 33 },
737 { "mmnop", 333 }
738 };
739 builder_->clear();
740 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
741 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
742 builder_->add(data[i].s, data[i].value, errorCode);
743 }
744 UnicodeString trieUChars;
745 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
746 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
747 checkIterator(iter, data, UPRV_LENGTHOF(data));
748 }
749
checkData(const StringAndValue data[],int32_t dataLength)750 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
751 logln("checkData(dataLength=%d, fast)", (int)dataLength);
752 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
753 logln("checkData(dataLength=%d, small)", (int)dataLength);
754 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
755 }
756
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)757 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
758 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
759 if(trie.isNull()) {
760 return; // buildTrie() reported an error
761 }
762 checkFirst(*trie, data, dataLength);
763 checkNext(*trie, data, dataLength);
764 checkNextWithState(*trie, data, dataLength);
765 checkNextString(*trie, data, dataLength);
766 checkIterator(*trie, data, dataLength);
767 }
768
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)769 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
770 UStringTrieBuildOption buildOption) {
771 IcuTestErrorCode errorCode(*this, "buildTrie()");
772 // Add the items to the trie builder in an interesting (not trivial, not random) order.
773 int32_t index, step;
774 if(dataLength&1) {
775 // Odd number of items.
776 index=dataLength/2;
777 step=2;
778 } else if((dataLength%3)!=0) {
779 // Not a multiple of 3.
780 index=dataLength/5;
781 step=3;
782 } else {
783 index=dataLength-1;
784 step=-1;
785 }
786 builder_->clear();
787 for(int32_t i=0; i<dataLength; ++i) {
788 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
789 data[index].value, errorCode);
790 index=(index+step)%dataLength;
791 }
792 UnicodeString trieUChars;
793 builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
794 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
795 if(!errorCode.errIfFailureAndReset("add()/build()")) {
796 builder_->add("zzz", 999, errorCode);
797 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
798 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
799 }
800 }
801 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
802 UnicodeString trieUChars2;
803 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
804 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
805 errln("builder.buildUnicodeString() before & after build() returned same array");
806 }
807 if(errorCode.isFailure()) {
808 return NULL;
809 }
810 // Tries from either build() method should be identical but
811 // UCharsTrie does not implement equals().
812 // We just return either one.
813 if((dataLength&1)!=0) {
814 return trie.orphan();
815 } else {
816 return new UCharsTrie(trieUChars2.getBuffer());
817 }
818 }
819
checkFirst(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)820 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
821 const StringAndValue data[], int32_t dataLength) {
822 for(int32_t i=0; i<dataLength; ++i) {
823 if(*data[i].s==0) {
824 continue; // skip empty string
825 }
826 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
827 UChar32 c=expectedString[0];
828 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
829 UStringTrieResult firstResult=trie.first(c);
830 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
831 UStringTrieResult nextResult=trie.next(nextCp);
832 if(firstResult!=trie.reset().next(c) ||
833 firstResult!=trie.current() ||
834 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
835 nextResult!=trie.next(nextCp)
836 ) {
837 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
838 c, data[i].s);
839 }
840 c=expectedString.char32At(0);
841 int32_t cLength=U16_LENGTH(c);
842 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
843 firstResult=trie.firstForCodePoint(c);
844 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
845 nextResult=trie.nextForCodePoint(nextCp);
846 if(firstResult!=trie.reset().nextForCodePoint(c) ||
847 firstResult!=trie.current() ||
848 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
849 nextResult!=trie.nextForCodePoint(nextCp)
850 ) {
851 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
852 c, data[i].s);
853 }
854 }
855 trie.reset();
856 }
857
checkNext(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)858 void UCharsTrieTest::checkNext(UCharsTrie &trie,
859 const StringAndValue data[], int32_t dataLength) {
860 UCharsTrie::State state;
861 for(int32_t i=0; i<dataLength; ++i) {
862 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
863 int32_t stringLength= (i&1) ? -1 : expectedString.length();
864 UStringTrieResult result;
865 if( !USTRINGTRIE_HAS_VALUE(
866 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
867 result!=trie.current()
868 ) {
869 errln("trie does not seem to contain %s", data[i].s);
870 } else if(trie.getValue()!=data[i].value) {
871 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
872 data[i].s,
873 (long)trie.getValue(), (long)trie.getValue(),
874 (long)data[i].value, (long)data[i].value);
875 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
876 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
877 }
878 trie.reset();
879 stringLength=expectedString.length();
880 result=trie.current();
881 for(int32_t j=0; j<stringLength; ++j) {
882 if(!USTRINGTRIE_HAS_NEXT(result)) {
883 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
884 break;
885 }
886 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
887 trie.getValue();
888 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
889 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
890 break;
891 }
892 }
893 result=trie.next(expectedString[j]);
894 if(!USTRINGTRIE_MATCHES(result)) {
895 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
896 break;
897 }
898 if(result!=trie.current()) {
899 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
900 break;
901 }
902 }
903 if(!USTRINGTRIE_HAS_VALUE(result)) {
904 errln("trie.next()!=hasValue at the end of %s", data[i].s);
905 continue;
906 }
907 trie.getValue();
908 if(result!=trie.current()) {
909 errln("trie.current() != current()+getValue()+current() after end of %s",
910 data[i].s);
911 }
912 // Compare the final current() with whether next() can actually continue.
913 trie.saveState(state);
914 UBool nextContinues=FALSE;
915 for(int32_t c=0x20; c<0xe000; ++c) {
916 if(c==0x80) {
917 c=0xd800; // Check for ASCII and surrogates but not all of the BMP.
918 }
919 if(trie.resetToState(state).next(c)) {
920 nextContinues=TRUE;
921 break;
922 }
923 }
924 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
925 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
926 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
927 }
928 trie.reset();
929 }
930 }
931
checkNextWithState(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)932 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
933 const StringAndValue data[], int32_t dataLength) {
934 UCharsTrie::State noState, state;
935 for(int32_t i=0; i<dataLength; ++i) {
936 if((i&1)==0) {
937 // This should have no effect.
938 trie.resetToState(noState);
939 }
940 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
941 int32_t stringLength=expectedString.length();
942 int32_t partialLength=stringLength/3;
943 for(int32_t j=0; j<partialLength; ++j) {
944 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
945 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
946 return;
947 }
948 }
949 trie.saveState(state);
950 UStringTrieResult resultAtState=trie.current();
951 UStringTrieResult result;
952 int32_t valueAtState=-99;
953 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
954 valueAtState=trie.getValue();
955 }
956 result=trie.next(0); // mismatch
957 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
958 errln("trie.next(0) matched after part of %s", data[i].s);
959 }
960 if( resultAtState!=trie.resetToState(state).current() ||
961 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
962 ) {
963 errln("trie.next(part of %s) changes current()/getValue() after "
964 "saveState/next(0)/resetToState",
965 data[i].s);
966 } else if(!USTRINGTRIE_HAS_VALUE(
967 result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
968 stringLength-partialLength)) ||
969 result!=trie.current()) {
970 errln("trie.next(rest of %s) does not seem to contain %s after "
971 "saveState/next(0)/resetToState",
972 data[i].s, data[i].s);
973 } else if(!USTRINGTRIE_HAS_VALUE(
974 result=trie.resetToState(state).
975 next(expectedString.getTerminatedBuffer()+partialLength,
976 stringLength-partialLength)) ||
977 result!=trie.current()) {
978 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
979 data[i].s);
980 } else if(trie.getValue()!=data[i].value) {
981 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
982 data[i].s,
983 (long)trie.getValue(), (long)trie.getValue(),
984 (long)data[i].value, (long)data[i].value);
985 }
986 trie.reset();
987 }
988 }
989
990 // next(string) is also tested in other functions,
991 // but here we try to go partway through the string, and then beyond it.
checkNextString(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)992 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
993 const StringAndValue data[], int32_t dataLength) {
994 for(int32_t i=0; i<dataLength; ++i) {
995 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
996 int32_t stringLength=expectedString.length();
997 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
998 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
999 continue;
1000 }
1001 // Test that we stop properly at the end of the string.
1002 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1003 stringLength+1-stringLength/2)) {
1004 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1005 }
1006 trie.reset();
1007 }
1008 }
1009
checkIterator(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)1010 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1011 const StringAndValue data[], int32_t dataLength) {
1012 IcuTestErrorCode errorCode(*this, "checkIterator()");
1013 UCharsTrie::Iterator iter(trie, 0, errorCode);
1014 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1015 return;
1016 }
1017 checkIterator(iter, data, dataLength);
1018 }
1019
checkIterator(UCharsTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)1020 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1021 const StringAndValue data[], int32_t dataLength) {
1022 IcuTestErrorCode errorCode(*this, "checkIterator()");
1023 for(int32_t i=0; i<dataLength; ++i) {
1024 if(!iter.hasNext()) {
1025 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1026 break;
1027 }
1028 UBool hasNext=iter.next(errorCode);
1029 if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1030 break;
1031 }
1032 if(!hasNext) {
1033 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1034 break;
1035 }
1036 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1037 if(iter.getString()!=expectedString) {
1038 char buffer[1000];
1039 UnicodeString invString(prettify(iter.getString()));
1040 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
1041 errln("trie iterator next().getString()=%s but expected %s for item %d",
1042 buffer, data[i].s, (int)i);
1043 }
1044 if(iter.getValue()!=data[i].value) {
1045 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1046 (long)iter.getValue(), (long)iter.getValue(),
1047 (long)data[i].value, (long)data[i].value,
1048 (int)i, data[i].s);
1049 }
1050 }
1051 if(iter.hasNext()) {
1052 errln("trie iterator hasNext()=TRUE after all items");
1053 }
1054 UBool hasNext=iter.next(errorCode);
1055 errorCode.errIfFailureAndReset("trie iterator next() after all items");
1056 if(hasNext) {
1057 errln("trie iterator next()=TRUE after all items");
1058 }
1059 }
1060