1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 /**
8 * IntlTestCollator is the medium level test class for everything in the directory "collate".
9 */
10
11 /***********************************************************************
12 * Modification history
13 * Date Name Description
14 * 02/14/2001 synwee Compare with cintltst and commented away tests
15 * that are not run.
16 ***********************************************************************/
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_COLLATION
21
22 #include "unicode/localpointer.h"
23 #include "unicode/sortkey.h"
24 #include "unicode/uchar.h"
25 #include "unicode/ustring.h"
26
27 #include "encoll.h"
28 #include "frcoll.h"
29 #include "decoll.h"
30 #include "escoll.h"
31 #include "ficoll.h"
32 #include "jacoll.h"
33 #include "trcoll.h"
34 #include "allcoll.h"
35 #include "g7coll.h"
36 #include "mnkytst.h"
37 #include "apicoll.h"
38 #include "regcoll.h"
39 #include "currcoll.h"
40 #include "itercoll.h"
41 #include "tstnorm.h"
42 #include "normconf.h"
43 #include "thcoll.h"
44 #include "srchtest.h"
45 #include "ssearch.h"
46 #include "lcukocol.h"
47 #include "ucaconf.h"
48 #include "svccoll.h"
49 #include "cmemory.h"
50 #include "alphaindextst.h"
51
52 // Set to 1 to test offsets in backAndForth()
53 #define TEST_OFFSETS 0
54
55 extern IntlTest *createCollationTest();
56
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)57 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
58 {
59 if (exec) {
60 logln("TestSuite Collator: ");
61 }
62
63 TESTCASE_AUTO_BEGIN;
64 TESTCASE_AUTO_CLASS(CollationEnglishTest);
65 TESTCASE_AUTO_CLASS(CollationFrenchTest);
66 TESTCASE_AUTO_CLASS(CollationGermanTest);
67 TESTCASE_AUTO_CLASS(CollationSpanishTest);
68 TESTCASE_AUTO_CLASS(CollationKanaTest);
69 TESTCASE_AUTO_CLASS(CollationTurkishTest);
70 TESTCASE_AUTO_CLASS(CollationDummyTest);
71 TESTCASE_AUTO_CLASS(G7CollationTest);
72 TESTCASE_AUTO_CLASS(CollationMonkeyTest);
73 TESTCASE_AUTO_CLASS(CollationAPITest);
74 TESTCASE_AUTO_CLASS(CollationRegressionTest);
75 TESTCASE_AUTO_CLASS(CollationCurrencyTest);
76 TESTCASE_AUTO_CLASS(CollationIteratorTest);
77 TESTCASE_AUTO_CLASS(CollationThaiTest);
78 TESTCASE_AUTO_CLASS(LotusCollationKoreanTest);
79 TESTCASE_AUTO_CLASS(StringSearchTest);
80 TESTCASE_AUTO_CLASS(UCAConformanceTest);
81 TESTCASE_AUTO_CLASS(CollationServiceTest);
82 TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation
83 TESTCASE_AUTO_CLASS(SSearchTest);
84 #if !UCONFIG_NO_NORMALIZATION
85 TESTCASE_AUTO_CLASS(AlphabeticIndexTest);
86 #endif
87 TESTCASE_AUTO_CREATE_CLASS(CollationTest);
88 TESTCASE_AUTO_END;
89 }
90
91 UCollationResult
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode & status)92 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
93 int32_t partialSKResult = 0;
94 uint8_t sBuf[512], tBuf[512];
95 UCharIterator sIter, tIter;
96 uint32_t sState[2], tState[2];
97 int32_t sSize = pieceSize, tSize = pieceSize;
98 int32_t i = 0;
99 status = U_ZERO_ERROR;
100 sState[0] = 0; sState[1] = 0;
101 tState[0] = 0; tState[1] = 0;
102 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
103 uiter_setString(&sIter, source, sLen);
104 uiter_setString(&tIter, target, tLen);
105 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
106 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
107
108 if(sState[0] != 0 || tState[0] != 0) {
109 log("State != 0 : %08X %08X\n", sState[0], tState[0]);
110 }
111 log("%i ", i++);
112
113 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
114 }
115
116 if(partialSKResult < 0) {
117 return UCOL_LESS;
118 } else if(partialSKResult > 0) {
119 return UCOL_GREATER;
120 } else {
121 return UCOL_EQUAL;
122 }
123 }
124
125 void
doTestVariant(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)126 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
127 {
128 UErrorCode status = U_ZERO_ERROR;
129
130 UCollator *myCollation = col->toUCollator();
131
132 Collator::EComparisonResult compareResult = col->compare(source, target);
133
134 CollationKey srckey, tgtkey;
135 col->getCollationKey(source, srckey, status);
136 col->getCollationKey(target, tgtkey, status);
137 if (U_FAILURE(status)){
138 errln("Creation of collation keys failed\n");
139 }
140 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
141
142 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
143
144 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
145
146 int32_t sLen = source.length(), tLen = target.length();
147 const UChar* src = source.getBuffer();
148 const UChar* trg = target.getBuffer();
149 UCollationResult compareResultIter = (UCollationResult)result;
150
151 {
152 UCharIterator sIter, tIter;
153 uiter_setString(&sIter, src, sLen);
154 uiter_setString(&tIter, trg, tLen);
155 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
156 if(compareResultIter != (UCollationResult)result) {
157 errln("Different result for iterative comparison "+source+" "+target);
158 }
159 }
160 /* convert the strings to UTF-8 and do try comparing with char iterator */
161 if(!quick) { /*!QUICK*/
162 char utf8Source[256], utf8Target[256];
163 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
164 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
165 if(U_FAILURE(status)) { /* probably buffer is not big enough */
166 log("Src UTF-8 buffer too small! Will not compare!\n");
167 } else {
168 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
169 if(U_SUCCESS(status)) { /* probably buffer is not big enough */
170 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
171 UCharIterator sIter, tIter;
172 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
173 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
174 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
175 /*uiter_setString(&sIter, source, sLen);
176 uiter_setString(&tIter, target, tLen);*/
177 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
178 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
179 sIter.move(&sIter, 0, UITER_START);
180 tIter.move(&tIter, 0, UITER_START);
181 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
182 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
183 if(compareResultUTF8 != compareResultIter) {
184 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
185 }
186 if(compareResultUTF8 != compareResultUTF8Norm) {
187 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
188 }
189 } else {
190 log("Target UTF-8 buffer too small! Did not compare!\n");
191 }
192 if(U_FAILURE(status)) {
193 log("UTF-8 strcoll failed! Ignoring result\n");
194 }
195 }
196 }
197
198 /* testing the partial sortkeys */
199 { /*!QUICK*/
200 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
201 int32_t partialSizesSize = 1;
202 if(!quick) {
203 partialSizesSize = 7;
204 }
205 int32_t i = 0;
206 log("partial sortkey test piecesize=");
207 for(i = 0; i < partialSizesSize; i++) {
208 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
209 log("%i ", partialSizes[i]);
210
211 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
212 if(partialSKResult != (UCollationResult)result) {
213 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
214 }
215
216 if(norm != UCOL_ON && !quick) {
217 log("N ");
218 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
219 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
220 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
221 if(partialSKResult != partialNormalizedSKResult) {
222 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
223 }
224 }
225 }
226 log("\n");
227 }
228 /*
229 if (compareResult != result) {
230 errln("String comparison failed in variant test\n");
231 }
232 if (keyResult != result) {
233 errln("Collation key comparison failed in variant test\n");
234 }
235 */
236 }
237
238 void
doTest(Collator * col,const UChar * source,const UChar * target,Collator::EComparisonResult result)239 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
240 doTest(col, UnicodeString(source), UnicodeString(target), result);
241 }
242
243 void
doTest(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)244 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
245 {
246 if(col) {
247 doTestVariant(col, source, target, result);
248 if(result == Collator::LESS) {
249 doTestVariant(col, target, source, Collator::GREATER);
250 } else if (result == Collator::GREATER) {
251 doTestVariant(col, target, source, Collator::LESS);
252 }
253
254 UErrorCode status = U_ZERO_ERROR;
255 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
256 logln("Testing iterating source: "+source);
257 backAndForth(*c);
258 c->setText(target, status);
259 logln("Testing iterating target: "+target);
260 backAndForth(*c);
261 }
262 }
263
264
265 // used for collation result reporting, defined here for convenience
266 // (maybe moved later)
267 void
reportCResult(const UnicodeString & source,const UnicodeString & target,CollationKey & sourceKey,CollationKey & targetKey,Collator::EComparisonResult compareResult,Collator::EComparisonResult keyResult,Collator::EComparisonResult incResult,Collator::EComparisonResult expectedResult)268 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
269 CollationKey &sourceKey, CollationKey &targetKey,
270 Collator::EComparisonResult compareResult,
271 Collator::EComparisonResult keyResult,
272 Collator::EComparisonResult incResult,
273 Collator::EComparisonResult expectedResult )
274 {
275 if (expectedResult < -1 || expectedResult > 1)
276 {
277 errln("***** invalid call to reportCResult ****");
278 return;
279 }
280
281 UBool ok1 = (compareResult == expectedResult);
282 UBool ok2 = (keyResult == expectedResult);
283 UBool ok3 = (incResult == expectedResult);
284
285
286 if (ok1 && ok2 && ok3 && !verbose) {
287 // Keep non-verbose, passing tests fast
288 return;
289 } else {
290 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
291 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
292 UnicodeString prettySource, prettyTarget, sExpect, sResult;
293
294 IntlTest::prettify(source, prettySource);
295 IntlTest::prettify(target, prettyTarget);
296 appendCompareResult(compareResult, sResult);
297 appendCompareResult(expectedResult, sExpect);
298
299 if (ok1) {
300 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
301 } else {
302 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
303 }
304
305 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
306 msg2 = ").compareTo(key(";
307 msg3 = ")) returned ";
308
309 appendCompareResult(keyResult, sResult);
310
311 if (ok2) {
312 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
313 } else {
314 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
315
316 msg1 = " ";
317 msg2 = " vs. ";
318
319 prettify(sourceKey, prettySource);
320 prettify(targetKey, prettyTarget);
321
322 errln(msg1 + prettySource + msg2 + prettyTarget);
323 }
324 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
325 msg2 = ", ";
326 msg3 = ") returned ";
327
328 appendCompareResult(incResult, sResult);
329
330 if (ok3) {
331 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
332 } else {
333 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
334 }
335 }
336 }
337
338 UnicodeString&
appendCompareResult(Collator::EComparisonResult result,UnicodeString & target)339 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
340 UnicodeString& target)
341 {
342 if (result == Collator::LESS)
343 {
344 target += "LESS";
345 }
346 else if (result == Collator::EQUAL)
347 {
348 target += "EQUAL";
349 }
350 else if (result == Collator::GREATER)
351 {
352 target += "GREATER";
353 }
354 else
355 {
356 UnicodeString huh = "?";
357
358 target += (huh + (int32_t)result);
359 }
360
361 return target;
362 }
363
364 // Produce a printable representation of a CollationKey
prettify(const CollationKey & source,UnicodeString & target)365 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
366 {
367 int32_t i, byteCount;
368 const uint8_t *bytes = source.getByteArray(byteCount);
369
370 target.remove();
371 target += "[";
372
373 for (i = 0; i < byteCount; i += 1)
374 {
375 if (i != 0) {
376 target += " ";
377 }
378 appendHex(bytes[i], 2, target);
379 }
380
381 target += "]";
382
383 return target;
384 }
385
backAndForth(CollationElementIterator & iter)386 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
387 {
388 // Run through the iterator forwards and stick it into an array
389 int32_t orderLength = 0;
390 LocalArray<Order> orders(getOrders(iter, orderLength));
391 UErrorCode status = U_ZERO_ERROR;
392
393 // Now go through it backwards and make sure we get the same values
394 int32_t index = orderLength;
395 int32_t o;
396
397 // reset the iterator
398 iter.reset();
399
400 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
401 {
402 /*int32_t offset = */iter.getOffset();
403
404 if (index == 0) {
405 if(o == 0) {
406 continue;
407 } else { // this is an error, orders exhausted but there are non-ignorable CEs from
408 // going backwards
409 errln("Backward iteration returned a non ignorable after orders are exhausted");
410 break;
411 }
412 }
413
414 index -= 1;
415 if (o != orders[index].order) {
416 if (o == 0)
417 index += 1;
418 else {
419 while (index > 0 && orders[--index].order == 0) {
420 // nothing...
421 }
422
423 if (o != orders[index].order) {
424 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
425 orders[index].order, o);
426 //break;
427 return;
428 }
429 }
430 }
431
432 #if TEST_OFFSETS
433 if (offset != orders[index].offset) {
434 errln("Mismatched offset at index %d: %d vs. %d", index,
435 orders[index].offset, offset);
436 //break;
437 return;
438 }
439 #endif
440
441 }
442
443 while (index != 0 && orders[index - 1].order == 0)
444 {
445 index --;
446 }
447
448 if (index != 0)
449 {
450 UnicodeString msg("Didn't get back to beginning - index is ");
451 errln(msg + index);
452
453 iter.reset();
454 err("next: ");
455 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
456 {
457 UnicodeString hexString("0x");
458
459 appendHex(o, 8, hexString);
460 hexString += " ";
461 err(hexString);
462 }
463 errln("");
464
465 err("prev: ");
466 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
467 {
468 UnicodeString hexString("0x");
469
470 appendHex(o, 8, hexString);
471 hexString += " ";
472 err(hexString);
473 }
474 errln("");
475 }
476 }
477
478
479 /**
480 * Return an integer array containing all of the collation orders
481 * returned by calls to next on the specified iterator
482 */
getOrders(CollationElementIterator & iter,int32_t & orderLength)483 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
484 {
485 int32_t maxSize = 100;
486 int32_t size = 0;
487 LocalArray<Order> orders(new Order[maxSize]);
488 UErrorCode status = U_ZERO_ERROR;
489 int32_t offset = iter.getOffset();
490
491 int32_t order;
492 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
493 {
494 if (size == maxSize)
495 {
496 maxSize *= 2;
497 Order *temp = new Order[maxSize];
498
499 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
500 orders.adoptInstead(temp);
501 }
502
503 orders[size].order = order;
504 orders[size].offset = offset;
505
506 offset = iter.getOffset();
507 size += 1;
508 }
509 if (U_FAILURE(status)) {
510 errln("CollationElementIterator.next() failed - %s",
511 u_errorName(status));
512 }
513
514 if (maxSize > size)
515 {
516 Order *temp = new Order[size];
517
518 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
519 orders.adoptInstead(temp);
520 }
521
522 orderLength = size;
523 return orders.orphan();
524 }
525
526 #endif /* #if !UCONFIG_NO_COLLATION */
527