1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 /**
8  * IntlTestCollator is the medium level test class for everything in the directory "collate".
9  */
10 
11 /***********************************************************************
12 * Modification history
13 * Date        Name        Description
14 * 02/14/2001  synwee      Compare with cintltst and commented away tests
15 *                         that are not run.
16 ***********************************************************************/
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_COLLATION
21 
22 #include "unicode/localpointer.h"
23 #include "unicode/sortkey.h"
24 #include "unicode/uchar.h"
25 #include "unicode/ustring.h"
26 
27 #include "encoll.h"
28 #include "frcoll.h"
29 #include "decoll.h"
30 #include "escoll.h"
31 #include "ficoll.h"
32 #include "jacoll.h"
33 #include "trcoll.h"
34 #include "allcoll.h"
35 #include "g7coll.h"
36 #include "mnkytst.h"
37 #include "apicoll.h"
38 #include "regcoll.h"
39 #include "currcoll.h"
40 #include "itercoll.h"
41 #include "tstnorm.h"
42 #include "normconf.h"
43 #include "thcoll.h"
44 #include "srchtest.h"
45 #include "ssearch.h"
46 #include "lcukocol.h"
47 #include "ucaconf.h"
48 #include "svccoll.h"
49 #include "cmemory.h"
50 #include "alphaindextst.h"
51 
52 // Set to 1 to test offsets in backAndForth()
53 #define TEST_OFFSETS 0
54 
55 extern IntlTest *createCollationTest();
56 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)57 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
58 {
59     if (exec) {
60         logln("TestSuite Collator: ");
61     }
62 
63     TESTCASE_AUTO_BEGIN;
64     TESTCASE_AUTO_CLASS(CollationEnglishTest);
65     TESTCASE_AUTO_CLASS(CollationFrenchTest);
66     TESTCASE_AUTO_CLASS(CollationGermanTest);
67     TESTCASE_AUTO_CLASS(CollationSpanishTest);
68     TESTCASE_AUTO_CLASS(CollationKanaTest);
69     TESTCASE_AUTO_CLASS(CollationTurkishTest);
70     TESTCASE_AUTO_CLASS(CollationDummyTest);
71     TESTCASE_AUTO_CLASS(G7CollationTest);
72     TESTCASE_AUTO_CLASS(CollationMonkeyTest);
73     TESTCASE_AUTO_CLASS(CollationAPITest);
74     TESTCASE_AUTO_CLASS(CollationRegressionTest);
75     TESTCASE_AUTO_CLASS(CollationCurrencyTest);
76     TESTCASE_AUTO_CLASS(CollationIteratorTest);
77     TESTCASE_AUTO_CLASS(CollationThaiTest);
78     TESTCASE_AUTO_CLASS(LotusCollationKoreanTest);
79     TESTCASE_AUTO_CLASS(StringSearchTest);
80     TESTCASE_AUTO_CLASS(UCAConformanceTest);
81     TESTCASE_AUTO_CLASS(CollationServiceTest);
82     TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation
83     TESTCASE_AUTO_CLASS(SSearchTest);
84 #if !UCONFIG_NO_NORMALIZATION
85     TESTCASE_AUTO_CLASS(AlphabeticIndexTest);
86 #endif
87     TESTCASE_AUTO_CREATE_CLASS(CollationTest);
88     TESTCASE_AUTO_END;
89 }
90 
91 UCollationResult
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode & status)92 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
93   int32_t partialSKResult = 0;
94   uint8_t sBuf[512], tBuf[512];
95   UCharIterator sIter, tIter;
96   uint32_t sState[2], tState[2];
97   int32_t sSize = pieceSize, tSize = pieceSize;
98   int32_t i = 0;
99   status = U_ZERO_ERROR;
100   sState[0] = 0; sState[1] = 0;
101   tState[0] = 0; tState[1] = 0;
102   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
103     uiter_setString(&sIter, source, sLen);
104     uiter_setString(&tIter, target, tLen);
105     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
106     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
107 
108     if(sState[0] != 0 || tState[0] != 0) {
109       log("State != 0 : %08X %08X\n", sState[0], tState[0]);
110     }
111     log("%i ", i++);
112 
113     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
114   }
115 
116   if(partialSKResult < 0) {
117       return UCOL_LESS;
118   } else if(partialSKResult > 0) {
119     return UCOL_GREATER;
120   } else {
121     return UCOL_EQUAL;
122   }
123 }
124 
125 void
doTestVariant(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)126 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
127 {
128   UErrorCode status = U_ZERO_ERROR;
129 
130   UCollator *myCollation = col->toUCollator();
131 
132   Collator::EComparisonResult compareResult = col->compare(source, target);
133 
134   CollationKey srckey, tgtkey;
135   col->getCollationKey(source, srckey, status);
136   col->getCollationKey(target, tgtkey, status);
137   if (U_FAILURE(status)){
138     errln("Creation of collation keys failed\n");
139   }
140   Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
141 
142   reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
143 
144     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
145 
146     int32_t sLen = source.length(), tLen = target.length();
147     const UChar* src = source.getBuffer();
148     const UChar* trg = target.getBuffer();
149     UCollationResult compareResultIter = (UCollationResult)result;
150 
151     {
152       UCharIterator sIter, tIter;
153       uiter_setString(&sIter, src, sLen);
154       uiter_setString(&tIter, trg, tLen);
155       compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
156       if(compareResultIter != (UCollationResult)result) {
157         errln("Different result for iterative comparison "+source+" "+target);
158       }
159     }
160     /* convert the strings to UTF-8 and do try comparing with char iterator */
161     if(!quick) { /*!QUICK*/
162       char utf8Source[256], utf8Target[256];
163       int32_t utf8SourceLen = 0, utf8TargetLen = 0;
164       u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
165       if(U_FAILURE(status)) { /* probably buffer is not big enough */
166         log("Src UTF-8 buffer too small! Will not compare!\n");
167       } else {
168         u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
169         if(U_SUCCESS(status)) { /* probably buffer is not big enough */
170           UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
171           UCharIterator sIter, tIter;
172           /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
173           uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
174           uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
175        /*uiter_setString(&sIter, source, sLen);
176       uiter_setString(&tIter, target, tLen);*/
177           compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
178           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
179           sIter.move(&sIter, 0, UITER_START);
180           tIter.move(&tIter, 0, UITER_START);
181           compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
182           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
183           if(compareResultUTF8 != compareResultIter) {
184             errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
185           }
186           if(compareResultUTF8 != compareResultUTF8Norm) {
187             errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
188           }
189         } else {
190           log("Target UTF-8 buffer too small! Did not compare!\n");
191         }
192         if(U_FAILURE(status)) {
193           log("UTF-8 strcoll failed! Ignoring result\n");
194         }
195       }
196     }
197 
198     /* testing the partial sortkeys */
199     { /*!QUICK*/
200       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
201       int32_t partialSizesSize = 1;
202       if(!quick) {
203         partialSizesSize = 7;
204       }
205       int32_t i = 0;
206       log("partial sortkey test piecesize=");
207       for(i = 0; i < partialSizesSize; i++) {
208         UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
209         log("%i ", partialSizes[i]);
210 
211         partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
212         if(partialSKResult != (UCollationResult)result) {
213           errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
214         }
215 
216         if(norm != UCOL_ON && !quick) {
217           log("N ");
218           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
219           partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
220           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
221           if(partialSKResult != partialNormalizedSKResult) {
222             errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
223           }
224         }
225       }
226       log("\n");
227     }
228 /*
229   if (compareResult != result) {
230     errln("String comparison failed in variant test\n");
231   }
232   if (keyResult != result) {
233     errln("Collation key comparison failed in variant test\n");
234   }
235 */
236 }
237 
238 void
doTest(Collator * col,const UChar * source,const UChar * target,Collator::EComparisonResult result)239 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
240   doTest(col, UnicodeString(source), UnicodeString(target), result);
241 }
242 
243 void
doTest(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)244 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
245 {
246   if(col) {
247     doTestVariant(col, source, target, result);
248     if(result == Collator::LESS) {
249       doTestVariant(col, target, source, Collator::GREATER);
250     } else if (result == Collator::GREATER) {
251       doTestVariant(col, target, source, Collator::LESS);
252     }
253 
254     UErrorCode status = U_ZERO_ERROR;
255     LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
256     logln("Testing iterating source: "+source);
257     backAndForth(*c);
258     c->setText(target, status);
259     logln("Testing iterating target: "+target);
260     backAndForth(*c);
261   }
262 }
263 
264 
265 // used for collation result reporting, defined here for convenience
266 // (maybe moved later)
267 void
reportCResult(const UnicodeString & source,const UnicodeString & target,CollationKey & sourceKey,CollationKey & targetKey,Collator::EComparisonResult compareResult,Collator::EComparisonResult keyResult,Collator::EComparisonResult incResult,Collator::EComparisonResult expectedResult)268 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
269              CollationKey &sourceKey, CollationKey &targetKey,
270              Collator::EComparisonResult compareResult,
271              Collator::EComparisonResult keyResult,
272                                 Collator::EComparisonResult incResult,
273                          Collator::EComparisonResult expectedResult )
274 {
275     if (expectedResult < -1 || expectedResult > 1)
276     {
277         errln("***** invalid call to reportCResult ****");
278         return;
279     }
280 
281     UBool ok1 = (compareResult == expectedResult);
282     UBool ok2 = (keyResult == expectedResult);
283     UBool ok3 = (incResult == expectedResult);
284 
285 
286     if (ok1 && ok2 && ok3 && !verbose) {
287         // Keep non-verbose, passing tests fast
288         return;
289     } else {
290         UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
291         UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
292         UnicodeString prettySource, prettyTarget, sExpect, sResult;
293 
294         IntlTest::prettify(source, prettySource);
295         IntlTest::prettify(target, prettyTarget);
296         appendCompareResult(compareResult, sResult);
297         appendCompareResult(expectedResult, sExpect);
298 
299         if (ok1) {
300             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
301         } else {
302             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
303         }
304 
305         msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
306         msg2 = ").compareTo(key(";
307         msg3 = ")) returned ";
308 
309         appendCompareResult(keyResult, sResult);
310 
311         if (ok2) {
312             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
313         } else {
314             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
315 
316             msg1 = "  ";
317             msg2 = " vs. ";
318 
319             prettify(sourceKey, prettySource);
320             prettify(targetKey, prettyTarget);
321 
322             errln(msg1 + prettySource + msg2 + prettyTarget);
323         }
324         msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
325         msg2 = ", ";
326         msg3 = ") returned ";
327 
328         appendCompareResult(incResult, sResult);
329 
330         if (ok3) {
331             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
332         } else {
333             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
334         }
335     }
336 }
337 
338 UnicodeString&
appendCompareResult(Collator::EComparisonResult result,UnicodeString & target)339 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
340                   UnicodeString& target)
341 {
342     if (result == Collator::LESS)
343     {
344         target += "LESS";
345     }
346     else if (result == Collator::EQUAL)
347     {
348         target += "EQUAL";
349     }
350     else if (result == Collator::GREATER)
351     {
352         target += "GREATER";
353     }
354     else
355     {
356         UnicodeString huh = "?";
357 
358         target += (huh + (int32_t)result);
359     }
360 
361     return target;
362 }
363 
364 // Produce a printable representation of a CollationKey
prettify(const CollationKey & source,UnicodeString & target)365 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
366 {
367     int32_t i, byteCount;
368     const uint8_t *bytes = source.getByteArray(byteCount);
369 
370     target.remove();
371     target += "[";
372 
373     for (i = 0; i < byteCount; i += 1)
374     {
375         if (i != 0) {
376             target += " ";
377         }
378         appendHex(bytes[i], 2, target);
379     }
380 
381     target += "]";
382 
383     return target;
384 }
385 
backAndForth(CollationElementIterator & iter)386 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
387 {
388     // Run through the iterator forwards and stick it into an array
389     int32_t orderLength = 0;
390     LocalArray<Order> orders(getOrders(iter, orderLength));
391     UErrorCode status = U_ZERO_ERROR;
392 
393     // Now go through it backwards and make sure we get the same values
394     int32_t index = orderLength;
395     int32_t o;
396 
397     // reset the iterator
398     iter.reset();
399 
400     while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
401     {
402         /*int32_t offset = */iter.getOffset();
403 
404         if (index == 0) {
405           if(o == 0) {
406             continue;
407           } else { // this is an error, orders exhausted but there are non-ignorable CEs from
408             // going backwards
409             errln("Backward iteration returned a non ignorable after orders are exhausted");
410             break;
411           }
412         }
413 
414         index -= 1;
415         if (o != orders[index].order) {
416             if (o == 0)
417                 index += 1;
418             else {
419                 while (index > 0 && orders[--index].order == 0) {
420                   // nothing...
421                 }
422 
423                 if (o != orders[index].order) {
424                     errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
425                     orders[index].order, o);
426                 //break;
427                   return;
428                 }
429             }
430         }
431 
432 #if TEST_OFFSETS
433         if (offset != orders[index].offset) {
434           errln("Mismatched offset at index %d: %d vs. %d", index,
435             orders[index].offset, offset);
436        //break;
437          return;
438         }
439 #endif
440 
441     }
442 
443     while (index != 0 && orders[index - 1].order == 0)
444     {
445       index --;
446     }
447 
448     if (index != 0)
449     {
450         UnicodeString msg("Didn't get back to beginning - index is ");
451         errln(msg + index);
452 
453         iter.reset();
454         err("next: ");
455         while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
456         {
457             UnicodeString hexString("0x");
458 
459             appendHex(o, 8, hexString);
460             hexString += " ";
461             err(hexString);
462         }
463         errln("");
464 
465         err("prev: ");
466         while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
467         {
468             UnicodeString hexString("0x");
469 
470             appendHex(o, 8, hexString);
471             hexString += " ";
472              err(hexString);
473         }
474         errln("");
475     }
476 }
477 
478 
479 /**
480  * Return an integer array containing all of the collation orders
481  * returned by calls to next on the specified iterator
482  */
getOrders(CollationElementIterator & iter,int32_t & orderLength)483 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
484 {
485     int32_t maxSize = 100;
486     int32_t size = 0;
487     LocalArray<Order> orders(new Order[maxSize]);
488     UErrorCode status = U_ZERO_ERROR;
489     int32_t offset = iter.getOffset();
490 
491     int32_t order;
492     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
493     {
494         if (size == maxSize)
495         {
496             maxSize *= 2;
497             Order *temp = new Order[maxSize];
498 
499             uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
500             orders.adoptInstead(temp);
501         }
502 
503         orders[size].order  = order;
504         orders[size].offset = offset;
505 
506         offset = iter.getOffset();
507         size += 1;
508     }
509     if (U_FAILURE(status)) {
510         errln("CollationElementIterator.next() failed - %s",
511               u_errorName(status));
512     }
513 
514     if (maxSize > size)
515     {
516         Order *temp = new Order[size];
517 
518         uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
519         orders.adoptInstead(temp);
520     }
521 
522     orderLength = size;
523     return orders.orphan();
524 }
525 
526 #endif /* #if !UCONFIG_NO_COLLATION */
527