1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 /**
10  * IntlTestCollator is the medium level test class for everything in the directory "collate".
11  */
12 
13 /***********************************************************************
14 * Modification history
15 * Date        Name        Description
16 * 02/14/2001  synwee      Compare with cintltst and commented away tests
17 *                         that are not run.
18 ***********************************************************************/
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_COLLATION
23 
24 #include "unicode/localpointer.h"
25 #include "unicode/sortkey.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 
29 #include "encoll.h"
30 #include "frcoll.h"
31 #include "decoll.h"
32 #include "escoll.h"
33 #include "ficoll.h"
34 #include "jacoll.h"
35 #include "trcoll.h"
36 #include "allcoll.h"
37 #include "g7coll.h"
38 #include "mnkytst.h"
39 #include "apicoll.h"
40 #include "regcoll.h"
41 #include "currcoll.h"
42 #include "itercoll.h"
43 #include "tstnorm.h"
44 #include "normconf.h"
45 #include "thcoll.h"
46 #include "srchtest.h"
47 #include "ssearch.h"
48 #include "lcukocol.h"
49 #include "ucaconf.h"
50 #include "svccoll.h"
51 #include "cmemory.h"
52 #include "alphaindextst.h"
53 
54 // Set to 1 to test offsets in backAndForth()
55 #define TEST_OFFSETS 0
56 
57 extern IntlTest *createCollationTest();
58 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)59 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
60 {
61     if (exec) {
62         logln("TestSuite Collator: ");
63     }
64 
65     TESTCASE_AUTO_BEGIN;
66     TESTCASE_AUTO_CLASS(CollationEnglishTest);
67     TESTCASE_AUTO_CLASS(CollationFrenchTest);
68     TESTCASE_AUTO_CLASS(CollationGermanTest);
69     TESTCASE_AUTO_CLASS(CollationSpanishTest);
70     TESTCASE_AUTO_CLASS(CollationKanaTest);
71     TESTCASE_AUTO_CLASS(CollationTurkishTest);
72     TESTCASE_AUTO_CLASS(CollationDummyTest);
73     TESTCASE_AUTO_CLASS(G7CollationTest);
74     TESTCASE_AUTO_CLASS(CollationMonkeyTest);
75     TESTCASE_AUTO_CLASS(CollationAPITest);
76     TESTCASE_AUTO_CLASS(CollationRegressionTest);
77     TESTCASE_AUTO_CLASS(CollationCurrencyTest);
78     TESTCASE_AUTO_CLASS(CollationIteratorTest);
79     TESTCASE_AUTO_CLASS(CollationThaiTest);
80     TESTCASE_AUTO_CLASS(LotusCollationKoreanTest);
81     TESTCASE_AUTO_CLASS(StringSearchTest);
82     TESTCASE_AUTO_CLASS(UCAConformanceTest);
83     TESTCASE_AUTO_CLASS(CollationServiceTest);
84     TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation
85     TESTCASE_AUTO_CLASS(SSearchTest);
86 #if !UCONFIG_NO_NORMALIZATION
87     TESTCASE_AUTO_CLASS(AlphabeticIndexTest);
88 #endif
89     TESTCASE_AUTO_CREATE_CLASS(CollationTest);
90     TESTCASE_AUTO_END;
91 }
92 
93 UCollationResult
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode & status)94 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
95   int32_t partialSKResult = 0;
96   uint8_t sBuf[512], tBuf[512];
97   UCharIterator sIter, tIter;
98   uint32_t sState[2], tState[2];
99   int32_t sSize = pieceSize, tSize = pieceSize;
100   int32_t i = 0;
101   status = U_ZERO_ERROR;
102   sState[0] = 0; sState[1] = 0;
103   tState[0] = 0; tState[1] = 0;
104   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
105     uiter_setString(&sIter, source, sLen);
106     uiter_setString(&tIter, target, tLen);
107     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
108     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
109 
110     if(sState[0] != 0 || tState[0] != 0) {
111       log("State != 0 : %08X %08X\n", sState[0], tState[0]);
112     }
113     log("%i ", i++);
114 
115     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
116   }
117 
118   if(partialSKResult < 0) {
119       return UCOL_LESS;
120   } else if(partialSKResult > 0) {
121     return UCOL_GREATER;
122   } else {
123     return UCOL_EQUAL;
124   }
125 }
126 
127 void
doTestVariant(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)128 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
129 {
130   UErrorCode status = U_ZERO_ERROR;
131 
132   UCollator *myCollation = col->toUCollator();
133 
134   Collator::EComparisonResult compareResult = col->compare(source, target);
135 
136   CollationKey srckey, tgtkey;
137   col->getCollationKey(source, srckey, status);
138   col->getCollationKey(target, tgtkey, status);
139   if (U_FAILURE(status)){
140     errln("Creation of collation keys failed\n");
141   }
142   Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
143 
144   reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
145 
146     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
147 
148     int32_t sLen = source.length(), tLen = target.length();
149     const UChar* src = source.getBuffer();
150     const UChar* trg = target.getBuffer();
151     UCollationResult compareResultIter = (UCollationResult)result;
152 
153     {
154       UCharIterator sIter, tIter;
155       uiter_setString(&sIter, src, sLen);
156       uiter_setString(&tIter, trg, tLen);
157       compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
158       if(compareResultIter != (UCollationResult)result) {
159         errln("Different result for iterative comparison "+source+" "+target);
160       }
161     }
162     /* convert the strings to UTF-8 and do try comparing with char iterator */
163     if(!quick) { /*!QUICK*/
164       char utf8Source[256], utf8Target[256];
165       int32_t utf8SourceLen = 0, utf8TargetLen = 0;
166       u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
167       if(U_FAILURE(status)) { /* probably buffer is not big enough */
168         log("Src UTF-8 buffer too small! Will not compare!\n");
169       } else {
170         u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
171         if(U_SUCCESS(status)) { /* probably buffer is not big enough */
172           UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
173           UCharIterator sIter, tIter;
174           /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
175           uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
176           uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
177        /*uiter_setString(&sIter, source, sLen);
178       uiter_setString(&tIter, target, tLen);*/
179           compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
180           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
181           sIter.move(&sIter, 0, UITER_START);
182           tIter.move(&tIter, 0, UITER_START);
183           compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
184           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
185           if(compareResultUTF8 != compareResultIter) {
186             errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
187           }
188           if(compareResultUTF8 != compareResultUTF8Norm) {
189             errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
190           }
191         } else {
192           log("Target UTF-8 buffer too small! Did not compare!\n");
193         }
194         if(U_FAILURE(status)) {
195           log("UTF-8 strcoll failed! Ignoring result\n");
196         }
197       }
198     }
199 
200     /* testing the partial sortkeys */
201     { /*!QUICK*/
202       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
203       int32_t partialSizesSize = 1;
204       if(!quick) {
205         partialSizesSize = 7;
206       }
207       int32_t i = 0;
208       log("partial sortkey test piecesize=");
209       for(i = 0; i < partialSizesSize; i++) {
210         UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
211         log("%i ", partialSizes[i]);
212 
213         partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
214         if(partialSKResult != (UCollationResult)result) {
215           errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
216         }
217 
218         if(norm != UCOL_ON && !quick) {
219           log("N ");
220           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
221           partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
222           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
223           if(partialSKResult != partialNormalizedSKResult) {
224             errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
225           }
226         }
227       }
228       log("\n");
229     }
230 /*
231   if (compareResult != result) {
232     errln("String comparison failed in variant test\n");
233   }
234   if (keyResult != result) {
235     errln("Collation key comparison failed in variant test\n");
236   }
237 */
238 }
239 
240 void
doTest(Collator * col,const UChar * source,const UChar * target,Collator::EComparisonResult result)241 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
242   doTest(col, UnicodeString(source), UnicodeString(target), result);
243 }
244 
245 void
doTest(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)246 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
247 {
248   if(col) {
249     doTestVariant(col, source, target, result);
250     if(result == Collator::LESS) {
251       doTestVariant(col, target, source, Collator::GREATER);
252     } else if (result == Collator::GREATER) {
253       doTestVariant(col, target, source, Collator::LESS);
254     }
255 
256     UErrorCode status = U_ZERO_ERROR;
257     LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
258     logln("Testing iterating source: "+source);
259     backAndForth(*c);
260     c->setText(target, status);
261     logln("Testing iterating target: "+target);
262     backAndForth(*c);
263   }
264 }
265 
266 
267 // used for collation result reporting, defined here for convenience
268 // (maybe moved later)
269 void
reportCResult(const UnicodeString & source,const UnicodeString & target,CollationKey & sourceKey,CollationKey & targetKey,Collator::EComparisonResult compareResult,Collator::EComparisonResult keyResult,Collator::EComparisonResult incResult,Collator::EComparisonResult expectedResult)270 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
271              CollationKey &sourceKey, CollationKey &targetKey,
272              Collator::EComparisonResult compareResult,
273              Collator::EComparisonResult keyResult,
274                                 Collator::EComparisonResult incResult,
275                          Collator::EComparisonResult expectedResult )
276 {
277     if (expectedResult < -1 || expectedResult > 1)
278     {
279         errln("***** invalid call to reportCResult ****");
280         return;
281     }
282 
283     UBool ok1 = (compareResult == expectedResult);
284     UBool ok2 = (keyResult == expectedResult);
285     UBool ok3 = (incResult == expectedResult);
286 
287 
288     if (ok1 && ok2 && ok3 && !verbose) {
289         // Keep non-verbose, passing tests fast
290         return;
291     } else {
292         UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
293         UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
294         UnicodeString prettySource, prettyTarget, sExpect, sResult;
295 
296         IntlTest::prettify(source, prettySource);
297         IntlTest::prettify(target, prettyTarget);
298         appendCompareResult(compareResult, sResult);
299         appendCompareResult(expectedResult, sExpect);
300 
301         if (ok1) {
302             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
303         } else {
304             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
305         }
306 
307         msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
308         msg2 = ").compareTo(key(";
309         msg3 = ")) returned ";
310 
311         appendCompareResult(keyResult, sResult);
312 
313         if (ok2) {
314             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
315         } else {
316             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
317 
318             msg1 = "  ";
319             msg2 = " vs. ";
320 
321             prettify(sourceKey, prettySource);
322             prettify(targetKey, prettyTarget);
323 
324             errln(msg1 + prettySource + msg2 + prettyTarget);
325         }
326         msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
327         msg2 = ", ";
328         msg3 = ") returned ";
329 
330         appendCompareResult(incResult, sResult);
331 
332         if (ok3) {
333             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
334         } else {
335             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
336         }
337     }
338 }
339 
340 UnicodeString&
appendCompareResult(Collator::EComparisonResult result,UnicodeString & target)341 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
342                   UnicodeString& target)
343 {
344     if (result == Collator::LESS)
345     {
346         target += "LESS";
347     }
348     else if (result == Collator::EQUAL)
349     {
350         target += "EQUAL";
351     }
352     else if (result == Collator::GREATER)
353     {
354         target += "GREATER";
355     }
356     else
357     {
358         UnicodeString huh = "?";
359 
360         target += (huh + (int32_t)result);
361     }
362 
363     return target;
364 }
365 
366 // Produce a printable representation of a CollationKey
prettify(const CollationKey & source,UnicodeString & target)367 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
368 {
369     int32_t i, byteCount;
370     const uint8_t *bytes = source.getByteArray(byteCount);
371 
372     target.remove();
373     target += "[";
374 
375     for (i = 0; i < byteCount; i += 1)
376     {
377         if (i != 0) {
378             target += " ";
379         }
380         appendHex(bytes[i], 2, target);
381     }
382 
383     target += "]";
384 
385     return target;
386 }
387 
backAndForth(CollationElementIterator & iter)388 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
389 {
390     // Run through the iterator forwards and stick it into an array
391     int32_t orderLength = 0;
392     LocalArray<Order> orders(getOrders(iter, orderLength));
393     UErrorCode status = U_ZERO_ERROR;
394 
395     // Now go through it backwards and make sure we get the same values
396     int32_t index = orderLength;
397     int32_t o;
398 
399     // reset the iterator
400     iter.reset();
401 
402     while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
403     {
404         /*int32_t offset = */iter.getOffset();
405 
406         if (index == 0) {
407           if(o == 0) {
408             continue;
409           } else { // this is an error, orders exhausted but there are non-ignorable CEs from
410             // going backwards
411             errln("Backward iteration returned a non ignorable after orders are exhausted");
412             break;
413           }
414         }
415 
416         index -= 1;
417         if (o != orders[index].order) {
418             if (o == 0)
419                 index += 1;
420             else {
421                 while (index > 0 && orders[--index].order == 0) {
422                   // nothing...
423                 }
424 
425                 if (o != orders[index].order) {
426                     errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
427                     orders[index].order, o);
428                 //break;
429                   return;
430                 }
431             }
432         }
433 
434 #if TEST_OFFSETS
435         if (offset != orders[index].offset) {
436           errln("Mismatched offset at index %d: %d vs. %d", index,
437             orders[index].offset, offset);
438        //break;
439          return;
440         }
441 #endif
442 
443     }
444 
445     while (index != 0 && orders[index - 1].order == 0)
446     {
447       index --;
448     }
449 
450     if (index != 0)
451     {
452         UnicodeString msg("Didn't get back to beginning - index is ");
453         errln(msg + index);
454 
455         iter.reset();
456         err("next: ");
457         while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
458         {
459             UnicodeString hexString("0x");
460 
461             appendHex(o, 8, hexString);
462             hexString += " ";
463             err(hexString);
464         }
465         errln("");
466 
467         err("prev: ");
468         while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
469         {
470             UnicodeString hexString("0x");
471 
472             appendHex(o, 8, hexString);
473             hexString += " ";
474              err(hexString);
475         }
476         errln("");
477     }
478 }
479 
480 
481 /**
482  * Return an integer array containing all of the collation orders
483  * returned by calls to next on the specified iterator
484  */
getOrders(CollationElementIterator & iter,int32_t & orderLength)485 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
486 {
487     int32_t maxSize = 100;
488     int32_t size = 0;
489     LocalArray<Order> orders(new Order[maxSize]);
490     UErrorCode status = U_ZERO_ERROR;
491     int32_t offset = iter.getOffset();
492 
493     int32_t order;
494     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
495     {
496         if (size == maxSize)
497         {
498             maxSize *= 2;
499             Order *temp = new Order[maxSize];
500 
501             uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
502             orders.adoptInstead(temp);
503         }
504 
505         orders[size].order  = order;
506         orders[size].offset = offset;
507 
508         offset = iter.getOffset();
509         size += 1;
510     }
511     if (U_FAILURE(status)) {
512         errln("CollationElementIterator.next() failed - %s",
513               u_errorName(status));
514     }
515 
516     if (maxSize > size)
517     {
518         Order *temp = new Order[size];
519 
520         uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
521         orders.adoptInstead(temp);
522     }
523 
524     orderLength = size;
525     return orders.orphan();
526 }
527 
528 #endif /* #if !UCONFIG_NO_COLLATION */
529