1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File CALLCOLL.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda              Ported for C API
15 ********************************************************************************
16 */
17 
18 /*
19  * Important: This file is included into intltest/allcoll.cpp so that the
20  * test data is shared. This makes it easier to maintain the test data,
21  * especially since the Unicode data must be portable and quoted character
22  * literals will not work.
23  * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
24  * that must prevent the actual code in here from being part of the
25  * allcoll.cpp compilation.
26  */
27 
28 /**
29  * CollationDummyTest is a third level test class.  This tests creation of
30  * a customized collator object.  For example, number 1 to be sorted
31  * equlivalent to word 'one'.
32  */
33 
34 #include <string.h>
35 #include <stdlib.h>
36 
37 #include "unicode/utypes.h"
38 
39 #if !UCONFIG_NO_COLLATION
40 
41 #include "unicode/ucol.h"
42 #include "unicode/uloc.h"
43 #include "unicode/ures.h"
44 #include "unicode/udata.h"
45 #include "unicode/ucoleitr.h"
46 #include "unicode/ustring.h"
47 #include "unicode/uclean.h"
48 #include "unicode/putil.h"
49 #include "unicode/uenum.h"
50 
51 #include "cintltst.h"
52 #include "ccolltst.h"
53 #include "callcoll.h"
54 #include "calldata.h"
55 #include "cstring.h"
56 #include "cmemory.h"
57 
58 /* set to 1 to test offsets in backAndForth() */
59 #define TEST_OFFSETS 0
60 
61 /* perform test with strength PRIMARY */
62 static void TestPrimary(void);
63 
64 /* perform test with strength SECONDARY */
65 static void TestSecondary(void);
66 
67 /* perform test with strength tertiary */
68 static void TestTertiary(void);
69 
70 /*perform tests with strength Identical */
71 static void TestIdentical(void);
72 
73 /* perform extra tests */
74 static void TestExtra(void);
75 
76 /* Test jitterbug 581 */
77 static void TestJB581(void);
78 
79 /* Test jitterbug 1401 */
80 static void TestJB1401(void);
81 
82 /* Test [variable top] in the rule syntax */
83 static void TestVariableTop(void);
84 
85 /* Test surrogates */
86 static void TestSurrogates(void);
87 
88 static void TestInvalidRules(void);
89 
90 static void TestJitterbug1098(void);
91 
92 static void TestFCDCrash(void);
93 
94 static void TestJ5298(void);
95 
96 static void TestBadKey(void);
97 
98 const UCollationResult results[] = {
99     UCOL_LESS,
100     UCOL_LESS, /*UCOL_GREATER,*/
101     UCOL_LESS,
102     UCOL_LESS,
103     UCOL_LESS,
104     UCOL_LESS,
105     UCOL_LESS,
106     UCOL_GREATER,
107     UCOL_GREATER,
108     UCOL_LESS,                                     /*  10 */
109     UCOL_GREATER,
110     UCOL_LESS,
111     UCOL_GREATER,
112     UCOL_GREATER,
113     UCOL_LESS,
114     UCOL_LESS,
115     UCOL_LESS,
116     /*  test primary > 17 */
117     UCOL_EQUAL,
118     UCOL_EQUAL,
119     UCOL_EQUAL,                                    /*  20 */
120     UCOL_LESS,
121     UCOL_LESS,
122     UCOL_EQUAL,
123     UCOL_EQUAL,
124     UCOL_EQUAL,
125     UCOL_LESS,
126     /*  test secondary > 26 */
127     UCOL_EQUAL,
128     UCOL_EQUAL,
129     UCOL_EQUAL,
130     UCOL_EQUAL,
131     UCOL_EQUAL,                                    /*  30 */
132     UCOL_EQUAL,
133     UCOL_LESS,
134     UCOL_EQUAL,                                     /*  34 */
135     UCOL_EQUAL,
136     UCOL_EQUAL,
137     UCOL_LESS                                        /* 37 */
138 };
139 
140 
141 static
uprv_appendByteToHexString(char * dst,uint8_t val)142 void uprv_appendByteToHexString(char *dst, uint8_t val) {
143   uint32_t len = (uint32_t)uprv_strlen(dst);
144   *(dst+len) = T_CString_itosOffset((val >> 4));
145   *(dst+len+1) = T_CString_itosOffset((val & 0xF));
146   *(dst+len+2) = 0;
147 }
148 
149 /* this function makes a string with representation of a sortkey */
sortKeyToString(const UCollator * coll,const uint8_t * sortkey,char * buffer,uint32_t * len)150 static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
151     int32_t strength = UCOL_PRIMARY;
152     uint32_t res_size = 0;
153     UBool doneCase = FALSE;
154     UErrorCode errorCode = U_ZERO_ERROR;
155 
156     char *current = buffer;
157     const uint8_t *currentSk = sortkey;
158 
159     uprv_strcpy(current, "[");
160 
161     while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
162         if(strength > UCOL_PRIMARY) {
163             uprv_strcat(current, " . ");
164         }
165         while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
166             uprv_appendByteToHexString(current, *currentSk++);
167             uprv_strcat(current, " ");
168         }
169         if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
170             doneCase = TRUE;
171         } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
172             strength ++;
173         }
174         if (*currentSk) {
175             uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
176         }
177         if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
178             break;
179         }
180     }
181 
182     if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
183         uprv_strcat(current, " . ");
184         while(*currentSk != 0) {
185             uprv_appendByteToHexString(current, *currentSk++);
186             uprv_strcat(current, " ");
187         }
188 
189         uprv_appendByteToHexString(current, *currentSk++);
190     }
191     uprv_strcat(current, "]");
192 
193     if(res_size > *len) {
194         return NULL;
195     }
196 
197     return buffer;
198 }
199 
addAllCollTest(TestNode ** root)200 void addAllCollTest(TestNode** root)
201 {
202     addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
203     addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
204     addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
205     addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
206     addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
207     addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
208     addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
209     addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
210     addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
211     addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
212     addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
213     addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
214     addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298");
215     addTest(root, &TestBadKey, "tscoll/callcoll/TestBadKey");
216 }
217 
hasCollationElements(const char * locName)218 UBool hasCollationElements(const char *locName) {
219 
220   UErrorCode status = U_ZERO_ERROR;
221 
222   UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);;
223 
224   if(U_SUCCESS(status)) {
225     status = U_ZERO_ERROR;
226     loc = ures_getByKey(loc, "collations", loc, &status);
227     ures_close(loc);
228     if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
229       return TRUE;
230     }
231   }
232   return FALSE;
233 }
234 
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode * status)235 static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
236   int32_t partialSKResult = 0;
237   UCharIterator sIter, tIter;
238   uint32_t sState[2], tState[2];
239   int32_t sSize = pieceSize, tSize = pieceSize;
240   /*int32_t i = 0;*/
241   uint8_t sBuf[16384], tBuf[16384];
242   if(pieceSize > 16384) {
243     log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
244     *status = U_BUFFER_OVERFLOW_ERROR;
245     return UCOL_EQUAL;
246   }
247   *status = U_ZERO_ERROR;
248   sState[0] = 0; sState[1] = 0;
249   tState[0] = 0; tState[1] = 0;
250   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
251     uiter_setString(&sIter, source, sLen);
252     uiter_setString(&tIter, target, tLen);
253     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
254     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
255 
256     if(sState[0] != 0 || tState[0] != 0) {
257       /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
258     }
259     /*log_verbose("%i ", i++);*/
260 
261     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
262   }
263 
264   if(partialSKResult < 0) {
265       return UCOL_LESS;
266   } else if(partialSKResult > 0) {
267     return UCOL_GREATER;
268   } else {
269     return UCOL_EQUAL;
270   }
271 }
272 
doTestVariant(UCollator * myCollation,const UChar source[],const UChar target[],UCollationResult result)273 static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
274 {
275     int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
276     int temp=0, gSortklen1=0,gSortklen2=0;
277     UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
278     uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
279     uint32_t sLen = u_strlen(source);
280     uint32_t tLen = u_strlen(target);
281     char buffer[256];
282     uint32_t len;
283     UErrorCode status = U_ZERO_ERROR;
284     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
285 
286     UCharIterator sIter, tIter;
287 
288     compareResult  = ucol_strcoll(myCollation, source, sLen, target, tLen);
289     if (compareResult != result) {
290         log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
291             compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
292     }
293     compareResulta = ucol_strcoll(myCollation, source, -1,   target, -1);
294     if (compareResulta != result) {
295         log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
296             compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
297     }
298 
299     uiter_setString(&sIter, source, sLen);
300     uiter_setString(&tIter, target, tLen);
301     compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
302     if(compareResultIter != result) {
303         log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
304     }
305 
306     /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
307     {
308         char utf8Source[256], utf8Target[256];
309         int32_t utf8SourceLen = 0, utf8TargetLen = 0;
310 
311         u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
312         if(U_FAILURE(status)) { /* probably buffer is not big enough */
313             log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
314         } else {
315             u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
316             if(U_SUCCESS(status)) {
317                 {
318                     /* ucol_strcollUTF8 */
319                     compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status);
320                     if (U_FAILURE(status)) {
321                         log_err("Error in ucol_strcollUTF8 with explicit length\n");
322                         status = U_ZERO_ERROR;
323                     } else if (compareResulta != result) {
324                         log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
325                             compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
326                     }
327                     compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status);
328                     if (U_FAILURE(status)) {
329                         log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
330                         status = U_ZERO_ERROR;
331                     } else if (compareResulta != result) {
332                         log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
333                             compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
334                     }
335                 }
336 
337                 {
338                     /* char iterator over UTF8 */
339                     UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result;
340 
341                     uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
342                     uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
343                     compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
344 
345                     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
346                     sIter.move(&sIter, 0, UITER_START);
347                     tIter.move(&tIter, 0, UITER_START);
348                     compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
349 
350                     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
351                     if(compareResultUTF8Iter != compareResultIter) {
352                         log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
353                     }
354                     if(compareResultUTF8Iter != compareResultUTF8IterNorm) {
355                         log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
356                     }
357                 }
358             } else {
359                 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
360             }
361             if(U_FAILURE(status)) {
362                 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
363             }
364         }
365     }
366 
367     /* testing the partial sortkeys */
368     if(1) { /*!QUICK*/
369       int32_t i = 0;
370       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
371       int32_t partialSizesSize = 1;
372       if(getTestOption(QUICK_OPTION) <= 0) {
373         partialSizesSize = 7;
374       }
375       /*log_verbose("partial sortkey test piecesize=");*/
376       for(i = 0; i < partialSizesSize; i++) {
377         UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
378         /*log_verbose("%i ", partialSizes[i]);*/
379 
380         partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
381         if(partialSKResult != result) {
382           log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
383             partialSKResult, result,
384             aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
385         }
386 
387         if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) {
388           /*log_verbose("N ");*/
389           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
390           partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
391           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
392           if(partialSKResult != partialNormalizedSKResult) {
393             log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
394               aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
395           }
396         }
397       }
398       /*log_verbose("\n");*/
399     }
400 
401     sortklen1=ucol_getSortKey(myCollation, source, sLen,  NULL, 0);
402     sortklen2=ucol_getSortKey(myCollation, target, tLen,  NULL, 0);
403 
404     sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
405     sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
406     (void)sortklenmin;  /* Suppress set but not used warning. */
407 
408     sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
409     sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
410     ucol_getSortKey(myCollation, source, sLen, sortKey1,  sortklen1+1);
411     ucol_getSortKey(myCollation, source, -1,   sortKey1a, sortklen1+1);
412 
413     sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
414     sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
415     ucol_getSortKey(myCollation, target, tLen, sortKey2,  sortklen2+1);
416     ucol_getSortKey(myCollation, target, -1,   sortKey2a, sortklen2+1);
417 
418     /* Check that sort key generated with null terminated string is identical  */
419     /*  to that generated with a length specified.                              */
420     if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
421         uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
422         log_err("Sort Keys from null terminated and explicit length strings differ.\n");
423     }
424 
425     /*memcmp(sortKey1, sortKey2,sortklenmax);*/
426     temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
427     gSortklen1 = (int)uprv_strlen((const char *)sortKey1)+1;
428     gSortklen2 = (int)uprv_strlen((const char *)sortKey2)+1;
429     if(sortklen1 != gSortklen1){
430         log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
431         log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
432     }
433     if(sortklen2!= gSortklen2){
434         log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
435         log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
436     }
437 
438     if(temp < 0) {
439         keyResult=UCOL_LESS;
440     }
441     else if(temp > 0) {
442         keyResult= UCOL_GREATER;
443     }
444     else {
445         keyResult = UCOL_EQUAL;
446     }
447     reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
448     free(sortKey1);
449     free(sortKey2);
450     free(sortKey1a);
451     free(sortKey2a);
452 
453 }
454 
doTest(UCollator * myCollation,const UChar source[],const UChar target[],UCollationResult result)455 void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
456 {
457   if(myCollation) {
458     doTestVariant(myCollation, source, target, result);
459     if(result == UCOL_LESS) {
460       doTestVariant(myCollation, target, source, UCOL_GREATER);
461     } else if(result == UCOL_GREATER) {
462       doTestVariant(myCollation, target, source, UCOL_LESS);
463     } else {
464       doTestVariant(myCollation, target, source, UCOL_EQUAL);
465     }
466   } else {
467     log_data_err("No collator! Any data around?\n");
468   }
469 }
470 
471 
472 /**
473  * Return an integer array containing all of the collation orders
474  * returned by calls to next on the specified iterator
475  */
getOrders(UCollationElements * iter,int32_t * orderLength)476 OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength)
477 {
478     UErrorCode status;
479     int32_t order;
480     int32_t maxSize = 100;
481     int32_t size = 0;
482     int32_t offset = ucol_getOffset(iter);
483     OrderAndOffset *temp;
484     OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
485     status= U_ZERO_ERROR;
486 
487 
488     while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
489     {
490         if (size == maxSize)
491         {
492             maxSize *= 2;
493             temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
494 
495             memcpy(temp, orders, size * sizeof(OrderAndOffset));
496             free(orders);
497             orders = temp;
498 
499         }
500 
501         orders[size].order  = order;
502         orders[size].offset = offset;
503 
504         offset = ucol_getOffset(iter);
505         size += 1;
506     }
507 
508     if (maxSize > size && size > 0)
509     {
510         temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size);
511 
512         memcpy(temp, orders, size * sizeof(OrderAndOffset));
513         free(orders);
514         orders = temp;
515 
516 
517     }
518 
519     *orderLength = size;
520     return orders;
521 }
522 
523 
524 void
backAndForth(UCollationElements * iter)525 backAndForth(UCollationElements *iter)
526 {
527     /* Run through the iterator forwards and stick it into an array */
528     int32_t idx, o;
529     UErrorCode status = U_ZERO_ERROR;
530     int32_t orderLength = 0;
531     OrderAndOffset *orders = getOrders(iter, &orderLength);
532 
533 
534     /* Now go through it backwards and make sure we get the same values */
535     idx = orderLength;
536     ucol_reset(iter);
537 
538     /* synwee : changed */
539     while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
540 #if TEST_OFFSETS
541       int32_t offset =
542 #endif
543         ucol_getOffset(iter);
544 
545       idx -= 1;
546       if (o != orders[idx].order) {
547         if (o == 0)
548           idx ++;
549         else {
550           while (idx > 0 && orders[-- idx].order == 0) {
551             /* nothing... */
552           }
553 
554           if (o != orders[idx].order) {
555               log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx,
556                 orders[idx].order, o);
557             goto bail;
558           }
559         }
560       }
561 
562 #if TEST_OFFSETS
563       if (offset != orders[idx].offset) {
564         log_err("Mismatched offset at index %d: %d vs. %d\n", idx,
565             orders[idx].offset, offset);
566         goto bail;
567       }
568 #endif
569 
570     }
571 
572     while (idx != 0 && orders[idx - 1].order == 0) {
573       idx -= 1;
574     }
575 
576     if (idx != 0) {
577         log_err("Didn't get back to beginning - index is %d\n", idx);
578 
579         ucol_reset(iter);
580         log_err("\nnext: ");
581 
582         if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) {
583             log_err("Error at %x\n", o);
584         }
585 
586         log_err("\nprev: ");
587 
588         if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
589             log_err("Error at %x\n", o);
590         }
591 
592         log_verbose("\n");
593     }
594 
595 bail:
596     free(orders);
597 }
598 
genericOrderingTestWithResult(UCollator * coll,const char * const s[],uint32_t size,UCollationResult result)599 void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) {
600   UChar t1[2048] = {0};
601   UChar t2[2048] = {0};
602   UCollationElements *iter;
603   UErrorCode status = U_ZERO_ERROR;
604 
605   uint32_t i = 0, j = 0;
606   log_verbose("testing sequence:\n");
607   for(i = 0; i < size; i++) {
608     log_verbose("%s\n", s[i]);
609   }
610 
611   iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
612   if (U_FAILURE(status)) {
613     log_err("Creation of iterator failed\n");
614   }
615   for(i = 0; i < size-1; i++) {
616     for(j = i+1; j < size; j++) {
617       u_unescape(s[i], t1, 2048);
618       u_unescape(s[j], t2, 2048);
619       doTest(coll, t1, t2, result);
620       /* synwee : added collation element iterator test */
621       ucol_setText(iter, t1, u_strlen(t1), &status);
622       backAndForth(iter);
623       ucol_setText(iter, t2, u_strlen(t2), &status);
624       backAndForth(iter);
625     }
626   }
627   ucol_closeElements(iter);
628 }
629 
genericOrderingTest(UCollator * coll,const char * const s[],uint32_t size)630 void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) {
631   genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
632 }
633 
genericLocaleStarter(const char * locale,const char * const s[],uint32_t size)634 void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) {
635   UErrorCode status = U_ZERO_ERROR;
636   UCollator *coll = ucol_open(locale, &status);
637 
638   log_verbose("Locale starter for %s\n", locale);
639 
640   if(U_SUCCESS(status)) {
641     genericOrderingTest(coll, s, size);
642   } else if(status == U_FILE_ACCESS_ERROR) {
643     log_data_err("Is your data around?\n");
644     return;
645   } else {
646     log_err("Unable to open collator for locale %s\n", locale);
647   }
648   ucol_close(coll);
649 }
650 
genericLocaleStarterWithResult(const char * locale,const char * const s[],uint32_t size,UCollationResult result)651 void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) {
652   UErrorCode status = U_ZERO_ERROR;
653   UCollator *coll = ucol_open(locale, &status);
654 
655   log_verbose("Locale starter for %s\n", locale);
656 
657   if(U_SUCCESS(status)) {
658     genericOrderingTestWithResult(coll, s, size, result);
659   } else if(status == U_FILE_ACCESS_ERROR) {
660     log_data_err("Is your data around?\n");
661     return;
662   } else {
663     log_err("Unable to open collator for locale %s\n", locale);
664   }
665   ucol_close(coll);
666 }
667 
668 /* currently not used with options */
genericRulesStarterWithOptionsAndResult(const char * rules,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize,UCollationResult result)669 void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
670   UErrorCode status = U_ZERO_ERROR;
671   UChar rlz[RULE_BUFFER_LEN] = { 0 };
672   uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
673   uint32_t i;
674 
675   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
676 
677   log_verbose("Rules starter for %s\n", rules);
678 
679   if(U_SUCCESS(status)) {
680     log_verbose("Setting attributes\n");
681     for(i = 0; i < attsize; i++) {
682       ucol_setAttribute(coll, attrs[i], values[i], &status);
683     }
684 
685     genericOrderingTestWithResult(coll, s, size, result);
686   } else {
687     log_err_status(status, "Unable to open collator with rules %s\n", rules);
688   }
689   ucol_close(coll);
690 }
691 
genericLocaleStarterWithOptionsAndResult(const char * locale,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize,UCollationResult result)692 void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
693   UErrorCode status = U_ZERO_ERROR;
694   uint32_t i;
695 
696   UCollator *coll = ucol_open(locale, &status);
697 
698   log_verbose("Locale starter for %s\n", locale);
699 
700   if(U_SUCCESS(status)) {
701 
702     log_verbose("Setting attributes\n");
703     for(i = 0; i < attsize; i++) {
704       ucol_setAttribute(coll, attrs[i], values[i], &status);
705     }
706 
707     genericOrderingTestWithResult(coll, s, size, result);
708   } else {
709     log_err_status(status, "Unable to open collator for locale %s\n", locale);
710   }
711   ucol_close(coll);
712 }
713 
genericLocaleStarterWithOptions(const char * locale,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize)714 void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
715   genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS);
716 }
717 
genericRulesStarterWithResult(const char * rules,const char * const s[],uint32_t size,UCollationResult result)718 void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) {
719   UErrorCode status = U_ZERO_ERROR;
720   UChar rlz[RULE_BUFFER_LEN] = { 0 };
721   uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
722 
723   UCollator *coll = NULL;
724   coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
725   log_verbose("Rules starter for %s\n", rules);
726 
727   if(U_SUCCESS(status)) {
728     genericOrderingTestWithResult(coll, s, size, result);
729     ucol_close(coll);
730   } else if(status == U_FILE_ACCESS_ERROR) {
731     log_data_err("Is your data around?\n");
732   } else {
733     log_err("Unable to open collator with rules %s\n", rules);
734   }
735 }
736 
genericRulesStarter(const char * rules,const char * const s[],uint32_t size)737 void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) {
738   genericRulesStarterWithResult(rules, s, size, UCOL_LESS);
739 }
740 
TestTertiary()741 static void TestTertiary()
742 {
743     int32_t len,i;
744     UCollator *myCollation;
745     UErrorCode status=U_ZERO_ERROR;
746     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
747     UChar rules[sizeof(str)];
748     len = (int32_t)strlen(str);
749     u_uastrcpy(rules, str);
750 
751     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
752     if(U_FAILURE(status)){
753         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
754         return;
755     }
756 
757     ucol_setStrength(myCollation, UCOL_TERTIARY);
758     for (i = 0; i < 17 ; i++)
759     {
760         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
761     }
762     ucol_close(myCollation);
763     myCollation = 0;
764 }
765 
TestPrimary()766 static void TestPrimary( )
767 {
768     int32_t len,i;
769     UCollator *myCollation;
770     UErrorCode status=U_ZERO_ERROR;
771     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
772     UChar rules[sizeof(str)];
773     len = (int32_t)strlen(str);
774     u_uastrcpy(rules, str);
775 
776     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
777     if(U_FAILURE(status)){
778         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
779         return;
780     }
781     ucol_setStrength(myCollation, UCOL_PRIMARY);
782 
783     for (i = 17; i < 26 ; i++)
784     {
785 
786         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
787     }
788     ucol_close(myCollation);
789     myCollation = 0;
790 }
791 
TestSecondary()792 static void TestSecondary()
793 {
794     int32_t i;
795     int32_t len;
796     UCollator *myCollation;
797     UErrorCode status=U_ZERO_ERROR;
798     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
799     UChar rules[sizeof(str)];
800     len = (int32_t)strlen(str);
801     u_uastrcpy(rules, str);
802 
803     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
804     if(U_FAILURE(status)){
805         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
806         return;
807     }
808     ucol_setStrength(myCollation, UCOL_SECONDARY);
809     for (i = 26; i < 34 ; i++)
810     {
811         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
812     }
813     ucol_close(myCollation);
814     myCollation = 0;
815 }
816 
TestIdentical()817 static void TestIdentical()
818 {
819     int32_t i;
820     int32_t len;
821     UCollator *myCollation;
822     UErrorCode status=U_ZERO_ERROR;
823     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
824     UChar rules[sizeof(str)];
825     len = (int32_t)strlen(str);
826     u_uastrcpy(rules, str);
827 
828     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
829     if(U_FAILURE(status)){
830         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
831         return;
832     }
833     for(i= 34; i<37; i++)
834     {
835         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
836     }
837     ucol_close(myCollation);
838     myCollation = 0;
839 }
840 
TestExtra()841 static void TestExtra()
842 {
843     int32_t i, j;
844     int32_t len;
845     UCollator *myCollation;
846     UErrorCode status = U_ZERO_ERROR;
847     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
848     UChar rules[sizeof(str)];
849     len = (int32_t)strlen(str);
850     u_uastrcpy(rules, str);
851 
852     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
853     if(U_FAILURE(status)){
854         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
855         return;
856     }
857     ucol_setStrength(myCollation, UCOL_TERTIARY);
858     for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
859     {
860         for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
861         {
862 
863             doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
864         }
865     }
866     ucol_close(myCollation);
867     myCollation = 0;
868 }
869 
TestJB581(void)870 static void TestJB581(void)
871 {
872     int32_t     bufferLen   = 0;
873     UChar       source      [100];
874     UChar       target      [100];
875     UCollationResult result     = UCOL_EQUAL;
876     uint8_t     sourceKeyArray  [100];
877     uint8_t     targetKeyArray  [100];
878     int32_t     sourceKeyOut    = 0,
879                 targetKeyOut    = 0;
880     UCollator   *myCollator = 0;
881     UErrorCode status = U_ZERO_ERROR;
882 
883     /*u_uastrcpy(source, "This is a test.");*/
884     /*u_uastrcpy(target, "THISISATEST.");*/
885     u_uastrcpy(source, "THISISATEST.");
886     u_uastrcpy(target, "Thisisatest.");
887 
888     myCollator = ucol_open("en_US", &status);
889     if (U_FAILURE(status)){
890         log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
891         return;
892     }
893     result = ucol_strcoll(myCollator, source, -1, target, -1);
894     /* result is 1, secondary differences only for ignorable space characters*/
895     if (result != 1)
896     {
897         log_err("Comparing two strings with only secondary differences in C failed.\n");
898     }
899     /* To compare them with just primary differences */
900     ucol_setStrength(myCollator, UCOL_PRIMARY);
901     result = ucol_strcoll(myCollator, source, -1, target, -1);
902     /* result is 0 */
903     if (result != 0)
904     {
905         log_err("Comparing two strings with no differences in C failed.\n");
906     }
907     /* Now, do the same comparison with keys */
908     sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
909     (void)sourceKeyOut;    /* Suppress set but not used warning. */
910     targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
911     bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
912     if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
913     {
914         log_err("Comparing two strings with sort keys in C failed.\n");
915     }
916     ucol_close(myCollator);
917 }
918 
TestJB1401(void)919 static void TestJB1401(void)
920 {
921     UCollator     *myCollator = 0;
922     UErrorCode     status = U_ZERO_ERROR;
923     static UChar   NFD_UnsafeStartChars[] = {
924         0x0f73,          /* Tibetan Vowel Sign II */
925         0x0f75,          /* Tibetan Vowel Sign UU */
926         0x0f81,          /* Tibetan Vowel Sign Reversed II */
927             0
928     };
929     int            i;
930 
931 
932     myCollator = ucol_open("en_US", &status);
933     if (U_FAILURE(status)){
934         log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
935         return;
936     }
937     ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
938     if (U_FAILURE(status)){
939         log_err("ERROR: Failed to set normalization mode ON for collator.\n");
940         return;
941     }
942 
943     for (i=0; ; i++) {
944         UChar    c;
945         UChar    X[4];
946         UChar    Y[20];
947         UChar    Z[20];
948 
949         /*  Get the next funny character to be tested, and set up the
950          *  three test strings X, Y, Z, consisting of an A-grave + test char,
951          *    in original form, NFD, and then NFC form.
952          */
953         c = NFD_UnsafeStartChars[i];
954         if (c==0) {break;}
955 
956         X[0]=0xC0; X[1]=c; X[2]=0;   /* \u00C0 is A Grave*/
957 
958         unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
959         unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
960         if (U_FAILURE(status)){
961             log_err("ERROR: Failed to normalize test of character %x\n", c);
962             return;
963         }
964 
965         /* Collation test.  All three strings should be equal.
966          *   doTest does both strcoll and sort keys, with params in both orders.
967          */
968         doTest(myCollator, X, Y, UCOL_EQUAL);
969         doTest(myCollator, X, Z, UCOL_EQUAL);
970         doTest(myCollator, Y, Z, UCOL_EQUAL);
971 
972         /* Run collation element iterators over the three strings.  Results should be same for each.
973          */
974         {
975             UCollationElements *ceiX, *ceiY, *ceiZ;
976             int32_t             ceX,   ceY,   ceZ;
977             int                 j;
978 
979             ceiX = ucol_openElements(myCollator, X, -1, &status);
980             ceiY = ucol_openElements(myCollator, Y, -1, &status);
981             ceiZ = ucol_openElements(myCollator, Z, -1, &status);
982             if (U_FAILURE(status)) {
983                 log_err("ERROR: uucol_openElements failed.\n");
984                 return;
985             }
986 
987             for (j=0;; j++) {
988                 ceX = ucol_next(ceiX, &status);
989                 ceY = ucol_next(ceiY, &status);
990                 ceZ = ucol_next(ceiZ, &status);
991                 if (U_FAILURE(status)) {
992                     log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
993                     break;
994                 }
995                 if (ceX != ceY || ceY != ceZ) {
996                     log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
997                     break;
998                 }
999                 if (ceX == UCOL_NULLORDER) {
1000                     break;
1001                 }
1002             }
1003             ucol_closeElements(ceiX);
1004             ucol_closeElements(ceiY);
1005             ucol_closeElements(ceiZ);
1006         }
1007     }
1008     ucol_close(myCollator);
1009 }
1010 
1011 
1012 
1013 /**
1014 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
1015 * tag has the value shifted, any codepoints before [variable top] should give
1016 * a primary ce of 0.
1017 */
TestVariableTop(void)1018 static void TestVariableTop(void)
1019 {
1020 #if 0
1021     /*
1022      * Starting with ICU 53, setting the variable top via a pseudo relation string
1023      * is not supported any more.
1024      * It was replaced by the [maxVariable symbol] setting.
1025      * See ICU tickets #9958 and #8032.
1026      */
1027     static const char       str[]          = "&z = [variable top]";
1028           int         len          = strlen(str);
1029           UChar      rules[sizeof(str)];
1030           UCollator  *myCollation;
1031           UCollator  *enCollation;
1032           UErrorCode  status       = U_ZERO_ERROR;
1033           UChar       source[1];
1034           UChar       ch;
1035           uint8_t     result[20];
1036           uint8_t     expected[20];
1037 
1038     u_uastrcpy(rules, str);
1039 
1040     enCollation = ucol_open("en_US", &status);
1041     if (U_FAILURE(status)) {
1042         log_err_status(status, "ERROR: in creation of collator :%s\n",
1043                 myErrorName(status));
1044         return;
1045     }
1046     myCollation = ucol_openRules(rules, len, UCOL_OFF,
1047                                  UCOL_PRIMARY,NULL, &status);
1048     if (U_FAILURE(status)) {
1049         ucol_close(enCollation);
1050         log_err("ERROR: in creation of rule based collator :%s\n",
1051                 myErrorName(status));
1052         return;
1053     }
1054 
1055     ucol_setStrength(enCollation, UCOL_PRIMARY);
1056     ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1057                       &status);
1058     ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1059                       &status);
1060 
1061     if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
1062         UCOL_SHIFTED || U_FAILURE(status)) {
1063         log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
1064     }
1065 
1066     uprv_memset(expected, 0, 20);
1067 
1068     /* space is supposed to be a variable */
1069     source[0] = ' ';
1070     len = ucol_getSortKey(enCollation, source, 1, result,
1071                           sizeof(result));
1072 
1073     if (uprv_memcmp(expected, result, len) != 0) {
1074         log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
1075     }
1076 
1077     ch = 'a';
1078     while (ch < 'z') {
1079         source[0] = ch;
1080         len = ucol_getSortKey(myCollation, source, 1, result,
1081                               sizeof(result));
1082         if (uprv_memcmp(expected, result, len) != 0) {
1083             log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
1084                     ch);
1085         }
1086         ch ++;
1087     }
1088 
1089     ucol_close(enCollation);
1090     ucol_close(myCollation);
1091     enCollation = NULL;
1092     myCollation = NULL;
1093 #endif
1094 }
1095 
1096 /**
1097   * Tests surrogate support.
1098   * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
1099   * Therefore, another (unassigned) code point was used for this test.
1100   */
TestSurrogates(void)1101 static void TestSurrogates(void)
1102 {
1103     static const char       str[]          =
1104                               "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
1105           int         len          = (int)strlen(str);
1106           int         rlen         = 0;
1107           UChar      rules[sizeof(str)];
1108           UCollator  *myCollation;
1109           UCollator  *enCollation;
1110           UErrorCode  status       = U_ZERO_ERROR;
1111           UChar       source[][4]    =
1112           {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1113           UChar       target[][4]    =
1114           {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1115           int         count        = 0;
1116           uint8_t enresult[20], myresult[20];
1117           int enlen, mylen;
1118 
1119     /* tests for open rules with surrogate rules */
1120     rlen = u_unescape(str, rules, len);
1121 
1122     enCollation = ucol_open("en_US", &status);
1123     if (U_FAILURE(status)) {
1124         log_err_status(status, "ERROR: in creation of collator :%s\n",
1125                 myErrorName(status));
1126         return;
1127     }
1128     myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
1129                                  UCOL_TERTIARY,NULL, &status);
1130     if (U_FAILURE(status)) {
1131         ucol_close(enCollation);
1132         log_err("ERROR: in creation of rule based collator :%s\n",
1133                 myErrorName(status));
1134         return;
1135     }
1136 
1137     /*
1138     this test is to verify the supplementary sort key order in the english
1139     collator
1140     */
1141     log_verbose("start of english collation supplementary characters test\n");
1142     while (count < 2) {
1143         doTest(enCollation, source[count], target[count], UCOL_LESS);
1144         count ++;
1145     }
1146     doTest(enCollation, source[count], target[count], UCOL_GREATER);
1147 
1148     log_verbose("start of tailored collation supplementary characters test\n");
1149     count = 0;
1150     /* tests getting collation elements for surrogates for tailored rules */
1151     while (count < 4) {
1152         doTest(myCollation, source[count], target[count], UCOL_LESS);
1153         count ++;
1154     }
1155 
1156     /* tests that \uD800\uDC02 still has the same value, not changed */
1157     enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
1158     mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
1159     if (enlen != mylen ||
1160         uprv_memcmp(enresult, myresult, enlen) != 0) {
1161         log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1162     }
1163 
1164     ucol_close(enCollation);
1165     ucol_close(myCollation);
1166     enCollation = NULL;
1167     myCollation = NULL;
1168 }
1169 
1170 /*
1171  *### TODO: Add more invalid rules to test all different scenarios.
1172  *
1173  */
1174 static void
TestInvalidRules()1175 TestInvalidRules(){
1176 #define MAX_ERROR_STATES 2
1177 
1178     static const char* rulesArr[MAX_ERROR_STATES] = {
1179         "& C < ch, cH, Ch[this should fail]<d",
1180         "& C < ch, cH, & Ch[variable top]"
1181     };
1182     static const char* preContextArr[MAX_ERROR_STATES] = {
1183         " C < ch, cH, Ch",
1184         "& C < ch, cH",
1185 
1186     };
1187     static const char* postContextArr[MAX_ERROR_STATES] = {
1188         "[this should fa",
1189         ", & Ch[variable"
1190     };
1191     int i;
1192 
1193     for(i = 0;i<MAX_ERROR_STATES;i++){
1194         UChar rules[1000]       = { '\0' };
1195         UChar preContextExp[1000]  = { '\0' };
1196         UChar postContextExp[1000] = { '\0' };
1197         UParseError parseError;
1198         UErrorCode status = U_ZERO_ERROR;
1199         UCollator* coll=0;
1200         u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1);
1201         u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1);
1202         u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1);
1203         /* clean up stuff in parseError */
1204         u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1205         u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1206         /* open the rules and test */
1207         coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status);
1208         (void)coll;   /* Suppress set but not used warning. */
1209         if(u_strcmp(parseError.preContext,preContextExp)!=0){
1210             log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1211                            aescstrdup(parseError.preContext, -1));
1212         }
1213         if(u_strcmp(parseError.postContext,postContextExp)!=0){
1214             log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1215                            aescstrdup(parseError.postContext, -1));
1216         }
1217     }
1218 }
1219 
1220 static void
TestJitterbug1098()1221 TestJitterbug1098(){
1222     UChar rule[1000];
1223     UCollator* c1 = NULL;
1224     UErrorCode status = U_ZERO_ERROR;
1225     UParseError parseError;
1226     char preContext[200]={0};
1227     char postContext[200]={0};
1228     int i=0;
1229     const char* rules[] = {
1230          "&''<\\\\",
1231          "&\\'<\\\\",
1232          "&\\\"<'\\'",
1233          "&'\"'<\\'",
1234          NULL
1235 
1236     };
1237     const UCollationResult results1098[] = {
1238         UCOL_LESS,
1239         UCOL_LESS,
1240         UCOL_LESS,
1241         UCOL_LESS,
1242     };
1243     const UChar input[][2]= {
1244         {0x0027,0x005c},
1245         {0x0027,0x005c},
1246         {0x0022,0x005c},
1247         {0x0022,0x0027},
1248     };
1249     UChar X[2] ={0};
1250     UChar Y[2] ={0};
1251     u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1252     u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1253     for(;rules[i]!=0;i++){
1254         u_uastrcpy(rule, rules[i]);
1255         c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1256         if(U_FAILURE(status)){
1257             log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status));
1258 
1259             if (status == U_PARSE_ERROR) {
1260                 u_UCharsToChars(parseError.preContext,preContext,20);
1261                 u_UCharsToChars(parseError.postContext,postContext,20);
1262                 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
1263             }
1264 
1265             return;
1266         }
1267         X[0] = input[i][0];
1268         Y[0] = input[i][1];
1269         doTest(c1,X,Y,results1098[i]);
1270         ucol_close(c1);
1271     }
1272 }
1273 
1274 static void
TestFCDCrash(void)1275 TestFCDCrash(void) {
1276     static const char *test[] = {
1277     "Gr\\u00F6\\u00DFe",
1278     "Grossist"
1279     };
1280 
1281     UErrorCode status = U_ZERO_ERROR;
1282     UCollator *coll = ucol_open("es", &status);
1283     if(U_FAILURE(status)) {
1284         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1285         return;
1286     }
1287     ucol_close(coll);
1288     coll = NULL;
1289     ctest_resetICU();
1290     coll = ucol_open("de_DE", &status);
1291     if(U_FAILURE(status)) {
1292         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1293         return;
1294     }
1295     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1296     genericOrderingTest(coll, test, 2);
1297     ucol_close(coll);
1298 }
1299 
1300 /*static UBool
1301 find(UEnumeration* list, const char* str, UErrorCode* status){
1302     const char* value = NULL;
1303     int32_t length=0;
1304     if(U_FAILURE(*status)){
1305         return FALSE;
1306     }
1307     uenum_reset(list, status);
1308     while( (value= uenum_next(list, &length, status))!=NULL){
1309         if(strcmp(value, str)==0){
1310             return TRUE;
1311         }
1312     }
1313     return FALSE;
1314 }*/
1315 
TestJ5298(void)1316 static void TestJ5298(void)
1317 {
1318     UErrorCode status = U_ZERO_ERROR;
1319     char input[256], output[256];
1320     UBool isAvailable;
1321     int32_t i = 0;
1322     UEnumeration* values = NULL;
1323     const char *keywordValue = NULL;
1324     log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
1325     values = ucol_getKeywordValues("collation", &status);
1326     while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1327         if (strncmp(keywordValue, "private-", 8) == 0) {
1328             log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue);
1329         }
1330     }
1331     for (i = 0; i < ucol_countAvailable(); i++) {
1332         uenum_reset(values, &status);
1333         while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1334             strcpy(input, ucol_getAvailable(i));
1335             if (strcmp(keywordValue, "standard") != 0) {
1336                 strcat(input, "@collation=");
1337                 strcat(input, keywordValue);
1338             }
1339 
1340             ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status);
1341             if (strcmp(input, output) == 0) { /* Unique locale, print it out */
1342                 log_verbose("%s, \n", output);
1343             }
1344         }
1345     }
1346     uenum_close(values);
1347     log_verbose("\n");
1348 }
1349 
1350 static const char* badKeyLocales[] = {
1351 	"@calendar=japanese;collation=search", // ucol_open OK
1352 	"@calendar=japanese", // ucol_open OK
1353 	"en@calendar=x", // ucol_open OK
1354 	"ja@calendar=x", // ucol_open OK
1355 	"en@collation=x", // ucol_open OK
1356 	"ja@collation=x", // ucol_open OK
1357 	"ja@collation=private-kana", // ucol_open fails, verify it does not crash
1358 	"en@collation=\x80", // (x80 undef in ASCII,EBCDIC) ucol_open fails, verify it does not crash
1359 	NULL
1360 };
1361 
1362 // Mainly this is to check that we don't have a crash, but we check
1363 // for correct NULL return and FAILURE/SUCCESS status as a bonus.
TestBadKey(void)1364 static void TestBadKey(void)
1365 {
1366     const char* badLoc;
1367     const char** badLocsPtr = badKeyLocales;
1368     while ((badLoc = *badLocsPtr++) != NULL) {
1369         UErrorCode status = U_ZERO_ERROR;
1370         UCollator* uc = ucol_open(badLoc, &status);
1371         if ( U_SUCCESS(status) ) {
1372             if (uc == NULL) {
1373                 log_err("ucol_open sets SUCCESS but returns NULL, locale: %s\n", badLoc);
1374             }
1375             ucol_close(uc);
1376         } else if (uc != NULL) {
1377             log_err("ucol_open sets FAILURE but returns non-NULL, locale: %s\n", badLoc);
1378         }
1379     }
1380 }
1381 #endif /* #if !UCONFIG_NO_COLLATION */
1382