1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CITERTST.C
11 *
12 * Modification History:
13 * Date      Name               Description
14 *           Madhu Katragadda   Ported for C API
15 * 02/19/01  synwee             Modified test case for new collation iterator
16 *********************************************************************************/
17 /*
18  * Collation Iterator tests.
19  * (Let me reiterate my position...)
20  */
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_COLLATION
25 
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf16.h"
32 #include "unicode/putil.h"
33 #include "callcoll.h"
34 #include "cmemory.h"
35 #include "cintltst.h"
36 #include "citertst.h"
37 #include "ccolltst.h"
38 #include "filestrm.h"
39 #include "cstring.h"
40 #include "ucol_imp.h"
41 #include "uparse.h"
42 #include <stdio.h>
43 
44 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
45 
addCollIterTest(TestNode ** root)46 void addCollIterTest(TestNode** root)
47 {
48     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
49     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
50     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
51     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
52     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
53     addTest(root, &TestNormalizedUnicodeChar,
54                                 "tscoll/citertst/TestNormalizedUnicodeChar");
55     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
56     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
57     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
58     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
59     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
60     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
61 }
62 
63 /* The locales we support */
64 
65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
66 
TestBug672()67 static void TestBug672() {
68     UErrorCode  status = U_ZERO_ERROR;
69     UChar       pattern[20];
70     UChar       text[50];
71     int         i;
72     int         result[3][3];
73 
74     u_uastrcpy(pattern, "resume");
75     u_uastrcpy(text, "Time to resume updating my resume.");
76 
77     for (i = 0; i < 3; ++ i) {
78         UCollator          *coll = ucol_open(LOCALES[i], &status);
79         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
80                                                      &status);
81         UCollationElements *titer = ucol_openElements(coll, text, -1,
82                                                      &status);
83         if (U_FAILURE(status)) {
84             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
85                     myErrorName(status));
86             return;
87         }
88 
89         log_verbose("locale tested %s\n", LOCALES[i]);
90 
91         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
92                U_SUCCESS(status)) {
93         }
94         if (U_FAILURE(status)) {
95             log_err("ERROR: reversing collation iterator :%s\n",
96                     myErrorName(status));
97             return;
98         }
99         ucol_reset(pitr);
100 
101         ucol_setOffset(titer, u_strlen(pattern), &status);
102         if (U_FAILURE(status)) {
103             log_err("ERROR: setting offset in collator :%s\n",
104                     myErrorName(status));
105             return;
106         }
107         result[i][0] = ucol_getOffset(titer);
108         log_verbose("Text iterator set to offset %d\n", result[i][0]);
109 
110         /* Use previous() */
111         ucol_previous(titer, &status);
112         result[i][1] = ucol_getOffset(titer);
113         log_verbose("Current offset %d after previous\n", result[i][1]);
114 
115         /* Add one to index */
116         log_verbose("Adding one to current offset...\n");
117         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
118         if (U_FAILURE(status)) {
119             log_err("ERROR: setting offset in collator :%s\n",
120                     myErrorName(status));
121             return;
122         }
123         result[i][2] = ucol_getOffset(titer);
124         log_verbose("Current offset in text = %d\n", result[i][2]);
125         ucol_closeElements(pitr);
126         ucol_closeElements(titer);
127         ucol_close(coll);
128     }
129 
130     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
131         uprv_memcmp(result[1], result[2], 3) != 0) {
132         log_err("ERROR: Different locales have different offsets at the same character\n");
133     }
134 }
135 
136 
137 
138 /*  Running this test with normalization enabled showed up a bug in the incremental
139     normalization code. */
TestBug672Normalize()140 static void TestBug672Normalize() {
141     UErrorCode  status = U_ZERO_ERROR;
142     UChar       pattern[20];
143     UChar       text[50];
144     int         i;
145     int         result[3][3];
146 
147     u_uastrcpy(pattern, "resume");
148     u_uastrcpy(text, "Time to resume updating my resume.");
149 
150     for (i = 0; i < 3; ++ i) {
151         UCollator          *coll = ucol_open(LOCALES[i], &status);
152         UCollationElements *pitr = NULL;
153         UCollationElements *titer = NULL;
154 
155         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
156 
157         pitr = ucol_openElements(coll, pattern, -1, &status);
158         titer = ucol_openElements(coll, text, -1, &status);
159         if (U_FAILURE(status)) {
160             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
161                     myErrorName(status));
162             return;
163         }
164 
165         log_verbose("locale tested %s\n", LOCALES[i]);
166 
167         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
168                U_SUCCESS(status)) {
169         }
170         if (U_FAILURE(status)) {
171             log_err("ERROR: reversing collation iterator :%s\n",
172                     myErrorName(status));
173             return;
174         }
175         ucol_reset(pitr);
176 
177         ucol_setOffset(titer, u_strlen(pattern), &status);
178         if (U_FAILURE(status)) {
179             log_err("ERROR: setting offset in collator :%s\n",
180                     myErrorName(status));
181             return;
182         }
183         result[i][0] = ucol_getOffset(titer);
184         log_verbose("Text iterator set to offset %d\n", result[i][0]);
185 
186         /* Use previous() */
187         ucol_previous(titer, &status);
188         result[i][1] = ucol_getOffset(titer);
189         log_verbose("Current offset %d after previous\n", result[i][1]);
190 
191         /* Add one to index */
192         log_verbose("Adding one to current offset...\n");
193         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
194         if (U_FAILURE(status)) {
195             log_err("ERROR: setting offset in collator :%s\n",
196                     myErrorName(status));
197             return;
198         }
199         result[i][2] = ucol_getOffset(titer);
200         log_verbose("Current offset in text = %d\n", result[i][2]);
201         ucol_closeElements(pitr);
202         ucol_closeElements(titer);
203         ucol_close(coll);
204     }
205 
206     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
207         uprv_memcmp(result[1], result[2], 3) != 0) {
208         log_err("ERROR: Different locales have different offsets at the same character\n");
209     }
210 }
211 
212 
213 
214 
215 /**
216  * Test for CollationElementIterator previous and next for the whole set of
217  * unicode characters.
218  */
TestUnicodeChar()219 static void TestUnicodeChar()
220 {
221     UChar source[0x100];
222     UCollator *en_us;
223     UCollationElements *iter;
224     UErrorCode status = U_ZERO_ERROR;
225     UChar codepoint;
226 
227     UChar *test;
228     en_us = ucol_open("en_US", &status);
229     if (U_FAILURE(status)){
230        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
231               myErrorName(status));
232        return;
233     }
234 
235     for (codepoint = 1; codepoint < 0xFFFE;)
236     {
237       test = source;
238 
239       while (codepoint % 0xFF != 0)
240       {
241         if (u_isdefined(codepoint))
242           *(test ++) = codepoint;
243         codepoint ++;
244       }
245 
246       if (u_isdefined(codepoint))
247         *(test ++) = codepoint;
248 
249       if (codepoint != 0xFFFF)
250         codepoint ++;
251 
252       *test = 0;
253       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
254       if(U_FAILURE(status)){
255           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
256               myErrorName(status));
257           ucol_close(en_us);
258           return;
259       }
260       /* A basic test to see if it's working at all */
261       log_verbose("codepoint testing %x\n", codepoint);
262       backAndForth(iter);
263       ucol_closeElements(iter);
264 
265       /* null termination test */
266       iter=ucol_openElements(en_us, source, -1, &status);
267       if(U_FAILURE(status)){
268           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
269               myErrorName(status));
270           ucol_close(en_us);
271           return;
272       }
273       /* A basic test to see if it's working at all */
274       backAndForth(iter);
275       ucol_closeElements(iter);
276     }
277 
278     ucol_close(en_us);
279 }
280 
281 /**
282  * Test for CollationElementIterator previous and next for the whole set of
283  * unicode characters with normalization on.
284  */
TestNormalizedUnicodeChar()285 static void TestNormalizedUnicodeChar()
286 {
287     UChar source[0x100];
288     UCollator *th_th;
289     UCollationElements *iter;
290     UErrorCode status = U_ZERO_ERROR;
291     UChar codepoint;
292 
293     UChar *test;
294     /* thai should have normalization on */
295     th_th = ucol_open("th_TH", &status);
296     if (U_FAILURE(status)){
297         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
298               myErrorName(status));
299         return;
300     }
301 
302     for (codepoint = 1; codepoint < 0xFFFE;)
303     {
304       test = source;
305 
306       while (codepoint % 0xFF != 0)
307       {
308         if (u_isdefined(codepoint))
309           *(test ++) = codepoint;
310         codepoint ++;
311       }
312 
313       if (u_isdefined(codepoint))
314         *(test ++) = codepoint;
315 
316       if (codepoint != 0xFFFF)
317         codepoint ++;
318 
319       *test = 0;
320       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
321       if(U_FAILURE(status)){
322           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
323               myErrorName(status));
324             ucol_close(th_th);
325           return;
326       }
327 
328       backAndForth(iter);
329       ucol_closeElements(iter);
330 
331       iter=ucol_openElements(th_th, source, -1, &status);
332       if(U_FAILURE(status)){
333           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
334               myErrorName(status));
335             ucol_close(th_th);
336           return;
337       }
338 
339       backAndForth(iter);
340       ucol_closeElements(iter);
341     }
342 
343     ucol_close(th_th);
344 }
345 
346 /**
347 * Test the incremental normalization
348 */
TestNormalization()349 static void TestNormalization()
350 {
351           UErrorCode          status = U_ZERO_ERROR;
352     const char               *str    =
353                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
354           UCollator          *coll;
355           UChar               rule[50];
356           int                 rulelen = u_unescape(str, rule, 50);
357           int                 count = 0;
358     const char                *testdata[] =
359                         {"\\u1ED9", "o\\u0323\\u0302",
360                         "\\u0300\\u0315", "\\u0315\\u0300",
361                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
362                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
363                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
364                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
365                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
366     int32_t   srclen;
367     UChar source[10];
368     UCollationElements *iter;
369 
370     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
371     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
372     if (U_FAILURE(status)){
373         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
374               myErrorName(status));
375         return;
376     }
377 
378     srclen = u_unescape(testdata[0], source, 10);
379     iter = ucol_openElements(coll, source, srclen, &status);
380     backAndForth(iter);
381     ucol_closeElements(iter);
382 
383     srclen = u_unescape(testdata[1], source, 10);
384     iter = ucol_openElements(coll, source, srclen, &status);
385     backAndForth(iter);
386     ucol_closeElements(iter);
387 
388     while (count < 12) {
389         srclen = u_unescape(testdata[count], source, 10);
390         iter = ucol_openElements(coll, source, srclen, &status);
391 
392         if (U_FAILURE(status)){
393             log_err("ERROR: in creation of collator element iterator\n %s\n",
394                   myErrorName(status));
395             return;
396         }
397         backAndForth(iter);
398         ucol_closeElements(iter);
399 
400         iter = ucol_openElements(coll, source, -1, &status);
401 
402         if (U_FAILURE(status)){
403             log_err("ERROR: in creation of collator element iterator\n %s\n",
404                   myErrorName(status));
405             return;
406         }
407         backAndForth(iter);
408         ucol_closeElements(iter);
409         count ++;
410     }
411     ucol_close(coll);
412 }
413 
414 /**
415  * Test for CollationElementIterator.previous()
416  *
417  * @bug 4108758 - Make sure it works with contracting characters
418  *
419  */
TestPrevious()420 static void TestPrevious()
421 {
422     UCollator *coll=NULL;
423     UChar rule[50];
424     UChar *source;
425     UCollator *c1, *c2, *c3;
426     UCollationElements *iter;
427     UErrorCode status = U_ZERO_ERROR;
428     UChar test1[50];
429     UChar test2[50];
430 
431     u_uastrcpy(test1, "What subset of all possible test cases?");
432     u_uastrcpy(test2, "has the highest probability of detecting");
433     coll = ucol_open("en_US", &status);
434 
435     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
436     log_verbose("English locale testing back and forth\n");
437     if(U_FAILURE(status)){
438         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
439             myErrorName(status));
440         ucol_close(coll);
441         return;
442     }
443     /* A basic test to see if it's working at all */
444     backAndForth(iter);
445     ucol_closeElements(iter);
446     ucol_close(coll);
447 
448     /* Test with a contracting character sequence */
449     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
450     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
451 
452     log_verbose("Contraction rule testing back and forth with no normalization\n");
453 
454     if (c1 == NULL || U_FAILURE(status))
455     {
456         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
457             myErrorName(status));
458         return;
459     }
460     source=(UChar*)malloc(sizeof(UChar) * 20);
461     u_uastrcpy(source, "abchdcba");
462     iter=ucol_openElements(c1, source, u_strlen(source), &status);
463     if(U_FAILURE(status)){
464         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
465             myErrorName(status));
466         return;
467     }
468     backAndForth(iter);
469     ucol_closeElements(iter);
470     ucol_close(c1);
471 
472     /* Test with an expanding character sequence */
473     u_uastrcpy(rule, "&a < b < c/abd < d");
474     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
475     log_verbose("Expansion rule testing back and forth with no normalization\n");
476     if (c2 == NULL || U_FAILURE(status))
477     {
478         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
479             myErrorName(status));
480         return;
481     }
482     u_uastrcpy(source, "abcd");
483     iter=ucol_openElements(c2, source, u_strlen(source), &status);
484     if(U_FAILURE(status)){
485         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
486             myErrorName(status));
487         return;
488     }
489     backAndForth(iter);
490     ucol_closeElements(iter);
491     ucol_close(c2);
492     /* Now try both */
493     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
494     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
495     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
496 
497     if (c3 == NULL || U_FAILURE(status))
498     {
499         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
500             myErrorName(status));
501         return;
502     }
503     u_uastrcpy(source, "abcdbchdc");
504     iter=ucol_openElements(c3, source, u_strlen(source), &status);
505     if(U_FAILURE(status)){
506         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
507             myErrorName(status));
508         return;
509     }
510     backAndForth(iter);
511     ucol_closeElements(iter);
512     ucol_close(c3);
513     source[0] = 0x0e41;
514     source[1] = 0x0e02;
515     source[2] = 0x0e41;
516     source[3] = 0x0e02;
517     source[4] = 0x0e27;
518     source[5] = 0x61;
519     source[6] = 0x62;
520     source[7] = 0x63;
521     source[8] = 0;
522 
523     coll = ucol_open("th_TH", &status);
524     log_verbose("Thai locale testing back and forth with normalization\n");
525     iter=ucol_openElements(coll, source, u_strlen(source), &status);
526     if(U_FAILURE(status)){
527         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
528             myErrorName(status));
529         return;
530     }
531     backAndForth(iter);
532     ucol_closeElements(iter);
533     ucol_close(coll);
534 
535     /* prev test */
536     source[0] = 0x0061;
537     source[1] = 0x30CF;
538     source[2] = 0x3099;
539     source[3] = 0x30FC;
540     source[4] = 0;
541 
542     coll = ucol_open("ja_JP", &status);
543     log_verbose("Japanese locale testing back and forth with normalization\n");
544     iter=ucol_openElements(coll, source, u_strlen(source), &status);
545     if(U_FAILURE(status)){
546         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
547             myErrorName(status));
548         return;
549     }
550     backAndForth(iter);
551     ucol_closeElements(iter);
552     ucol_close(coll);
553 
554     free(source);
555 }
556 
557 /**
558  * Test for getOffset() and setOffset()
559  */
TestOffset()560 static void TestOffset()
561 {
562     UErrorCode status= U_ZERO_ERROR;
563     UCollator *en_us=NULL;
564     UCollationElements *iter, *pristine;
565     int32_t offset;
566     OrderAndOffset *orders;
567     int32_t orderLength=0;
568     int     count = 0;
569     UChar test1[50];
570     UChar test2[50];
571 
572     u_uastrcpy(test1, "What subset of all possible test cases?");
573     u_uastrcpy(test2, "has the highest probability of detecting");
574     en_us = ucol_open("en_US", &status);
575     log_verbose("Testing getOffset and setOffset for collations\n");
576     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
577     if(U_FAILURE(status)){
578         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
579             myErrorName(status));
580         ucol_close(en_us);
581         return;
582     }
583 
584     /* testing boundaries */
585     ucol_setOffset(iter, 0, &status);
586     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
587         log_err("Error: After setting offset to 0, we should be at the end "
588                 "of the backwards iteration");
589     }
590     ucol_setOffset(iter, u_strlen(test1), &status);
591     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
592         log_err("Error: After setting offset to end of the string, we should "
593                 "be at the end of the backwards iteration");
594     }
595 
596     /* Run all the way through the iterator, then get the offset */
597 
598     orders = getOrders(iter, &orderLength);
599 
600     offset = ucol_getOffset(iter);
601 
602     if (offset != u_strlen(test1))
603     {
604         log_err("offset at end != length %d vs %d\n", offset,
605             u_strlen(test1) );
606     }
607 
608     /* Now set the offset back to the beginning and see if it works */
609     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
610     if(U_FAILURE(status)){
611         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
612             myErrorName(status));
613     ucol_close(en_us);
614         return;
615     }
616     status = U_ZERO_ERROR;
617 
618     ucol_setOffset(iter, 0, &status);
619     if (U_FAILURE(status))
620     {
621         log_err("setOffset failed. %s\n",    myErrorName(status));
622     }
623     else
624     {
625         assertEqual(iter, pristine);
626     }
627 
628     ucol_closeElements(pristine);
629     ucol_closeElements(iter);
630     free(orders);
631 
632     /* testing offsets in normalization buffer */
633     test1[0] = 0x61;
634     test1[1] = 0x300;
635     test1[2] = 0x316;
636     test1[3] = 0x62;
637     test1[4] = 0;
638     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
639     iter = ucol_openElements(en_us, test1, 4, &status);
640     if(U_FAILURE(status)){
641         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
642             myErrorName(status));
643         ucol_close(en_us);
644         return;
645     }
646 
647     count = 0;
648     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
649         U_SUCCESS(status)) {
650         switch (count) {
651         case 0:
652             if (ucol_getOffset(iter) != 1) {
653                 log_err("ERROR: Offset of iteration should be 1\n");
654             }
655             break;
656         case 3:
657             if (ucol_getOffset(iter) != 4) {
658                 log_err("ERROR: Offset of iteration should be 4\n");
659             }
660             break;
661         default:
662             if (ucol_getOffset(iter) != 3) {
663                 log_err("ERROR: Offset of iteration should be 3\n");
664             }
665         }
666         count ++;
667     }
668 
669     ucol_reset(iter);
670     count = 0;
671     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
672         U_SUCCESS(status)) {
673         switch (count) {
674         case 0:
675         case 1:
676             if (ucol_getOffset(iter) != 3) {
677                 log_err("ERROR: Offset of iteration should be 3\n");
678             }
679             break;
680         case 2:
681             if (ucol_getOffset(iter) != 1) {
682                 log_err("ERROR: Offset of iteration should be 1\n");
683             }
684             break;
685         default:
686             if (ucol_getOffset(iter) != 0) {
687                 log_err("ERROR: Offset of iteration should be 0\n");
688             }
689         }
690         count ++;
691     }
692 
693     if(U_FAILURE(status)){
694         log_err("ERROR: in iterating collation elements %s\n",
695             myErrorName(status));
696     }
697 
698     ucol_closeElements(iter);
699     ucol_close(en_us);
700 }
701 
702 /**
703  * Test for setText()
704  */
TestSetText()705 static void TestSetText()
706 {
707     int32_t c,i;
708     UErrorCode status = U_ZERO_ERROR;
709     UCollator *en_us=NULL;
710     UCollationElements *iter1, *iter2;
711     UChar test1[50];
712     UChar test2[50];
713 
714     u_uastrcpy(test1, "What subset of all possible test cases?");
715     u_uastrcpy(test2, "has the highest probability of detecting");
716     en_us = ucol_open("en_US", &status);
717     log_verbose("testing setText for Collation elements\n");
718     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
719     if(U_FAILURE(status)){
720         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
721             myErrorName(status));
722     ucol_close(en_us);
723         return;
724     }
725     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
726     if(U_FAILURE(status)){
727         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
728             myErrorName(status));
729     ucol_close(en_us);
730         return;
731     }
732 
733     /* Run through the second iterator just to exercise it */
734     c = ucol_next(iter2, &status);
735     i = 0;
736 
737     while ( ++i < 10 && (c != UCOL_NULLORDER))
738     {
739         if (U_FAILURE(status))
740         {
741             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
742             ucol_closeElements(iter2);
743             ucol_closeElements(iter1);
744     ucol_close(en_us);
745             return;
746         }
747 
748         c = ucol_next(iter2, &status);
749     }
750 
751     /* Now set it to point to the same string as the first iterator */
752     ucol_setText(iter2, test1, u_strlen(test1), &status);
753     if (U_FAILURE(status))
754     {
755         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
756     }
757     else
758     {
759         assertEqual(iter1, iter2);
760     }
761 
762     /* Now set it to point to a null string with fake length*/
763     ucol_setText(iter2, NULL, 2, &status);
764     if (status != U_ILLEGAL_ARGUMENT_ERROR)
765     {
766         log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
767                 myErrorName(status));
768     }
769 
770     ucol_closeElements(iter2);
771     ucol_closeElements(iter1);
772     ucol_close(en_us);
773 }
774 
775 /** @bug 4108762
776  * Test for getMaxExpansion()
777  */
TestMaxExpansion()778 static void TestMaxExpansion()
779 {
780     UErrorCode          status = U_ZERO_ERROR;
781     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
782     UChar               ch     = 0;
783     UChar32             unassigned = 0xEFFFD;
784     UChar               supplementary[2];
785     uint32_t            stringOffset = 0;
786     UBool               isError = FALSE;
787     uint32_t            sorder = 0;
788     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
789     uint32_t            temporder = 0;
790 
791     UChar rule[256];
792     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
793     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
794         UCOL_DEFAULT_STRENGTH,NULL, &status);
795     if(U_SUCCESS(status) && coll) {
796       iter = ucol_openElements(coll, &ch, 1, &status);
797 
798       while (ch < 0xFFFF && U_SUCCESS(status)) {
799           int      count = 1;
800           uint32_t order;
801           int32_t  size = 0;
802 
803           ch ++;
804 
805           ucol_setText(iter, &ch, 1, &status);
806           order = ucol_previous(iter, &status);
807 
808           /* thai management */
809           if (order == 0)
810               order = ucol_previous(iter, &status);
811 
812           while (U_SUCCESS(status) &&
813               ucol_previous(iter, &status) != UCOL_NULLORDER) {
814               count ++;
815           }
816 
817           size = ucol_getMaxExpansion(iter, order);
818           if (U_FAILURE(status) || size < count) {
819               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
820                   ch, count);
821           }
822       }
823 
824       /* testing for exact max expansion */
825       ch = 0;
826       while (ch < 0x61) {
827           uint32_t order;
828           int32_t  size;
829           ucol_setText(iter, &ch, 1, &status);
830           order = ucol_previous(iter, &status);
831           size  = ucol_getMaxExpansion(iter, order);
832           if (U_FAILURE(status) || size != 1) {
833               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
834                   ch, 1);
835           }
836           ch ++;
837       }
838 
839       ch = 0x63;
840       ucol_setText(iter, &ch, 1, &status);
841       temporder = ucol_previous(iter, &status);
842 
843       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
844           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
845                   ch, 3);
846       }
847 
848       ch = 0x64;
849       ucol_setText(iter, &ch, 1, &status);
850       temporder = ucol_previous(iter, &status);
851 
852       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
853           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
854                   ch, 3);
855       }
856 
857       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
858       (void)isError;    /* Suppress set but not used warning. */
859       ucol_setText(iter, supplementary, 2, &status);
860       sorder = ucol_previous(iter, &status);
861 
862       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
863           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
864                   ch, 2);
865       }
866 
867       /* testing jamo */
868       ch = 0x1165;
869 
870       ucol_setText(iter, &ch, 1, &status);
871       temporder = ucol_previous(iter, &status);
872       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
873           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
874                   ch, 3);
875       }
876 
877       ucol_closeElements(iter);
878       ucol_close(coll);
879 
880       /* testing special jamo &a<\u1160 */
881       rule[0] = 0x26;
882       rule[1] = 0x71;
883       rule[2] = 0x3c;
884       rule[3] = 0x1165;
885       rule[4] = 0x2f;
886       rule[5] = 0x71;
887       rule[6] = 0x71;
888       rule[7] = 0x71;
889       rule[8] = 0x71;
890       rule[9] = 0;
891 
892       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
893           UCOL_DEFAULT_STRENGTH,NULL, &status);
894       iter = ucol_openElements(coll, &ch, 1, &status);
895 
896       temporder = ucol_previous(iter, &status);
897       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
898           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
899                   ch, 5);
900       }
901 
902       ucol_closeElements(iter);
903       ucol_close(coll);
904     } else {
905       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
906     }
907 
908 }
909 
910 
assertEqual(UCollationElements * i1,UCollationElements * i2)911 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
912 {
913     int32_t c1, c2;
914     int32_t count = 0;
915     UErrorCode status = U_ZERO_ERROR;
916 
917     do
918     {
919         c1 = ucol_next(i1, &status);
920         c2 = ucol_next(i2, &status);
921 
922         if (c1 != c2)
923         {
924             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
925             break;
926         }
927 
928         count += 1;
929     }
930     while (c1 != UCOL_NULLORDER);
931 }
932 
933 /**
934  * Testing iterators with extremely small buffers
935  */
TestSmallBuffer()936 static void TestSmallBuffer()
937 {
938     UErrorCode          status = U_ZERO_ERROR;
939     UCollator          *coll;
940     UCollationElements *testiter,
941                        *iter;
942     int32_t             count = 0;
943     OrderAndOffset     *testorders,
944                        *orders;
945 
946     UChar teststr[500];
947     UChar str[] = {0x300, 0x31A, 0};
948     /*
949     creating a long string of decomposable characters,
950     since by default the writable buffer is of size 256
951     */
952     while (count < 500) {
953         if ((count & 1) == 0) {
954             teststr[count ++] = 0x300;
955         }
956         else {
957             teststr[count ++] = 0x31A;
958         }
959     }
960 
961     coll = ucol_open("th_TH", &status);
962     if(U_SUCCESS(status) && coll) {
963       testiter = ucol_openElements(coll, teststr, 500, &status);
964       iter = ucol_openElements(coll, str, 2, &status);
965 
966       orders     = getOrders(iter, &count);
967       if (count != 2) {
968           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
969       }
970 
971       /*
972       this will rearrange the string data to 250 characters of 0x300 first then
973       250 characters of 0x031A
974       */
975       testorders = getOrders(testiter, &count);
976 
977       if (count != 500) {
978           log_err("Error decomposition does not give the right sized collation elements\n");
979       }
980 
981       while (count != 0) {
982           /* UCA collation element for 0x0F76 */
983           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
984               (count <= 250 && testorders[-- count].order != orders[0].order)) {
985               log_err("Error decomposition does not give the right collation element at %d count\n", count);
986               break;
987           }
988       }
989 
990       free(testorders);
991       free(orders);
992 
993       ucol_reset(testiter);
994 
995       /* ensures closing of elements done properly to clear writable buffer */
996       ucol_next(testiter, &status);
997       ucol_next(testiter, &status);
998       ucol_closeElements(testiter);
999       ucol_closeElements(iter);
1000       ucol_close(coll);
1001     } else {
1002       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1003     }
1004 }
1005 
1006 /**
1007 * Testing the discontigous contractions
1008 */
TestDiscontiguos()1009 static void TestDiscontiguos() {
1010     const char               *rulestr    =
1011                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1012           UChar               rule[50];
1013           int                 rulelen = u_unescape(rulestr, rule, 50);
1014     const char               *src[] = {
1015      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1016     /* base character blocked */
1017      "XD\\u0300", "XD\\u0300\\u0315",
1018     /* non blocking combining character */
1019      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1020      /* blocking combining character */
1021      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1022      /* contraction prefix */
1023      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1024      "X\\u0300\\u031A\\u0315",
1025      /* ends not with a contraction character */
1026      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1027      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1028     };
1029     const char               *tgt[] = {
1030      /* non blocking combining character */
1031      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1032     /* base character blocked */
1033      "X D \\u0300", "X D \\u0300\\u0315",
1034     /* non blocking combining character */
1035      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1036      /* blocking combining character */
1037      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1038      /* contraction prefix */
1039      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1040      "X\\u0300 \\u031A \\u0315",
1041      /* ends not with a contraction character */
1042      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1043      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1044     };
1045           int                 size   = 20;
1046           UCollator          *coll;
1047           UErrorCode          status    = U_ZERO_ERROR;
1048           int                 count     = 0;
1049           UCollationElements *iter;
1050           UCollationElements *resultiter;
1051 
1052     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1053     iter       = ucol_openElements(coll, rule, 1, &status);
1054     resultiter = ucol_openElements(coll, rule, 1, &status);
1055 
1056     if (U_FAILURE(status)) {
1057         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1058         return;
1059     }
1060 
1061     while (count < size) {
1062         UChar  str[20];
1063         UChar  tstr[20];
1064         int    strLen = u_unescape(src[count], str, 20);
1065         UChar *s;
1066 
1067         ucol_setText(iter, str, strLen, &status);
1068         if (U_FAILURE(status)) {
1069             log_err("Error opening collation iterator\n");
1070             return;
1071         }
1072 
1073         u_unescape(tgt[count], tstr, 20);
1074         s = tstr;
1075 
1076         log_verbose("count %d\n", count);
1077 
1078         for (;;) {
1079             uint32_t  ce;
1080             UChar    *e = u_strchr(s, 0x20);
1081             if (e == 0) {
1082                 e = u_strchr(s, 0);
1083             }
1084             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1085             ce = ucol_next(resultiter, &status);
1086             if (U_FAILURE(status)) {
1087                 log_err("Error manipulating collation iterator\n");
1088                 return;
1089             }
1090             while (ce != UCOL_NULLORDER) {
1091                 if (ce != (uint32_t)ucol_next(iter, &status) ||
1092                     U_FAILURE(status)) {
1093                     log_err("Discontiguos contraction test mismatch\n");
1094                     return;
1095                 }
1096                 ce = ucol_next(resultiter, &status);
1097                 if (U_FAILURE(status)) {
1098                     log_err("Error getting next collation element\n");
1099                     return;
1100                 }
1101             }
1102             s = e + 1;
1103             if (*e == 0) {
1104                 break;
1105             }
1106         }
1107         ucol_reset(iter);
1108         backAndForth(iter);
1109         count ++;
1110     }
1111     ucol_closeElements(resultiter);
1112     ucol_closeElements(iter);
1113     ucol_close(coll);
1114 }
1115 
1116 /**
1117 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1118 * normalization on AND jamo tailoring, among other things.
1119 *
1120 * Note: This test is sensitive to changes of the root collator,
1121 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1122 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1123 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1124 * For example, the DUCET's artificial secondary CE in the ae-ligature
1125 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1126 */
1127 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
1128     0x0020, 0xAC00,                 /* simple LV Hangul */
1129     0x0020, 0xAC01,                 /* simple LVT Hangul */
1130     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
1131     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
1132     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1133     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1134     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1135     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1136     0x0020, 0x00E6,                 /* small letter ae, expands */
1137     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
1138     0x0020
1139 };
1140 enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
1141 
1142 static const int32_t rootStandardOffsets[] = {
1143     0,  1,2,
1144     2,  3,4,4,
1145     4,  5,6,6,
1146     6,  7,8,8,
1147     8,  9,10,11,
1148     12, 13,14,15,
1149     16, 17,18,19,
1150     20, 21,22,23,
1151     24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1152     26, 27,28,28,
1153     28,
1154     29
1155 };
1156 enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
1157 
1158 static const int32_t rootSearchOffsets[] = {
1159     0,  1,2,
1160     2,  3,4,4,
1161     4,  5,6,6,6,
1162     6,  7,8,8,8,8,8,8,
1163     8,  9,10,11,
1164     12, 13,14,15,
1165     16, 17,18,19,20,
1166     20, 21,22,22,23,23,23,24,
1167     24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1168     26, 27,28,28,
1169     28,
1170     29
1171 };
1172 enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
1173 
1174 typedef struct {
1175     const char *    locale;
1176     const int32_t * offsets;
1177     int32_t         offsetsLen;
1178 } TSCEItem;
1179 
1180 static const TSCEItem tsceItems[] = {
1181     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
1182     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
1183     { NULL,                    NULL,                0                        }
1184 };
1185 
TestSearchCollatorElements(void)1186 static void TestSearchCollatorElements(void)
1187 {
1188     const TSCEItem * tsceItemPtr;
1189     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1190         UErrorCode status = U_ZERO_ERROR;
1191         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1192         if ( U_SUCCESS(status) ) {
1193             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1194             if ( U_SUCCESS(status) ) {
1195                 int32_t offset, element;
1196                 const int32_t * nextOffsetPtr;
1197                 const int32_t * limitOffsetPtr;
1198 
1199                 nextOffsetPtr = tsceItemPtr->offsets;
1200                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1201                 do {
1202                     offset = ucol_getOffset(uce);
1203                     element = ucol_next(uce, &status);
1204                     log_verbose("(%s) offset=%2d  ce=%08x\n", tsceItemPtr->locale, offset, element);
1205                     if ( element == 0 ) {
1206                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1207                     }
1208                     if ( nextOffsetPtr < limitOffsetPtr ) {
1209                         if (offset != *nextOffsetPtr) {
1210                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1211                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
1212                             nextOffsetPtr = limitOffsetPtr;
1213                             break;
1214                         }
1215                         nextOffsetPtr++;
1216                     } else {
1217                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1218                     }
1219                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1220                 if ( nextOffsetPtr < limitOffsetPtr ) {
1221                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1222                 }
1223 
1224                 ucol_setOffset(uce, kLen_tsceText, &status);
1225                 status = U_ZERO_ERROR;
1226                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1227                 limitOffsetPtr = tsceItemPtr->offsets;
1228                 do {
1229                     offset = ucol_getOffset(uce);
1230                     element = ucol_previous(uce, &status);
1231                     if ( element == 0 ) {
1232                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1233                     }
1234                     if ( nextOffsetPtr > limitOffsetPtr ) {
1235                         nextOffsetPtr--;
1236                         if (offset != *nextOffsetPtr) {
1237                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1238                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
1239                             nextOffsetPtr = limitOffsetPtr;
1240                             break;
1241                         }
1242                    } else {
1243                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1244                     }
1245                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1246                 if ( nextOffsetPtr > limitOffsetPtr ) {
1247                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1248                 }
1249 
1250                 ucol_closeElements(uce);
1251             } else {
1252                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1253             }
1254             ucol_close(ucol);
1255         } else {
1256             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1257         }
1258     }
1259 }
1260 
1261 #endif /* #if !UCONFIG_NO_COLLATION */
1262