1 /*
2 **********************************************************************
3 * Copyright (c) 2013-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 
8 #include <string.h>
9 #include "unicode/localpointer.h"
10 #include "unicode/uperf.h"
11 #include "unicode/ucol.h"
12 #include "unicode/coll.h"
13 #include "unicode/uiter.h"
14 #include "unicode/ustring.h"
15 #include "unicode/sortkey.h"
16 #include "uarrsort.h"
17 #include "uoptions.h"
18 #include "ustr_imp.h"
19 
20 #define COMPACT_ARRAY(CompactArrays, UNIT) \
21 struct CompactArrays{\
22     CompactArrays(const CompactArrays & );\
23     CompactArrays & operator=(const CompactArrays & );\
24     int32_t   count;/*total number of the strings*/ \
25     int32_t * index;/*relative offset in data*/ \
26     UNIT    * data; /*the real space to hold strings*/ \
27     \
28     ~CompactArrays(){free(index);free(data);} \
29     CompactArrays() : count(0), index(NULL), data(NULL) { \
30         index = (int32_t *) realloc(index, sizeof(int32_t)); \
31         index[0] = 0; \
32     } \
33     void append_one(int32_t theLen){ /*include terminal NULL*/ \
34         count++; \
35         index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
36         index[count] = index[count - 1] + theLen; \
37         data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
38     } \
39     UNIT * last(){return data + index[count - 1];} \
40     const UNIT * dataOf(int32_t i) const {return data + index[i];} \
41     int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/  \
42 };
43 
44 COMPACT_ARRAY(CA_uchar, UChar)
45 COMPACT_ARRAY(CA_char, char)
46 
47 #define MAX_TEST_STRINGS_FOR_PERMUTING 1000
48 
49 // C API test cases
50 
51 //
52 // Test case taking a single test data array, calling ucol_strcoll by permuting the test data
53 //
54 class Strcoll : public UPerfFunction
55 {
56 public:
57     Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen);
58     ~Strcoll();
59     virtual void call(UErrorCode* status);
60     virtual long getOperationsPerIteration();
61 
62 private:
63     const UCollator *coll;
64     const CA_uchar *source;
65     UBool useLen;
66     int32_t maxTestStrings;
67 };
68 
Strcoll(const UCollator * coll,const CA_uchar * source,UBool useLen)69 Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen)
70     :   coll(coll),
71         source(source),
72         useLen(useLen)
73 {
74     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
75 }
76 
~Strcoll()77 Strcoll::~Strcoll()
78 {
79 }
80 
call(UErrorCode * status)81 void Strcoll::call(UErrorCode* status)
82 {
83     if (U_FAILURE(*status)) return;
84 
85     // call strcoll for permutation
86     int32_t divisor = source->count / maxTestStrings;
87     int32_t srcLen, tgtLen;
88     int32_t cmp = 0;
89     for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
90         if (i % divisor) continue;
91         numTestStringsI++;
92         srcLen = useLen ? source->lengthOf(i) : -1;
93         for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
94             if (j % divisor) continue;
95             numTestStringsJ++;
96             tgtLen = useLen ? source->lengthOf(j) : -1;
97             cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
98         }
99     }
100     // At the end, cmp must be 0
101     if (cmp != 0) {
102         *status = U_INTERNAL_PROGRAM_ERROR;
103     }
104 }
105 
getOperationsPerIteration()106 long Strcoll::getOperationsPerIteration()
107 {
108     return maxTestStrings * maxTestStrings;
109 }
110 
111 //
112 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
113 //
114 class Strcoll_2 : public UPerfFunction
115 {
116 public:
117     Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
118     ~Strcoll_2();
119     virtual void call(UErrorCode* status);
120     virtual long getOperationsPerIteration();
121 
122 private:
123     const UCollator *coll;
124     const CA_uchar *source;
125     const CA_uchar *target;
126     UBool useLen;
127 };
128 
Strcoll_2(const UCollator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)129 Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
130     :   coll(coll),
131         source(source),
132         target(target),
133         useLen(useLen)
134 {
135 }
136 
~Strcoll_2()137 Strcoll_2::~Strcoll_2()
138 {
139 }
140 
call(UErrorCode * status)141 void Strcoll_2::call(UErrorCode* status)
142 {
143     if (U_FAILURE(*status)) return;
144 
145     // call strcoll for two strings at the same index
146     if (source->count < target->count) {
147         *status = U_ILLEGAL_ARGUMENT_ERROR;
148     } else {
149         for (int32_t i = 0; i < source->count; i++) {
150             int32_t srcLen = useLen ? source->lengthOf(i) : -1;
151             int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
152             ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
153         }
154     }
155 }
156 
getOperationsPerIteration()157 long Strcoll_2::getOperationsPerIteration()
158 {
159     return source->count;
160 }
161 
162 
163 //
164 // Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data
165 //
166 class StrcollUTF8 : public UPerfFunction
167 {
168 public:
169     StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen);
170     ~StrcollUTF8();
171     virtual void call(UErrorCode* status);
172     virtual long getOperationsPerIteration();
173 
174 private:
175     const UCollator *coll;
176     const CA_char *source;
177     UBool useLen;
178     int32_t maxTestStrings;
179 };
180 
StrcollUTF8(const UCollator * coll,const CA_char * source,UBool useLen)181 StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen)
182     :   coll(coll),
183         source(source),
184         useLen(useLen)
185 {
186     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
187 }
188 
~StrcollUTF8()189 StrcollUTF8::~StrcollUTF8()
190 {
191 }
192 
call(UErrorCode * status)193 void StrcollUTF8::call(UErrorCode* status)
194 {
195     if (U_FAILURE(*status)) return;
196 
197     // call strcollUTF8 for permutation
198     int32_t divisor = source->count / maxTestStrings;
199     int32_t srcLen, tgtLen;
200     int32_t cmp = 0;
201     for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
202         if (i % divisor) continue;
203         numTestStringsI++;
204         srcLen = useLen ? source->lengthOf(i) : -1;
205         for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
206             if (j % divisor) continue;
207             numTestStringsJ++;
208             tgtLen = useLen ? source->lengthOf(j) : -1;
209             cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status);
210         }
211     }
212     // At the end, cmp must be 0
213     if (cmp != 0) {
214         *status = U_INTERNAL_PROGRAM_ERROR;
215     }
216 }
217 
getOperationsPerIteration()218 long StrcollUTF8::getOperationsPerIteration()
219 {
220     return maxTestStrings * maxTestStrings;
221 }
222 
223 //
224 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
225 //
226 class StrcollUTF8_2 : public UPerfFunction
227 {
228 public:
229     StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen);
230     ~StrcollUTF8_2();
231     virtual void call(UErrorCode* status);
232     virtual long getOperationsPerIteration();
233 
234 private:
235     const UCollator *coll;
236     const CA_char *source;
237     const CA_char *target;
238     UBool useLen;
239 };
240 
StrcollUTF8_2(const UCollator * coll,const CA_char * source,const CA_char * target,UBool useLen)241 StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen)
242     :   coll(coll),
243         source(source),
244         target(target),
245         useLen(useLen)
246 {
247 }
248 
~StrcollUTF8_2()249 StrcollUTF8_2::~StrcollUTF8_2()
250 {
251 }
252 
call(UErrorCode * status)253 void StrcollUTF8_2::call(UErrorCode* status)
254 {
255     if (U_FAILURE(*status)) return;
256 
257     // call strcoll for two strings at the same index
258     if (source->count < target->count) {
259         *status = U_ILLEGAL_ARGUMENT_ERROR;
260     } else {
261         for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
262             int32_t srcLen = useLen ? source->lengthOf(i) : -1;
263             int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
264             ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status);
265         }
266     }
267 }
268 
getOperationsPerIteration()269 long StrcollUTF8_2::getOperationsPerIteration()
270 {
271     return source->count;
272 }
273 
274 //
275 // Test case taking a single test data array, calling ucol_getSortKey for each
276 //
277 class GetSortKey : public UPerfFunction
278 {
279 public:
280     GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen);
281     ~GetSortKey();
282     virtual void call(UErrorCode* status);
283     virtual long getOperationsPerIteration();
284 
285 private:
286     const UCollator *coll;
287     const CA_uchar *source;
288     UBool useLen;
289 };
290 
GetSortKey(const UCollator * coll,const CA_uchar * source,UBool useLen)291 GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen)
292     :   coll(coll),
293         source(source),
294         useLen(useLen)
295 {
296 }
297 
~GetSortKey()298 GetSortKey::~GetSortKey()
299 {
300 }
301 
302 #define KEY_BUF_SIZE 512
303 
call(UErrorCode * status)304 void GetSortKey::call(UErrorCode* status)
305 {
306     if (U_FAILURE(*status)) return;
307 
308     uint8_t key[KEY_BUF_SIZE];
309     int32_t len;
310 
311     if (useLen) {
312         for (int32_t i = 0; i < source->count; i++) {
313             len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE);
314         }
315     } else {
316         for (int32_t i = 0; i < source->count; i++) {
317             len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE);
318         }
319     }
320 }
321 
getOperationsPerIteration()322 long GetSortKey::getOperationsPerIteration()
323 {
324     return source->count;
325 }
326 
327 //
328 // Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the
329 // given buffer size
330 //
331 class NextSortKeyPart : public UPerfFunction
332 {
333 public:
334     NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1);
335     ~NextSortKeyPart();
336     virtual void call(UErrorCode* status);
337     virtual long getOperationsPerIteration();
338     virtual long getEventsPerIteration();
339 
340 private:
341     const UCollator *coll;
342     const CA_uchar *source;
343     int32_t bufSize;
344     int32_t maxIteration;
345     long events;
346 };
347 
348 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPart(const UCollator * coll,const CA_uchar * source,int32_t bufSize,int32_t maxIteration)349 NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
350     :   coll(coll),
351         source(source),
352         bufSize(bufSize),
353         maxIteration(maxIteration),
354         events(0)
355 {
356 }
357 
~NextSortKeyPart()358 NextSortKeyPart::~NextSortKeyPart()
359 {
360 }
361 
call(UErrorCode * status)362 void NextSortKeyPart::call(UErrorCode* status)
363 {
364     if (U_FAILURE(*status)) return;
365 
366     uint8_t *part = (uint8_t *)malloc(bufSize);
367     uint32_t state[2];
368     UCharIterator iter;
369 
370     events = 0;
371     for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
372         uiter_setString(&iter, source->dataOf(i), source->lengthOf(i));
373         state[0] = 0;
374         state[1] = 0;
375         int32_t partLen = bufSize;
376         for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
377             partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
378             events++;
379         }
380     }
381     free(part);
382 }
383 
getOperationsPerIteration()384 long NextSortKeyPart::getOperationsPerIteration()
385 {
386     return source->count;
387 }
388 
getEventsPerIteration()389 long NextSortKeyPart::getEventsPerIteration()
390 {
391     return events;
392 }
393 
394 //
395 // Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the
396 // given buffer size
397 //
398 class NextSortKeyPartUTF8 : public UPerfFunction
399 {
400 public:
401     NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1);
402     ~NextSortKeyPartUTF8();
403     virtual void call(UErrorCode* status);
404     virtual long getOperationsPerIteration();
405     virtual long getEventsPerIteration();
406 
407 private:
408     const UCollator *coll;
409     const CA_char *source;
410     int32_t bufSize;
411     int32_t maxIteration;
412     long events;
413 };
414 
415 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPartUTF8(const UCollator * coll,const CA_char * source,int32_t bufSize,int32_t maxIteration)416 NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
417     :   coll(coll),
418         source(source),
419         bufSize(bufSize),
420         maxIteration(maxIteration),
421         events(0)
422 {
423 }
424 
~NextSortKeyPartUTF8()425 NextSortKeyPartUTF8::~NextSortKeyPartUTF8()
426 {
427 }
428 
call(UErrorCode * status)429 void NextSortKeyPartUTF8::call(UErrorCode* status)
430 {
431     if (U_FAILURE(*status)) return;
432 
433     uint8_t *part = (uint8_t *)malloc(bufSize);
434     uint32_t state[2];
435     UCharIterator iter;
436 
437     events = 0;
438     for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
439         uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i));
440         state[0] = 0;
441         state[1] = 0;
442         int32_t partLen = bufSize;
443         for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
444             partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
445             events++;
446         }
447     }
448     free(part);
449 }
450 
getOperationsPerIteration()451 long NextSortKeyPartUTF8::getOperationsPerIteration()
452 {
453     return source->count;
454 }
455 
getEventsPerIteration()456 long NextSortKeyPartUTF8::getEventsPerIteration()
457 {
458     return events;
459 }
460 
461 // CPP API test cases
462 
463 //
464 // Test case taking a single test data array, calling Collator::compare by permuting the test data
465 //
466 class CppCompare : public UPerfFunction
467 {
468 public:
469     CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen);
470     ~CppCompare();
471     virtual void call(UErrorCode* status);
472     virtual long getOperationsPerIteration();
473 
474 private:
475     const Collator *coll;
476     const CA_uchar *source;
477     UBool useLen;
478     int32_t maxTestStrings;
479 };
480 
CppCompare(const Collator * coll,const CA_uchar * source,UBool useLen)481 CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen)
482     :   coll(coll),
483         source(source),
484         useLen(useLen)
485 {
486     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
487 }
488 
~CppCompare()489 CppCompare::~CppCompare()
490 {
491 }
492 
call(UErrorCode * status)493 void CppCompare::call(UErrorCode* status) {
494     if (U_FAILURE(*status)) return;
495 
496     // call compare for permutation of test data
497     int32_t divisor = source->count / maxTestStrings;
498     int32_t srcLen, tgtLen;
499     int32_t cmp = 0;
500     for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
501         if (i % divisor) continue;
502         numTestStringsI++;
503         srcLen = useLen ? source->lengthOf(i) : -1;
504         for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
505             if (j % divisor) continue;
506             numTestStringsJ++;
507             tgtLen = useLen ? source->lengthOf(j) : -1;
508             cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
509         }
510     }
511     // At the end, cmp must be 0
512     if (cmp != 0) {
513         *status = U_INTERNAL_PROGRAM_ERROR;
514     }
515 }
516 
getOperationsPerIteration()517 long CppCompare::getOperationsPerIteration()
518 {
519     return maxTestStrings * maxTestStrings;
520 }
521 
522 //
523 // Test case taking two test data arrays, calling Collator::compare for strings at a same index
524 //
525 class CppCompare_2 : public UPerfFunction
526 {
527 public:
528     CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
529     ~CppCompare_2();
530     virtual void call(UErrorCode* status);
531     virtual long getOperationsPerIteration();
532 
533 private:
534     const Collator *coll;
535     const CA_uchar *source;
536     const CA_uchar *target;
537     UBool useLen;
538 };
539 
CppCompare_2(const Collator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)540 CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
541     :   coll(coll),
542         source(source),
543         target(target),
544         useLen(useLen)
545 {
546 }
547 
~CppCompare_2()548 CppCompare_2::~CppCompare_2()
549 {
550 }
551 
call(UErrorCode * status)552 void CppCompare_2::call(UErrorCode* status) {
553     if (U_FAILURE(*status)) return;
554 
555     // call strcoll for two strings at the same index
556     if (source->count < target->count) {
557         *status = U_ILLEGAL_ARGUMENT_ERROR;
558     } else {
559         for (int32_t i = 0; i < source->count; i++) {
560             int32_t srcLen = useLen ? source->lengthOf(i) : -1;
561             int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
562             coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
563         }
564     }
565 }
566 
getOperationsPerIteration()567 long CppCompare_2::getOperationsPerIteration()
568 {
569     return source->count;
570 }
571 
572 
573 //
574 // Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data
575 //
576 class CppCompareUTF8 : public UPerfFunction
577 {
578 public:
579     CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen);
580     ~CppCompareUTF8();
581     virtual void call(UErrorCode* status);
582     virtual long getOperationsPerIteration();
583 
584 private:
585     const Collator *coll;
586     const CA_char *source;
587     UBool useLen;
588     int32_t maxTestStrings;
589 };
590 
CppCompareUTF8(const Collator * coll,const CA_char * source,UBool useLen)591 CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen)
592     :   coll(coll),
593         source(source),
594         useLen(useLen)
595 {
596     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
597 }
598 
~CppCompareUTF8()599 CppCompareUTF8::~CppCompareUTF8()
600 {
601 }
602 
call(UErrorCode * status)603 void CppCompareUTF8::call(UErrorCode* status) {
604     if (U_FAILURE(*status)) return;
605 
606     // call compareUTF8 for all permutations
607     int32_t divisor = source->count / maxTestStrings;
608     StringPiece src, tgt;
609     int32_t cmp = 0;
610     for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
611         if (i % divisor) continue;
612         numTestStringsI++;
613 
614         if (useLen) {
615             src.set(source->dataOf(i), source->lengthOf(i));
616         } else {
617             src.set(source->dataOf(i));
618         }
619         for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
620             if (j % divisor) continue;
621             numTestStringsJ++;
622 
623             if (useLen) {
624                 tgt.set(source->dataOf(i), source->lengthOf(i));
625             } else {
626                 tgt.set(source->dataOf(i));
627             }
628             cmp += coll->compareUTF8(src, tgt, *status);
629         }
630     }
631     // At the end, cmp must be 0
632     if (cmp != 0) {
633         *status = U_INTERNAL_PROGRAM_ERROR;
634     }
635 }
636 
getOperationsPerIteration()637 long CppCompareUTF8::getOperationsPerIteration()
638 {
639     return maxTestStrings * maxTestStrings;
640 }
641 
642 
643 //
644 // Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index
645 //
646 class CppCompareUTF8_2 : public UPerfFunction
647 {
648 public:
649     CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen);
650     ~CppCompareUTF8_2();
651     virtual void call(UErrorCode* status);
652     virtual long getOperationsPerIteration();
653 
654 private:
655     const Collator *coll;
656     const CA_char *source;
657     const CA_char *target;
658     UBool useLen;
659 };
660 
CppCompareUTF8_2(const Collator * coll,const CA_char * source,const CA_char * target,UBool useLen)661 CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen)
662     :   coll(coll),
663         source(source),
664         target(target),
665         useLen(useLen)
666 {
667 }
668 
~CppCompareUTF8_2()669 CppCompareUTF8_2::~CppCompareUTF8_2()
670 {
671 }
672 
call(UErrorCode * status)673 void CppCompareUTF8_2::call(UErrorCode* status) {
674     if (U_FAILURE(*status)) return;
675 
676     // call strcoll for two strings at the same index
677     StringPiece src, tgt;
678     if (source->count < target->count) {
679         *status = U_ILLEGAL_ARGUMENT_ERROR;
680     } else {
681         for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
682             if (useLen) {
683                 src.set(source->dataOf(i), source->lengthOf(i));
684                 tgt.set(target->dataOf(i), target->lengthOf(i));
685             } else {
686                 src.set(source->dataOf(i));
687                 tgt.set(target->dataOf(i));
688             }
689             coll->compareUTF8(src, tgt, *status);
690         }
691     }
692 }
693 
getOperationsPerIteration()694 long CppCompareUTF8_2::getOperationsPerIteration()
695 {
696     return source->count;
697 }
698 
699 
700 //
701 // Test case taking a single test data array, calling Collator::getCollationKey for each
702 //
703 class CppGetCollationKey : public UPerfFunction
704 {
705 public:
706     CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen);
707     ~CppGetCollationKey();
708     virtual void call(UErrorCode* status);
709     virtual long getOperationsPerIteration();
710 
711 private:
712     const Collator *coll;
713     const CA_uchar *source;
714     UBool useLen;
715 };
716 
CppGetCollationKey(const Collator * coll,const CA_uchar * source,UBool useLen)717 CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen)
718     :   coll(coll),
719         source(source),
720         useLen(useLen)
721 {
722 }
723 
~CppGetCollationKey()724 CppGetCollationKey::~CppGetCollationKey()
725 {
726 }
727 
call(UErrorCode * status)728 void CppGetCollationKey::call(UErrorCode* status)
729 {
730     if (U_FAILURE(*status)) return;
731 
732     CollationKey key;
733     for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
734         coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status);
735     }
736 }
737 
getOperationsPerIteration()738 long CppGetCollationKey::getOperationsPerIteration() {
739     return source->count;
740 }
741 
742 namespace {
743 
744 struct CollatorAndCounter {
CollatorAndCounter__anon918c61850111::CollatorAndCounter745     CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {}
CollatorAndCounter__anon918c61850111::CollatorAndCounter746     CollatorAndCounter(const Collator& coll, const UCollator *ucoll)
747             : coll(coll), ucoll(ucoll), counter(0) {}
748     const Collator& coll;
749     const UCollator *ucoll;
750     int32_t counter;
751 };
752 
753 int32_t U_CALLCONV
UniStrCollatorComparator(const void * context,const void * left,const void * right)754 UniStrCollatorComparator(const void* context, const void* left, const void* right) {
755     CollatorAndCounter& cc = *(CollatorAndCounter*)context;
756     const UnicodeString& leftString = **(const UnicodeString**)left;
757     const UnicodeString& rightString = **(const UnicodeString**)right;
758     UErrorCode errorCode = U_ZERO_ERROR;
759     ++cc.counter;
760     return cc.coll.compare(leftString, rightString, errorCode);
761 }
762 
763 }  // namespace
764 
765 class CollPerfFunction : public UPerfFunction {
766 public:
CollPerfFunction(const Collator & coll,const UCollator * ucoll)767     CollPerfFunction(const Collator& coll, const UCollator *ucoll)
768             : coll(coll), ucoll(ucoll), ops(0) {}
769     virtual ~CollPerfFunction();
770     /** Calls call() to set the ops field, and returns that. */
771     virtual long getOperationsPerIteration();
772 
773 protected:
774     const Collator& coll;
775     const UCollator *ucoll;
776     int32_t ops;
777 };
778 
~CollPerfFunction()779 CollPerfFunction::~CollPerfFunction() {}
780 
getOperationsPerIteration()781 long CollPerfFunction::getOperationsPerIteration() {
782     UErrorCode errorCode = U_ZERO_ERROR;
783     call(&errorCode);
784     return U_SUCCESS(errorCode) ? ops : 0;
785 }
786 
787 class UniStrCollPerfFunction : public CollPerfFunction {
788 public:
UniStrCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)789     UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
790             : CollPerfFunction(coll, ucoll), d16(data16),
791               source(new UnicodeString*[d16->count]) {
792         for (int32_t i = 0; i < d16->count; ++i) {
793             source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i));
794         }
795     }
796     virtual ~UniStrCollPerfFunction();
797 
798 protected:
799     const CA_uchar* d16;
800     UnicodeString** source;
801 };
802 
~UniStrCollPerfFunction()803 UniStrCollPerfFunction::~UniStrCollPerfFunction() {
804     for (int32_t i = 0; i < d16->count; ++i) {
805         delete source[i];
806     }
807     delete[] source;
808 }
809 
810 //
811 // Test case sorting an array of UnicodeString pointers.
812 //
813 class UniStrSort : public UniStrCollPerfFunction {
814 public:
UniStrSort(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)815     UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
816             : UniStrCollPerfFunction(coll, ucoll, data16),
817               dest(new UnicodeString*[d16->count]) {}
818     virtual ~UniStrSort();
819     virtual void call(UErrorCode* status);
820 
821 private:
822     UnicodeString** dest;  // aliases only
823 };
824 
~UniStrSort()825 UniStrSort::~UniStrSort() {
826     delete[] dest;
827 }
828 
call(UErrorCode * status)829 void UniStrSort::call(UErrorCode* status) {
830     if (U_FAILURE(*status)) return;
831 
832     CollatorAndCounter cc(coll);
833     int32_t count = d16->count;
834     memcpy(dest, source, count * sizeof(UnicodeString *));
835     uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *),
836                    UniStrCollatorComparator, &cc, TRUE, status);
837     ops = cc.counter;
838 }
839 
840 namespace {
841 
842 int32_t U_CALLCONV
StringPieceCollatorComparator(const void * context,const void * left,const void * right)843 StringPieceCollatorComparator(const void* context, const void* left, const void* right) {
844     CollatorAndCounter& cc = *(CollatorAndCounter*)context;
845     const StringPiece& leftString = *(const StringPiece*)left;
846     const StringPiece& rightString = *(const StringPiece*)right;
847     UErrorCode errorCode = U_ZERO_ERROR;
848     ++cc.counter;
849     return cc.coll.compareUTF8(leftString, rightString, errorCode);
850 }
851 
852 int32_t U_CALLCONV
StringPieceUCollatorComparator(const void * context,const void * left,const void * right)853 StringPieceUCollatorComparator(const void* context, const void* left, const void* right) {
854     CollatorAndCounter& cc = *(CollatorAndCounter*)context;
855     const StringPiece& leftString = *(const StringPiece*)left;
856     const StringPiece& rightString = *(const StringPiece*)right;
857     UErrorCode errorCode = U_ZERO_ERROR;
858     ++cc.counter;
859     return ucol_strcollUTF8(cc.ucoll,
860                             leftString.data(), leftString.length(),
861                             rightString.data(), rightString.length(), &errorCode);
862 }
863 
864 }  // namespace
865 
866 class StringPieceCollPerfFunction : public CollPerfFunction {
867 public:
StringPieceCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_char * data8)868     StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
869             : CollPerfFunction(coll, ucoll), d8(data8),
870               source(new StringPiece[d8->count]) {
871         for (int32_t i = 0; i < d8->count; ++i) {
872             source[i].set(d8->dataOf(i), d8->lengthOf(i));
873         }
874     }
875     virtual ~StringPieceCollPerfFunction();
876 
877 protected:
878     const CA_char* d8;
879     StringPiece* source;
880 };
881 
~StringPieceCollPerfFunction()882 StringPieceCollPerfFunction::~StringPieceCollPerfFunction() {
883     delete[] source;
884 }
885 
886 class StringPieceSort : public StringPieceCollPerfFunction {
887 public:
StringPieceSort(const Collator & coll,const UCollator * ucoll,const CA_char * data8)888     StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
889             : StringPieceCollPerfFunction(coll, ucoll, data8),
890               dest(new StringPiece[d8->count]) {}
891     virtual ~StringPieceSort();
892 
893 protected:
894     StringPiece* dest;
895 };
896 
~StringPieceSort()897 StringPieceSort::~StringPieceSort() {
898     delete[] dest;
899 }
900 
901 //
902 // Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8().
903 //
904 class StringPieceSortCpp : public StringPieceSort {
905 public:
StringPieceSortCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)906     StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
907             : StringPieceSort(coll, ucoll, data8) {}
908     virtual ~StringPieceSortCpp();
909     virtual void call(UErrorCode* status);
910 };
911 
~StringPieceSortCpp()912 StringPieceSortCpp::~StringPieceSortCpp() {}
913 
call(UErrorCode * status)914 void StringPieceSortCpp::call(UErrorCode* status) {
915     if (U_FAILURE(*status)) return;
916 
917     CollatorAndCounter cc(coll);
918     int32_t count = d8->count;
919     memcpy(dest, source, count * sizeof(StringPiece));
920     uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
921                    StringPieceCollatorComparator, &cc, TRUE, status);
922     ops = cc.counter;
923 }
924 
925 //
926 // Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8().
927 //
928 class StringPieceSortC : public StringPieceSort {
929 public:
StringPieceSortC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)930     StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
931             : StringPieceSort(coll, ucoll, data8) {}
932     virtual ~StringPieceSortC();
933     virtual void call(UErrorCode* status);
934 };
935 
~StringPieceSortC()936 StringPieceSortC::~StringPieceSortC() {}
937 
call(UErrorCode * status)938 void StringPieceSortC::call(UErrorCode* status) {
939     if (U_FAILURE(*status)) return;
940 
941     CollatorAndCounter cc(coll, ucoll);
942     int32_t count = d8->count;
943     memcpy(dest, source, count * sizeof(StringPiece));
944     uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
945                    StringPieceUCollatorComparator, &cc, TRUE, status);
946     ops = cc.counter;
947 }
948 
949 //
950 // Test case performing binary searches in a sorted array of UnicodeString pointers.
951 //
952 class UniStrBinSearch : public UniStrCollPerfFunction {
953 public:
UniStrBinSearch(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)954     UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
955             : UniStrCollPerfFunction(coll, ucoll, data16) {}
956     virtual ~UniStrBinSearch();
957     virtual void call(UErrorCode* status);
958 };
959 
~UniStrBinSearch()960 UniStrBinSearch::~UniStrBinSearch() {}
961 
call(UErrorCode * status)962 void UniStrBinSearch::call(UErrorCode* status) {
963     if (U_FAILURE(*status)) return;
964 
965     CollatorAndCounter cc(coll);
966     int32_t count = d16->count;
967     for (int32_t i = 0; i < count; ++i) {
968         (void)uprv_stableBinarySearch((char *)source, count,
969                                       source + i, (int32_t)sizeof(UnicodeString *),
970                                       UniStrCollatorComparator, &cc);
971     }
972     ops = cc.counter;
973 }
974 
975 class StringPieceBinSearch : public StringPieceCollPerfFunction {
976 public:
StringPieceBinSearch(const Collator & coll,const UCollator * ucoll,const CA_char * data8)977     StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
978             : StringPieceCollPerfFunction(coll, ucoll, data8) {}
979     virtual ~StringPieceBinSearch();
980 };
981 
~StringPieceBinSearch()982 StringPieceBinSearch::~StringPieceBinSearch() {}
983 
984 //
985 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
986 // with Collator::compareUTF8().
987 //
988 class StringPieceBinSearchCpp : public StringPieceBinSearch {
989 public:
StringPieceBinSearchCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)990     StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
991             : StringPieceBinSearch(coll, ucoll, data8) {}
992     virtual ~StringPieceBinSearchCpp();
993     virtual void call(UErrorCode* status);
994 };
995 
~StringPieceBinSearchCpp()996 StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {}
997 
call(UErrorCode * status)998 void StringPieceBinSearchCpp::call(UErrorCode* status) {
999     if (U_FAILURE(*status)) return;
1000 
1001     CollatorAndCounter cc(coll);
1002     int32_t count = d8->count;
1003     for (int32_t i = 0; i < count; ++i) {
1004         (void)uprv_stableBinarySearch((char *)source, count,
1005                                       source + i, (int32_t)sizeof(StringPiece),
1006                                       StringPieceCollatorComparator, &cc);
1007     }
1008     ops = cc.counter;
1009 }
1010 
1011 //
1012 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
1013 // with ucol_strcollUTF8().
1014 //
1015 class StringPieceBinSearchC : public StringPieceBinSearch {
1016 public:
StringPieceBinSearchC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)1017     StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
1018             : StringPieceBinSearch(coll, ucoll, data8) {}
1019     virtual ~StringPieceBinSearchC();
1020     virtual void call(UErrorCode* status);
1021 };
1022 
~StringPieceBinSearchC()1023 StringPieceBinSearchC::~StringPieceBinSearchC() {}
1024 
call(UErrorCode * status)1025 void StringPieceBinSearchC::call(UErrorCode* status) {
1026     if (U_FAILURE(*status)) return;
1027 
1028     CollatorAndCounter cc(coll, ucoll);
1029     int32_t count = d8->count;
1030     for (int32_t i = 0; i < count; ++i) {
1031         (void)uprv_stableBinarySearch((char *)source, count,
1032                                       source + i, (int32_t)sizeof(StringPiece),
1033                                       StringPieceUCollatorComparator, &cc);
1034     }
1035     ops = cc.counter;
1036 }
1037 
1038 
1039 class CollPerf2Test : public UPerfTest
1040 {
1041 public:
1042     CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status);
1043     ~CollPerf2Test();
1044     virtual UPerfFunction* runIndexedTest(
1045         int32_t index, UBool exec, const char *&name, char *par = NULL);
1046 
1047 private:
1048     UCollator* coll;
1049     Collator* collObj;
1050 
1051     int32_t count;
1052     CA_uchar* data16;
1053     CA_char* data8;
1054 
1055     CA_uchar* modData16;
1056     CA_char* modData8;
1057 
1058     CA_uchar* sortedData16;
1059     CA_char* sortedData8;
1060 
1061     CA_uchar* randomData16;
1062     CA_char* randomData8;
1063 
1064     const CA_uchar* getData16(UErrorCode &status);
1065     const CA_char* getData8(UErrorCode &status);
1066 
1067     const CA_uchar* getModData16(UErrorCode &status);
1068     const CA_char* getModData8(UErrorCode &status);
1069 
1070     const CA_uchar* getSortedData16(UErrorCode &status);
1071     const CA_char* getSortedData8(UErrorCode &status);
1072 
1073     const CA_uchar* getRandomData16(UErrorCode &status);
1074     const CA_char* getRandomData8(UErrorCode &status);
1075 
1076     static CA_uchar* sortData16(
1077             const CA_uchar* d16,
1078             UComparator *cmp, const void *context,
1079             UErrorCode &status);
1080     static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status);
1081 
1082     UPerfFunction* TestStrcoll();
1083     UPerfFunction* TestStrcollNull();
1084     UPerfFunction* TestStrcollSimilar();
1085 
1086     UPerfFunction* TestStrcollUTF8();
1087     UPerfFunction* TestStrcollUTF8Null();
1088     UPerfFunction* TestStrcollUTF8Similar();
1089 
1090     UPerfFunction* TestGetSortKey();
1091     UPerfFunction* TestGetSortKeyNull();
1092 
1093     UPerfFunction* TestNextSortKeyPart_4All();
1094     UPerfFunction* TestNextSortKeyPart_4x2();
1095     UPerfFunction* TestNextSortKeyPart_4x4();
1096     UPerfFunction* TestNextSortKeyPart_4x8();
1097     UPerfFunction* TestNextSortKeyPart_32All();
1098     UPerfFunction* TestNextSortKeyPart_32x2();
1099 
1100     UPerfFunction* TestNextSortKeyPartUTF8_4All();
1101     UPerfFunction* TestNextSortKeyPartUTF8_4x2();
1102     UPerfFunction* TestNextSortKeyPartUTF8_4x4();
1103     UPerfFunction* TestNextSortKeyPartUTF8_4x8();
1104     UPerfFunction* TestNextSortKeyPartUTF8_32All();
1105     UPerfFunction* TestNextSortKeyPartUTF8_32x2();
1106 
1107     UPerfFunction* TestCppCompare();
1108     UPerfFunction* TestCppCompareNull();
1109     UPerfFunction* TestCppCompareSimilar();
1110 
1111     UPerfFunction* TestCppCompareUTF8();
1112     UPerfFunction* TestCppCompareUTF8Null();
1113     UPerfFunction* TestCppCompareUTF8Similar();
1114 
1115     UPerfFunction* TestCppGetCollationKey();
1116     UPerfFunction* TestCppGetCollationKeyNull();
1117 
1118     UPerfFunction* TestUniStrSort();
1119     UPerfFunction* TestStringPieceSortCpp();
1120     UPerfFunction* TestStringPieceSortC();
1121 
1122     UPerfFunction* TestUniStrBinSearch();
1123     UPerfFunction* TestStringPieceBinSearchCpp();
1124     UPerfFunction* TestStringPieceBinSearchC();
1125 };
1126 
CollPerf2Test(int32_t argc,const char * argv[],UErrorCode & status)1127 CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) :
1128     UPerfTest(argc, argv, status),
1129     coll(NULL),
1130     collObj(NULL),
1131     count(0),
1132     data16(NULL),
1133     data8(NULL),
1134     modData16(NULL),
1135     modData8(NULL),
1136     sortedData16(NULL),
1137     sortedData8(NULL),
1138     randomData16(NULL),
1139     randomData8(NULL)
1140 {
1141     if (U_FAILURE(status)) {
1142         return;
1143     }
1144 
1145     if (locale == NULL){
1146         locale = "root";
1147     }
1148 
1149     // Set up an ICU collator.
1150     // Starting with ICU 54 (ticket #8260), this supports standard collation locale keywords.
1151     coll = ucol_open(locale, &status);
1152     collObj = Collator::createInstance(locale, status);
1153 }
1154 
~CollPerf2Test()1155 CollPerf2Test::~CollPerf2Test()
1156 {
1157     ucol_close(coll);
1158     delete collObj;
1159 
1160     delete data16;
1161     delete data8;
1162     delete modData16;
1163     delete modData8;
1164     delete sortedData16;
1165     delete sortedData8;
1166     delete randomData16;
1167     delete randomData8;
1168 }
1169 
1170 #define MAX_NUM_DATA 10000
1171 
getData16(UErrorCode & status)1172 const CA_uchar* CollPerf2Test::getData16(UErrorCode &status)
1173 {
1174     if (U_FAILURE(status)) return NULL;
1175     if (data16) return data16;
1176 
1177     CA_uchar* d16 = new CA_uchar();
1178     const UChar *line = NULL;
1179     int32_t len = 0;
1180     int32_t numData = 0;
1181 
1182     for (;;) {
1183         line = ucbuf_readline(ucharBuf, &len, &status);
1184         if (line == NULL || U_FAILURE(status)) break;
1185 
1186         // Refer to the source code of ucbuf_readline()
1187         // 1. 'len' includes the line terminal symbols
1188         // 2. The length of the line terminal symbols is only one character
1189         // 3. The Windows CR LF line terminal symbols will be converted to CR
1190 
1191         if (len == 1 || line[0] == 0x23 /* '#' */) {
1192             continue; // skip empty/comment line
1193         } else {
1194             d16->append_one(len);
1195             UChar *p = d16->last();
1196             u_memcpy(p, line, len - 1);  // exclude the CR
1197             p[len - 1] = 0;  // NUL-terminate
1198 
1199             numData++;
1200             if (numData >= MAX_NUM_DATA) break;
1201         }
1202     }
1203 
1204     if (U_SUCCESS(status)) {
1205         data16 = d16;
1206     } else {
1207         delete d16;
1208     }
1209 
1210     return data16;
1211 }
1212 
getData8(UErrorCode & status)1213 const CA_char* CollPerf2Test::getData8(UErrorCode &status)
1214 {
1215     if (U_FAILURE(status)) return NULL;
1216     if (data8) return data8;
1217     return data8 = getData8FromData16(getData16(status), status);
1218 }
1219 
getModData16(UErrorCode & status)1220 const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status)
1221 {
1222     if (U_FAILURE(status)) return NULL;
1223     if (modData16) return modData16;
1224 
1225     const CA_uchar* d16 = getData16(status);
1226     if (U_FAILURE(status)) return NULL;
1227 
1228     CA_uchar* modData16 = new CA_uchar();
1229 
1230     for (int32_t i = 0; i < d16->count; i++) {
1231         const UChar *s = d16->dataOf(i);
1232         int32_t len = d16->lengthOf(i) + 1; // including NULL terminator
1233 
1234         modData16->append_one(len);
1235         u_memcpy(modData16->last(), s, len);
1236 
1237         // replacing the last character with a different character
1238         UChar *lastChar = &modData16->last()[len -2];
1239         for (int32_t j = i + 1; j != i; j++) {
1240             if (j >= d16->count) {
1241                 j = 0;
1242             }
1243             const UChar *s1 = d16->dataOf(j);
1244             UChar lastChar1 = s1[d16->lengthOf(j) - 1];
1245             if (*lastChar != lastChar1) {
1246                 *lastChar = lastChar1;
1247                 break;
1248             }
1249         }
1250     }
1251 
1252     return modData16;
1253 }
1254 
getModData8(UErrorCode & status)1255 const CA_char* CollPerf2Test::getModData8(UErrorCode &status)
1256 {
1257     if (U_FAILURE(status)) return NULL;
1258     if (modData8) return modData8;
1259     return modData8 = getData8FromData16(getModData16(status), status);
1260 }
1261 
1262 namespace {
1263 
1264 struct ArrayAndColl {
ArrayAndColl__anon918c61850311::ArrayAndColl1265     ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {}
1266     const CA_uchar* d16;
1267     const Collator& coll;
1268 };
1269 
1270 int32_t U_CALLCONV
U16CollatorComparator(const void * context,const void * left,const void * right)1271 U16CollatorComparator(const void* context, const void* left, const void* right) {
1272     const ArrayAndColl& ac = *(const ArrayAndColl*)context;
1273     const CA_uchar* d16 = ac.d16;
1274     int32_t leftIndex = *(const int32_t*)left;
1275     int32_t rightIndex = *(const int32_t*)right;
1276     UErrorCode errorCode = U_ZERO_ERROR;
1277     return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex),
1278                            d16->dataOf(rightIndex), d16->lengthOf(rightIndex),
1279                            errorCode);
1280 }
1281 
1282 int32_t U_CALLCONV
U16HashComparator(const void * context,const void * left,const void * right)1283 U16HashComparator(const void* context, const void* left, const void* right) {
1284     const CA_uchar* d16 = (const CA_uchar*)context;
1285     int32_t leftIndex = *(const int32_t*)left;
1286     int32_t rightIndex = *(const int32_t*)right;
1287     int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex));
1288     int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex));
1289     return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1;
1290 }
1291 
1292 }  // namespace
1293 
getSortedData16(UErrorCode & status)1294 const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) {
1295     if (U_FAILURE(status)) return NULL;
1296     if (sortedData16) return sortedData16;
1297 
1298     ArrayAndColl ac(getData16(status), *collObj);
1299     return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status);
1300 }
1301 
getSortedData8(UErrorCode & status)1302 const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) {
1303     if (U_FAILURE(status)) return NULL;
1304     if (sortedData8) return sortedData8;
1305     return sortedData8 = getData8FromData16(getSortedData16(status), status);
1306 }
1307 
getRandomData16(UErrorCode & status)1308 const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) {
1309     if (U_FAILURE(status)) return NULL;
1310     if (randomData16) return randomData16;
1311 
1312     // Sort the strings by their hash codes, which should be a reasonably pseudo-random order.
1313     const CA_uchar* d16 = getData16(status);
1314     return randomData16 = sortData16(d16, U16HashComparator, d16, status);
1315 }
1316 
getRandomData8(UErrorCode & status)1317 const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) {
1318     if (U_FAILURE(status)) return NULL;
1319     if (randomData8) return randomData8;
1320     return randomData8 = getData8FromData16(getRandomData16(status), status);
1321 }
1322 
sortData16(const CA_uchar * d16,UComparator * cmp,const void * context,UErrorCode & status)1323 CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16,
1324                                     UComparator *cmp, const void *context,
1325                                     UErrorCode &status) {
1326     if (U_FAILURE(status)) return NULL;
1327 
1328     LocalArray<int32_t> indexes(new int32_t[d16->count]);
1329     for (int32_t i = 0; i < d16->count; ++i) {
1330         indexes[i] = i;
1331     }
1332     uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status);
1333     if (U_FAILURE(status)) return NULL;
1334 
1335     // Copy the strings in sorted order into a new array.
1336     LocalPointer<CA_uchar> newD16(new CA_uchar());
1337     for (int32_t i = 0; i < d16->count; i++) {
1338         int32_t j = indexes[i];
1339         const UChar* s = d16->dataOf(j);
1340         int32_t len = d16->lengthOf(j);
1341         int32_t capacity = len + 1;  // including NULL terminator
1342         newD16->append_one(capacity);
1343         u_memcpy(newD16->last(), s, capacity);
1344     }
1345 
1346     if (U_SUCCESS(status)) {
1347         return newD16.orphan();
1348     } else {
1349         return NULL;
1350     }
1351 }
1352 
getData8FromData16(const CA_uchar * d16,UErrorCode & status)1353 CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) {
1354     if (U_FAILURE(status)) return NULL;
1355 
1356     // UTF-16 -> UTF-8 conversion
1357     LocalPointer<CA_char> d8(new CA_char());
1358     for (int32_t i = 0; i < d16->count; i++) {
1359         const UChar *s16 = d16->dataOf(i);
1360         int32_t length16 = d16->lengthOf(i);
1361 
1362         // get length in UTF-8
1363         int32_t length8;
1364         u_strToUTF8(NULL, 0, &length8, s16, length16, &status);
1365         if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
1366             status = U_ZERO_ERROR;
1367         } else {
1368             break;
1369         }
1370         int32_t capacity8 = length8 + 1;  // plus terminal NULL
1371         d8->append_one(capacity8);
1372 
1373         // convert to UTF-8
1374         u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status);
1375         if (U_FAILURE(status)) break;
1376     }
1377 
1378     if (U_SUCCESS(status)) {
1379         return d8.orphan();
1380     } else {
1381         return NULL;
1382     }
1383 }
1384 
1385 UPerfFunction*
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)1386 CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/)
1387 {
1388     (void)par;
1389     TESTCASE_AUTO_BEGIN;
1390 
1391     TESTCASE_AUTO(TestStrcoll);
1392     TESTCASE_AUTO(TestStrcollNull);
1393     TESTCASE_AUTO(TestStrcollSimilar);
1394 
1395     TESTCASE_AUTO(TestStrcollUTF8);
1396     TESTCASE_AUTO(TestStrcollUTF8Null);
1397     TESTCASE_AUTO(TestStrcollUTF8Similar);
1398 
1399     TESTCASE_AUTO(TestGetSortKey);
1400     TESTCASE_AUTO(TestGetSortKeyNull);
1401 
1402     TESTCASE_AUTO(TestNextSortKeyPart_4All);
1403     TESTCASE_AUTO(TestNextSortKeyPart_4x4);
1404     TESTCASE_AUTO(TestNextSortKeyPart_4x8);
1405     TESTCASE_AUTO(TestNextSortKeyPart_32All);
1406     TESTCASE_AUTO(TestNextSortKeyPart_32x2);
1407 
1408     TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All);
1409     TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4);
1410     TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8);
1411     TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All);
1412     TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2);
1413 
1414     TESTCASE_AUTO(TestCppCompare);
1415     TESTCASE_AUTO(TestCppCompareNull);
1416     TESTCASE_AUTO(TestCppCompareSimilar);
1417 
1418     TESTCASE_AUTO(TestCppCompareUTF8);
1419     TESTCASE_AUTO(TestCppCompareUTF8Null);
1420     TESTCASE_AUTO(TestCppCompareUTF8Similar);
1421 
1422     TESTCASE_AUTO(TestCppGetCollationKey);
1423     TESTCASE_AUTO(TestCppGetCollationKeyNull);
1424 
1425     TESTCASE_AUTO(TestUniStrSort);
1426     TESTCASE_AUTO(TestStringPieceSortCpp);
1427     TESTCASE_AUTO(TestStringPieceSortC);
1428 
1429     TESTCASE_AUTO(TestUniStrBinSearch);
1430     TESTCASE_AUTO(TestStringPieceBinSearchCpp);
1431     TESTCASE_AUTO(TestStringPieceBinSearchC);
1432 
1433     TESTCASE_AUTO_END;
1434     return NULL;
1435 }
1436 
1437 
1438 
TestStrcoll()1439 UPerfFunction* CollPerf2Test::TestStrcoll()
1440 {
1441     UErrorCode status = U_ZERO_ERROR;
1442     Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */);
1443     if (U_FAILURE(status)) {
1444         delete testCase;
1445         return NULL;
1446     }
1447     return testCase;
1448 }
1449 
TestStrcollNull()1450 UPerfFunction* CollPerf2Test::TestStrcollNull()
1451 {
1452     UErrorCode status = U_ZERO_ERROR;
1453     Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */);
1454     if (U_FAILURE(status)) {
1455         delete testCase;
1456         return NULL;
1457     }
1458     return testCase;
1459 }
1460 
TestStrcollSimilar()1461 UPerfFunction* CollPerf2Test::TestStrcollSimilar()
1462 {
1463     UErrorCode status = U_ZERO_ERROR;
1464     Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */);
1465     if (U_FAILURE(status)) {
1466         delete testCase;
1467         return NULL;
1468     }
1469     return testCase;
1470 }
1471 
TestStrcollUTF8()1472 UPerfFunction* CollPerf2Test::TestStrcollUTF8()
1473 {
1474     UErrorCode status = U_ZERO_ERROR;
1475     StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */);
1476     if (U_FAILURE(status)) {
1477         delete testCase;
1478         return NULL;
1479     }
1480     return testCase;
1481 }
1482 
TestStrcollUTF8Null()1483 UPerfFunction* CollPerf2Test::TestStrcollUTF8Null()
1484 {
1485     UErrorCode status = U_ZERO_ERROR;
1486     StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */);
1487     if (U_FAILURE(status)) {
1488         delete testCase;
1489         return NULL;
1490     }
1491     return testCase;
1492 }
1493 
TestStrcollUTF8Similar()1494 UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar()
1495 {
1496     UErrorCode status = U_ZERO_ERROR;
1497     StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */);
1498     if (U_FAILURE(status)) {
1499         delete testCase;
1500         return NULL;
1501     }
1502     return testCase;
1503 }
1504 
TestGetSortKey()1505 UPerfFunction* CollPerf2Test::TestGetSortKey()
1506 {
1507     UErrorCode status = U_ZERO_ERROR;
1508     GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */);
1509     if (U_FAILURE(status)) {
1510         delete testCase;
1511         return NULL;
1512     }
1513     return testCase;
1514 }
1515 
TestGetSortKeyNull()1516 UPerfFunction* CollPerf2Test::TestGetSortKeyNull()
1517 {
1518     UErrorCode status = U_ZERO_ERROR;
1519     GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */);
1520     if (U_FAILURE(status)) {
1521         delete testCase;
1522         return NULL;
1523     }
1524     return testCase;
1525 }
1526 
TestNextSortKeyPart_4All()1527 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All()
1528 {
1529     UErrorCode status = U_ZERO_ERROR;
1530     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */);
1531     if (U_FAILURE(status)) {
1532         delete testCase;
1533         return NULL;
1534     }
1535     return testCase;
1536 }
1537 
TestNextSortKeyPart_4x4()1538 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4()
1539 {
1540     UErrorCode status = U_ZERO_ERROR;
1541     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */);
1542     if (U_FAILURE(status)) {
1543         delete testCase;
1544         return NULL;
1545     }
1546     return testCase;
1547 }
1548 
TestNextSortKeyPart_4x8()1549 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8()
1550 {
1551     UErrorCode status = U_ZERO_ERROR;
1552     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */);
1553     if (U_FAILURE(status)) {
1554         delete testCase;
1555         return NULL;
1556     }
1557     return testCase;
1558 }
1559 
TestNextSortKeyPart_32All()1560 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All()
1561 {
1562     UErrorCode status = U_ZERO_ERROR;
1563     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */);
1564     if (U_FAILURE(status)) {
1565         delete testCase;
1566         return NULL;
1567     }
1568     return testCase;
1569 }
1570 
TestNextSortKeyPart_32x2()1571 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2()
1572 {
1573     UErrorCode status = U_ZERO_ERROR;
1574     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */);
1575     if (U_FAILURE(status)) {
1576         delete testCase;
1577         return NULL;
1578     }
1579     return testCase;
1580 }
1581 
TestNextSortKeyPartUTF8_4All()1582 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All()
1583 {
1584     UErrorCode status = U_ZERO_ERROR;
1585     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */);
1586     if (U_FAILURE(status)) {
1587         delete testCase;
1588         return NULL;
1589     }
1590     return testCase;
1591 }
1592 
TestNextSortKeyPartUTF8_4x4()1593 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4()
1594 {
1595     UErrorCode status = U_ZERO_ERROR;
1596     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */);
1597     if (U_FAILURE(status)) {
1598         delete testCase;
1599         return NULL;
1600     }
1601     return testCase;
1602 }
1603 
TestNextSortKeyPartUTF8_4x8()1604 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8()
1605 {
1606     UErrorCode status = U_ZERO_ERROR;
1607     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */);
1608     if (U_FAILURE(status)) {
1609         delete testCase;
1610         return NULL;
1611     }
1612     return testCase;
1613 }
1614 
TestNextSortKeyPartUTF8_32All()1615 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All()
1616 {
1617     UErrorCode status = U_ZERO_ERROR;
1618     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */);
1619     if (U_FAILURE(status)) {
1620         delete testCase;
1621         return NULL;
1622     }
1623     return testCase;
1624 }
1625 
TestNextSortKeyPartUTF8_32x2()1626 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2()
1627 {
1628     UErrorCode status = U_ZERO_ERROR;
1629     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */);
1630     if (U_FAILURE(status)) {
1631         delete testCase;
1632         return NULL;
1633     }
1634     return testCase;
1635 }
1636 
TestCppCompare()1637 UPerfFunction* CollPerf2Test::TestCppCompare()
1638 {
1639     UErrorCode status = U_ZERO_ERROR;
1640     CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */);
1641     if (U_FAILURE(status)) {
1642         delete testCase;
1643         return NULL;
1644     }
1645     return testCase;
1646 }
1647 
TestCppCompareNull()1648 UPerfFunction* CollPerf2Test::TestCppCompareNull()
1649 {
1650     UErrorCode status = U_ZERO_ERROR;
1651     CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */);
1652     if (U_FAILURE(status)) {
1653         delete testCase;
1654         return NULL;
1655     }
1656     return testCase;
1657 }
1658 
TestCppCompareSimilar()1659 UPerfFunction* CollPerf2Test::TestCppCompareSimilar()
1660 {
1661     UErrorCode status = U_ZERO_ERROR;
1662     CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */);
1663     if (U_FAILURE(status)) {
1664         delete testCase;
1665         return NULL;
1666     }
1667     return testCase;
1668 }
1669 
TestCppCompareUTF8()1670 UPerfFunction* CollPerf2Test::TestCppCompareUTF8()
1671 {
1672     UErrorCode status = U_ZERO_ERROR;
1673     CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */);
1674     if (U_FAILURE(status)) {
1675         delete testCase;
1676         return NULL;
1677     }
1678     return testCase;
1679 }
1680 
TestCppCompareUTF8Null()1681 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null()
1682 {
1683     UErrorCode status = U_ZERO_ERROR;
1684     CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */);
1685     if (U_FAILURE(status)) {
1686         delete testCase;
1687         return NULL;
1688     }
1689     return testCase;
1690 }
1691 
TestCppCompareUTF8Similar()1692 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar()
1693 {
1694     UErrorCode status = U_ZERO_ERROR;
1695     CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */);
1696     if (U_FAILURE(status)) {
1697         delete testCase;
1698         return NULL;
1699     }
1700     return testCase;
1701 }
1702 
TestCppGetCollationKey()1703 UPerfFunction* CollPerf2Test::TestCppGetCollationKey()
1704 {
1705     UErrorCode status = U_ZERO_ERROR;
1706     CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */);
1707     if (U_FAILURE(status)) {
1708         delete testCase;
1709         return NULL;
1710     }
1711     return testCase;
1712 }
1713 
TestCppGetCollationKeyNull()1714 UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull()
1715 {
1716     UErrorCode status = U_ZERO_ERROR;
1717     CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */);
1718     if (U_FAILURE(status)) {
1719         delete testCase;
1720         return NULL;
1721     }
1722     return testCase;
1723 }
1724 
TestUniStrSort()1725 UPerfFunction* CollPerf2Test::TestUniStrSort() {
1726     UErrorCode status = U_ZERO_ERROR;
1727     UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status));
1728     if (U_FAILURE(status)) {
1729         delete testCase;
1730         return NULL;
1731     }
1732     return testCase;
1733 }
1734 
TestStringPieceSortCpp()1735 UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() {
1736     UErrorCode status = U_ZERO_ERROR;
1737     UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status));
1738     if (U_FAILURE(status)) {
1739         delete testCase;
1740         return NULL;
1741     }
1742     return testCase;
1743 }
1744 
TestStringPieceSortC()1745 UPerfFunction* CollPerf2Test::TestStringPieceSortC() {
1746     UErrorCode status = U_ZERO_ERROR;
1747     UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status));
1748     if (U_FAILURE(status)) {
1749         delete testCase;
1750         return NULL;
1751     }
1752     return testCase;
1753 }
1754 
TestUniStrBinSearch()1755 UPerfFunction* CollPerf2Test::TestUniStrBinSearch() {
1756     UErrorCode status = U_ZERO_ERROR;
1757     UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status));
1758     if (U_FAILURE(status)) {
1759         delete testCase;
1760         return NULL;
1761     }
1762     return testCase;
1763 }
1764 
TestStringPieceBinSearchCpp()1765 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() {
1766     UErrorCode status = U_ZERO_ERROR;
1767     UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status));
1768     if (U_FAILURE(status)) {
1769         delete testCase;
1770         return NULL;
1771     }
1772     return testCase;
1773 }
1774 
TestStringPieceBinSearchC()1775 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() {
1776     UErrorCode status = U_ZERO_ERROR;
1777     UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status));
1778     if (U_FAILURE(status)) {
1779         delete testCase;
1780         return NULL;
1781     }
1782     return testCase;
1783 }
1784 
1785 
main(int argc,const char * argv[])1786 int main(int argc, const char *argv[])
1787 {
1788     UErrorCode status = U_ZERO_ERROR;
1789     CollPerf2Test test(argc, argv, status);
1790 
1791     if (U_FAILURE(status)){
1792         printf("The error is %s\n", u_errorName(status));
1793         //TODO: print usage here
1794         return status;
1795     }
1796 
1797     if (test.run() == FALSE){
1798         fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n");
1799         return -1;
1800     }
1801     return 0;
1802 }
1803