1 /*
2 ***********************************************************************
3 * © 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html
5 ***********************************************************************
6 ***********************************************************************
7 * Copyright (c) 2013-2014, International Business Machines
8 * Corporation and others.  All Rights Reserved.
9 ***********************************************************************
10 */
11 
12 #include <string.h>
13 #include "unicode/localpointer.h"
14 #include "unicode/uperf.h"
15 #include "unicode/ucol.h"
16 #include "unicode/coll.h"
17 #include "unicode/uiter.h"
18 #include "unicode/ustring.h"
19 #include "unicode/sortkey.h"
20 #include "uarrsort.h"
21 #include "uoptions.h"
22 #include "ustr_imp.h"
23 
24 #define COMPACT_ARRAY(CompactArrays, UNIT) \
25 struct CompactArrays{\
26     CompactArrays(const CompactArrays & );\
27     CompactArrays & operator=(const CompactArrays & );\
28     int32_t   count;/*total number of the strings*/ \
29     int32_t * index;/*relative offset in data*/ \
30     UNIT    * data; /*the real space to hold strings*/ \
31     \
32     ~CompactArrays(){free(index);free(data);} \
33     CompactArrays() : count(0), index(NULL), data(NULL) { \
34         index = (int32_t *) realloc(index, sizeof(int32_t)); \
35         index[0] = 0; \
36     } \
37     void append_one(int32_t theLen){ /*include terminal NULL*/ \
38         count++; \
39         index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
40         index[count] = index[count - 1] + theLen; \
41         data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
42     } \
43     UNIT * last(){return data + index[count - 1];} \
44     const UNIT * dataOf(int32_t i) const {return data + index[i];} \
45     int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/  \
46 };
47 
48 COMPACT_ARRAY(CA_uchar, UChar)
49 COMPACT_ARRAY(CA_char, char)
50 
51 #define MAX_TEST_STRINGS_FOR_PERMUTING 1000
52 
53 // C API test cases
54 
55 //
56 // Test case taking a single test data array, calling ucol_strcoll by permuting the test data
57 //
58 class Strcoll : public UPerfFunction
59 {
60 public:
61     Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen);
62     ~Strcoll();
63     virtual void call(UErrorCode* status);
64     virtual long getOperationsPerIteration();
65 
66 private:
67     const UCollator *coll;
68     const CA_uchar *source;
69     UBool useLen;
70     int32_t maxTestStrings;
71 };
72 
Strcoll(const UCollator * coll,const CA_uchar * source,UBool useLen)73 Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen)
74     :   coll(coll),
75         source(source),
76         useLen(useLen)
77 {
78     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
79 }
80 
~Strcoll()81 Strcoll::~Strcoll()
82 {
83 }
84 
call(UErrorCode * status)85 void Strcoll::call(UErrorCode* status)
86 {
87     if (U_FAILURE(*status)) return;
88 
89     // call strcoll for permutation
90     int32_t divisor = source->count / maxTestStrings;
91     int32_t srcLen, tgtLen;
92     int32_t cmp = 0;
93     for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
94         if (i % divisor) continue;
95         numTestStringsI++;
96         srcLen = useLen ? source->lengthOf(i) : -1;
97         for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
98             if (j % divisor) continue;
99             numTestStringsJ++;
100             tgtLen = useLen ? source->lengthOf(j) : -1;
101             cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
102         }
103     }
104     // At the end, cmp must be 0
105     if (cmp != 0) {
106         *status = U_INTERNAL_PROGRAM_ERROR;
107     }
108 }
109 
getOperationsPerIteration()110 long Strcoll::getOperationsPerIteration()
111 {
112     return maxTestStrings * maxTestStrings;
113 }
114 
115 //
116 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
117 //
118 class Strcoll_2 : public UPerfFunction
119 {
120 public:
121     Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
122     ~Strcoll_2();
123     virtual void call(UErrorCode* status);
124     virtual long getOperationsPerIteration();
125 
126 private:
127     const UCollator *coll;
128     const CA_uchar *source;
129     const CA_uchar *target;
130     UBool useLen;
131 };
132 
Strcoll_2(const UCollator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)133 Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
134     :   coll(coll),
135         source(source),
136         target(target),
137         useLen(useLen)
138 {
139 }
140 
~Strcoll_2()141 Strcoll_2::~Strcoll_2()
142 {
143 }
144 
call(UErrorCode * status)145 void Strcoll_2::call(UErrorCode* status)
146 {
147     if (U_FAILURE(*status)) return;
148 
149     // call strcoll for two strings at the same index
150     if (source->count < target->count) {
151         *status = U_ILLEGAL_ARGUMENT_ERROR;
152     } else {
153         for (int32_t i = 0; i < source->count; i++) {
154             int32_t srcLen = useLen ? source->lengthOf(i) : -1;
155             int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
156             ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
157         }
158     }
159 }
160 
getOperationsPerIteration()161 long Strcoll_2::getOperationsPerIteration()
162 {
163     return source->count;
164 }
165 
166 
167 //
168 // Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data
169 //
170 class StrcollUTF8 : public UPerfFunction
171 {
172 public:
173     StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen);
174     ~StrcollUTF8();
175     virtual void call(UErrorCode* status);
176     virtual long getOperationsPerIteration();
177 
178 private:
179     const UCollator *coll;
180     const CA_char *source;
181     UBool useLen;
182     int32_t maxTestStrings;
183 };
184 
StrcollUTF8(const UCollator * coll,const CA_char * source,UBool useLen)185 StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen)
186     :   coll(coll),
187         source(source),
188         useLen(useLen)
189 {
190     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
191 }
192 
~StrcollUTF8()193 StrcollUTF8::~StrcollUTF8()
194 {
195 }
196 
call(UErrorCode * status)197 void StrcollUTF8::call(UErrorCode* status)
198 {
199     if (U_FAILURE(*status)) return;
200 
201     // call strcollUTF8 for permutation
202     int32_t divisor = source->count / maxTestStrings;
203     int32_t srcLen, tgtLen;
204     int32_t cmp = 0;
205     for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
206         if (i % divisor) continue;
207         numTestStringsI++;
208         srcLen = useLen ? source->lengthOf(i) : -1;
209         for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
210             if (j % divisor) continue;
211             numTestStringsJ++;
212             tgtLen = useLen ? source->lengthOf(j) : -1;
213             cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status);
214         }
215     }
216     // At the end, cmp must be 0
217     if (cmp != 0) {
218         *status = U_INTERNAL_PROGRAM_ERROR;
219     }
220 }
221 
getOperationsPerIteration()222 long StrcollUTF8::getOperationsPerIteration()
223 {
224     return maxTestStrings * maxTestStrings;
225 }
226 
227 //
228 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
229 //
230 class StrcollUTF8_2 : public UPerfFunction
231 {
232 public:
233     StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen);
234     ~StrcollUTF8_2();
235     virtual void call(UErrorCode* status);
236     virtual long getOperationsPerIteration();
237 
238 private:
239     const UCollator *coll;
240     const CA_char *source;
241     const CA_char *target;
242     UBool useLen;
243 };
244 
StrcollUTF8_2(const UCollator * coll,const CA_char * source,const CA_char * target,UBool useLen)245 StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen)
246     :   coll(coll),
247         source(source),
248         target(target),
249         useLen(useLen)
250 {
251 }
252 
~StrcollUTF8_2()253 StrcollUTF8_2::~StrcollUTF8_2()
254 {
255 }
256 
call(UErrorCode * status)257 void StrcollUTF8_2::call(UErrorCode* status)
258 {
259     if (U_FAILURE(*status)) return;
260 
261     // call strcoll for two strings at the same index
262     if (source->count < target->count) {
263         *status = U_ILLEGAL_ARGUMENT_ERROR;
264     } else {
265         for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
266             int32_t srcLen = useLen ? source->lengthOf(i) : -1;
267             int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
268             ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status);
269         }
270     }
271 }
272 
getOperationsPerIteration()273 long StrcollUTF8_2::getOperationsPerIteration()
274 {
275     return source->count;
276 }
277 
278 //
279 // Test case taking a single test data array, calling ucol_getSortKey for each
280 //
281 class GetSortKey : public UPerfFunction
282 {
283 public:
284     GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen);
285     ~GetSortKey();
286     virtual void call(UErrorCode* status);
287     virtual long getOperationsPerIteration();
288 
289 private:
290     const UCollator *coll;
291     const CA_uchar *source;
292     UBool useLen;
293 };
294 
GetSortKey(const UCollator * coll,const CA_uchar * source,UBool useLen)295 GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen)
296     :   coll(coll),
297         source(source),
298         useLen(useLen)
299 {
300 }
301 
~GetSortKey()302 GetSortKey::~GetSortKey()
303 {
304 }
305 
306 #define KEY_BUF_SIZE 512
307 
call(UErrorCode * status)308 void GetSortKey::call(UErrorCode* status)
309 {
310     if (U_FAILURE(*status)) return;
311 
312     uint8_t key[KEY_BUF_SIZE];
313     int32_t len;
314 
315     if (useLen) {
316         for (int32_t i = 0; i < source->count; i++) {
317             len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE);
318         }
319     } else {
320         for (int32_t i = 0; i < source->count; i++) {
321             len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE);
322         }
323     }
324 }
325 
getOperationsPerIteration()326 long GetSortKey::getOperationsPerIteration()
327 {
328     return source->count;
329 }
330 
331 //
332 // Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the
333 // given buffer size
334 //
335 class NextSortKeyPart : public UPerfFunction
336 {
337 public:
338     NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1);
339     ~NextSortKeyPart();
340     virtual void call(UErrorCode* status);
341     virtual long getOperationsPerIteration();
342     virtual long getEventsPerIteration();
343 
344 private:
345     const UCollator *coll;
346     const CA_uchar *source;
347     int32_t bufSize;
348     int32_t maxIteration;
349     long events;
350 };
351 
352 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPart(const UCollator * coll,const CA_uchar * source,int32_t bufSize,int32_t maxIteration)353 NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
354     :   coll(coll),
355         source(source),
356         bufSize(bufSize),
357         maxIteration(maxIteration),
358         events(0)
359 {
360 }
361 
~NextSortKeyPart()362 NextSortKeyPart::~NextSortKeyPart()
363 {
364 }
365 
call(UErrorCode * status)366 void NextSortKeyPart::call(UErrorCode* status)
367 {
368     if (U_FAILURE(*status)) return;
369 
370     uint8_t *part = (uint8_t *)malloc(bufSize);
371     uint32_t state[2];
372     UCharIterator iter;
373 
374     events = 0;
375     for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
376         uiter_setString(&iter, source->dataOf(i), source->lengthOf(i));
377         state[0] = 0;
378         state[1] = 0;
379         int32_t partLen = bufSize;
380         for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
381             partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
382             events++;
383         }
384     }
385     free(part);
386 }
387 
getOperationsPerIteration()388 long NextSortKeyPart::getOperationsPerIteration()
389 {
390     return source->count;
391 }
392 
getEventsPerIteration()393 long NextSortKeyPart::getEventsPerIteration()
394 {
395     return events;
396 }
397 
398 //
399 // Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the
400 // given buffer size
401 //
402 class NextSortKeyPartUTF8 : public UPerfFunction
403 {
404 public:
405     NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1);
406     ~NextSortKeyPartUTF8();
407     virtual void call(UErrorCode* status);
408     virtual long getOperationsPerIteration();
409     virtual long getEventsPerIteration();
410 
411 private:
412     const UCollator *coll;
413     const CA_char *source;
414     int32_t bufSize;
415     int32_t maxIteration;
416     long events;
417 };
418 
419 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPartUTF8(const UCollator * coll,const CA_char * source,int32_t bufSize,int32_t maxIteration)420 NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
421     :   coll(coll),
422         source(source),
423         bufSize(bufSize),
424         maxIteration(maxIteration),
425         events(0)
426 {
427 }
428 
~NextSortKeyPartUTF8()429 NextSortKeyPartUTF8::~NextSortKeyPartUTF8()
430 {
431 }
432 
call(UErrorCode * status)433 void NextSortKeyPartUTF8::call(UErrorCode* status)
434 {
435     if (U_FAILURE(*status)) return;
436 
437     uint8_t *part = (uint8_t *)malloc(bufSize);
438     uint32_t state[2];
439     UCharIterator iter;
440 
441     events = 0;
442     for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
443         uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i));
444         state[0] = 0;
445         state[1] = 0;
446         int32_t partLen = bufSize;
447         for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
448             partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
449             events++;
450         }
451     }
452     free(part);
453 }
454 
getOperationsPerIteration()455 long NextSortKeyPartUTF8::getOperationsPerIteration()
456 {
457     return source->count;
458 }
459 
getEventsPerIteration()460 long NextSortKeyPartUTF8::getEventsPerIteration()
461 {
462     return events;
463 }
464 
465 // CPP API test cases
466 
467 //
468 // Test case taking a single test data array, calling Collator::compare by permuting the test data
469 //
470 class CppCompare : public UPerfFunction
471 {
472 public:
473     CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen);
474     ~CppCompare();
475     virtual void call(UErrorCode* status);
476     virtual long getOperationsPerIteration();
477 
478 private:
479     const Collator *coll;
480     const CA_uchar *source;
481     UBool useLen;
482     int32_t maxTestStrings;
483 };
484 
CppCompare(const Collator * coll,const CA_uchar * source,UBool useLen)485 CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen)
486     :   coll(coll),
487         source(source),
488         useLen(useLen)
489 {
490     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
491 }
492 
~CppCompare()493 CppCompare::~CppCompare()
494 {
495 }
496 
call(UErrorCode * status)497 void CppCompare::call(UErrorCode* status) {
498     if (U_FAILURE(*status)) return;
499 
500     // call compare for permutation of test data
501     int32_t divisor = source->count / maxTestStrings;
502     int32_t srcLen, tgtLen;
503     int32_t cmp = 0;
504     for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
505         if (i % divisor) continue;
506         numTestStringsI++;
507         srcLen = useLen ? source->lengthOf(i) : -1;
508         for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
509             if (j % divisor) continue;
510             numTestStringsJ++;
511             tgtLen = useLen ? source->lengthOf(j) : -1;
512             cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
513         }
514     }
515     // At the end, cmp must be 0
516     if (cmp != 0) {
517         *status = U_INTERNAL_PROGRAM_ERROR;
518     }
519 }
520 
getOperationsPerIteration()521 long CppCompare::getOperationsPerIteration()
522 {
523     return maxTestStrings * maxTestStrings;
524 }
525 
526 //
527 // Test case taking two test data arrays, calling Collator::compare for strings at a same index
528 //
529 class CppCompare_2 : public UPerfFunction
530 {
531 public:
532     CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
533     ~CppCompare_2();
534     virtual void call(UErrorCode* status);
535     virtual long getOperationsPerIteration();
536 
537 private:
538     const Collator *coll;
539     const CA_uchar *source;
540     const CA_uchar *target;
541     UBool useLen;
542 };
543 
CppCompare_2(const Collator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)544 CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
545     :   coll(coll),
546         source(source),
547         target(target),
548         useLen(useLen)
549 {
550 }
551 
~CppCompare_2()552 CppCompare_2::~CppCompare_2()
553 {
554 }
555 
call(UErrorCode * status)556 void CppCompare_2::call(UErrorCode* status) {
557     if (U_FAILURE(*status)) return;
558 
559     // call strcoll for two strings at the same index
560     if (source->count < target->count) {
561         *status = U_ILLEGAL_ARGUMENT_ERROR;
562     } else {
563         for (int32_t i = 0; i < source->count; i++) {
564             int32_t srcLen = useLen ? source->lengthOf(i) : -1;
565             int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
566             coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
567         }
568     }
569 }
570 
getOperationsPerIteration()571 long CppCompare_2::getOperationsPerIteration()
572 {
573     return source->count;
574 }
575 
576 
577 //
578 // Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data
579 //
580 class CppCompareUTF8 : public UPerfFunction
581 {
582 public:
583     CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen);
584     ~CppCompareUTF8();
585     virtual void call(UErrorCode* status);
586     virtual long getOperationsPerIteration();
587 
588 private:
589     const Collator *coll;
590     const CA_char *source;
591     UBool useLen;
592     int32_t maxTestStrings;
593 };
594 
CppCompareUTF8(const Collator * coll,const CA_char * source,UBool useLen)595 CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen)
596     :   coll(coll),
597         source(source),
598         useLen(useLen)
599 {
600     maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
601 }
602 
~CppCompareUTF8()603 CppCompareUTF8::~CppCompareUTF8()
604 {
605 }
606 
call(UErrorCode * status)607 void CppCompareUTF8::call(UErrorCode* status) {
608     if (U_FAILURE(*status)) return;
609 
610     // call compareUTF8 for all permutations
611     int32_t divisor = source->count / maxTestStrings;
612     StringPiece src, tgt;
613     int32_t cmp = 0;
614     for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
615         if (i % divisor) continue;
616         numTestStringsI++;
617 
618         if (useLen) {
619             src.set(source->dataOf(i), source->lengthOf(i));
620         } else {
621             src.set(source->dataOf(i));
622         }
623         for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
624             if (j % divisor) continue;
625             numTestStringsJ++;
626 
627             if (useLen) {
628                 tgt.set(source->dataOf(i), source->lengthOf(i));
629             } else {
630                 tgt.set(source->dataOf(i));
631             }
632             cmp += coll->compareUTF8(src, tgt, *status);
633         }
634     }
635     // At the end, cmp must be 0
636     if (cmp != 0) {
637         *status = U_INTERNAL_PROGRAM_ERROR;
638     }
639 }
640 
getOperationsPerIteration()641 long CppCompareUTF8::getOperationsPerIteration()
642 {
643     return maxTestStrings * maxTestStrings;
644 }
645 
646 
647 //
648 // Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index
649 //
650 class CppCompareUTF8_2 : public UPerfFunction
651 {
652 public:
653     CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen);
654     ~CppCompareUTF8_2();
655     virtual void call(UErrorCode* status);
656     virtual long getOperationsPerIteration();
657 
658 private:
659     const Collator *coll;
660     const CA_char *source;
661     const CA_char *target;
662     UBool useLen;
663 };
664 
CppCompareUTF8_2(const Collator * coll,const CA_char * source,const CA_char * target,UBool useLen)665 CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen)
666     :   coll(coll),
667         source(source),
668         target(target),
669         useLen(useLen)
670 {
671 }
672 
~CppCompareUTF8_2()673 CppCompareUTF8_2::~CppCompareUTF8_2()
674 {
675 }
676 
call(UErrorCode * status)677 void CppCompareUTF8_2::call(UErrorCode* status) {
678     if (U_FAILURE(*status)) return;
679 
680     // call strcoll for two strings at the same index
681     StringPiece src, tgt;
682     if (source->count < target->count) {
683         *status = U_ILLEGAL_ARGUMENT_ERROR;
684     } else {
685         for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
686             if (useLen) {
687                 src.set(source->dataOf(i), source->lengthOf(i));
688                 tgt.set(target->dataOf(i), target->lengthOf(i));
689             } else {
690                 src.set(source->dataOf(i));
691                 tgt.set(target->dataOf(i));
692             }
693             coll->compareUTF8(src, tgt, *status);
694         }
695     }
696 }
697 
getOperationsPerIteration()698 long CppCompareUTF8_2::getOperationsPerIteration()
699 {
700     return source->count;
701 }
702 
703 
704 //
705 // Test case taking a single test data array, calling Collator::getCollationKey for each
706 //
707 class CppGetCollationKey : public UPerfFunction
708 {
709 public:
710     CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen);
711     ~CppGetCollationKey();
712     virtual void call(UErrorCode* status);
713     virtual long getOperationsPerIteration();
714 
715 private:
716     const Collator *coll;
717     const CA_uchar *source;
718     UBool useLen;
719 };
720 
CppGetCollationKey(const Collator * coll,const CA_uchar * source,UBool useLen)721 CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen)
722     :   coll(coll),
723         source(source),
724         useLen(useLen)
725 {
726 }
727 
~CppGetCollationKey()728 CppGetCollationKey::~CppGetCollationKey()
729 {
730 }
731 
call(UErrorCode * status)732 void CppGetCollationKey::call(UErrorCode* status)
733 {
734     if (U_FAILURE(*status)) return;
735 
736     CollationKey key;
737     for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
738         coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status);
739     }
740 }
741 
getOperationsPerIteration()742 long CppGetCollationKey::getOperationsPerIteration() {
743     return source->count;
744 }
745 
746 namespace {
747 
748 struct CollatorAndCounter {
CollatorAndCounter__anon918c61850111::CollatorAndCounter749     CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {}
CollatorAndCounter__anon918c61850111::CollatorAndCounter750     CollatorAndCounter(const Collator& coll, const UCollator *ucoll)
751             : coll(coll), ucoll(ucoll), counter(0) {}
752     const Collator& coll;
753     const UCollator *ucoll;
754     int32_t counter;
755 };
756 
757 int32_t U_CALLCONV
UniStrCollatorComparator(const void * context,const void * left,const void * right)758 UniStrCollatorComparator(const void* context, const void* left, const void* right) {
759     CollatorAndCounter& cc = *(CollatorAndCounter*)context;
760     const UnicodeString& leftString = **(const UnicodeString**)left;
761     const UnicodeString& rightString = **(const UnicodeString**)right;
762     UErrorCode errorCode = U_ZERO_ERROR;
763     ++cc.counter;
764     return cc.coll.compare(leftString, rightString, errorCode);
765 }
766 
767 }  // namespace
768 
769 class CollPerfFunction : public UPerfFunction {
770 public:
CollPerfFunction(const Collator & coll,const UCollator * ucoll)771     CollPerfFunction(const Collator& coll, const UCollator *ucoll)
772             : coll(coll), ucoll(ucoll), ops(0) {}
773     virtual ~CollPerfFunction();
774     /** Calls call() to set the ops field, and returns that. */
775     virtual long getOperationsPerIteration();
776 
777 protected:
778     const Collator& coll;
779     const UCollator *ucoll;
780     int32_t ops;
781 };
782 
~CollPerfFunction()783 CollPerfFunction::~CollPerfFunction() {}
784 
getOperationsPerIteration()785 long CollPerfFunction::getOperationsPerIteration() {
786     UErrorCode errorCode = U_ZERO_ERROR;
787     call(&errorCode);
788     return U_SUCCESS(errorCode) ? ops : 0;
789 }
790 
791 class UniStrCollPerfFunction : public CollPerfFunction {
792 public:
UniStrCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)793     UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
794             : CollPerfFunction(coll, ucoll), d16(data16),
795               source(new UnicodeString*[d16->count]) {
796         for (int32_t i = 0; i < d16->count; ++i) {
797             source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i));
798         }
799     }
800     virtual ~UniStrCollPerfFunction();
801 
802 protected:
803     const CA_uchar* d16;
804     UnicodeString** source;
805 };
806 
~UniStrCollPerfFunction()807 UniStrCollPerfFunction::~UniStrCollPerfFunction() {
808     for (int32_t i = 0; i < d16->count; ++i) {
809         delete source[i];
810     }
811     delete[] source;
812 }
813 
814 //
815 // Test case sorting an array of UnicodeString pointers.
816 //
817 class UniStrSort : public UniStrCollPerfFunction {
818 public:
UniStrSort(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)819     UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
820             : UniStrCollPerfFunction(coll, ucoll, data16),
821               dest(new UnicodeString*[d16->count]) {}
822     virtual ~UniStrSort();
823     virtual void call(UErrorCode* status);
824 
825 private:
826     UnicodeString** dest;  // aliases only
827 };
828 
~UniStrSort()829 UniStrSort::~UniStrSort() {
830     delete[] dest;
831 }
832 
call(UErrorCode * status)833 void UniStrSort::call(UErrorCode* status) {
834     if (U_FAILURE(*status)) return;
835 
836     CollatorAndCounter cc(coll);
837     int32_t count = d16->count;
838     memcpy(dest, source, count * sizeof(UnicodeString *));
839     uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *),
840                    UniStrCollatorComparator, &cc, TRUE, status);
841     ops = cc.counter;
842 }
843 
844 namespace {
845 
846 int32_t U_CALLCONV
StringPieceCollatorComparator(const void * context,const void * left,const void * right)847 StringPieceCollatorComparator(const void* context, const void* left, const void* right) {
848     CollatorAndCounter& cc = *(CollatorAndCounter*)context;
849     const StringPiece& leftString = *(const StringPiece*)left;
850     const StringPiece& rightString = *(const StringPiece*)right;
851     UErrorCode errorCode = U_ZERO_ERROR;
852     ++cc.counter;
853     return cc.coll.compareUTF8(leftString, rightString, errorCode);
854 }
855 
856 int32_t U_CALLCONV
StringPieceUCollatorComparator(const void * context,const void * left,const void * right)857 StringPieceUCollatorComparator(const void* context, const void* left, const void* right) {
858     CollatorAndCounter& cc = *(CollatorAndCounter*)context;
859     const StringPiece& leftString = *(const StringPiece*)left;
860     const StringPiece& rightString = *(const StringPiece*)right;
861     UErrorCode errorCode = U_ZERO_ERROR;
862     ++cc.counter;
863     return ucol_strcollUTF8(cc.ucoll,
864                             leftString.data(), leftString.length(),
865                             rightString.data(), rightString.length(), &errorCode);
866 }
867 
868 }  // namespace
869 
870 class StringPieceCollPerfFunction : public CollPerfFunction {
871 public:
StringPieceCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_char * data8)872     StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
873             : CollPerfFunction(coll, ucoll), d8(data8),
874               source(new StringPiece[d8->count]) {
875         for (int32_t i = 0; i < d8->count; ++i) {
876             source[i].set(d8->dataOf(i), d8->lengthOf(i));
877         }
878     }
879     virtual ~StringPieceCollPerfFunction();
880 
881 protected:
882     const CA_char* d8;
883     StringPiece* source;
884 };
885 
~StringPieceCollPerfFunction()886 StringPieceCollPerfFunction::~StringPieceCollPerfFunction() {
887     delete[] source;
888 }
889 
890 class StringPieceSort : public StringPieceCollPerfFunction {
891 public:
StringPieceSort(const Collator & coll,const UCollator * ucoll,const CA_char * data8)892     StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
893             : StringPieceCollPerfFunction(coll, ucoll, data8),
894               dest(new StringPiece[d8->count]) {}
895     virtual ~StringPieceSort();
896 
897 protected:
898     StringPiece* dest;
899 };
900 
~StringPieceSort()901 StringPieceSort::~StringPieceSort() {
902     delete[] dest;
903 }
904 
905 //
906 // Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8().
907 //
908 class StringPieceSortCpp : public StringPieceSort {
909 public:
StringPieceSortCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)910     StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
911             : StringPieceSort(coll, ucoll, data8) {}
912     virtual ~StringPieceSortCpp();
913     virtual void call(UErrorCode* status);
914 };
915 
~StringPieceSortCpp()916 StringPieceSortCpp::~StringPieceSortCpp() {}
917 
call(UErrorCode * status)918 void StringPieceSortCpp::call(UErrorCode* status) {
919     if (U_FAILURE(*status)) return;
920 
921     CollatorAndCounter cc(coll);
922     int32_t count = d8->count;
923     memcpy(dest, source, count * sizeof(StringPiece));
924     uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
925                    StringPieceCollatorComparator, &cc, TRUE, status);
926     ops = cc.counter;
927 }
928 
929 //
930 // Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8().
931 //
932 class StringPieceSortC : public StringPieceSort {
933 public:
StringPieceSortC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)934     StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
935             : StringPieceSort(coll, ucoll, data8) {}
936     virtual ~StringPieceSortC();
937     virtual void call(UErrorCode* status);
938 };
939 
~StringPieceSortC()940 StringPieceSortC::~StringPieceSortC() {}
941 
call(UErrorCode * status)942 void StringPieceSortC::call(UErrorCode* status) {
943     if (U_FAILURE(*status)) return;
944 
945     CollatorAndCounter cc(coll, ucoll);
946     int32_t count = d8->count;
947     memcpy(dest, source, count * sizeof(StringPiece));
948     uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
949                    StringPieceUCollatorComparator, &cc, TRUE, status);
950     ops = cc.counter;
951 }
952 
953 //
954 // Test case performing binary searches in a sorted array of UnicodeString pointers.
955 //
956 class UniStrBinSearch : public UniStrCollPerfFunction {
957 public:
UniStrBinSearch(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)958     UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
959             : UniStrCollPerfFunction(coll, ucoll, data16) {}
960     virtual ~UniStrBinSearch();
961     virtual void call(UErrorCode* status);
962 };
963 
~UniStrBinSearch()964 UniStrBinSearch::~UniStrBinSearch() {}
965 
call(UErrorCode * status)966 void UniStrBinSearch::call(UErrorCode* status) {
967     if (U_FAILURE(*status)) return;
968 
969     CollatorAndCounter cc(coll);
970     int32_t count = d16->count;
971     for (int32_t i = 0; i < count; ++i) {
972         (void)uprv_stableBinarySearch((char *)source, count,
973                                       source + i, (int32_t)sizeof(UnicodeString *),
974                                       UniStrCollatorComparator, &cc);
975     }
976     ops = cc.counter;
977 }
978 
979 class StringPieceBinSearch : public StringPieceCollPerfFunction {
980 public:
StringPieceBinSearch(const Collator & coll,const UCollator * ucoll,const CA_char * data8)981     StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
982             : StringPieceCollPerfFunction(coll, ucoll, data8) {}
983     virtual ~StringPieceBinSearch();
984 };
985 
~StringPieceBinSearch()986 StringPieceBinSearch::~StringPieceBinSearch() {}
987 
988 //
989 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
990 // with Collator::compareUTF8().
991 //
992 class StringPieceBinSearchCpp : public StringPieceBinSearch {
993 public:
StringPieceBinSearchCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)994     StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
995             : StringPieceBinSearch(coll, ucoll, data8) {}
996     virtual ~StringPieceBinSearchCpp();
997     virtual void call(UErrorCode* status);
998 };
999 
~StringPieceBinSearchCpp()1000 StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {}
1001 
call(UErrorCode * status)1002 void StringPieceBinSearchCpp::call(UErrorCode* status) {
1003     if (U_FAILURE(*status)) return;
1004 
1005     CollatorAndCounter cc(coll);
1006     int32_t count = d8->count;
1007     for (int32_t i = 0; i < count; ++i) {
1008         (void)uprv_stableBinarySearch((char *)source, count,
1009                                       source + i, (int32_t)sizeof(StringPiece),
1010                                       StringPieceCollatorComparator, &cc);
1011     }
1012     ops = cc.counter;
1013 }
1014 
1015 //
1016 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
1017 // with ucol_strcollUTF8().
1018 //
1019 class StringPieceBinSearchC : public StringPieceBinSearch {
1020 public:
StringPieceBinSearchC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)1021     StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
1022             : StringPieceBinSearch(coll, ucoll, data8) {}
1023     virtual ~StringPieceBinSearchC();
1024     virtual void call(UErrorCode* status);
1025 };
1026 
~StringPieceBinSearchC()1027 StringPieceBinSearchC::~StringPieceBinSearchC() {}
1028 
call(UErrorCode * status)1029 void StringPieceBinSearchC::call(UErrorCode* status) {
1030     if (U_FAILURE(*status)) return;
1031 
1032     CollatorAndCounter cc(coll, ucoll);
1033     int32_t count = d8->count;
1034     for (int32_t i = 0; i < count; ++i) {
1035         (void)uprv_stableBinarySearch((char *)source, count,
1036                                       source + i, (int32_t)sizeof(StringPiece),
1037                                       StringPieceUCollatorComparator, &cc);
1038     }
1039     ops = cc.counter;
1040 }
1041 
1042 
1043 class CollPerf2Test : public UPerfTest
1044 {
1045 public:
1046     CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status);
1047     ~CollPerf2Test();
1048     virtual UPerfFunction* runIndexedTest(
1049         int32_t index, UBool exec, const char *&name, char *par = NULL);
1050 
1051 private:
1052     UCollator* coll;
1053     Collator* collObj;
1054 
1055     int32_t count;
1056     CA_uchar* data16;
1057     CA_char* data8;
1058 
1059     CA_uchar* modData16;
1060     CA_char* modData8;
1061 
1062     CA_uchar* sortedData16;
1063     CA_char* sortedData8;
1064 
1065     CA_uchar* randomData16;
1066     CA_char* randomData8;
1067 
1068     const CA_uchar* getData16(UErrorCode &status);
1069     const CA_char* getData8(UErrorCode &status);
1070 
1071     const CA_uchar* getModData16(UErrorCode &status);
1072     const CA_char* getModData8(UErrorCode &status);
1073 
1074     const CA_uchar* getSortedData16(UErrorCode &status);
1075     const CA_char* getSortedData8(UErrorCode &status);
1076 
1077     const CA_uchar* getRandomData16(UErrorCode &status);
1078     const CA_char* getRandomData8(UErrorCode &status);
1079 
1080     static CA_uchar* sortData16(
1081             const CA_uchar* d16,
1082             UComparator *cmp, const void *context,
1083             UErrorCode &status);
1084     static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status);
1085 
1086     UPerfFunction* TestStrcoll();
1087     UPerfFunction* TestStrcollNull();
1088     UPerfFunction* TestStrcollSimilar();
1089 
1090     UPerfFunction* TestStrcollUTF8();
1091     UPerfFunction* TestStrcollUTF8Null();
1092     UPerfFunction* TestStrcollUTF8Similar();
1093 
1094     UPerfFunction* TestGetSortKey();
1095     UPerfFunction* TestGetSortKeyNull();
1096 
1097     UPerfFunction* TestNextSortKeyPart_4All();
1098     UPerfFunction* TestNextSortKeyPart_4x2();
1099     UPerfFunction* TestNextSortKeyPart_4x4();
1100     UPerfFunction* TestNextSortKeyPart_4x8();
1101     UPerfFunction* TestNextSortKeyPart_32All();
1102     UPerfFunction* TestNextSortKeyPart_32x2();
1103 
1104     UPerfFunction* TestNextSortKeyPartUTF8_4All();
1105     UPerfFunction* TestNextSortKeyPartUTF8_4x2();
1106     UPerfFunction* TestNextSortKeyPartUTF8_4x4();
1107     UPerfFunction* TestNextSortKeyPartUTF8_4x8();
1108     UPerfFunction* TestNextSortKeyPartUTF8_32All();
1109     UPerfFunction* TestNextSortKeyPartUTF8_32x2();
1110 
1111     UPerfFunction* TestCppCompare();
1112     UPerfFunction* TestCppCompareNull();
1113     UPerfFunction* TestCppCompareSimilar();
1114 
1115     UPerfFunction* TestCppCompareUTF8();
1116     UPerfFunction* TestCppCompareUTF8Null();
1117     UPerfFunction* TestCppCompareUTF8Similar();
1118 
1119     UPerfFunction* TestCppGetCollationKey();
1120     UPerfFunction* TestCppGetCollationKeyNull();
1121 
1122     UPerfFunction* TestUniStrSort();
1123     UPerfFunction* TestStringPieceSortCpp();
1124     UPerfFunction* TestStringPieceSortC();
1125 
1126     UPerfFunction* TestUniStrBinSearch();
1127     UPerfFunction* TestStringPieceBinSearchCpp();
1128     UPerfFunction* TestStringPieceBinSearchC();
1129 };
1130 
CollPerf2Test(int32_t argc,const char * argv[],UErrorCode & status)1131 CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) :
1132     UPerfTest(argc, argv, status),
1133     coll(NULL),
1134     collObj(NULL),
1135     count(0),
1136     data16(NULL),
1137     data8(NULL),
1138     modData16(NULL),
1139     modData8(NULL),
1140     sortedData16(NULL),
1141     sortedData8(NULL),
1142     randomData16(NULL),
1143     randomData8(NULL)
1144 {
1145     if (U_FAILURE(status)) {
1146         return;
1147     }
1148 
1149     if (locale == NULL){
1150         locale = "root";
1151     }
1152 
1153     // Set up an ICU collator.
1154     // Starting with ICU 54 (ticket #8260), this supports standard collation locale keywords.
1155     coll = ucol_open(locale, &status);
1156     collObj = Collator::createInstance(locale, status);
1157 }
1158 
~CollPerf2Test()1159 CollPerf2Test::~CollPerf2Test()
1160 {
1161     ucol_close(coll);
1162     delete collObj;
1163 
1164     delete data16;
1165     delete data8;
1166     delete modData16;
1167     delete modData8;
1168     delete sortedData16;
1169     delete sortedData8;
1170     delete randomData16;
1171     delete randomData8;
1172 }
1173 
1174 #define MAX_NUM_DATA 10000
1175 
getData16(UErrorCode & status)1176 const CA_uchar* CollPerf2Test::getData16(UErrorCode &status)
1177 {
1178     if (U_FAILURE(status)) return NULL;
1179     if (data16) return data16;
1180 
1181     CA_uchar* d16 = new CA_uchar();
1182     const UChar *line = NULL;
1183     int32_t len = 0;
1184     int32_t numData = 0;
1185 
1186     for (;;) {
1187         line = ucbuf_readline(ucharBuf, &len, &status);
1188         if (line == NULL || U_FAILURE(status)) break;
1189 
1190         // Refer to the source code of ucbuf_readline()
1191         // 1. 'len' includes the line terminal symbols
1192         // 2. The length of the line terminal symbols is only one character
1193         // 3. The Windows CR LF line terminal symbols will be converted to CR
1194 
1195         if (len == 1 || line[0] == 0x23 /* '#' */) {
1196             continue; // skip empty/comment line
1197         } else {
1198             d16->append_one(len);
1199             UChar *p = d16->last();
1200             u_memcpy(p, line, len - 1);  // exclude the CR
1201             p[len - 1] = 0;  // NUL-terminate
1202 
1203             numData++;
1204             if (numData >= MAX_NUM_DATA) break;
1205         }
1206     }
1207 
1208     if (U_SUCCESS(status)) {
1209         data16 = d16;
1210     } else {
1211         delete d16;
1212     }
1213 
1214     return data16;
1215 }
1216 
getData8(UErrorCode & status)1217 const CA_char* CollPerf2Test::getData8(UErrorCode &status)
1218 {
1219     if (U_FAILURE(status)) return NULL;
1220     if (data8) return data8;
1221     return data8 = getData8FromData16(getData16(status), status);
1222 }
1223 
getModData16(UErrorCode & status)1224 const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status)
1225 {
1226     if (U_FAILURE(status)) return NULL;
1227     if (modData16) return modData16;
1228 
1229     const CA_uchar* d16 = getData16(status);
1230     if (U_FAILURE(status)) return NULL;
1231 
1232     CA_uchar* modData16 = new CA_uchar();
1233 
1234     for (int32_t i = 0; i < d16->count; i++) {
1235         const UChar *s = d16->dataOf(i);
1236         int32_t len = d16->lengthOf(i) + 1; // including NULL terminator
1237 
1238         modData16->append_one(len);
1239         u_memcpy(modData16->last(), s, len);
1240 
1241         // replacing the last character with a different character
1242         UChar *lastChar = &modData16->last()[len -2];
1243         for (int32_t j = i + 1; j != i; j++) {
1244             if (j >= d16->count) {
1245                 j = 0;
1246             }
1247             const UChar *s1 = d16->dataOf(j);
1248             UChar lastChar1 = s1[d16->lengthOf(j) - 1];
1249             if (*lastChar != lastChar1) {
1250                 *lastChar = lastChar1;
1251                 break;
1252             }
1253         }
1254     }
1255 
1256     return modData16;
1257 }
1258 
getModData8(UErrorCode & status)1259 const CA_char* CollPerf2Test::getModData8(UErrorCode &status)
1260 {
1261     if (U_FAILURE(status)) return NULL;
1262     if (modData8) return modData8;
1263     return modData8 = getData8FromData16(getModData16(status), status);
1264 }
1265 
1266 namespace {
1267 
1268 struct ArrayAndColl {
ArrayAndColl__anon918c61850311::ArrayAndColl1269     ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {}
1270     const CA_uchar* d16;
1271     const Collator& coll;
1272 };
1273 
1274 int32_t U_CALLCONV
U16CollatorComparator(const void * context,const void * left,const void * right)1275 U16CollatorComparator(const void* context, const void* left, const void* right) {
1276     const ArrayAndColl& ac = *(const ArrayAndColl*)context;
1277     const CA_uchar* d16 = ac.d16;
1278     int32_t leftIndex = *(const int32_t*)left;
1279     int32_t rightIndex = *(const int32_t*)right;
1280     UErrorCode errorCode = U_ZERO_ERROR;
1281     return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex),
1282                            d16->dataOf(rightIndex), d16->lengthOf(rightIndex),
1283                            errorCode);
1284 }
1285 
1286 int32_t U_CALLCONV
U16HashComparator(const void * context,const void * left,const void * right)1287 U16HashComparator(const void* context, const void* left, const void* right) {
1288     const CA_uchar* d16 = (const CA_uchar*)context;
1289     int32_t leftIndex = *(const int32_t*)left;
1290     int32_t rightIndex = *(const int32_t*)right;
1291     int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex));
1292     int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex));
1293     return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1;
1294 }
1295 
1296 }  // namespace
1297 
getSortedData16(UErrorCode & status)1298 const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) {
1299     if (U_FAILURE(status)) return NULL;
1300     if (sortedData16) return sortedData16;
1301 
1302     ArrayAndColl ac(getData16(status), *collObj);
1303     return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status);
1304 }
1305 
getSortedData8(UErrorCode & status)1306 const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) {
1307     if (U_FAILURE(status)) return NULL;
1308     if (sortedData8) return sortedData8;
1309     return sortedData8 = getData8FromData16(getSortedData16(status), status);
1310 }
1311 
getRandomData16(UErrorCode & status)1312 const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) {
1313     if (U_FAILURE(status)) return NULL;
1314     if (randomData16) return randomData16;
1315 
1316     // Sort the strings by their hash codes, which should be a reasonably pseudo-random order.
1317     const CA_uchar* d16 = getData16(status);
1318     return randomData16 = sortData16(d16, U16HashComparator, d16, status);
1319 }
1320 
getRandomData8(UErrorCode & status)1321 const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) {
1322     if (U_FAILURE(status)) return NULL;
1323     if (randomData8) return randomData8;
1324     return randomData8 = getData8FromData16(getRandomData16(status), status);
1325 }
1326 
sortData16(const CA_uchar * d16,UComparator * cmp,const void * context,UErrorCode & status)1327 CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16,
1328                                     UComparator *cmp, const void *context,
1329                                     UErrorCode &status) {
1330     if (U_FAILURE(status)) return NULL;
1331 
1332     LocalArray<int32_t> indexes(new int32_t[d16->count]);
1333     for (int32_t i = 0; i < d16->count; ++i) {
1334         indexes[i] = i;
1335     }
1336     uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status);
1337     if (U_FAILURE(status)) return NULL;
1338 
1339     // Copy the strings in sorted order into a new array.
1340     LocalPointer<CA_uchar> newD16(new CA_uchar());
1341     for (int32_t i = 0; i < d16->count; i++) {
1342         int32_t j = indexes[i];
1343         const UChar* s = d16->dataOf(j);
1344         int32_t len = d16->lengthOf(j);
1345         int32_t capacity = len + 1;  // including NULL terminator
1346         newD16->append_one(capacity);
1347         u_memcpy(newD16->last(), s, capacity);
1348     }
1349 
1350     if (U_SUCCESS(status)) {
1351         return newD16.orphan();
1352     } else {
1353         return NULL;
1354     }
1355 }
1356 
getData8FromData16(const CA_uchar * d16,UErrorCode & status)1357 CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) {
1358     if (U_FAILURE(status)) return NULL;
1359 
1360     // UTF-16 -> UTF-8 conversion
1361     LocalPointer<CA_char> d8(new CA_char());
1362     for (int32_t i = 0; i < d16->count; i++) {
1363         const UChar *s16 = d16->dataOf(i);
1364         int32_t length16 = d16->lengthOf(i);
1365 
1366         // get length in UTF-8
1367         int32_t length8;
1368         u_strToUTF8(NULL, 0, &length8, s16, length16, &status);
1369         if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
1370             status = U_ZERO_ERROR;
1371         } else {
1372             break;
1373         }
1374         int32_t capacity8 = length8 + 1;  // plus terminal NULL
1375         d8->append_one(capacity8);
1376 
1377         // convert to UTF-8
1378         u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status);
1379         if (U_FAILURE(status)) break;
1380     }
1381 
1382     if (U_SUCCESS(status)) {
1383         return d8.orphan();
1384     } else {
1385         return NULL;
1386     }
1387 }
1388 
1389 UPerfFunction*
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)1390 CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/)
1391 {
1392     (void)par;
1393     TESTCASE_AUTO_BEGIN;
1394 
1395     TESTCASE_AUTO(TestStrcoll);
1396     TESTCASE_AUTO(TestStrcollNull);
1397     TESTCASE_AUTO(TestStrcollSimilar);
1398 
1399     TESTCASE_AUTO(TestStrcollUTF8);
1400     TESTCASE_AUTO(TestStrcollUTF8Null);
1401     TESTCASE_AUTO(TestStrcollUTF8Similar);
1402 
1403     TESTCASE_AUTO(TestGetSortKey);
1404     TESTCASE_AUTO(TestGetSortKeyNull);
1405 
1406     TESTCASE_AUTO(TestNextSortKeyPart_4All);
1407     TESTCASE_AUTO(TestNextSortKeyPart_4x4);
1408     TESTCASE_AUTO(TestNextSortKeyPart_4x8);
1409     TESTCASE_AUTO(TestNextSortKeyPart_32All);
1410     TESTCASE_AUTO(TestNextSortKeyPart_32x2);
1411 
1412     TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All);
1413     TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4);
1414     TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8);
1415     TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All);
1416     TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2);
1417 
1418     TESTCASE_AUTO(TestCppCompare);
1419     TESTCASE_AUTO(TestCppCompareNull);
1420     TESTCASE_AUTO(TestCppCompareSimilar);
1421 
1422     TESTCASE_AUTO(TestCppCompareUTF8);
1423     TESTCASE_AUTO(TestCppCompareUTF8Null);
1424     TESTCASE_AUTO(TestCppCompareUTF8Similar);
1425 
1426     TESTCASE_AUTO(TestCppGetCollationKey);
1427     TESTCASE_AUTO(TestCppGetCollationKeyNull);
1428 
1429     TESTCASE_AUTO(TestUniStrSort);
1430     TESTCASE_AUTO(TestStringPieceSortCpp);
1431     TESTCASE_AUTO(TestStringPieceSortC);
1432 
1433     TESTCASE_AUTO(TestUniStrBinSearch);
1434     TESTCASE_AUTO(TestStringPieceBinSearchCpp);
1435     TESTCASE_AUTO(TestStringPieceBinSearchC);
1436 
1437     TESTCASE_AUTO_END;
1438     return NULL;
1439 }
1440 
1441 
1442 
TestStrcoll()1443 UPerfFunction* CollPerf2Test::TestStrcoll()
1444 {
1445     UErrorCode status = U_ZERO_ERROR;
1446     Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */);
1447     if (U_FAILURE(status)) {
1448         delete testCase;
1449         return NULL;
1450     }
1451     return testCase;
1452 }
1453 
TestStrcollNull()1454 UPerfFunction* CollPerf2Test::TestStrcollNull()
1455 {
1456     UErrorCode status = U_ZERO_ERROR;
1457     Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */);
1458     if (U_FAILURE(status)) {
1459         delete testCase;
1460         return NULL;
1461     }
1462     return testCase;
1463 }
1464 
TestStrcollSimilar()1465 UPerfFunction* CollPerf2Test::TestStrcollSimilar()
1466 {
1467     UErrorCode status = U_ZERO_ERROR;
1468     Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */);
1469     if (U_FAILURE(status)) {
1470         delete testCase;
1471         return NULL;
1472     }
1473     return testCase;
1474 }
1475 
TestStrcollUTF8()1476 UPerfFunction* CollPerf2Test::TestStrcollUTF8()
1477 {
1478     UErrorCode status = U_ZERO_ERROR;
1479     StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */);
1480     if (U_FAILURE(status)) {
1481         delete testCase;
1482         return NULL;
1483     }
1484     return testCase;
1485 }
1486 
TestStrcollUTF8Null()1487 UPerfFunction* CollPerf2Test::TestStrcollUTF8Null()
1488 {
1489     UErrorCode status = U_ZERO_ERROR;
1490     StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */);
1491     if (U_FAILURE(status)) {
1492         delete testCase;
1493         return NULL;
1494     }
1495     return testCase;
1496 }
1497 
TestStrcollUTF8Similar()1498 UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar()
1499 {
1500     UErrorCode status = U_ZERO_ERROR;
1501     StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */);
1502     if (U_FAILURE(status)) {
1503         delete testCase;
1504         return NULL;
1505     }
1506     return testCase;
1507 }
1508 
TestGetSortKey()1509 UPerfFunction* CollPerf2Test::TestGetSortKey()
1510 {
1511     UErrorCode status = U_ZERO_ERROR;
1512     GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */);
1513     if (U_FAILURE(status)) {
1514         delete testCase;
1515         return NULL;
1516     }
1517     return testCase;
1518 }
1519 
TestGetSortKeyNull()1520 UPerfFunction* CollPerf2Test::TestGetSortKeyNull()
1521 {
1522     UErrorCode status = U_ZERO_ERROR;
1523     GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */);
1524     if (U_FAILURE(status)) {
1525         delete testCase;
1526         return NULL;
1527     }
1528     return testCase;
1529 }
1530 
TestNextSortKeyPart_4All()1531 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All()
1532 {
1533     UErrorCode status = U_ZERO_ERROR;
1534     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */);
1535     if (U_FAILURE(status)) {
1536         delete testCase;
1537         return NULL;
1538     }
1539     return testCase;
1540 }
1541 
TestNextSortKeyPart_4x4()1542 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4()
1543 {
1544     UErrorCode status = U_ZERO_ERROR;
1545     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */);
1546     if (U_FAILURE(status)) {
1547         delete testCase;
1548         return NULL;
1549     }
1550     return testCase;
1551 }
1552 
TestNextSortKeyPart_4x8()1553 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8()
1554 {
1555     UErrorCode status = U_ZERO_ERROR;
1556     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */);
1557     if (U_FAILURE(status)) {
1558         delete testCase;
1559         return NULL;
1560     }
1561     return testCase;
1562 }
1563 
TestNextSortKeyPart_32All()1564 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All()
1565 {
1566     UErrorCode status = U_ZERO_ERROR;
1567     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */);
1568     if (U_FAILURE(status)) {
1569         delete testCase;
1570         return NULL;
1571     }
1572     return testCase;
1573 }
1574 
TestNextSortKeyPart_32x2()1575 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2()
1576 {
1577     UErrorCode status = U_ZERO_ERROR;
1578     NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */);
1579     if (U_FAILURE(status)) {
1580         delete testCase;
1581         return NULL;
1582     }
1583     return testCase;
1584 }
1585 
TestNextSortKeyPartUTF8_4All()1586 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All()
1587 {
1588     UErrorCode status = U_ZERO_ERROR;
1589     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */);
1590     if (U_FAILURE(status)) {
1591         delete testCase;
1592         return NULL;
1593     }
1594     return testCase;
1595 }
1596 
TestNextSortKeyPartUTF8_4x4()1597 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4()
1598 {
1599     UErrorCode status = U_ZERO_ERROR;
1600     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */);
1601     if (U_FAILURE(status)) {
1602         delete testCase;
1603         return NULL;
1604     }
1605     return testCase;
1606 }
1607 
TestNextSortKeyPartUTF8_4x8()1608 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8()
1609 {
1610     UErrorCode status = U_ZERO_ERROR;
1611     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */);
1612     if (U_FAILURE(status)) {
1613         delete testCase;
1614         return NULL;
1615     }
1616     return testCase;
1617 }
1618 
TestNextSortKeyPartUTF8_32All()1619 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All()
1620 {
1621     UErrorCode status = U_ZERO_ERROR;
1622     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */);
1623     if (U_FAILURE(status)) {
1624         delete testCase;
1625         return NULL;
1626     }
1627     return testCase;
1628 }
1629 
TestNextSortKeyPartUTF8_32x2()1630 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2()
1631 {
1632     UErrorCode status = U_ZERO_ERROR;
1633     NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */);
1634     if (U_FAILURE(status)) {
1635         delete testCase;
1636         return NULL;
1637     }
1638     return testCase;
1639 }
1640 
TestCppCompare()1641 UPerfFunction* CollPerf2Test::TestCppCompare()
1642 {
1643     UErrorCode status = U_ZERO_ERROR;
1644     CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */);
1645     if (U_FAILURE(status)) {
1646         delete testCase;
1647         return NULL;
1648     }
1649     return testCase;
1650 }
1651 
TestCppCompareNull()1652 UPerfFunction* CollPerf2Test::TestCppCompareNull()
1653 {
1654     UErrorCode status = U_ZERO_ERROR;
1655     CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */);
1656     if (U_FAILURE(status)) {
1657         delete testCase;
1658         return NULL;
1659     }
1660     return testCase;
1661 }
1662 
TestCppCompareSimilar()1663 UPerfFunction* CollPerf2Test::TestCppCompareSimilar()
1664 {
1665     UErrorCode status = U_ZERO_ERROR;
1666     CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */);
1667     if (U_FAILURE(status)) {
1668         delete testCase;
1669         return NULL;
1670     }
1671     return testCase;
1672 }
1673 
TestCppCompareUTF8()1674 UPerfFunction* CollPerf2Test::TestCppCompareUTF8()
1675 {
1676     UErrorCode status = U_ZERO_ERROR;
1677     CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */);
1678     if (U_FAILURE(status)) {
1679         delete testCase;
1680         return NULL;
1681     }
1682     return testCase;
1683 }
1684 
TestCppCompareUTF8Null()1685 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null()
1686 {
1687     UErrorCode status = U_ZERO_ERROR;
1688     CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */);
1689     if (U_FAILURE(status)) {
1690         delete testCase;
1691         return NULL;
1692     }
1693     return testCase;
1694 }
1695 
TestCppCompareUTF8Similar()1696 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar()
1697 {
1698     UErrorCode status = U_ZERO_ERROR;
1699     CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */);
1700     if (U_FAILURE(status)) {
1701         delete testCase;
1702         return NULL;
1703     }
1704     return testCase;
1705 }
1706 
TestCppGetCollationKey()1707 UPerfFunction* CollPerf2Test::TestCppGetCollationKey()
1708 {
1709     UErrorCode status = U_ZERO_ERROR;
1710     CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */);
1711     if (U_FAILURE(status)) {
1712         delete testCase;
1713         return NULL;
1714     }
1715     return testCase;
1716 }
1717 
TestCppGetCollationKeyNull()1718 UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull()
1719 {
1720     UErrorCode status = U_ZERO_ERROR;
1721     CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */);
1722     if (U_FAILURE(status)) {
1723         delete testCase;
1724         return NULL;
1725     }
1726     return testCase;
1727 }
1728 
TestUniStrSort()1729 UPerfFunction* CollPerf2Test::TestUniStrSort() {
1730     UErrorCode status = U_ZERO_ERROR;
1731     UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status));
1732     if (U_FAILURE(status)) {
1733         delete testCase;
1734         return NULL;
1735     }
1736     return testCase;
1737 }
1738 
TestStringPieceSortCpp()1739 UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() {
1740     UErrorCode status = U_ZERO_ERROR;
1741     UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status));
1742     if (U_FAILURE(status)) {
1743         delete testCase;
1744         return NULL;
1745     }
1746     return testCase;
1747 }
1748 
TestStringPieceSortC()1749 UPerfFunction* CollPerf2Test::TestStringPieceSortC() {
1750     UErrorCode status = U_ZERO_ERROR;
1751     UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status));
1752     if (U_FAILURE(status)) {
1753         delete testCase;
1754         return NULL;
1755     }
1756     return testCase;
1757 }
1758 
TestUniStrBinSearch()1759 UPerfFunction* CollPerf2Test::TestUniStrBinSearch() {
1760     UErrorCode status = U_ZERO_ERROR;
1761     UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status));
1762     if (U_FAILURE(status)) {
1763         delete testCase;
1764         return NULL;
1765     }
1766     return testCase;
1767 }
1768 
TestStringPieceBinSearchCpp()1769 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() {
1770     UErrorCode status = U_ZERO_ERROR;
1771     UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status));
1772     if (U_FAILURE(status)) {
1773         delete testCase;
1774         return NULL;
1775     }
1776     return testCase;
1777 }
1778 
TestStringPieceBinSearchC()1779 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() {
1780     UErrorCode status = U_ZERO_ERROR;
1781     UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status));
1782     if (U_FAILURE(status)) {
1783         delete testCase;
1784         return NULL;
1785     }
1786     return testCase;
1787 }
1788 
1789 
main(int argc,const char * argv[])1790 int main(int argc, const char *argv[])
1791 {
1792     UErrorCode status = U_ZERO_ERROR;
1793     CollPerf2Test test(argc, argv, status);
1794 
1795     if (U_FAILURE(status)){
1796         printf("The error is %s\n", u_errorName(status));
1797         //TODO: print usage here
1798         return status;
1799     }
1800 
1801     if (test.run() == FALSE){
1802         fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n");
1803         return -1;
1804     }
1805     return 0;
1806 }
1807