1 /*
2 **********************************************************************
3 * Copyright (c) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include <string.h>
9 #include "unicode/localpointer.h"
10 #include "unicode/uperf.h"
11 #include "unicode/ucol.h"
12 #include "unicode/coll.h"
13 #include "unicode/uiter.h"
14 #include "unicode/ustring.h"
15 #include "unicode/sortkey.h"
16 #include "uarrsort.h"
17 #include "uoptions.h"
18 #include "ustr_imp.h"
19
20 #define COMPACT_ARRAY(CompactArrays, UNIT) \
21 struct CompactArrays{\
22 CompactArrays(const CompactArrays & );\
23 CompactArrays & operator=(const CompactArrays & );\
24 int32_t count;/*total number of the strings*/ \
25 int32_t * index;/*relative offset in data*/ \
26 UNIT * data; /*the real space to hold strings*/ \
27 \
28 ~CompactArrays(){free(index);free(data);} \
29 CompactArrays() : count(0), index(NULL), data(NULL) { \
30 index = (int32_t *) realloc(index, sizeof(int32_t)); \
31 index[0] = 0; \
32 } \
33 void append_one(int32_t theLen){ /*include terminal NULL*/ \
34 count++; \
35 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
36 index[count] = index[count - 1] + theLen; \
37 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
38 } \
39 UNIT * last(){return data + index[count - 1];} \
40 const UNIT * dataOf(int32_t i) const {return data + index[i];} \
41 int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
42 };
43
44 COMPACT_ARRAY(CA_uchar, UChar)
45 COMPACT_ARRAY(CA_char, char)
46
47 #define MAX_TEST_STRINGS_FOR_PERMUTING 1000
48
49 // C API test cases
50
51 //
52 // Test case taking a single test data array, calling ucol_strcoll by permuting the test data
53 //
54 class Strcoll : public UPerfFunction
55 {
56 public:
57 Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen);
58 ~Strcoll();
59 virtual void call(UErrorCode* status);
60 virtual long getOperationsPerIteration();
61
62 private:
63 const UCollator *coll;
64 const CA_uchar *source;
65 UBool useLen;
66 int32_t maxTestStrings;
67 };
68
Strcoll(const UCollator * coll,const CA_uchar * source,UBool useLen)69 Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen)
70 : coll(coll),
71 source(source),
72 useLen(useLen)
73 {
74 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
75 }
76
~Strcoll()77 Strcoll::~Strcoll()
78 {
79 }
80
call(UErrorCode * status)81 void Strcoll::call(UErrorCode* status)
82 {
83 if (U_FAILURE(*status)) return;
84
85 // call strcoll for permutation
86 int32_t divisor = source->count / maxTestStrings;
87 int32_t srcLen, tgtLen;
88 int32_t cmp = 0;
89 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
90 if (i % divisor) continue;
91 numTestStringsI++;
92 srcLen = useLen ? source->lengthOf(i) : -1;
93 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
94 if (j % divisor) continue;
95 numTestStringsJ++;
96 tgtLen = useLen ? source->lengthOf(j) : -1;
97 cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
98 }
99 }
100 // At the end, cmp must be 0
101 if (cmp != 0) {
102 *status = U_INTERNAL_PROGRAM_ERROR;
103 }
104 }
105
getOperationsPerIteration()106 long Strcoll::getOperationsPerIteration()
107 {
108 return maxTestStrings * maxTestStrings;
109 }
110
111 //
112 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
113 //
114 class Strcoll_2 : public UPerfFunction
115 {
116 public:
117 Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
118 ~Strcoll_2();
119 virtual void call(UErrorCode* status);
120 virtual long getOperationsPerIteration();
121
122 private:
123 const UCollator *coll;
124 const CA_uchar *source;
125 const CA_uchar *target;
126 UBool useLen;
127 };
128
Strcoll_2(const UCollator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)129 Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
130 : coll(coll),
131 source(source),
132 target(target),
133 useLen(useLen)
134 {
135 }
136
~Strcoll_2()137 Strcoll_2::~Strcoll_2()
138 {
139 }
140
call(UErrorCode * status)141 void Strcoll_2::call(UErrorCode* status)
142 {
143 if (U_FAILURE(*status)) return;
144
145 // call strcoll for two strings at the same index
146 if (source->count < target->count) {
147 *status = U_ILLEGAL_ARGUMENT_ERROR;
148 } else {
149 for (int32_t i = 0; i < source->count; i++) {
150 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
151 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
152 ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
153 }
154 }
155 }
156
getOperationsPerIteration()157 long Strcoll_2::getOperationsPerIteration()
158 {
159 return source->count;
160 }
161
162
163 //
164 // Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data
165 //
166 class StrcollUTF8 : public UPerfFunction
167 {
168 public:
169 StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen);
170 ~StrcollUTF8();
171 virtual void call(UErrorCode* status);
172 virtual long getOperationsPerIteration();
173
174 private:
175 const UCollator *coll;
176 const CA_char *source;
177 UBool useLen;
178 int32_t maxTestStrings;
179 };
180
StrcollUTF8(const UCollator * coll,const CA_char * source,UBool useLen)181 StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen)
182 : coll(coll),
183 source(source),
184 useLen(useLen)
185 {
186 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
187 }
188
~StrcollUTF8()189 StrcollUTF8::~StrcollUTF8()
190 {
191 }
192
call(UErrorCode * status)193 void StrcollUTF8::call(UErrorCode* status)
194 {
195 if (U_FAILURE(*status)) return;
196
197 // call strcollUTF8 for permutation
198 int32_t divisor = source->count / maxTestStrings;
199 int32_t srcLen, tgtLen;
200 int32_t cmp = 0;
201 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
202 if (i % divisor) continue;
203 numTestStringsI++;
204 srcLen = useLen ? source->lengthOf(i) : -1;
205 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
206 if (j % divisor) continue;
207 numTestStringsJ++;
208 tgtLen = useLen ? source->lengthOf(j) : -1;
209 cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status);
210 }
211 }
212 // At the end, cmp must be 0
213 if (cmp != 0) {
214 *status = U_INTERNAL_PROGRAM_ERROR;
215 }
216 }
217
getOperationsPerIteration()218 long StrcollUTF8::getOperationsPerIteration()
219 {
220 return maxTestStrings * maxTestStrings;
221 }
222
223 //
224 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
225 //
226 class StrcollUTF8_2 : public UPerfFunction
227 {
228 public:
229 StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen);
230 ~StrcollUTF8_2();
231 virtual void call(UErrorCode* status);
232 virtual long getOperationsPerIteration();
233
234 private:
235 const UCollator *coll;
236 const CA_char *source;
237 const CA_char *target;
238 UBool useLen;
239 };
240
StrcollUTF8_2(const UCollator * coll,const CA_char * source,const CA_char * target,UBool useLen)241 StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen)
242 : coll(coll),
243 source(source),
244 target(target),
245 useLen(useLen)
246 {
247 }
248
~StrcollUTF8_2()249 StrcollUTF8_2::~StrcollUTF8_2()
250 {
251 }
252
call(UErrorCode * status)253 void StrcollUTF8_2::call(UErrorCode* status)
254 {
255 if (U_FAILURE(*status)) return;
256
257 // call strcoll for two strings at the same index
258 if (source->count < target->count) {
259 *status = U_ILLEGAL_ARGUMENT_ERROR;
260 } else {
261 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
262 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
263 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
264 ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status);
265 }
266 }
267 }
268
getOperationsPerIteration()269 long StrcollUTF8_2::getOperationsPerIteration()
270 {
271 return source->count;
272 }
273
274 //
275 // Test case taking a single test data array, calling ucol_getSortKey for each
276 //
277 class GetSortKey : public UPerfFunction
278 {
279 public:
280 GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen);
281 ~GetSortKey();
282 virtual void call(UErrorCode* status);
283 virtual long getOperationsPerIteration();
284
285 private:
286 const UCollator *coll;
287 const CA_uchar *source;
288 UBool useLen;
289 };
290
GetSortKey(const UCollator * coll,const CA_uchar * source,UBool useLen)291 GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen)
292 : coll(coll),
293 source(source),
294 useLen(useLen)
295 {
296 }
297
~GetSortKey()298 GetSortKey::~GetSortKey()
299 {
300 }
301
302 #define KEY_BUF_SIZE 512
303
call(UErrorCode * status)304 void GetSortKey::call(UErrorCode* status)
305 {
306 if (U_FAILURE(*status)) return;
307
308 uint8_t key[KEY_BUF_SIZE];
309 int32_t len;
310
311 if (useLen) {
312 for (int32_t i = 0; i < source->count; i++) {
313 len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE);
314 }
315 } else {
316 for (int32_t i = 0; i < source->count; i++) {
317 len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE);
318 }
319 }
320 }
321
getOperationsPerIteration()322 long GetSortKey::getOperationsPerIteration()
323 {
324 return source->count;
325 }
326
327 //
328 // Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the
329 // given buffer size
330 //
331 class NextSortKeyPart : public UPerfFunction
332 {
333 public:
334 NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1);
335 ~NextSortKeyPart();
336 virtual void call(UErrorCode* status);
337 virtual long getOperationsPerIteration();
338 virtual long getEventsPerIteration();
339
340 private:
341 const UCollator *coll;
342 const CA_uchar *source;
343 int32_t bufSize;
344 int32_t maxIteration;
345 long events;
346 };
347
348 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPart(const UCollator * coll,const CA_uchar * source,int32_t bufSize,int32_t maxIteration)349 NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
350 : coll(coll),
351 source(source),
352 bufSize(bufSize),
353 maxIteration(maxIteration),
354 events(0)
355 {
356 }
357
~NextSortKeyPart()358 NextSortKeyPart::~NextSortKeyPart()
359 {
360 }
361
call(UErrorCode * status)362 void NextSortKeyPart::call(UErrorCode* status)
363 {
364 if (U_FAILURE(*status)) return;
365
366 uint8_t *part = (uint8_t *)malloc(bufSize);
367 uint32_t state[2];
368 UCharIterator iter;
369
370 events = 0;
371 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
372 uiter_setString(&iter, source->dataOf(i), source->lengthOf(i));
373 state[0] = 0;
374 state[1] = 0;
375 int32_t partLen = bufSize;
376 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
377 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
378 events++;
379 }
380 }
381 free(part);
382 }
383
getOperationsPerIteration()384 long NextSortKeyPart::getOperationsPerIteration()
385 {
386 return source->count;
387 }
388
getEventsPerIteration()389 long NextSortKeyPart::getEventsPerIteration()
390 {
391 return events;
392 }
393
394 //
395 // Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the
396 // given buffer size
397 //
398 class NextSortKeyPartUTF8 : public UPerfFunction
399 {
400 public:
401 NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1);
402 ~NextSortKeyPartUTF8();
403 virtual void call(UErrorCode* status);
404 virtual long getOperationsPerIteration();
405 virtual long getEventsPerIteration();
406
407 private:
408 const UCollator *coll;
409 const CA_char *source;
410 int32_t bufSize;
411 int32_t maxIteration;
412 long events;
413 };
414
415 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPartUTF8(const UCollator * coll,const CA_char * source,int32_t bufSize,int32_t maxIteration)416 NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
417 : coll(coll),
418 source(source),
419 bufSize(bufSize),
420 maxIteration(maxIteration),
421 events(0)
422 {
423 }
424
~NextSortKeyPartUTF8()425 NextSortKeyPartUTF8::~NextSortKeyPartUTF8()
426 {
427 }
428
call(UErrorCode * status)429 void NextSortKeyPartUTF8::call(UErrorCode* status)
430 {
431 if (U_FAILURE(*status)) return;
432
433 uint8_t *part = (uint8_t *)malloc(bufSize);
434 uint32_t state[2];
435 UCharIterator iter;
436
437 events = 0;
438 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
439 uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i));
440 state[0] = 0;
441 state[1] = 0;
442 int32_t partLen = bufSize;
443 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
444 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
445 events++;
446 }
447 }
448 free(part);
449 }
450
getOperationsPerIteration()451 long NextSortKeyPartUTF8::getOperationsPerIteration()
452 {
453 return source->count;
454 }
455
getEventsPerIteration()456 long NextSortKeyPartUTF8::getEventsPerIteration()
457 {
458 return events;
459 }
460
461 // CPP API test cases
462
463 //
464 // Test case taking a single test data array, calling Collator::compare by permuting the test data
465 //
466 class CppCompare : public UPerfFunction
467 {
468 public:
469 CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen);
470 ~CppCompare();
471 virtual void call(UErrorCode* status);
472 virtual long getOperationsPerIteration();
473
474 private:
475 const Collator *coll;
476 const CA_uchar *source;
477 UBool useLen;
478 int32_t maxTestStrings;
479 };
480
CppCompare(const Collator * coll,const CA_uchar * source,UBool useLen)481 CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen)
482 : coll(coll),
483 source(source),
484 useLen(useLen)
485 {
486 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
487 }
488
~CppCompare()489 CppCompare::~CppCompare()
490 {
491 }
492
call(UErrorCode * status)493 void CppCompare::call(UErrorCode* status) {
494 if (U_FAILURE(*status)) return;
495
496 // call compare for permutation of test data
497 int32_t divisor = source->count / maxTestStrings;
498 int32_t srcLen, tgtLen;
499 int32_t cmp = 0;
500 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
501 if (i % divisor) continue;
502 numTestStringsI++;
503 srcLen = useLen ? source->lengthOf(i) : -1;
504 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
505 if (j % divisor) continue;
506 numTestStringsJ++;
507 tgtLen = useLen ? source->lengthOf(j) : -1;
508 cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
509 }
510 }
511 // At the end, cmp must be 0
512 if (cmp != 0) {
513 *status = U_INTERNAL_PROGRAM_ERROR;
514 }
515 }
516
getOperationsPerIteration()517 long CppCompare::getOperationsPerIteration()
518 {
519 return maxTestStrings * maxTestStrings;
520 }
521
522 //
523 // Test case taking two test data arrays, calling Collator::compare for strings at a same index
524 //
525 class CppCompare_2 : public UPerfFunction
526 {
527 public:
528 CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
529 ~CppCompare_2();
530 virtual void call(UErrorCode* status);
531 virtual long getOperationsPerIteration();
532
533 private:
534 const Collator *coll;
535 const CA_uchar *source;
536 const CA_uchar *target;
537 UBool useLen;
538 };
539
CppCompare_2(const Collator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)540 CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
541 : coll(coll),
542 source(source),
543 target(target),
544 useLen(useLen)
545 {
546 }
547
~CppCompare_2()548 CppCompare_2::~CppCompare_2()
549 {
550 }
551
call(UErrorCode * status)552 void CppCompare_2::call(UErrorCode* status) {
553 if (U_FAILURE(*status)) return;
554
555 // call strcoll for two strings at the same index
556 if (source->count < target->count) {
557 *status = U_ILLEGAL_ARGUMENT_ERROR;
558 } else {
559 for (int32_t i = 0; i < source->count; i++) {
560 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
561 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
562 coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
563 }
564 }
565 }
566
getOperationsPerIteration()567 long CppCompare_2::getOperationsPerIteration()
568 {
569 return source->count;
570 }
571
572
573 //
574 // Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data
575 //
576 class CppCompareUTF8 : public UPerfFunction
577 {
578 public:
579 CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen);
580 ~CppCompareUTF8();
581 virtual void call(UErrorCode* status);
582 virtual long getOperationsPerIteration();
583
584 private:
585 const Collator *coll;
586 const CA_char *source;
587 UBool useLen;
588 int32_t maxTestStrings;
589 };
590
CppCompareUTF8(const Collator * coll,const CA_char * source,UBool useLen)591 CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen)
592 : coll(coll),
593 source(source),
594 useLen(useLen)
595 {
596 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
597 }
598
~CppCompareUTF8()599 CppCompareUTF8::~CppCompareUTF8()
600 {
601 }
602
call(UErrorCode * status)603 void CppCompareUTF8::call(UErrorCode* status) {
604 if (U_FAILURE(*status)) return;
605
606 // call compareUTF8 for all permutations
607 int32_t divisor = source->count / maxTestStrings;
608 StringPiece src, tgt;
609 int32_t cmp = 0;
610 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
611 if (i % divisor) continue;
612 numTestStringsI++;
613
614 if (useLen) {
615 src.set(source->dataOf(i), source->lengthOf(i));
616 } else {
617 src.set(source->dataOf(i));
618 }
619 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
620 if (j % divisor) continue;
621 numTestStringsJ++;
622
623 if (useLen) {
624 tgt.set(source->dataOf(i), source->lengthOf(i));
625 } else {
626 tgt.set(source->dataOf(i));
627 }
628 cmp += coll->compareUTF8(src, tgt, *status);
629 }
630 }
631 // At the end, cmp must be 0
632 if (cmp != 0) {
633 *status = U_INTERNAL_PROGRAM_ERROR;
634 }
635 }
636
getOperationsPerIteration()637 long CppCompareUTF8::getOperationsPerIteration()
638 {
639 return maxTestStrings * maxTestStrings;
640 }
641
642
643 //
644 // Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index
645 //
646 class CppCompareUTF8_2 : public UPerfFunction
647 {
648 public:
649 CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen);
650 ~CppCompareUTF8_2();
651 virtual void call(UErrorCode* status);
652 virtual long getOperationsPerIteration();
653
654 private:
655 const Collator *coll;
656 const CA_char *source;
657 const CA_char *target;
658 UBool useLen;
659 };
660
CppCompareUTF8_2(const Collator * coll,const CA_char * source,const CA_char * target,UBool useLen)661 CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen)
662 : coll(coll),
663 source(source),
664 target(target),
665 useLen(useLen)
666 {
667 }
668
~CppCompareUTF8_2()669 CppCompareUTF8_2::~CppCompareUTF8_2()
670 {
671 }
672
call(UErrorCode * status)673 void CppCompareUTF8_2::call(UErrorCode* status) {
674 if (U_FAILURE(*status)) return;
675
676 // call strcoll for two strings at the same index
677 StringPiece src, tgt;
678 if (source->count < target->count) {
679 *status = U_ILLEGAL_ARGUMENT_ERROR;
680 } else {
681 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
682 if (useLen) {
683 src.set(source->dataOf(i), source->lengthOf(i));
684 tgt.set(target->dataOf(i), target->lengthOf(i));
685 } else {
686 src.set(source->dataOf(i));
687 tgt.set(target->dataOf(i));
688 }
689 coll->compareUTF8(src, tgt, *status);
690 }
691 }
692 }
693
getOperationsPerIteration()694 long CppCompareUTF8_2::getOperationsPerIteration()
695 {
696 return source->count;
697 }
698
699
700 //
701 // Test case taking a single test data array, calling Collator::getCollationKey for each
702 //
703 class CppGetCollationKey : public UPerfFunction
704 {
705 public:
706 CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen);
707 ~CppGetCollationKey();
708 virtual void call(UErrorCode* status);
709 virtual long getOperationsPerIteration();
710
711 private:
712 const Collator *coll;
713 const CA_uchar *source;
714 UBool useLen;
715 };
716
CppGetCollationKey(const Collator * coll,const CA_uchar * source,UBool useLen)717 CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen)
718 : coll(coll),
719 source(source),
720 useLen(useLen)
721 {
722 }
723
~CppGetCollationKey()724 CppGetCollationKey::~CppGetCollationKey()
725 {
726 }
727
call(UErrorCode * status)728 void CppGetCollationKey::call(UErrorCode* status)
729 {
730 if (U_FAILURE(*status)) return;
731
732 CollationKey key;
733 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
734 coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status);
735 }
736 }
737
getOperationsPerIteration()738 long CppGetCollationKey::getOperationsPerIteration() {
739 return source->count;
740 }
741
742 namespace {
743
744 struct CollatorAndCounter {
CollatorAndCounter__anon918c61850111::CollatorAndCounter745 CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {}
CollatorAndCounter__anon918c61850111::CollatorAndCounter746 CollatorAndCounter(const Collator& coll, const UCollator *ucoll)
747 : coll(coll), ucoll(ucoll), counter(0) {}
748 const Collator& coll;
749 const UCollator *ucoll;
750 int32_t counter;
751 };
752
753 int32_t U_CALLCONV
UniStrCollatorComparator(const void * context,const void * left,const void * right)754 UniStrCollatorComparator(const void* context, const void* left, const void* right) {
755 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
756 const UnicodeString& leftString = **(const UnicodeString**)left;
757 const UnicodeString& rightString = **(const UnicodeString**)right;
758 UErrorCode errorCode = U_ZERO_ERROR;
759 ++cc.counter;
760 return cc.coll.compare(leftString, rightString, errorCode);
761 }
762
763 } // namespace
764
765 class CollPerfFunction : public UPerfFunction {
766 public:
CollPerfFunction(const Collator & coll,const UCollator * ucoll)767 CollPerfFunction(const Collator& coll, const UCollator *ucoll)
768 : coll(coll), ucoll(ucoll), ops(0) {}
769 virtual ~CollPerfFunction();
770 /** Calls call() to set the ops field, and returns that. */
771 virtual long getOperationsPerIteration();
772
773 protected:
774 const Collator& coll;
775 const UCollator *ucoll;
776 int32_t ops;
777 };
778
~CollPerfFunction()779 CollPerfFunction::~CollPerfFunction() {}
780
getOperationsPerIteration()781 long CollPerfFunction::getOperationsPerIteration() {
782 UErrorCode errorCode = U_ZERO_ERROR;
783 call(&errorCode);
784 return U_SUCCESS(errorCode) ? ops : 0;
785 }
786
787 class UniStrCollPerfFunction : public CollPerfFunction {
788 public:
UniStrCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)789 UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
790 : CollPerfFunction(coll, ucoll), d16(data16),
791 source(new UnicodeString*[d16->count]) {
792 for (int32_t i = 0; i < d16->count; ++i) {
793 source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i));
794 }
795 }
796 virtual ~UniStrCollPerfFunction();
797
798 protected:
799 const CA_uchar* d16;
800 UnicodeString** source;
801 };
802
~UniStrCollPerfFunction()803 UniStrCollPerfFunction::~UniStrCollPerfFunction() {
804 for (int32_t i = 0; i < d16->count; ++i) {
805 delete source[i];
806 }
807 delete[] source;
808 }
809
810 //
811 // Test case sorting an array of UnicodeString pointers.
812 //
813 class UniStrSort : public UniStrCollPerfFunction {
814 public:
UniStrSort(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)815 UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
816 : UniStrCollPerfFunction(coll, ucoll, data16),
817 dest(new UnicodeString*[d16->count]) {}
818 virtual ~UniStrSort();
819 virtual void call(UErrorCode* status);
820
821 private:
822 UnicodeString** dest; // aliases only
823 };
824
~UniStrSort()825 UniStrSort::~UniStrSort() {
826 delete[] dest;
827 }
828
call(UErrorCode * status)829 void UniStrSort::call(UErrorCode* status) {
830 if (U_FAILURE(*status)) return;
831
832 CollatorAndCounter cc(coll);
833 int32_t count = d16->count;
834 memcpy(dest, source, count * sizeof(UnicodeString *));
835 uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *),
836 UniStrCollatorComparator, &cc, TRUE, status);
837 ops = cc.counter;
838 }
839
840 namespace {
841
842 int32_t U_CALLCONV
StringPieceCollatorComparator(const void * context,const void * left,const void * right)843 StringPieceCollatorComparator(const void* context, const void* left, const void* right) {
844 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
845 const StringPiece& leftString = *(const StringPiece*)left;
846 const StringPiece& rightString = *(const StringPiece*)right;
847 UErrorCode errorCode = U_ZERO_ERROR;
848 ++cc.counter;
849 return cc.coll.compareUTF8(leftString, rightString, errorCode);
850 }
851
852 int32_t U_CALLCONV
StringPieceUCollatorComparator(const void * context,const void * left,const void * right)853 StringPieceUCollatorComparator(const void* context, const void* left, const void* right) {
854 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
855 const StringPiece& leftString = *(const StringPiece*)left;
856 const StringPiece& rightString = *(const StringPiece*)right;
857 UErrorCode errorCode = U_ZERO_ERROR;
858 ++cc.counter;
859 return ucol_strcollUTF8(cc.ucoll,
860 leftString.data(), leftString.length(),
861 rightString.data(), rightString.length(), &errorCode);
862 }
863
864 } // namespace
865
866 class StringPieceCollPerfFunction : public CollPerfFunction {
867 public:
StringPieceCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_char * data8)868 StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
869 : CollPerfFunction(coll, ucoll), d8(data8),
870 source(new StringPiece[d8->count]) {
871 for (int32_t i = 0; i < d8->count; ++i) {
872 source[i].set(d8->dataOf(i), d8->lengthOf(i));
873 }
874 }
875 virtual ~StringPieceCollPerfFunction();
876
877 protected:
878 const CA_char* d8;
879 StringPiece* source;
880 };
881
~StringPieceCollPerfFunction()882 StringPieceCollPerfFunction::~StringPieceCollPerfFunction() {
883 delete[] source;
884 }
885
886 class StringPieceSort : public StringPieceCollPerfFunction {
887 public:
StringPieceSort(const Collator & coll,const UCollator * ucoll,const CA_char * data8)888 StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
889 : StringPieceCollPerfFunction(coll, ucoll, data8),
890 dest(new StringPiece[d8->count]) {}
891 virtual ~StringPieceSort();
892
893 protected:
894 StringPiece* dest;
895 };
896
~StringPieceSort()897 StringPieceSort::~StringPieceSort() {
898 delete[] dest;
899 }
900
901 //
902 // Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8().
903 //
904 class StringPieceSortCpp : public StringPieceSort {
905 public:
StringPieceSortCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)906 StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
907 : StringPieceSort(coll, ucoll, data8) {}
908 virtual ~StringPieceSortCpp();
909 virtual void call(UErrorCode* status);
910 };
911
~StringPieceSortCpp()912 StringPieceSortCpp::~StringPieceSortCpp() {}
913
call(UErrorCode * status)914 void StringPieceSortCpp::call(UErrorCode* status) {
915 if (U_FAILURE(*status)) return;
916
917 CollatorAndCounter cc(coll);
918 int32_t count = d8->count;
919 memcpy(dest, source, count * sizeof(StringPiece));
920 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
921 StringPieceCollatorComparator, &cc, TRUE, status);
922 ops = cc.counter;
923 }
924
925 //
926 // Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8().
927 //
928 class StringPieceSortC : public StringPieceSort {
929 public:
StringPieceSortC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)930 StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
931 : StringPieceSort(coll, ucoll, data8) {}
932 virtual ~StringPieceSortC();
933 virtual void call(UErrorCode* status);
934 };
935
~StringPieceSortC()936 StringPieceSortC::~StringPieceSortC() {}
937
call(UErrorCode * status)938 void StringPieceSortC::call(UErrorCode* status) {
939 if (U_FAILURE(*status)) return;
940
941 CollatorAndCounter cc(coll, ucoll);
942 int32_t count = d8->count;
943 memcpy(dest, source, count * sizeof(StringPiece));
944 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
945 StringPieceUCollatorComparator, &cc, TRUE, status);
946 ops = cc.counter;
947 }
948
949 //
950 // Test case performing binary searches in a sorted array of UnicodeString pointers.
951 //
952 class UniStrBinSearch : public UniStrCollPerfFunction {
953 public:
UniStrBinSearch(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)954 UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
955 : UniStrCollPerfFunction(coll, ucoll, data16) {}
956 virtual ~UniStrBinSearch();
957 virtual void call(UErrorCode* status);
958 };
959
~UniStrBinSearch()960 UniStrBinSearch::~UniStrBinSearch() {}
961
call(UErrorCode * status)962 void UniStrBinSearch::call(UErrorCode* status) {
963 if (U_FAILURE(*status)) return;
964
965 CollatorAndCounter cc(coll);
966 int32_t count = d16->count;
967 for (int32_t i = 0; i < count; ++i) {
968 (void)uprv_stableBinarySearch((char *)source, count,
969 source + i, (int32_t)sizeof(UnicodeString *),
970 UniStrCollatorComparator, &cc);
971 }
972 ops = cc.counter;
973 }
974
975 class StringPieceBinSearch : public StringPieceCollPerfFunction {
976 public:
StringPieceBinSearch(const Collator & coll,const UCollator * ucoll,const CA_char * data8)977 StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
978 : StringPieceCollPerfFunction(coll, ucoll, data8) {}
979 virtual ~StringPieceBinSearch();
980 };
981
~StringPieceBinSearch()982 StringPieceBinSearch::~StringPieceBinSearch() {}
983
984 //
985 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
986 // with Collator::compareUTF8().
987 //
988 class StringPieceBinSearchCpp : public StringPieceBinSearch {
989 public:
StringPieceBinSearchCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)990 StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
991 : StringPieceBinSearch(coll, ucoll, data8) {}
992 virtual ~StringPieceBinSearchCpp();
993 virtual void call(UErrorCode* status);
994 };
995
~StringPieceBinSearchCpp()996 StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {}
997
call(UErrorCode * status)998 void StringPieceBinSearchCpp::call(UErrorCode* status) {
999 if (U_FAILURE(*status)) return;
1000
1001 CollatorAndCounter cc(coll);
1002 int32_t count = d8->count;
1003 for (int32_t i = 0; i < count; ++i) {
1004 (void)uprv_stableBinarySearch((char *)source, count,
1005 source + i, (int32_t)sizeof(StringPiece),
1006 StringPieceCollatorComparator, &cc);
1007 }
1008 ops = cc.counter;
1009 }
1010
1011 //
1012 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
1013 // with ucol_strcollUTF8().
1014 //
1015 class StringPieceBinSearchC : public StringPieceBinSearch {
1016 public:
StringPieceBinSearchC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)1017 StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
1018 : StringPieceBinSearch(coll, ucoll, data8) {}
1019 virtual ~StringPieceBinSearchC();
1020 virtual void call(UErrorCode* status);
1021 };
1022
~StringPieceBinSearchC()1023 StringPieceBinSearchC::~StringPieceBinSearchC() {}
1024
call(UErrorCode * status)1025 void StringPieceBinSearchC::call(UErrorCode* status) {
1026 if (U_FAILURE(*status)) return;
1027
1028 CollatorAndCounter cc(coll, ucoll);
1029 int32_t count = d8->count;
1030 for (int32_t i = 0; i < count; ++i) {
1031 (void)uprv_stableBinarySearch((char *)source, count,
1032 source + i, (int32_t)sizeof(StringPiece),
1033 StringPieceUCollatorComparator, &cc);
1034 }
1035 ops = cc.counter;
1036 }
1037
1038
1039 class CollPerf2Test : public UPerfTest
1040 {
1041 public:
1042 CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status);
1043 ~CollPerf2Test();
1044 virtual UPerfFunction* runIndexedTest(
1045 int32_t index, UBool exec, const char *&name, char *par = NULL);
1046
1047 private:
1048 UCollator* coll;
1049 Collator* collObj;
1050
1051 int32_t count;
1052 CA_uchar* data16;
1053 CA_char* data8;
1054
1055 CA_uchar* modData16;
1056 CA_char* modData8;
1057
1058 CA_uchar* sortedData16;
1059 CA_char* sortedData8;
1060
1061 CA_uchar* randomData16;
1062 CA_char* randomData8;
1063
1064 const CA_uchar* getData16(UErrorCode &status);
1065 const CA_char* getData8(UErrorCode &status);
1066
1067 const CA_uchar* getModData16(UErrorCode &status);
1068 const CA_char* getModData8(UErrorCode &status);
1069
1070 const CA_uchar* getSortedData16(UErrorCode &status);
1071 const CA_char* getSortedData8(UErrorCode &status);
1072
1073 const CA_uchar* getRandomData16(UErrorCode &status);
1074 const CA_char* getRandomData8(UErrorCode &status);
1075
1076 static CA_uchar* sortData16(
1077 const CA_uchar* d16,
1078 UComparator *cmp, const void *context,
1079 UErrorCode &status);
1080 static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status);
1081
1082 UPerfFunction* TestStrcoll();
1083 UPerfFunction* TestStrcollNull();
1084 UPerfFunction* TestStrcollSimilar();
1085
1086 UPerfFunction* TestStrcollUTF8();
1087 UPerfFunction* TestStrcollUTF8Null();
1088 UPerfFunction* TestStrcollUTF8Similar();
1089
1090 UPerfFunction* TestGetSortKey();
1091 UPerfFunction* TestGetSortKeyNull();
1092
1093 UPerfFunction* TestNextSortKeyPart_4All();
1094 UPerfFunction* TestNextSortKeyPart_4x2();
1095 UPerfFunction* TestNextSortKeyPart_4x4();
1096 UPerfFunction* TestNextSortKeyPart_4x8();
1097 UPerfFunction* TestNextSortKeyPart_32All();
1098 UPerfFunction* TestNextSortKeyPart_32x2();
1099
1100 UPerfFunction* TestNextSortKeyPartUTF8_4All();
1101 UPerfFunction* TestNextSortKeyPartUTF8_4x2();
1102 UPerfFunction* TestNextSortKeyPartUTF8_4x4();
1103 UPerfFunction* TestNextSortKeyPartUTF8_4x8();
1104 UPerfFunction* TestNextSortKeyPartUTF8_32All();
1105 UPerfFunction* TestNextSortKeyPartUTF8_32x2();
1106
1107 UPerfFunction* TestCppCompare();
1108 UPerfFunction* TestCppCompareNull();
1109 UPerfFunction* TestCppCompareSimilar();
1110
1111 UPerfFunction* TestCppCompareUTF8();
1112 UPerfFunction* TestCppCompareUTF8Null();
1113 UPerfFunction* TestCppCompareUTF8Similar();
1114
1115 UPerfFunction* TestCppGetCollationKey();
1116 UPerfFunction* TestCppGetCollationKeyNull();
1117
1118 UPerfFunction* TestUniStrSort();
1119 UPerfFunction* TestStringPieceSortCpp();
1120 UPerfFunction* TestStringPieceSortC();
1121
1122 UPerfFunction* TestUniStrBinSearch();
1123 UPerfFunction* TestStringPieceBinSearchCpp();
1124 UPerfFunction* TestStringPieceBinSearchC();
1125 };
1126
CollPerf2Test(int32_t argc,const char * argv[],UErrorCode & status)1127 CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) :
1128 UPerfTest(argc, argv, status),
1129 coll(NULL),
1130 collObj(NULL),
1131 count(0),
1132 data16(NULL),
1133 data8(NULL),
1134 modData16(NULL),
1135 modData8(NULL),
1136 sortedData16(NULL),
1137 sortedData8(NULL),
1138 randomData16(NULL),
1139 randomData8(NULL)
1140 {
1141 if (U_FAILURE(status)) {
1142 return;
1143 }
1144
1145 if (locale == NULL){
1146 locale = "root";
1147 }
1148
1149 // Set up an ICU collator.
1150 // Starting with ICU 54 (ticket #8260), this supports standard collation locale keywords.
1151 coll = ucol_open(locale, &status);
1152 collObj = Collator::createInstance(locale, status);
1153 }
1154
~CollPerf2Test()1155 CollPerf2Test::~CollPerf2Test()
1156 {
1157 ucol_close(coll);
1158 delete collObj;
1159
1160 delete data16;
1161 delete data8;
1162 delete modData16;
1163 delete modData8;
1164 delete sortedData16;
1165 delete sortedData8;
1166 delete randomData16;
1167 delete randomData8;
1168 }
1169
1170 #define MAX_NUM_DATA 10000
1171
getData16(UErrorCode & status)1172 const CA_uchar* CollPerf2Test::getData16(UErrorCode &status)
1173 {
1174 if (U_FAILURE(status)) return NULL;
1175 if (data16) return data16;
1176
1177 CA_uchar* d16 = new CA_uchar();
1178 const UChar *line = NULL;
1179 int32_t len = 0;
1180 int32_t numData = 0;
1181
1182 for (;;) {
1183 line = ucbuf_readline(ucharBuf, &len, &status);
1184 if (line == NULL || U_FAILURE(status)) break;
1185
1186 // Refer to the source code of ucbuf_readline()
1187 // 1. 'len' includes the line terminal symbols
1188 // 2. The length of the line terminal symbols is only one character
1189 // 3. The Windows CR LF line terminal symbols will be converted to CR
1190
1191 if (len == 1 || line[0] == 0x23 /* '#' */) {
1192 continue; // skip empty/comment line
1193 } else {
1194 d16->append_one(len);
1195 UChar *p = d16->last();
1196 u_memcpy(p, line, len - 1); // exclude the CR
1197 p[len - 1] = 0; // NUL-terminate
1198
1199 numData++;
1200 if (numData >= MAX_NUM_DATA) break;
1201 }
1202 }
1203
1204 if (U_SUCCESS(status)) {
1205 data16 = d16;
1206 } else {
1207 delete d16;
1208 }
1209
1210 return data16;
1211 }
1212
getData8(UErrorCode & status)1213 const CA_char* CollPerf2Test::getData8(UErrorCode &status)
1214 {
1215 if (U_FAILURE(status)) return NULL;
1216 if (data8) return data8;
1217 return data8 = getData8FromData16(getData16(status), status);
1218 }
1219
getModData16(UErrorCode & status)1220 const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status)
1221 {
1222 if (U_FAILURE(status)) return NULL;
1223 if (modData16) return modData16;
1224
1225 const CA_uchar* d16 = getData16(status);
1226 if (U_FAILURE(status)) return NULL;
1227
1228 CA_uchar* modData16 = new CA_uchar();
1229
1230 for (int32_t i = 0; i < d16->count; i++) {
1231 const UChar *s = d16->dataOf(i);
1232 int32_t len = d16->lengthOf(i) + 1; // including NULL terminator
1233
1234 modData16->append_one(len);
1235 u_memcpy(modData16->last(), s, len);
1236
1237 // replacing the last character with a different character
1238 UChar *lastChar = &modData16->last()[len -2];
1239 for (int32_t j = i + 1; j != i; j++) {
1240 if (j >= d16->count) {
1241 j = 0;
1242 }
1243 const UChar *s1 = d16->dataOf(j);
1244 UChar lastChar1 = s1[d16->lengthOf(j) - 1];
1245 if (*lastChar != lastChar1) {
1246 *lastChar = lastChar1;
1247 break;
1248 }
1249 }
1250 }
1251
1252 return modData16;
1253 }
1254
getModData8(UErrorCode & status)1255 const CA_char* CollPerf2Test::getModData8(UErrorCode &status)
1256 {
1257 if (U_FAILURE(status)) return NULL;
1258 if (modData8) return modData8;
1259 return modData8 = getData8FromData16(getModData16(status), status);
1260 }
1261
1262 namespace {
1263
1264 struct ArrayAndColl {
ArrayAndColl__anon918c61850311::ArrayAndColl1265 ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {}
1266 const CA_uchar* d16;
1267 const Collator& coll;
1268 };
1269
1270 int32_t U_CALLCONV
U16CollatorComparator(const void * context,const void * left,const void * right)1271 U16CollatorComparator(const void* context, const void* left, const void* right) {
1272 const ArrayAndColl& ac = *(const ArrayAndColl*)context;
1273 const CA_uchar* d16 = ac.d16;
1274 int32_t leftIndex = *(const int32_t*)left;
1275 int32_t rightIndex = *(const int32_t*)right;
1276 UErrorCode errorCode = U_ZERO_ERROR;
1277 return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex),
1278 d16->dataOf(rightIndex), d16->lengthOf(rightIndex),
1279 errorCode);
1280 }
1281
1282 int32_t U_CALLCONV
U16HashComparator(const void * context,const void * left,const void * right)1283 U16HashComparator(const void* context, const void* left, const void* right) {
1284 const CA_uchar* d16 = (const CA_uchar*)context;
1285 int32_t leftIndex = *(const int32_t*)left;
1286 int32_t rightIndex = *(const int32_t*)right;
1287 int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex));
1288 int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex));
1289 return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1;
1290 }
1291
1292 } // namespace
1293
getSortedData16(UErrorCode & status)1294 const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) {
1295 if (U_FAILURE(status)) return NULL;
1296 if (sortedData16) return sortedData16;
1297
1298 ArrayAndColl ac(getData16(status), *collObj);
1299 return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status);
1300 }
1301
getSortedData8(UErrorCode & status)1302 const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) {
1303 if (U_FAILURE(status)) return NULL;
1304 if (sortedData8) return sortedData8;
1305 return sortedData8 = getData8FromData16(getSortedData16(status), status);
1306 }
1307
getRandomData16(UErrorCode & status)1308 const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) {
1309 if (U_FAILURE(status)) return NULL;
1310 if (randomData16) return randomData16;
1311
1312 // Sort the strings by their hash codes, which should be a reasonably pseudo-random order.
1313 const CA_uchar* d16 = getData16(status);
1314 return randomData16 = sortData16(d16, U16HashComparator, d16, status);
1315 }
1316
getRandomData8(UErrorCode & status)1317 const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) {
1318 if (U_FAILURE(status)) return NULL;
1319 if (randomData8) return randomData8;
1320 return randomData8 = getData8FromData16(getRandomData16(status), status);
1321 }
1322
sortData16(const CA_uchar * d16,UComparator * cmp,const void * context,UErrorCode & status)1323 CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16,
1324 UComparator *cmp, const void *context,
1325 UErrorCode &status) {
1326 if (U_FAILURE(status)) return NULL;
1327
1328 LocalArray<int32_t> indexes(new int32_t[d16->count]);
1329 for (int32_t i = 0; i < d16->count; ++i) {
1330 indexes[i] = i;
1331 }
1332 uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status);
1333 if (U_FAILURE(status)) return NULL;
1334
1335 // Copy the strings in sorted order into a new array.
1336 LocalPointer<CA_uchar> newD16(new CA_uchar());
1337 for (int32_t i = 0; i < d16->count; i++) {
1338 int32_t j = indexes[i];
1339 const UChar* s = d16->dataOf(j);
1340 int32_t len = d16->lengthOf(j);
1341 int32_t capacity = len + 1; // including NULL terminator
1342 newD16->append_one(capacity);
1343 u_memcpy(newD16->last(), s, capacity);
1344 }
1345
1346 if (U_SUCCESS(status)) {
1347 return newD16.orphan();
1348 } else {
1349 return NULL;
1350 }
1351 }
1352
getData8FromData16(const CA_uchar * d16,UErrorCode & status)1353 CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) {
1354 if (U_FAILURE(status)) return NULL;
1355
1356 // UTF-16 -> UTF-8 conversion
1357 LocalPointer<CA_char> d8(new CA_char());
1358 for (int32_t i = 0; i < d16->count; i++) {
1359 const UChar *s16 = d16->dataOf(i);
1360 int32_t length16 = d16->lengthOf(i);
1361
1362 // get length in UTF-8
1363 int32_t length8;
1364 u_strToUTF8(NULL, 0, &length8, s16, length16, &status);
1365 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
1366 status = U_ZERO_ERROR;
1367 } else {
1368 break;
1369 }
1370 int32_t capacity8 = length8 + 1; // plus terminal NULL
1371 d8->append_one(capacity8);
1372
1373 // convert to UTF-8
1374 u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status);
1375 if (U_FAILURE(status)) break;
1376 }
1377
1378 if (U_SUCCESS(status)) {
1379 return d8.orphan();
1380 } else {
1381 return NULL;
1382 }
1383 }
1384
1385 UPerfFunction*
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)1386 CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/)
1387 {
1388 (void)par;
1389 TESTCASE_AUTO_BEGIN;
1390
1391 TESTCASE_AUTO(TestStrcoll);
1392 TESTCASE_AUTO(TestStrcollNull);
1393 TESTCASE_AUTO(TestStrcollSimilar);
1394
1395 TESTCASE_AUTO(TestStrcollUTF8);
1396 TESTCASE_AUTO(TestStrcollUTF8Null);
1397 TESTCASE_AUTO(TestStrcollUTF8Similar);
1398
1399 TESTCASE_AUTO(TestGetSortKey);
1400 TESTCASE_AUTO(TestGetSortKeyNull);
1401
1402 TESTCASE_AUTO(TestNextSortKeyPart_4All);
1403 TESTCASE_AUTO(TestNextSortKeyPart_4x4);
1404 TESTCASE_AUTO(TestNextSortKeyPart_4x8);
1405 TESTCASE_AUTO(TestNextSortKeyPart_32All);
1406 TESTCASE_AUTO(TestNextSortKeyPart_32x2);
1407
1408 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All);
1409 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4);
1410 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8);
1411 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All);
1412 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2);
1413
1414 TESTCASE_AUTO(TestCppCompare);
1415 TESTCASE_AUTO(TestCppCompareNull);
1416 TESTCASE_AUTO(TestCppCompareSimilar);
1417
1418 TESTCASE_AUTO(TestCppCompareUTF8);
1419 TESTCASE_AUTO(TestCppCompareUTF8Null);
1420 TESTCASE_AUTO(TestCppCompareUTF8Similar);
1421
1422 TESTCASE_AUTO(TestCppGetCollationKey);
1423 TESTCASE_AUTO(TestCppGetCollationKeyNull);
1424
1425 TESTCASE_AUTO(TestUniStrSort);
1426 TESTCASE_AUTO(TestStringPieceSortCpp);
1427 TESTCASE_AUTO(TestStringPieceSortC);
1428
1429 TESTCASE_AUTO(TestUniStrBinSearch);
1430 TESTCASE_AUTO(TestStringPieceBinSearchCpp);
1431 TESTCASE_AUTO(TestStringPieceBinSearchC);
1432
1433 TESTCASE_AUTO_END;
1434 return NULL;
1435 }
1436
1437
1438
TestStrcoll()1439 UPerfFunction* CollPerf2Test::TestStrcoll()
1440 {
1441 UErrorCode status = U_ZERO_ERROR;
1442 Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */);
1443 if (U_FAILURE(status)) {
1444 delete testCase;
1445 return NULL;
1446 }
1447 return testCase;
1448 }
1449
TestStrcollNull()1450 UPerfFunction* CollPerf2Test::TestStrcollNull()
1451 {
1452 UErrorCode status = U_ZERO_ERROR;
1453 Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */);
1454 if (U_FAILURE(status)) {
1455 delete testCase;
1456 return NULL;
1457 }
1458 return testCase;
1459 }
1460
TestStrcollSimilar()1461 UPerfFunction* CollPerf2Test::TestStrcollSimilar()
1462 {
1463 UErrorCode status = U_ZERO_ERROR;
1464 Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */);
1465 if (U_FAILURE(status)) {
1466 delete testCase;
1467 return NULL;
1468 }
1469 return testCase;
1470 }
1471
TestStrcollUTF8()1472 UPerfFunction* CollPerf2Test::TestStrcollUTF8()
1473 {
1474 UErrorCode status = U_ZERO_ERROR;
1475 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */);
1476 if (U_FAILURE(status)) {
1477 delete testCase;
1478 return NULL;
1479 }
1480 return testCase;
1481 }
1482
TestStrcollUTF8Null()1483 UPerfFunction* CollPerf2Test::TestStrcollUTF8Null()
1484 {
1485 UErrorCode status = U_ZERO_ERROR;
1486 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */);
1487 if (U_FAILURE(status)) {
1488 delete testCase;
1489 return NULL;
1490 }
1491 return testCase;
1492 }
1493
TestStrcollUTF8Similar()1494 UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar()
1495 {
1496 UErrorCode status = U_ZERO_ERROR;
1497 StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */);
1498 if (U_FAILURE(status)) {
1499 delete testCase;
1500 return NULL;
1501 }
1502 return testCase;
1503 }
1504
TestGetSortKey()1505 UPerfFunction* CollPerf2Test::TestGetSortKey()
1506 {
1507 UErrorCode status = U_ZERO_ERROR;
1508 GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */);
1509 if (U_FAILURE(status)) {
1510 delete testCase;
1511 return NULL;
1512 }
1513 return testCase;
1514 }
1515
TestGetSortKeyNull()1516 UPerfFunction* CollPerf2Test::TestGetSortKeyNull()
1517 {
1518 UErrorCode status = U_ZERO_ERROR;
1519 GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */);
1520 if (U_FAILURE(status)) {
1521 delete testCase;
1522 return NULL;
1523 }
1524 return testCase;
1525 }
1526
TestNextSortKeyPart_4All()1527 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All()
1528 {
1529 UErrorCode status = U_ZERO_ERROR;
1530 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */);
1531 if (U_FAILURE(status)) {
1532 delete testCase;
1533 return NULL;
1534 }
1535 return testCase;
1536 }
1537
TestNextSortKeyPart_4x4()1538 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4()
1539 {
1540 UErrorCode status = U_ZERO_ERROR;
1541 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */);
1542 if (U_FAILURE(status)) {
1543 delete testCase;
1544 return NULL;
1545 }
1546 return testCase;
1547 }
1548
TestNextSortKeyPart_4x8()1549 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8()
1550 {
1551 UErrorCode status = U_ZERO_ERROR;
1552 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */);
1553 if (U_FAILURE(status)) {
1554 delete testCase;
1555 return NULL;
1556 }
1557 return testCase;
1558 }
1559
TestNextSortKeyPart_32All()1560 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All()
1561 {
1562 UErrorCode status = U_ZERO_ERROR;
1563 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */);
1564 if (U_FAILURE(status)) {
1565 delete testCase;
1566 return NULL;
1567 }
1568 return testCase;
1569 }
1570
TestNextSortKeyPart_32x2()1571 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2()
1572 {
1573 UErrorCode status = U_ZERO_ERROR;
1574 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */);
1575 if (U_FAILURE(status)) {
1576 delete testCase;
1577 return NULL;
1578 }
1579 return testCase;
1580 }
1581
TestNextSortKeyPartUTF8_4All()1582 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All()
1583 {
1584 UErrorCode status = U_ZERO_ERROR;
1585 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */);
1586 if (U_FAILURE(status)) {
1587 delete testCase;
1588 return NULL;
1589 }
1590 return testCase;
1591 }
1592
TestNextSortKeyPartUTF8_4x4()1593 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4()
1594 {
1595 UErrorCode status = U_ZERO_ERROR;
1596 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */);
1597 if (U_FAILURE(status)) {
1598 delete testCase;
1599 return NULL;
1600 }
1601 return testCase;
1602 }
1603
TestNextSortKeyPartUTF8_4x8()1604 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8()
1605 {
1606 UErrorCode status = U_ZERO_ERROR;
1607 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */);
1608 if (U_FAILURE(status)) {
1609 delete testCase;
1610 return NULL;
1611 }
1612 return testCase;
1613 }
1614
TestNextSortKeyPartUTF8_32All()1615 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All()
1616 {
1617 UErrorCode status = U_ZERO_ERROR;
1618 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */);
1619 if (U_FAILURE(status)) {
1620 delete testCase;
1621 return NULL;
1622 }
1623 return testCase;
1624 }
1625
TestNextSortKeyPartUTF8_32x2()1626 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2()
1627 {
1628 UErrorCode status = U_ZERO_ERROR;
1629 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */);
1630 if (U_FAILURE(status)) {
1631 delete testCase;
1632 return NULL;
1633 }
1634 return testCase;
1635 }
1636
TestCppCompare()1637 UPerfFunction* CollPerf2Test::TestCppCompare()
1638 {
1639 UErrorCode status = U_ZERO_ERROR;
1640 CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */);
1641 if (U_FAILURE(status)) {
1642 delete testCase;
1643 return NULL;
1644 }
1645 return testCase;
1646 }
1647
TestCppCompareNull()1648 UPerfFunction* CollPerf2Test::TestCppCompareNull()
1649 {
1650 UErrorCode status = U_ZERO_ERROR;
1651 CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */);
1652 if (U_FAILURE(status)) {
1653 delete testCase;
1654 return NULL;
1655 }
1656 return testCase;
1657 }
1658
TestCppCompareSimilar()1659 UPerfFunction* CollPerf2Test::TestCppCompareSimilar()
1660 {
1661 UErrorCode status = U_ZERO_ERROR;
1662 CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */);
1663 if (U_FAILURE(status)) {
1664 delete testCase;
1665 return NULL;
1666 }
1667 return testCase;
1668 }
1669
TestCppCompareUTF8()1670 UPerfFunction* CollPerf2Test::TestCppCompareUTF8()
1671 {
1672 UErrorCode status = U_ZERO_ERROR;
1673 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */);
1674 if (U_FAILURE(status)) {
1675 delete testCase;
1676 return NULL;
1677 }
1678 return testCase;
1679 }
1680
TestCppCompareUTF8Null()1681 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null()
1682 {
1683 UErrorCode status = U_ZERO_ERROR;
1684 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */);
1685 if (U_FAILURE(status)) {
1686 delete testCase;
1687 return NULL;
1688 }
1689 return testCase;
1690 }
1691
TestCppCompareUTF8Similar()1692 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar()
1693 {
1694 UErrorCode status = U_ZERO_ERROR;
1695 CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */);
1696 if (U_FAILURE(status)) {
1697 delete testCase;
1698 return NULL;
1699 }
1700 return testCase;
1701 }
1702
TestCppGetCollationKey()1703 UPerfFunction* CollPerf2Test::TestCppGetCollationKey()
1704 {
1705 UErrorCode status = U_ZERO_ERROR;
1706 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */);
1707 if (U_FAILURE(status)) {
1708 delete testCase;
1709 return NULL;
1710 }
1711 return testCase;
1712 }
1713
TestCppGetCollationKeyNull()1714 UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull()
1715 {
1716 UErrorCode status = U_ZERO_ERROR;
1717 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */);
1718 if (U_FAILURE(status)) {
1719 delete testCase;
1720 return NULL;
1721 }
1722 return testCase;
1723 }
1724
TestUniStrSort()1725 UPerfFunction* CollPerf2Test::TestUniStrSort() {
1726 UErrorCode status = U_ZERO_ERROR;
1727 UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status));
1728 if (U_FAILURE(status)) {
1729 delete testCase;
1730 return NULL;
1731 }
1732 return testCase;
1733 }
1734
TestStringPieceSortCpp()1735 UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() {
1736 UErrorCode status = U_ZERO_ERROR;
1737 UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status));
1738 if (U_FAILURE(status)) {
1739 delete testCase;
1740 return NULL;
1741 }
1742 return testCase;
1743 }
1744
TestStringPieceSortC()1745 UPerfFunction* CollPerf2Test::TestStringPieceSortC() {
1746 UErrorCode status = U_ZERO_ERROR;
1747 UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status));
1748 if (U_FAILURE(status)) {
1749 delete testCase;
1750 return NULL;
1751 }
1752 return testCase;
1753 }
1754
TestUniStrBinSearch()1755 UPerfFunction* CollPerf2Test::TestUniStrBinSearch() {
1756 UErrorCode status = U_ZERO_ERROR;
1757 UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status));
1758 if (U_FAILURE(status)) {
1759 delete testCase;
1760 return NULL;
1761 }
1762 return testCase;
1763 }
1764
TestStringPieceBinSearchCpp()1765 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() {
1766 UErrorCode status = U_ZERO_ERROR;
1767 UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status));
1768 if (U_FAILURE(status)) {
1769 delete testCase;
1770 return NULL;
1771 }
1772 return testCase;
1773 }
1774
TestStringPieceBinSearchC()1775 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() {
1776 UErrorCode status = U_ZERO_ERROR;
1777 UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status));
1778 if (U_FAILURE(status)) {
1779 delete testCase;
1780 return NULL;
1781 }
1782 return testCase;
1783 }
1784
1785
main(int argc,const char * argv[])1786 int main(int argc, const char *argv[])
1787 {
1788 UErrorCode status = U_ZERO_ERROR;
1789 CollPerf2Test test(argc, argv, status);
1790
1791 if (U_FAILURE(status)){
1792 printf("The error is %s\n", u_errorName(status));
1793 //TODO: print usage here
1794 return status;
1795 }
1796
1797 if (test.run() == FALSE){
1798 fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n");
1799 return -1;
1800 }
1801 return 0;
1802 }
1803