1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved.
4 *
5 ********************************************************************/
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <locale.h>
10 #include <limits.h>
11 #include <string.h>
12 #include "unicode/uperf.h"
13 #include "uoptions.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
16
17 #if !U_PLATFORM_HAS_WIN32_API
18 #define DWORD uint32_t
19 #define WCHAR wchar_t
20 #endif
21
22 /* To store an array of string<UNIT> in continue space.
23 Since string<UNIT> itself is treated as an array of UNIT, this
24 class will ease our memory management for an array of string<UNIT>.
25 */
26
27 //template<typename UNIT>
28 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
29 struct CompactArrays{\
30 CompactArrays(const CompactArrays & );\
31 CompactArrays & operator=(const CompactArrays & );\
32 int32_t count;/*total number of the strings*/ \
33 int32_t * index;/*relative offset in data*/ \
34 UNIT * data; /*the real space to hold strings*/ \
35 \
36 ~CompactArrays(){free(index);free(data);} \
37 CompactArrays():data(NULL), index(NULL), count(0){ \
38 index = (int32_t *) realloc(index, sizeof(int32_t)); \
39 index[0] = 0; \
40 } \
41 void append_one(int32_t theLen){ /*include terminal NULL*/ \
42 count++; \
43 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
44 index[count] = index[count - 1] + theLen; \
45 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
46 } \
47 UNIT * last(){return data + index[count - 1];} \
48 UNIT * dataOf(int32_t i){return data + index[i];} \
49 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
50 };
51
52 //typedef CompactArrays<UChar> CA_uchar;
53 //typedef CompactArrays<char> CA_char;
54 //typedef CompactArrays<uint8_t> CA_uint8;
55 //typedef CompactArrays<WCHAR> CA_win_wchar;
56
57 COMPATCT_ARRAY(CA_uchar, UChar)
58 COMPATCT_ARRAY(CA_char, char)
59 COMPATCT_ARRAY(CA_uint8, uint8_t)
60 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
61
62
63 struct DataIndex {
64 static DWORD win_langid; // for qsort callback function
65 static UCollator * col; // for qsort callback function
66 uint8_t * icu_key;
67 UChar * icu_data;
68 int32_t icu_data_len;
69 char* posix_key;
70 char* posix_data;
71 int32_t posix_data_len;
72 char* win_key;
73 WCHAR * win_data;
74 int32_t win_data_len;
75 };
76 DWORD DataIndex::win_langid;
77 UCollator * DataIndex::col;
78
79
80
81 class CmdKeyGen : public UPerfFunction {
82 typedef void (CmdKeyGen::* Func)(int32_t);
83 enum{MAX_KEY_LENGTH = 5000};
84 UCollator * col;
85 DWORD win_langid;
86 int32_t count;
87 DataIndex * data;
88 Func fn;
89
90 union { // to save sapce
91 uint8_t icu_key[MAX_KEY_LENGTH];
92 char posix_key[MAX_KEY_LENGTH];
93 WCHAR win_key[MAX_KEY_LENGTH];
94 };
95 public:
CmdKeyGen(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * data,Func fn,int32_t)96 CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
97 :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
98
getOperationsPerIteration()99 virtual long getOperationsPerIteration(){return count;}
100
call(UErrorCode * status)101 virtual void call(UErrorCode* status){
102 for(int32_t i = 0; i< count; i++){
103 (this->*fn)(i);
104 }
105 }
106
icu_key_null(int32_t i)107 void icu_key_null(int32_t i){
108 ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
109 }
110
icu_key_len(int32_t i)111 void icu_key_len(int32_t i){
112 ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
113 }
114
115 #if U_PLATFORM_HAS_WIN32_API
116 // pre-generated in CollPerfTest::prepareData(), need not to check error here
win_key_null(int32_t i)117 void win_key_null(int32_t i){
118 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
119 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
120 }
121
win_key_len(int32_t i)122 void win_key_len(int32_t i){
123 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
124 }
125 #endif
126
posix_key_null(int32_t i)127 void posix_key_null(int32_t i){
128 strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
129 }
130 };
131
132
133 class CmdIter : public UPerfFunction {
134 typedef void (CmdIter::* Func)(UErrorCode* , int32_t );
135 int32_t count;
136 CA_uchar * data;
137 Func fn;
138 UCollationElements *iter;
139 int32_t exec_count;
140 public:
CmdIter(UErrorCode & status,UCollator * col,int32_t count,CA_uchar * data,Func fn,int32_t,int32_t)141 CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
142 :count(count), data(data), fn(fn){
143 exec_count = 0;
144 UChar dummytext[] = {0, 0};
145 iter = ucol_openElements(col, NULL, 0, &status);
146 ucol_setText(iter, dummytext, 1, &status);
147 }
~CmdIter()148 ~CmdIter(){
149 ucol_closeElements(iter);
150 }
151
getOperationsPerIteration()152 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
153
call(UErrorCode * status)154 virtual void call(UErrorCode* status){
155 exec_count = 0;
156 for(int32_t i = 0; i< count; i++){
157 (this->*fn)(status, i);
158 }
159 }
160
icu_forward_null(UErrorCode * status,int32_t i)161 void icu_forward_null(UErrorCode* status, int32_t i){
162 ucol_setText(iter, data->dataOf(i), -1, status);
163 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
164 }
165
icu_forward_len(UErrorCode * status,int32_t i)166 void icu_forward_len(UErrorCode* status, int32_t i){
167 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
168 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
169 }
170
icu_backward_null(UErrorCode * status,int32_t i)171 void icu_backward_null(UErrorCode* status, int32_t i){
172 ucol_setText(iter, data->dataOf(i), -1, status);
173 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
174 }
175
icu_backward_len(UErrorCode * status,int32_t i)176 void icu_backward_len(UErrorCode* status, int32_t i){
177 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
178 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
179 }
180 };
181
182 class CmdIterAll : public UPerfFunction {
183 typedef void (CmdIterAll::* Func)(UErrorCode* status);
184 int32_t count;
185 UChar * data;
186 Func fn;
187 UCollationElements *iter;
188 int32_t exec_count;
189
190 public:
191 enum CALL {forward_null, forward_len, backward_null, backward_len};
192
~CmdIterAll()193 ~CmdIterAll(){
194 ucol_closeElements(iter);
195 }
CmdIterAll(UErrorCode & status,UCollator * col,int32_t count,UChar * data,CALL call,int32_t,int32_t)196 CmdIterAll(UErrorCode & status, UCollator * col, int32_t count, UChar * data, CALL call,int32_t,int32_t)
197 :count(count),data(data)
198 {
199 exec_count = 0;
200 if (call == forward_null || call == backward_null) {
201 iter = ucol_openElements(col, data, -1, &status);
202 } else {
203 iter = ucol_openElements(col, data, count, &status);
204 }
205
206 if (call == forward_null || call == forward_len){
207 fn = &CmdIterAll::icu_forward_all;
208 } else {
209 fn = &CmdIterAll::icu_backward_all;
210 }
211 }
getOperationsPerIteration()212 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
213
call(UErrorCode * status)214 virtual void call(UErrorCode* status){
215 (this->*fn)(status);
216 }
217
icu_forward_all(UErrorCode * status)218 void icu_forward_all(UErrorCode* status){
219 int strlen = count - 5;
220 int count5 = 5;
221 int strindex = 0;
222 ucol_setOffset(iter, strindex, status);
223 while (TRUE) {
224 if (ucol_next(iter, status) == UCOL_NULLORDER) {
225 break;
226 }
227 exec_count++;
228 count5 --;
229 if (count5 == 0) {
230 strindex += 10;
231 if (strindex > strlen) {
232 break;
233 }
234 ucol_setOffset(iter, strindex, status);
235 count5 = 5;
236 }
237 }
238 }
239
icu_backward_all(UErrorCode * status)240 void icu_backward_all(UErrorCode* status){
241 int strlen = count;
242 int count5 = 5;
243 int strindex = 5;
244 ucol_setOffset(iter, strindex, status);
245 while (TRUE) {
246 if (ucol_previous(iter, status) == UCOL_NULLORDER) {
247 break;
248 }
249 exec_count++;
250 count5 --;
251 if (count5 == 0) {
252 strindex += 10;
253 if (strindex > strlen) {
254 break;
255 }
256 ucol_setOffset(iter, strindex, status);
257 count5 = 5;
258 }
259 }
260 }
261
262 };
263
264 struct CmdQsort : public UPerfFunction{
265
q_randomCmdQsort266 static int q_random(const void * a, const void * b){
267 uint8_t * key_a = ((DataIndex *)a)->icu_key;
268 uint8_t * key_b = ((DataIndex *)b)->icu_key;
269
270 int val_a = 0;
271 int val_b = 0;
272 while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
273 while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
274 return val_a - val_b;
275 }
276
277 #define QCAST() \
278 DataIndex * da = (DataIndex *) a; \
279 DataIndex * db = (DataIndex *) b; \
280 ++exec_count
281
icu_strcoll_nullCmdQsort282 static int icu_strcoll_null(const void *a, const void *b){
283 QCAST();
284 return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
285 }
286
icu_strcoll_lenCmdQsort287 static int icu_strcoll_len(const void *a, const void *b){
288 QCAST();
289 return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
290 }
291
icu_cmpkeyCmdQsort292 static int icu_cmpkey (const void *a, const void *b){
293 QCAST();
294 return strcmp((char *) da->icu_key, (char *) db->icu_key);
295 }
296
297 #if U_PLATFORM_HAS_WIN32_API
win_cmp_nullCmdQsort298 static int win_cmp_null(const void *a, const void *b) {
299 QCAST();
300 //CSTR_LESS_THAN 1
301 //CSTR_EQUAL 2
302 //CSTR_GREATER_THAN 3
303 int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
304 if (t == 0){
305 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
306 exit(-1);
307 } else{
308 return t - CSTR_EQUAL;
309 }
310 }
311
win_cmp_lenCmdQsort312 static int win_cmp_len(const void *a, const void *b) {
313 QCAST();
314 int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
315 if (t == 0){
316 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
317 exit(-1);
318 } else{
319 return t - CSTR_EQUAL;
320 }
321 }
322 #endif
323
324 #define QFUNC(name, func, data) \
325 static int name (const void *a, const void *b){ \
326 QCAST(); \
327 return func(da->data, db->data); \
328 }
329
330 QFUNC(posix_strcoll_null, strcoll, posix_data)
331 QFUNC(posix_cmpkey, strcmp, posix_key)
332 #if U_PLATFORM_HAS_WIN32_API
333 QFUNC(win_cmpkey, strcmp, win_key)
334 QFUNC(win_wcscmp, wcscmp, win_data)
335 #endif
336 QFUNC(icu_strcmp, u_strcmp, icu_data)
337 QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
338
339 private:
340 static int32_t exec_count; // potential muilt-thread problem
341
342 typedef int (* Func)(const void *, const void *);
343
344 Func fn;
345 void * base; //Start of target array.
346 int32_t num; //Array size in elements.
347 int32_t width; //Element size in bytes.
348
349 void * backup; //copy source of base
350 public:
CmdQsortCmdQsort351 CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
352 :backup(theBase),num(num),width(width),fn(fn){
353 base = malloc(num * width);
354 time_empty(100, &status); // warm memory/cache
355 }
356
~CmdQsortCmdQsort357 ~CmdQsort(){
358 free(base);
359 }
360
empty_callCmdQsort361 void empty_call(){
362 exec_count = 0;
363 memcpy(base, backup, num * width);
364 }
365
time_emptyCmdQsort366 double time_empty(int32_t n, UErrorCode* status) {
367 UTimer start, stop;
368 utimer_getTime(&start);
369 while (n-- > 0) {
370 empty_call();
371 }
372 utimer_getTime(&stop);
373 return utimer_getDeltaSeconds(&start,&stop); // ms
374 }
375
callCmdQsort376 virtual void call(UErrorCode* status){
377 exec_count = 0;
378 memcpy(base, backup, num * width);
379 qsort(base, num, width, fn);
380 }
timeCmdQsort381 virtual double time(int32_t n, UErrorCode* status) {
382 double t1 = time_empty(n,status);
383 double t2 = UPerfFunction::time(n, status);
384 return t2-t1;// < 0 ? t2 : t2-t1;
385 }
386
getOperationsPerIterationCmdQsort387 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
388 };
389 int32_t CmdQsort::exec_count;
390
391
392 class CmdBinSearch : public UPerfFunction{
393 public:
394 typedef int (CmdBinSearch::* Func)(int, int);
395
396 UCollator * col;
397 DWORD win_langid;
398 int32_t count;
399 DataIndex * rnd;
400 DataIndex * ord;
401 Func fn;
402 int32_t exec_count;
403
CmdBinSearch(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)404 CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
405 :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
406
407
call(UErrorCode * status)408 virtual void call(UErrorCode* status){
409 exec_count = 0;
410 for(int32_t i = 0; i< count; i++){ // search all data
411 binary_search(i);
412 }
413 }
getOperationsPerIteration()414 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
415
binary_search(int32_t random)416 void binary_search(int32_t random) {
417 int low = 0;
418 int high = count - 1;
419 int guess;
420 int last_guess = -1;
421 int r;
422 while (TRUE) {
423 guess = (high + low)/2;
424 if (last_guess == guess) break; // nothing to search
425
426 r = (this->*fn)(random, guess);
427 exec_count++;
428
429 if (r == 0)
430 return; // found, search end.
431 if (r < 0) {
432 high = guess;
433 } else {
434 low = guess;
435 }
436 last_guess = guess;
437 }
438 }
439
icu_strcoll_null(int32_t i,int32_t j)440 int icu_strcoll_null(int32_t i, int32_t j){
441 return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
442 }
443
icu_strcoll_len(int32_t i,int32_t j)444 int icu_strcoll_len(int32_t i, int32_t j){
445 return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
446 }
447
icu_cmpkey(int32_t i,int32_t j)448 int icu_cmpkey(int32_t i, int32_t j) {
449 return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
450 }
451
452 #if U_PLATFORM_HAS_WIN32_API
win_cmp_null(int32_t i,int32_t j)453 int win_cmp_null(int32_t i, int32_t j) {
454 int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
455 if (t == 0){
456 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
457 exit(-1);
458 } else{
459 return t - CSTR_EQUAL;
460 }
461 }
462
win_cmp_len(int32_t i,int32_t j)463 int win_cmp_len(int32_t i, int32_t j) {
464 int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
465 if (t == 0){
466 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
467 exit(-1);
468 } else{
469 return t - CSTR_EQUAL;
470 }
471 }
472 #endif
473
474 #define BFUNC(name, func, data) \
475 int name(int32_t i, int32_t j) { \
476 return func(rnd[i].data, ord[j].data); \
477 }
478
479 BFUNC(posix_strcoll_null, strcoll, posix_data)
480 BFUNC(posix_cmpkey, strcmp, posix_key)
481 BFUNC(win_cmpkey, strcmp, win_key)
482 BFUNC(win_wcscmp, wcscmp, win_data)
483 BFUNC(icu_strcmp, u_strcmp, icu_data)
484 BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
485 };
486
487 class CollPerfTest : public UPerfTest {
488 public:
489 UCollator * col;
490 DWORD win_langid;
491
492 UChar * icu_data_all;
493 int32_t icu_data_all_len;
494
495 int32_t count;
496 CA_uchar * icu_data;
497 CA_uint8 * icu_key;
498 CA_char * posix_data;
499 CA_char * posix_key;
500 CA_win_wchar * win_data;
501 CA_char * win_key;
502
503 DataIndex * rnd_index; // random by icu key
504 DataIndex * ord_win_data;
505 DataIndex * ord_win_key;
506 DataIndex * ord_posix_data;
507 DataIndex * ord_posix_key;
508 DataIndex * ord_icu_data;
509 DataIndex * ord_icu_key;
510 DataIndex * ord_win_wcscmp;
511 DataIndex * ord_icu_strcmp;
512 DataIndex * ord_icu_cmpcpo;
513
~CollPerfTest()514 virtual ~CollPerfTest(){
515 ucol_close(col);
516 delete [] icu_data_all;
517 delete icu_data;
518 delete icu_key;
519 delete posix_data;
520 delete posix_key;
521 delete win_data;
522 delete win_key;
523 delete[] rnd_index;
524 delete[] ord_win_data;
525 delete[] ord_win_key;
526 delete[] ord_posix_data;
527 delete[] ord_posix_key;
528 delete[] ord_icu_data;
529 delete[] ord_icu_key;
530 delete[] ord_win_wcscmp;
531 delete[] ord_icu_strcmp;
532 delete[] ord_icu_cmpcpo;
533 }
534
CollPerfTest(int32_t argc,const char * argv[],UErrorCode & status)535 CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
536 col = NULL;
537 icu_data_all = NULL;
538 icu_data = NULL;
539 icu_key = NULL;
540 posix_data = NULL;
541 posix_key = NULL;
542 win_data =NULL;
543 win_key = NULL;
544
545 rnd_index = NULL;
546 ord_win_data= NULL;
547 ord_win_key= NULL;
548 ord_posix_data= NULL;
549 ord_posix_key= NULL;
550 ord_icu_data= NULL;
551 ord_icu_key= NULL;
552 ord_win_wcscmp = NULL;
553 ord_icu_strcmp = NULL;
554 ord_icu_cmpcpo = NULL;
555
556 if (U_FAILURE(status)){
557 return;
558 }
559
560 // Parse additional arguments
561
562 UOption options[] = {
563 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG), // Windows Language ID number.
564 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG), // --rulefile <filename>
565 // Collation related arguments. All are optional.
566 // To simplify parsing, two choice arguments are disigned as NO_ARG.
567 // The default value is UPPER word in the comment
568 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG), // --french <on | OFF>
569 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG), // --alternate <NON_IGNORE | shifted>
570 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
571 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG), // --caselevel <on | OFF>
572 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG), // --normal <on | OFF>
573 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG), // --strength <1-5>
574 };
575 int32_t opt_len = (sizeof(options)/sizeof(options[0]));
576 enum {i, r,f,a,c,l,n,s}; // The buffer between the option items' order and their references
577
578 _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
579
580 if (_remainingArgc < 0){
581 status = U_ILLEGAL_ARGUMENT_ERROR;
582 return;
583 }
584
585 if (locale == NULL){
586 locale = "en_US"; // set default locale
587 }
588
589 #if U_PLATFORM_HAS_WIN32_API
590 if (options[i].doesOccur) {
591 char *endp;
592 int tmp = strtol(options[i].value, &endp, 0);
593 if (endp == options[i].value) {
594 status = U_ILLEGAL_ARGUMENT_ERROR;
595 return;
596 }
597 win_langid = MAKELCID(tmp, SORT_DEFAULT);
598 } else {
599 win_langid = uloc_getLCID(locale);
600 }
601 #endif
602
603 // Set up an ICU collator
604 if (options[r].doesOccur) {
605 // TODO: implement it
606 } else {
607 col = ucol_open(locale, &status);
608 if (U_FAILURE(status)) {
609 return;
610 }
611 }
612
613 if (options[f].doesOccur) {
614 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
615 } else {
616 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
617 }
618
619 if (options[a].doesOccur) {
620 ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
621 }
622
623 if (options[c].doesOccur) { // strcmp() has i18n encoding problem
624 if (strcmp("lower", options[c].value) == 0){
625 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
626 } else if (strcmp("upper", options[c].value) == 0) {
627 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
628 } else {
629 status = U_ILLEGAL_ARGUMENT_ERROR;
630 return;
631 }
632 }
633
634 if (options[l].doesOccur){
635 ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
636 }
637
638 if (options[n].doesOccur){
639 ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
640 }
641
642 if (options[s].doesOccur) {
643 char *endp;
644 int tmp = strtol(options[l].value, &endp, 0);
645 if (endp == options[l].value) {
646 status = U_ILLEGAL_ARGUMENT_ERROR;
647 return;
648 }
649 switch (tmp) {
650 case 1: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status); break;
651 case 2: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status); break;
652 case 3: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status); break;
653 case 4: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status); break;
654 case 5: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status); break;
655 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
656 }
657 }
658 prepareData(status);
659 }
660
661 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
662 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
663 if(temp == index) {\
664 name = #testname;\
665 if (exec) {\
666 UErrorCode status = U_ZERO_ERROR;\
667 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
668 if (U_FAILURE(status)) {\
669 delete t;\
670 return NULL;\
671 } else {\
672 return t;\
673 }\
674 } else {\
675 return NULL;\
676 }\
677 }\
678 temp++\
679
680
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par=NULL)681 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
682 int temp = 0;
683
684 #define TEST_KEYGEN(testname, func)\
685 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
686 TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
687 TEST_KEYGEN(TestIcu_KeyGen_len, icu_key_len);
688 TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
689 #if U_PLATFORM_HAS_WIN32_API
690 TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
691 TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
692 #endif
693
694 #define TEST_ITER(testname, func)\
695 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
696 TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
697 TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
698 TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
699 TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
700
701 #define TEST_ITER_ALL(testname, func)\
702 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
703 TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
704 TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
705 TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
706 TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
707
708 #define TEST_QSORT(testname, func)\
709 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
710 TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
711 TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
712 TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
713 TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
714 TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
715 #if U_PLATFORM_HAS_WIN32_API
716 TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
717 TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
718 TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
719 #endif
720
721 #define TEST_BIN(testname, func)\
722 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
723 TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
724 TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
725 TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
726 TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
727 TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
728 TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
729 TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
730 #if U_PLATFORM_HAS_WIN32_API
731 TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
732 TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
733 #endif
734 TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
735 TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
736
737 name="";
738 return NULL;
739 }
740
741
742
prepareData(UErrorCode & status)743 void prepareData(UErrorCode& status){
744 if(U_FAILURE(status)) return;
745 if (icu_data) return; // prepared
746
747 icu_data = new CA_uchar();
748
749 // Following code is borrowed from UPerfTest::getLines();
750 const UChar* line=NULL;
751 int32_t len =0;
752 for (;;) {
753 line = ucbuf_readline(ucharBuf,&len,&status);
754 if(line == NULL || U_FAILURE(status)){break;}
755
756 // Refer to the source code of ucbuf_readline()
757 // 1. 'len' includs the line terminal symbols
758 // 2. The length of the line terminal symbols is only one character
759 // 3. The Windows CR LF line terminal symbols will be converted to CR
760
761 if (len == 1) {
762 continue; //skip empty line
763 } else {
764 icu_data->append_one(len);
765 memcpy(icu_data->last(), line, len * sizeof(UChar));
766 icu_data->last()[len -1] = NULL;
767 }
768 }
769 if(U_FAILURE(status)) return;
770
771 // UTF-16 -> UTF-8 conversion.
772 UConverter *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
773 if (U_FAILURE(status)) return;
774
775 count = icu_data->count;
776
777 icu_data_all_len = icu_data->index[count]; // includes all NULLs
778 icu_data_all_len -= count; // excludes all NULLs
779 icu_data_all_len += 1; // the terminal NULL
780 icu_data_all = new UChar[icu_data_all_len];
781 icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
782
783 icu_key = new CA_uint8;
784 win_data = new CA_win_wchar;
785 win_key = new CA_char;
786 posix_data = new CA_char;
787 posix_key = new CA_char;
788 rnd_index = new DataIndex[count];
789 DataIndex::win_langid = win_langid;
790 DataIndex::col = col;
791
792
793 UChar * p = icu_data_all;
794 int32_t s;
795 int32_t t;
796 for (int i=0; i < count; i++) {
797 // ICU all data
798 s = sizeof(UChar) * icu_data->lengthOf(i);
799 memcpy(p, icu_data->dataOf(i), s);
800 p += icu_data->lengthOf(i);
801
802 // ICU data
803
804 // ICU key
805 s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
806 icu_key->append_one(s);
807 t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
808 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
809
810 // POSIX data
811 s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
812 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
813 status = U_ZERO_ERROR;
814 } else {
815 return;
816 }
817 posix_data->append_one(s + 1); // plus terminal NULL
818 t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
819 if (U_FAILURE(status)) return;
820 if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
821 posix_data->last()[s] = 0;
822
823 // POSIX key
824 s = strxfrm(NULL, posix_data->dataOf(i), 0);
825 if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
826 posix_key->append_one(s);
827 t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
828 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
829
830 #if U_PLATFORM_HAS_WIN32_API
831 // Win data
832 s = icu_data->lengthOf(i) + 1; // plus terminal NULL
833 win_data->append_one(s);
834 memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
835
836 // Win key
837 s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
838 if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
839 win_key->append_one(s);
840 t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
841 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
842 #endif
843 };
844
845 // append_one() will make points shifting, should not merge following code into previous iteration
846 for (int i=0; i < count; i++) {
847 rnd_index[i].icu_key = icu_key->dataOf(i);
848 rnd_index[i].icu_data = icu_data->dataOf(i);
849 rnd_index[i].icu_data_len = icu_data->lengthOf(i);
850 rnd_index[i].posix_key = posix_key->last();
851 rnd_index[i].posix_data = posix_data->dataOf(i);
852 rnd_index[i].posix_data_len = posix_data->lengthOf(i);
853 #if U_PLATFORM_HAS_WIN32_API
854 rnd_index[i].win_key = win_key->dataOf(i);
855 rnd_index[i].win_data = win_data->dataOf(i);
856 rnd_index[i].win_data_len = win_data->lengthOf(i);
857 #endif
858 };
859
860 ucnv_close(conv);
861 qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
862
863 #define SORT(data, func) \
864 data = new DataIndex[count];\
865 memcpy(data, rnd_index, count * sizeof(DataIndex));\
866 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
867
868 SORT(ord_icu_data, icu_strcoll_len);
869 SORT(ord_icu_key, icu_cmpkey);
870 SORT(ord_posix_data, posix_strcoll_null);
871 SORT(ord_posix_key, posix_cmpkey);
872 #if U_PLATFORM_HAS_WIN32_API
873 SORT(ord_win_data, win_cmp_len);
874 SORT(ord_win_key, win_cmpkey);
875 SORT(ord_win_wcscmp, win_wcscmp);
876 #endif
877 SORT(ord_icu_strcmp, icu_strcmp);
878 SORT(ord_icu_cmpcpo, icu_cmpcpo);
879 }
880 };
881
882
main(int argc,const char * argv[])883 int main(int argc, const char *argv[])
884 {
885
886 UErrorCode status = U_ZERO_ERROR;
887 CollPerfTest test(argc, argv, status);
888
889 if (U_FAILURE(status)){
890 printf("The error is %s\n", u_errorName(status));
891 //TODO: print usage here
892 return status;
893 }
894
895 if (test.run() == FALSE){
896 fprintf(stderr, "FAILED: Tests could not be run please check the "
897 "arguments.\n");
898 return -1;
899 }
900 return 0;
901 }
902
903