1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2001-2012 IBM, Inc.   All Rights Reserved.
4 *
5 ********************************************************************/
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <locale.h>
10 #include <limits.h>
11 #include <string.h>
12 #include "unicode/uperf.h"
13 #include "uoptions.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
16 
17 #if !U_PLATFORM_HAS_WIN32_API
18 #define DWORD uint32_t
19 #define WCHAR wchar_t
20 #endif
21 
22 /* To store an array of string<UNIT> in continue space.
23 Since string<UNIT> itself is treated as an array of UNIT, this
24 class will ease our memory management for an array of string<UNIT>.
25 */
26 
27 //template<typename UNIT>
28 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
29 struct CompactArrays{\
30     CompactArrays(const CompactArrays & );\
31     CompactArrays & operator=(const CompactArrays & );\
32     int32_t   count;/*total number of the strings*/ \
33     int32_t * index;/*relative offset in data*/ \
34     UNIT    * data; /*the real space to hold strings*/ \
35     \
36     ~CompactArrays(){free(index);free(data);} \
37     CompactArrays():data(NULL), index(NULL), count(0){ \
38     index = (int32_t *) realloc(index, sizeof(int32_t)); \
39     index[0] = 0; \
40     } \
41     void append_one(int32_t theLen){ /*include terminal NULL*/ \
42     count++; \
43     index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
44     index[count] = index[count - 1] + theLen; \
45     data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
46     } \
47     UNIT * last(){return data + index[count - 1];} \
48     UNIT * dataOf(int32_t i){return data + index[i];} \
49     int32_t lengthOf(int i){return index[i+1] - index[i] - 1; }	/*exclude terminating NULL*/  \
50 };
51 
52 //typedef CompactArrays<UChar> CA_uchar;
53 //typedef CompactArrays<char> CA_char;
54 //typedef CompactArrays<uint8_t> CA_uint8;
55 //typedef CompactArrays<WCHAR> CA_win_wchar;
56 
57 COMPATCT_ARRAY(CA_uchar, UChar)
58 COMPATCT_ARRAY(CA_char, char)
59 COMPATCT_ARRAY(CA_uint8, uint8_t)
60 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
61 
62 
63 struct DataIndex {
64     static DWORD        win_langid;     // for qsort callback function
65     static UCollator *  col;            // for qsort callback function
66     uint8_t *   icu_key;
67     UChar *     icu_data;
68     int32_t     icu_data_len;
69     char*       posix_key;
70     char*       posix_data;
71     int32_t     posix_data_len;
72     char*       win_key;
73     WCHAR *     win_data;
74     int32_t     win_data_len;
75 };
76 DWORD DataIndex::win_langid;
77 UCollator * DataIndex::col;
78 
79 
80 
81 class CmdKeyGen : public UPerfFunction {
82     typedef	void (CmdKeyGen::* Func)(int32_t);
83     enum{MAX_KEY_LENGTH = 5000};
84     UCollator * col;
85     DWORD       win_langid;
86     int32_t     count;
87     DataIndex * data;
88     Func 	    fn;
89 
90     union { // to save sapce
91         uint8_t		icu_key[MAX_KEY_LENGTH];
92         char        posix_key[MAX_KEY_LENGTH];
93         WCHAR		win_key[MAX_KEY_LENGTH];
94     };
95 public:
CmdKeyGen(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * data,Func fn,int32_t)96     CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
97         :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
98 
getOperationsPerIteration()99         virtual long getOperationsPerIteration(){return count;}
100 
call(UErrorCode * status)101         virtual void call(UErrorCode* status){
102             for(int32_t i = 0; i< count; i++){
103                 (this->*fn)(i);
104             }
105         }
106 
icu_key_null(int32_t i)107         void icu_key_null(int32_t i){
108             ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
109         }
110 
icu_key_len(int32_t i)111         void icu_key_len(int32_t i){
112             ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
113         }
114 
115 #if U_PLATFORM_HAS_WIN32_API
116         // pre-generated in CollPerfTest::prepareData(), need not to check error here
win_key_null(int32_t i)117         void win_key_null(int32_t i){
118             //LCMAP_SORTsk             0x00000400  // WC sort sk (normalize)
119             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
120         }
121 
win_key_len(int32_t i)122         void win_key_len(int32_t i){
123             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
124         }
125 #endif
126 
posix_key_null(int32_t i)127         void posix_key_null(int32_t i){
128             strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
129         }
130 };
131 
132 
133 class CmdIter : public UPerfFunction {
134     typedef	void (CmdIter::* Func)(UErrorCode* , int32_t );
135     int32_t             count;
136     CA_uchar *          data;
137     Func                fn;
138     UCollationElements *iter;
139     int32_t             exec_count;
140 public:
CmdIter(UErrorCode & status,UCollator * col,int32_t count,CA_uchar * data,Func fn,int32_t,int32_t)141     CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
142         :count(count), data(data), fn(fn){
143             exec_count = 0;
144             UChar dummytext[] = {0, 0};
145             iter = ucol_openElements(col, NULL, 0, &status);
146             ucol_setText(iter, dummytext, 1, &status);
147         }
~CmdIter()148         ~CmdIter(){
149             ucol_closeElements(iter);
150         }
151 
getOperationsPerIteration()152         virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
153 
call(UErrorCode * status)154         virtual void call(UErrorCode* status){
155             exec_count = 0;
156             for(int32_t i = 0; i< count; i++){
157                 (this->*fn)(status, i);
158             }
159         }
160 
icu_forward_null(UErrorCode * status,int32_t i)161         void icu_forward_null(UErrorCode* status, int32_t i){
162             ucol_setText(iter, data->dataOf(i), -1, status);
163             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
164         }
165 
icu_forward_len(UErrorCode * status,int32_t i)166         void icu_forward_len(UErrorCode* status, int32_t i){
167             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
168             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
169         }
170 
icu_backward_null(UErrorCode * status,int32_t i)171         void icu_backward_null(UErrorCode* status, int32_t i){
172             ucol_setText(iter, data->dataOf(i), -1, status);
173             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
174         }
175 
icu_backward_len(UErrorCode * status,int32_t i)176         void icu_backward_len(UErrorCode* status, int32_t i){
177             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
178             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
179         }
180 };
181 
182 class CmdIterAll : public UPerfFunction {
183     typedef	void (CmdIterAll::* Func)(UErrorCode* status);
184     int32_t     count;
185     UChar *     data;
186     Func        fn;
187     UCollationElements *iter;
188     int32_t     exec_count;
189 
190 public:
191     enum CALL {forward_null, forward_len, backward_null, backward_len};
192 
~CmdIterAll()193     ~CmdIterAll(){
194         ucol_closeElements(iter);
195     }
CmdIterAll(UErrorCode & status,UCollator * col,int32_t count,UChar * data,CALL call,int32_t,int32_t)196     CmdIterAll(UErrorCode & status, UCollator * col, int32_t count,  UChar * data, CALL call,int32_t,int32_t)
197         :count(count),data(data)
198     {
199         exec_count = 0;
200         if (call == forward_null || call == backward_null) {
201             iter = ucol_openElements(col, data, -1, &status);
202         } else {
203             iter = ucol_openElements(col, data, count, &status);
204         }
205 
206         if (call == forward_null || call == forward_len){
207             fn = &CmdIterAll::icu_forward_all;
208         } else {
209             fn = &CmdIterAll::icu_backward_all;
210         }
211     }
getOperationsPerIteration()212     virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
213 
call(UErrorCode * status)214     virtual void call(UErrorCode* status){
215         (this->*fn)(status);
216     }
217 
icu_forward_all(UErrorCode * status)218     void icu_forward_all(UErrorCode* status){
219         int strlen = count - 5;
220         int count5 = 5;
221         int strindex = 0;
222         ucol_setOffset(iter, strindex, status);
223         while (TRUE) {
224             if (ucol_next(iter, status) == UCOL_NULLORDER) {
225                 break;
226             }
227             exec_count++;
228             count5 --;
229             if (count5 == 0) {
230                 strindex += 10;
231                 if (strindex > strlen) {
232                     break;
233                 }
234                 ucol_setOffset(iter, strindex, status);
235                 count5 = 5;
236             }
237         }
238     }
239 
icu_backward_all(UErrorCode * status)240     void icu_backward_all(UErrorCode* status){
241         int strlen = count;
242         int count5 = 5;
243         int strindex = 5;
244         ucol_setOffset(iter, strindex, status);
245         while (TRUE) {
246             if (ucol_previous(iter, status) == UCOL_NULLORDER) {
247                 break;
248             }
249             exec_count++;
250             count5 --;
251             if (count5 == 0) {
252                 strindex += 10;
253                 if (strindex > strlen) {
254                     break;
255                 }
256                 ucol_setOffset(iter, strindex, status);
257                 count5 = 5;
258             }
259         }
260     }
261 
262 };
263 
264 struct CmdQsort : public UPerfFunction{
265 
q_randomCmdQsort266     static int q_random(const void * a, const void * b){
267         uint8_t * key_a = ((DataIndex *)a)->icu_key;
268         uint8_t * key_b = ((DataIndex *)b)->icu_key;
269 
270         int   val_a = 0;
271         int   val_b = 0;
272         while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
273         while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
274         return val_a - val_b;
275     }
276 
277 #define QCAST() \
278     DataIndex * da = (DataIndex *) a; \
279     DataIndex * db = (DataIndex *) b; \
280     ++exec_count
281 
icu_strcoll_nullCmdQsort282     static int icu_strcoll_null(const void *a, const void *b){
283         QCAST();
284         return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
285     }
286 
icu_strcoll_lenCmdQsort287     static int icu_strcoll_len(const void *a, const void *b){
288         QCAST();
289         return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
290     }
291 
icu_cmpkeyCmdQsort292     static int icu_cmpkey (const void *a, const void *b){
293         QCAST();
294         return strcmp((char *) da->icu_key, (char *) db->icu_key);
295     }
296 
297 #if U_PLATFORM_HAS_WIN32_API
win_cmp_nullCmdQsort298     static int win_cmp_null(const void *a, const void *b) {
299         QCAST();
300         //CSTR_LESS_THAN		1
301         //CSTR_EQUAL			2
302         //CSTR_GREATER_THAN		3
303         int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
304         if (t == 0){
305             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
306             exit(-1);
307         } else{
308             return t - CSTR_EQUAL;
309         }
310     }
311 
win_cmp_lenCmdQsort312     static int win_cmp_len(const void *a, const void *b) {
313         QCAST();
314         int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
315         if (t == 0){
316             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
317             exit(-1);
318         } else{
319             return t - CSTR_EQUAL;
320         }
321     }
322 #endif
323 
324 #define QFUNC(name, func, data) \
325     static int name (const void *a, const void *b){ \
326     QCAST(); \
327     return func(da->data, db->data); \
328     }
329 
330     QFUNC(posix_strcoll_null, strcoll, posix_data)
331         QFUNC(posix_cmpkey, strcmp, posix_key)
332 #if U_PLATFORM_HAS_WIN32_API
333         QFUNC(win_cmpkey, strcmp, win_key)
334         QFUNC(win_wcscmp, wcscmp, win_data)
335 #endif
336         QFUNC(icu_strcmp, u_strcmp, icu_data)
337         QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
338 
339 private:
340     static int32_t exec_count; // potential muilt-thread problem
341 
342     typedef	int (* Func)(const void *, const void *);
343 
344     Func    fn;
345     void *  base;   //Start of target array.
346     int32_t num;    //Array size in elements.
347     int32_t width;  //Element size in bytes.
348 
349     void *  backup; //copy source of base
350 public:
CmdQsortCmdQsort351     CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
352         :backup(theBase),num(num),width(width),fn(fn){
353             base = malloc(num * width);
354             time_empty(100, &status); // warm memory/cache
355         }
356 
~CmdQsortCmdQsort357         ~CmdQsort(){
358             free(base);
359         }
360 
empty_callCmdQsort361         void empty_call(){
362             exec_count = 0;
363             memcpy(base, backup, num * width);
364         }
365 
time_emptyCmdQsort366         double time_empty(int32_t n, UErrorCode* status) {
367             UTimer start, stop;
368             utimer_getTime(&start);
369             while (n-- > 0) {
370                 empty_call();
371             }
372             utimer_getTime(&stop);
373             return utimer_getDeltaSeconds(&start,&stop); // ms
374         }
375 
callCmdQsort376         virtual void call(UErrorCode* status){
377             exec_count = 0;
378             memcpy(base, backup, num * width);
379             qsort(base, num, width, fn);
380         }
timeCmdQsort381         virtual double time(int32_t n, UErrorCode* status) {
382             double t1 = time_empty(n,status);
383             double t2 = UPerfFunction::time(n, status);
384             return  t2-t1;// < 0 ? t2 : t2-t1;
385         }
386 
getOperationsPerIterationCmdQsort387         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
388 };
389 int32_t CmdQsort::exec_count;
390 
391 
392 class CmdBinSearch : public UPerfFunction{
393 public:
394     typedef	int (CmdBinSearch::* Func)(int, int);
395 
396     UCollator * col;
397     DWORD       win_langid;
398     int32_t     count;
399     DataIndex * rnd;
400     DataIndex * ord;
401     Func 	    fn;
402     int32_t     exec_count;
403 
CmdBinSearch(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)404     CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
405         :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
406 
407 
call(UErrorCode * status)408         virtual void call(UErrorCode* status){
409             exec_count = 0;
410             for(int32_t i = 0; i< count; i++){ // search all data
411                 binary_search(i);
412             }
413         }
getOperationsPerIteration()414         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
415 
binary_search(int32_t random)416         void binary_search(int32_t random)	{
417             int low   = 0;
418             int high  = count - 1;
419             int guess;
420             int last_guess = -1;
421             int r;
422             while (TRUE) {
423                 guess = (high + low)/2;
424                 if (last_guess == guess) break; // nothing to search
425 
426                 r = (this->*fn)(random, guess);
427                 exec_count++;
428 
429                 if (r == 0)
430                     return;	// found, search end.
431                 if (r < 0) {
432                     high = guess;
433                 } else {
434                     low  = guess;
435                 }
436                 last_guess = guess;
437             }
438         }
439 
icu_strcoll_null(int32_t i,int32_t j)440         int icu_strcoll_null(int32_t i, int32_t j){
441             return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
442         }
443 
icu_strcoll_len(int32_t i,int32_t j)444         int icu_strcoll_len(int32_t i, int32_t j){
445             return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
446         }
447 
icu_cmpkey(int32_t i,int32_t j)448         int icu_cmpkey(int32_t i, int32_t j) {
449             return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
450         }
451 
452 #if U_PLATFORM_HAS_WIN32_API
win_cmp_null(int32_t i,int32_t j)453         int win_cmp_null(int32_t i, int32_t j) {
454             int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
455             if (t == 0){
456                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
457                 exit(-1);
458             } else{
459                 return t - CSTR_EQUAL;
460             }
461         }
462 
win_cmp_len(int32_t i,int32_t j)463         int win_cmp_len(int32_t i, int32_t j) {
464             int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
465             if (t == 0){
466                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
467                 exit(-1);
468             } else{
469                 return t - CSTR_EQUAL;
470             }
471         }
472 #endif
473 
474 #define BFUNC(name, func, data) \
475     int name(int32_t i, int32_t j) { \
476     return func(rnd[i].data, ord[j].data); \
477     }
478 
479         BFUNC(posix_strcoll_null, strcoll, posix_data)
480             BFUNC(posix_cmpkey, strcmp, posix_key)
481             BFUNC(win_cmpkey, strcmp, win_key)
482             BFUNC(win_wcscmp, wcscmp, win_data)
483             BFUNC(icu_strcmp, u_strcmp, icu_data)
484             BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
485 };
486 
487 class CollPerfTest : public UPerfTest {
488 public:
489     UCollator *     col;
490     DWORD           win_langid;
491 
492     UChar * icu_data_all;
493     int32_t icu_data_all_len;
494 
495     int32_t         count;
496     CA_uchar *      icu_data;
497     CA_uint8 *      icu_key;
498     CA_char *       posix_data;
499     CA_char *       posix_key;
500     CA_win_wchar *  win_data;
501     CA_char *       win_key;
502 
503     DataIndex * rnd_index; // random by icu key
504     DataIndex * ord_win_data;
505     DataIndex * ord_win_key;
506     DataIndex * ord_posix_data;
507     DataIndex * ord_posix_key;
508     DataIndex * ord_icu_data;
509     DataIndex * ord_icu_key;
510     DataIndex * ord_win_wcscmp;
511     DataIndex * ord_icu_strcmp;
512     DataIndex * ord_icu_cmpcpo;
513 
~CollPerfTest()514     virtual ~CollPerfTest(){
515         ucol_close(col);
516         delete [] icu_data_all;
517         delete icu_data;
518         delete icu_key;
519         delete posix_data;
520         delete posix_key;
521         delete win_data;
522         delete win_key;
523         delete[] rnd_index;
524         delete[] ord_win_data;
525         delete[] ord_win_key;
526         delete[] ord_posix_data;
527         delete[] ord_posix_key;
528         delete[] ord_icu_data;
529         delete[] ord_icu_key;
530         delete[] ord_win_wcscmp;
531         delete[] ord_icu_strcmp;
532         delete[] ord_icu_cmpcpo;
533     }
534 
CollPerfTest(int32_t argc,const char * argv[],UErrorCode & status)535     CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
536         col = NULL;
537         icu_data_all = NULL;
538         icu_data = NULL;
539         icu_key = NULL;
540         posix_data = NULL;
541         posix_key = NULL;
542         win_data =NULL;
543         win_key = NULL;
544 
545         rnd_index = NULL;
546         ord_win_data= NULL;
547         ord_win_key= NULL;
548         ord_posix_data= NULL;
549         ord_posix_key= NULL;
550         ord_icu_data= NULL;
551         ord_icu_key= NULL;
552         ord_win_wcscmp = NULL;
553         ord_icu_strcmp = NULL;
554         ord_icu_cmpcpo = NULL;
555 
556         if (U_FAILURE(status)){
557             return;
558         }
559 
560         // Parse additional arguments
561 
562         UOption options[] = {
563             UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG),        // Windows Language ID number.
564                 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG),      // --rulefile <filename>
565                 // Collation related arguments. All are optional.
566                 // To simplify parsing, two choice arguments are disigned as NO_ARG.
567                 // The default value is UPPER word in the comment
568                 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG),          // --french <on | OFF>
569                 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG),       // --alternate <NON_IGNORE | shifted>
570                 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
571                 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG),       // --caselevel <on | OFF>
572                 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG),          // --normal <on | OFF>
573                 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG),  // --strength <1-5>
574         };
575         int32_t opt_len = (sizeof(options)/sizeof(options[0]));
576         enum {i, r,f,a,c,l,n,s};   // The buffer between the option items' order and their references
577 
578         _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
579 
580         if (_remainingArgc < 0){
581             status = U_ILLEGAL_ARGUMENT_ERROR;
582             return;
583         }
584 
585         if (locale == NULL){
586             locale = "en_US";   // set default locale
587         }
588 
589 #if U_PLATFORM_HAS_WIN32_API
590         if (options[i].doesOccur) {
591             char *endp;
592             int tmp = strtol(options[i].value, &endp, 0);
593             if (endp == options[i].value) {
594                 status = U_ILLEGAL_ARGUMENT_ERROR;
595                 return;
596             }
597             win_langid = MAKELCID(tmp, SORT_DEFAULT);
598         } else {
599             win_langid = uloc_getLCID(locale);
600         }
601 #endif
602 
603         //  Set up an ICU collator
604         if (options[r].doesOccur) {
605             // TODO: implement it
606         } else {
607             col = ucol_open(locale, &status);
608             if (U_FAILURE(status)) {
609                 return;
610             }
611         }
612 
613         if (options[f].doesOccur) {
614             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
615         } else {
616             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
617         }
618 
619         if (options[a].doesOccur) {
620             ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
621         }
622 
623         if (options[c].doesOccur) { // strcmp() has i18n encoding problem
624             if (strcmp("lower", options[c].value) == 0){
625                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
626             } else if (strcmp("upper", options[c].value) == 0) {
627                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
628             } else {
629                 status = U_ILLEGAL_ARGUMENT_ERROR;
630                 return;
631             }
632         }
633 
634         if (options[l].doesOccur){
635             ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
636         }
637 
638         if (options[n].doesOccur){
639             ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
640         }
641 
642         if (options[s].doesOccur) {
643             char *endp;
644             int tmp = strtol(options[l].value, &endp, 0);
645             if (endp == options[l].value) {
646                 status = U_ILLEGAL_ARGUMENT_ERROR;
647                 return;
648             }
649             switch (tmp) {
650             case 1:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status);		break;
651             case 2:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status);		break;
652             case 3:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);		break;
653             case 4:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);	break;
654             case 5:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);		break;
655             default: status = U_ILLEGAL_ARGUMENT_ERROR;					return;
656             }
657         }
658         prepareData(status);
659     }
660 
661     //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
662 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
663     if(temp == index) {\
664     name = #testname;\
665     if (exec) {\
666     UErrorCode status = U_ZERO_ERROR;\
667     UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
668     if (U_FAILURE(status)) {\
669     delete t;\
670     return NULL;\
671     } else {\
672     return t;\
673     }\
674     } else {\
675     return NULL;\
676     }\
677     }\
678     temp++\
679 
680 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par=NULL)681     virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
682         int temp = 0;
683 
684 #define TEST_KEYGEN(testname, func)\
685     TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
686         TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
687         TEST_KEYGEN(TestIcu_KeyGen_len,  icu_key_len);
688         TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
689 #if U_PLATFORM_HAS_WIN32_API
690         TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
691         TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
692 #endif
693 
694 #define TEST_ITER(testname, func)\
695     TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
696         TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
697         TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
698         TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
699         TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
700 
701 #define TEST_ITER_ALL(testname, func)\
702     TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
703         TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
704         TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
705         TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
706         TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
707 
708 #define TEST_QSORT(testname, func)\
709     TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
710         TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
711         TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
712         TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
713         TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
714         TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
715 #if U_PLATFORM_HAS_WIN32_API
716         TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
717         TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
718         TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
719 #endif
720 
721 #define TEST_BIN(testname, func)\
722     TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
723         TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
724         TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
725         TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
726         TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
727         TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
728         TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
729         TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
730 #if U_PLATFORM_HAS_WIN32_API
731         TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
732         TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
733 #endif
734         TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
735         TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
736 
737         name="";
738         return NULL;
739     }
740 
741 
742 
prepareData(UErrorCode & status)743     void prepareData(UErrorCode& status){
744         if(U_FAILURE(status)) return;
745         if (icu_data) return; // prepared
746 
747         icu_data = new CA_uchar();
748 
749         // Following code is borrowed from UPerfTest::getLines();
750         const UChar*    line=NULL;
751         int32_t         len =0;
752         for (;;) {
753             line = ucbuf_readline(ucharBuf,&len,&status);
754             if(line == NULL || U_FAILURE(status)){break;}
755 
756             // Refer to the source code of ucbuf_readline()
757             // 1. 'len' includs the line terminal symbols
758             // 2. The length of the line terminal symbols is only one character
759             // 3. The Windows CR LF line terminal symbols will be converted to CR
760 
761             if (len == 1) {
762                 continue; //skip empty line
763             } else {
764                 icu_data->append_one(len);
765                 memcpy(icu_data->last(), line, len * sizeof(UChar));
766                 icu_data->last()[len -1] = NULL;
767             }
768         }
769         if(U_FAILURE(status)) return;
770 
771         // UTF-16 -> UTF-8 conversion.
772         UConverter   *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
773         if (U_FAILURE(status)) return;
774 
775         count = icu_data->count;
776 
777         icu_data_all_len =  icu_data->index[count]; // includes all NULLs
778         icu_data_all_len -= count;  // excludes all NULLs
779         icu_data_all_len += 1;      // the terminal NULL
780         icu_data_all = new UChar[icu_data_all_len];
781         icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
782 
783         icu_key  = new CA_uint8;
784         win_data = new CA_win_wchar;
785         win_key  = new CA_char;
786         posix_data = new CA_char;
787         posix_key = new CA_char;
788         rnd_index = new DataIndex[count];
789         DataIndex::win_langid = win_langid;
790         DataIndex::col        = col;
791 
792 
793         UChar * p = icu_data_all;
794         int32_t s;
795         int32_t t;
796         for (int i=0; i < count; i++) {
797             // ICU all data
798             s = sizeof(UChar) * icu_data->lengthOf(i);
799             memcpy(p, icu_data->dataOf(i), s);
800             p += icu_data->lengthOf(i);
801 
802             // ICU data
803 
804             // ICU key
805             s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
806             icu_key->append_one(s);
807             t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
808             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
809 
810             // POSIX data
811             s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
812             if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
813                 status = U_ZERO_ERROR;
814             } else {
815                 return;
816             }
817             posix_data->append_one(s + 1); // plus terminal NULL
818             t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
819             if (U_FAILURE(status)) return;
820             if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
821             posix_data->last()[s] = 0;
822 
823             // POSIX key
824             s = strxfrm(NULL, posix_data->dataOf(i), 0);
825             if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
826             posix_key->append_one(s);
827             t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
828             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
829 
830 #if U_PLATFORM_HAS_WIN32_API
831             // Win data
832             s = icu_data->lengthOf(i) + 1; // plus terminal NULL
833             win_data->append_one(s);
834             memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
835 
836             // Win key
837             s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
838             if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
839             win_key->append_one(s);
840             t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
841             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
842 #endif
843         };
844 
845         // append_one() will make points shifting, should not merge following code into previous iteration
846         for (int i=0; i < count; i++) {
847             rnd_index[i].icu_key = icu_key->dataOf(i);
848             rnd_index[i].icu_data = icu_data->dataOf(i);
849             rnd_index[i].icu_data_len = icu_data->lengthOf(i);
850             rnd_index[i].posix_key = posix_key->last();
851             rnd_index[i].posix_data = posix_data->dataOf(i);
852             rnd_index[i].posix_data_len = posix_data->lengthOf(i);
853 #if U_PLATFORM_HAS_WIN32_API
854             rnd_index[i].win_key = win_key->dataOf(i);
855             rnd_index[i].win_data = win_data->dataOf(i);
856             rnd_index[i].win_data_len = win_data->lengthOf(i);
857 #endif
858         };
859 
860         ucnv_close(conv);
861         qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
862 
863 #define SORT(data, func) \
864     data = new DataIndex[count];\
865     memcpy(data, rnd_index, count * sizeof(DataIndex));\
866     qsort(data, count, sizeof(DataIndex), CmdQsort::func)
867 
868         SORT(ord_icu_data, icu_strcoll_len);
869         SORT(ord_icu_key, icu_cmpkey);
870         SORT(ord_posix_data, posix_strcoll_null);
871         SORT(ord_posix_key, posix_cmpkey);
872 #if U_PLATFORM_HAS_WIN32_API
873         SORT(ord_win_data, win_cmp_len);
874         SORT(ord_win_key, win_cmpkey);
875         SORT(ord_win_wcscmp, win_wcscmp);
876 #endif
877         SORT(ord_icu_strcmp, icu_strcmp);
878         SORT(ord_icu_cmpcpo, icu_cmpcpo);
879     }
880 };
881 
882 
main(int argc,const char * argv[])883 int main(int argc, const char *argv[])
884 {
885 
886     UErrorCode status = U_ZERO_ERROR;
887     CollPerfTest test(argc, argv, status);
888 
889     if (U_FAILURE(status)){
890         printf("The error is %s\n", u_errorName(status));
891         //TODO: print usage here
892         return status;
893     }
894 
895     if (test.run() == FALSE){
896         fprintf(stderr, "FAILED: Tests could not be run please check the "
897             "arguments.\n");
898         return -1;
899     }
900     return 0;
901 }
902 
903