1 /*
2 **************************************************************************
3  *   © 2016 and later: Unicode, Inc. and others.
4  *   License & terms of use: http://www.unicode.org/copyright.html#License
5  *************************************************************************
6  *************************************************************************
7  *   Copyright (C) 2002-2014, International Business Machines
8  *   Corporation and others.  All Rights Reserved.
9  *************************************************************************
10  *   file name:  utfperf.cpp
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2005Nov17
16  *   created by: Raymond Yang
17  *
18  *   Ported from utfper.c created by Markus W. Scherer
19  *   Performance test program for Unicode converters
20  */
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include "unicode/uperf.h"
25 #include "cmemory.h" // for UPRV_LENGTHOF
26 #include "uoptions.h"
27 
28 /* definitions and text buffers */
29 
30 #define INPUT_CAPACITY (1024*1024)
31 #define INTERMEDIATE_CAPACITY 4096
32 #define INTERMEDIATE_SMALL_CAPACITY 20
33 #define PIVOT_CAPACITY 1024
34 #define OUTPUT_CAPACITY INPUT_CAPACITY
35 
36 static char utf8[INPUT_CAPACITY];
37 static UChar pivot[INTERMEDIATE_CAPACITY];
38 
39 static UChar output[OUTPUT_CAPACITY];
40 static char intermediate[OUTPUT_CAPACITY];
41 
42 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
43 
44 static int32_t fromUCallbackCount;
45 
46 // Command-line options specific to utfperf.
47 // Options do not have abbreviations: Force readable command lines.
48 // (Using U+0001 for abbreviation characters.)
49 enum {
50     CHARSET,
51     CHUNK_LENGTH,
52     PIVOT_LENGTH,
53     UTFPERF_OPTIONS_COUNT
54 };
55 
56 static UOption options[UTFPERF_OPTIONS_COUNT]={
57     UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
58     UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
59     UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
60 };
61 
62 static const char *const utfperf_usage =
63     "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
64     "\t            Default: UTF-8\n"
65     "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
66     "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
67     "\t            [1024]\n";
68 
69 // Test object.
70 class  UtfPerformanceTest : public UPerfTest{
71 public:
UtfPerformanceTest(int32_t argc,const char * argv[],UErrorCode & status)72     UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
73             : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
74         if (U_SUCCESS(status)) {
75             charset = options[CHARSET].value;
76 
77             chunkLength = atoi(options[CHUNK_LENGTH].value);
78             if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
79                 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
80                 status = U_ILLEGAL_ARGUMENT_ERROR;
81             }
82 
83             pivotLength = atoi(options[PIVOT_LENGTH].value);
84             if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
85                 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
86                 status = U_ILLEGAL_ARGUMENT_ERROR;
87             }
88 
89             int32_t inputLength;
90             UPerfTest::getBuffer(inputLength, status);
91             countInputCodePoints = u_countChar32(buffer, bufferLen);
92             u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
93         }
94     }
95 
96     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
97 
getBuffer() const98     const UChar *getBuffer() const { return buffer; }
getBufferLen() const99     int32_t getBufferLen() const { return bufferLen; }
100 
101     const char *charset;
102     int32_t chunkLength, pivotLength;
103 };
104 
105 U_CDECL_BEGIN
106 // Custom callback for counting callback calls.
107 static void U_CALLCONV
fromUCallback(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)108 fromUCallback(const void *context,
109               UConverterFromUnicodeArgs *fromUArgs,
110               const UChar *codeUnits,
111               int32_t length,
112               UChar32 codePoint,
113               UConverterCallbackReason reason,
114               UErrorCode *pErrorCode) {
115     if (reason <= UCNV_IRREGULAR) {
116         ++fromUCallbackCount;
117     }
118     UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
119 }
120 U_CDECL_END
121 
122 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
123 class Command : public UPerfFunction {
124 protected:
Command(const UtfPerformanceTest & testcase)125     Command(const UtfPerformanceTest &testcase)
126             : testcase(testcase),
127               input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
128               errorCode(U_ZERO_ERROR) {
129         cnv=ucnv_open(testcase.charset, &errorCode);
130         if (U_FAILURE(errorCode)) {
131             fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
132         }
133         ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
134     }
135 public:
~Command()136     virtual ~Command(){
137         if(U_SUCCESS(errorCode)) {
138             ucnv_close(cnv);
139         }
140     }
141     // virtual void call(UErrorCode* pErrorCode) { ... }
getOperationsPerIteration()142     virtual long getOperationsPerIteration(){
143         return countInputCodePoints;
144     }
145 
146     const UtfPerformanceTest &testcase;
147     const UChar *input;
148     int32_t inputLength;
149     UErrorCode errorCode;
150     UConverter *cnv;
151 };
152 
153 // Test roundtrip UTF-16->encoding->UTF-16.
154 class Roundtrip : public Command {
155 protected:
Roundtrip(const UtfPerformanceTest & testcase)156     Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
157 public:
get(const UtfPerformanceTest & testcase)158     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
159         Roundtrip * t = new Roundtrip(testcase);
160         if (U_SUCCESS(t->errorCode)){
161             return t;
162         } else {
163             delete t;
164             return NULL;
165         }
166     }
call(UErrorCode * pErrorCode)167     virtual void call(UErrorCode* pErrorCode){
168         const UChar *pIn, *pInLimit;
169         UChar *pOut, *pOutLimit;
170         char *pInter, *pInterLimit;
171         const char *p;
172         UBool flush;
173 
174         ucnv_reset(cnv);
175         fromUCallbackCount=0;
176 
177         pIn=input;
178         pInLimit=input+inputLength;
179 
180         pOut=output;
181         pOutLimit=output+OUTPUT_CAPACITY;
182 
183         pInterLimit=intermediate+testcase.chunkLength;
184 
185         encodedLength=outputLength=0;
186         flush=FALSE;
187 
188         do {
189             /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
190             pInter=intermediate;
191             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
192             encodedLength+=(int32_t)(pInter-intermediate);
193 
194             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
195                 /* make sure that we convert once more to really flush */
196                 *pErrorCode=U_ZERO_ERROR;
197             } else if(U_FAILURE(*pErrorCode)) {
198                 return;
199             } else if(pIn==pInLimit) {
200                 flush=TRUE;
201             }
202 
203             /* convert the block [intermediate..pInter[ back to UTF-16 */
204             p=intermediate;
205             ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
206             if(U_FAILURE(*pErrorCode)) {
207                 return;
208             }
209             /* intermediate must have been consumed (p==pInter) because of the converter semantics */
210         } while(!flush);
211 
212         outputLength=pOut-output;
213         if(inputLength!=outputLength) {
214             fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
215             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
216         }
217     }
218 };
219 
220 // Test one-way conversion UTF-16->encoding.
221 class FromUnicode : public Command {
222 protected:
FromUnicode(const UtfPerformanceTest & testcase)223     FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
224 public:
get(const UtfPerformanceTest & testcase)225     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
226         FromUnicode * t = new FromUnicode(testcase);
227         if (U_SUCCESS(t->errorCode)){
228             return t;
229         } else {
230             delete t;
231             return NULL;
232         }
233     }
call(UErrorCode * pErrorCode)234     virtual void call(UErrorCode* pErrorCode){
235         const UChar *pIn, *pInLimit;
236         char *pInter, *pInterLimit;
237 
238         ucnv_resetFromUnicode(cnv);
239         fromUCallbackCount=0;
240 
241         pIn=input;
242         pInLimit=input+inputLength;
243 
244         pInterLimit=intermediate+testcase.chunkLength;
245 
246         encodedLength=0;
247 
248         for(;;) {
249             pInter=intermediate;
250             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
251             encodedLength+=(int32_t)(pInter-intermediate);
252 
253             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
254                 /* make sure that we convert once more to really flush */
255                 *pErrorCode=U_ZERO_ERROR;
256             } else if(U_FAILURE(*pErrorCode)) {
257                 return;
258             } else {
259                 break;  // all done
260             }
261         }
262     }
263 };
264 
265 // Test one-way conversion UTF-8->encoding.
266 class FromUTF8 : public Command {
267 protected:
FromUTF8(const UtfPerformanceTest & testcase)268     FromUTF8(const UtfPerformanceTest &testcase)
269             : Command(testcase),
270               utf8Cnv(NULL),
271               input8(utf8), input8Length(utf8Length) {
272         utf8Cnv=ucnv_open("UTF-8", &errorCode);
273     }
274 public:
get(const UtfPerformanceTest & testcase)275     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
276         FromUTF8 * t = new FromUTF8(testcase);
277         if (U_SUCCESS(t->errorCode)){
278             return t;
279         } else {
280             delete t;
281             return NULL;
282         }
283     }
~FromUTF8()284     ~FromUTF8() {
285         ucnv_close(utf8Cnv);
286     }
call(UErrorCode * pErrorCode)287     virtual void call(UErrorCode* pErrorCode){
288         const char *pIn, *pInLimit;
289         char *pInter, *pInterLimit;
290         UChar *pivotSource, *pivotTarget, *pivotLimit;
291 
292         ucnv_resetToUnicode(utf8Cnv);
293         ucnv_resetFromUnicode(cnv);
294         fromUCallbackCount=0;
295 
296         pIn=input8;
297         pInLimit=input8+input8Length;
298 
299         pInterLimit=intermediate+testcase.chunkLength;
300 
301         pivotSource=pivotTarget=pivot;
302         pivotLimit=pivot+testcase.pivotLength;
303 
304         encodedLength=0;
305 
306         for(;;) {
307             pInter=intermediate;
308             ucnv_convertEx(cnv, utf8Cnv,
309                            &pInter, pInterLimit,
310                            &pIn, pInLimit,
311                            pivot, &pivotSource, &pivotTarget, pivotLimit,
312                            FALSE, TRUE, pErrorCode);
313             encodedLength+=(int32_t)(pInter-intermediate);
314 
315             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
316                 /* make sure that we convert once more to really flush */
317                 *pErrorCode=U_ZERO_ERROR;
318             } else if(U_FAILURE(*pErrorCode)) {
319                 return;
320             } else {
321                 break;  // all done
322             }
323         }
324     }
325 protected:
326     UConverter *utf8Cnv;
327     const char *input8;
328     int32_t input8Length;
329 };
330 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)331 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
332     switch (index) {
333         case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
334         case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
335         case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
336         default: name = ""; break;
337     }
338     return NULL;
339 }
340 
main(int argc,const char * argv[])341 int main(int argc, const char *argv[])
342 {
343     // Default values for command-line options.
344     options[CHARSET].value = "UTF-8";
345     options[CHUNK_LENGTH].value = "4096";
346     options[PIVOT_LENGTH].value = "1024";
347 
348     UErrorCode status = U_ZERO_ERROR;
349     UtfPerformanceTest test(argc, argv, status);
350 
351 	if (U_FAILURE(status)){
352         printf("The error is %s\n", u_errorName(status));
353         test.usage();
354         return status;
355     }
356 
357     if (test.run() == FALSE){
358         fprintf(stderr, "FAILED: Tests could not be run please check the "
359 			            "arguments.\n");
360         return -1;
361     }
362 
363     if (fromUCallbackCount > 0) {
364         printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
365     }
366 
367     return 0;
368 }
369