1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 2002-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 //
8 //   dcfmtest.cpp
9 //
10 //     Decimal Formatter tests, data driven.
11 //
12 
13 #include "intltest.h"
14 
15 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_REGULAR_EXPRESSIONS
16 
17 #include "unicode/regex.h"
18 #include "unicode/uchar.h"
19 #include "unicode/ustring.h"
20 #include "unicode/unistr.h"
21 #include "unicode/dcfmtsym.h"
22 #include "unicode/decimfmt.h"
23 #include "unicode/locid.h"
24 #include "cmemory.h"
25 #include "dcfmtest.h"
26 #include "util.h"
27 #include "cstring.h"
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdio.h>
31 
32 #if !defined(_MSC_VER)
33 namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
34 #endif
35 
36 #include <string>
37 #include <iostream>
38 
39 //---------------------------------------------------------------------------
40 //
41 //  Test class boilerplate
42 //
43 //---------------------------------------------------------------------------
DecimalFormatTest()44 DecimalFormatTest::DecimalFormatTest()
45 {
46 }
47 
48 
~DecimalFormatTest()49 DecimalFormatTest::~DecimalFormatTest()
50 {
51 }
52 
53 
54 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)55 void DecimalFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
56 {
57     if (exec) logln("TestSuite DecimalFormatTest: ");
58     switch (index) {
59 
60 #if !UCONFIG_NO_FILE_IO
61         case 0: name = "DataDrivenTests";
62             if (exec) DataDrivenTests();
63             break;
64 #else
65         case 0: name = "skip";
66             break;
67 #endif
68 
69         default: name = "";
70             break; //needed to end loop
71     }
72 }
73 
74 
75 //---------------------------------------------------------------------------
76 //
77 //   Error Checking / Reporting macros used in all of the tests.
78 //
79 //---------------------------------------------------------------------------
80 #define DF_CHECK_STATUS {if (U_FAILURE(status)) \
81     {dataerrln("DecimalFormatTest failure at line %d.  status=%s", \
82     __LINE__, u_errorName(status)); return 0;}}
83 
84 #define DF_ASSERT(expr) {if ((expr)==FALSE) {errln("DecimalFormatTest failure at line %d.\n", __LINE__);};}
85 
86 #define DF_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
87 if (status!=errcode) {dataerrln("DecimalFormatTest failure at line %d.  Expected status=%s, got %s", \
88     __LINE__, u_errorName(errcode), u_errorName(status));};}
89 
90 #define DF_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
91     "DecimalFormatTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
92 
93 #define DF_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
94     errln("DecimalFormatTest failure at line %d, from %d.", __LINE__, (line)); return;}}
95 
96 
97 
98 //
99 //  InvariantStringPiece
100 //    Wrap a StringPiece around the extracted invariant data of a UnicodeString.
101 //    The data is guaranteed to be nul terminated.  (This is not true of StringPiece
102 //    in general, but is true of InvariantStringPiece)
103 //
104 class InvariantStringPiece: public StringPiece {
105   public:
106     InvariantStringPiece(const UnicodeString &s);
~InvariantStringPiece()107     ~InvariantStringPiece() {};
108   private:
109     MaybeStackArray<char, 20>  buf;
110 };
111 
InvariantStringPiece(const UnicodeString & s)112 InvariantStringPiece::InvariantStringPiece(const UnicodeString &s) {
113     int32_t  len = s.length();
114     if (len+1 > buf.getCapacity()) {
115         buf.resize(len+1);
116     }
117     // Buffer size is len+1 so that s.extract() will nul-terminate the string.
118     s.extract(0, len, buf.getAlias(), len+1, US_INV);
119     this->set(buf.getAlias(), len);
120 }
121 
122 
123 //  UnicodeStringPiece
124 //    Wrap a StringPiece around the extracted (to the default charset) data of
125 //    a UnicodeString.  The extracted data is guaranteed to be nul terminated.
126 //    (This is not true of StringPiece in general, but is true of UnicodeStringPiece)
127 //
128 class UnicodeStringPiece: public StringPiece {
129   public:
130     UnicodeStringPiece(const UnicodeString &s);
~UnicodeStringPiece()131     ~UnicodeStringPiece() {};
132   private:
133     MaybeStackArray<char, 20>  buf;
134 };
135 
UnicodeStringPiece(const UnicodeString & s)136 UnicodeStringPiece::UnicodeStringPiece(const UnicodeString &s) {
137     int32_t  len = s.length();
138     int32_t  capacity = buf.getCapacity();
139     int32_t requiredCapacity = s.extract(0, len, buf.getAlias(), capacity) + 1;
140     if (capacity < requiredCapacity) {
141         buf.resize(requiredCapacity);
142         capacity = requiredCapacity;
143         s.extract(0, len, buf.getAlias(), capacity);
144     }
145     this->set(buf.getAlias(), requiredCapacity - 1);
146 }
147 
148 
149 
150 //---------------------------------------------------------------------------
151 //
152 //      DataDrivenTests
153 //             The test cases are in a separate data file,
154 //
155 //---------------------------------------------------------------------------
156 
157 // Translate a Formattable::type enum value to a string, for error message formatting.
formattableType(Formattable::Type typ)158 static const char *formattableType(Formattable::Type typ) {
159     static const char *types[] = {"kDate",
160                                   "kDouble",
161                                   "kLong",
162                                   "kString",
163                                   "kArray",
164                                   "kInt64",
165                                   "kObject"
166                                   };
167     if (typ<0 || typ>Formattable::kObject) {
168         return "Unknown";
169     }
170     return types[typ];
171 }
172 
173 const char *
getPath(char * buffer,const char * filename)174 DecimalFormatTest::getPath(char *buffer, const char *filename) {
175     UErrorCode status=U_ZERO_ERROR;
176     const char *testDataDirectory = IntlTest::getSourceTestData(status);
177     DF_CHECK_STATUS;
178 
179     strcpy(buffer, testDataDirectory);
180     strcat(buffer, filename);
181     return buffer;
182 }
183 
DataDrivenTests()184 void DecimalFormatTest::DataDrivenTests() {
185     char tdd[2048];
186     const char *srcPath;
187     UErrorCode  status  = U_ZERO_ERROR;
188     int32_t     lineNum = 0;
189 
190     //
191     //  Open and read the test data file.
192     //
193     srcPath=getPath(tdd, "dcfmtest.txt");
194     if(srcPath==NULL) {
195         return; /* something went wrong, error already output */
196     }
197 
198     int32_t    len;
199     UChar *testData = ReadAndConvertFile(srcPath, len, status);
200     if (U_FAILURE(status)) {
201         return; /* something went wrong, error already output */
202     }
203 
204     //
205     //  Put the test data into a UnicodeString
206     //
207     UnicodeString testString(FALSE, testData, len);
208 
209     RegexMatcher    parseLineMat(UnicodeString(
210             "(?i)\\s*parse\\s+"
211             "\"([^\"]*)\"\\s+"           // Capture group 1: input text
212             "([ild])\\s+"                // Capture group 2: expected parsed type
213             "\"([^\"]*)\"\\s+"           // Capture group 3: expected parsed decimal
214             "\\s*(?:#.*)?"),             // Trailing comment
215          0, status);
216 
217     RegexMatcher    formatLineMat(UnicodeString(
218             "(?i)\\s*format\\s+"
219             "(\\S+)\\s+"                 // Capture group 1: pattern
220             "(ceiling|floor|down|up|halfeven|halfdown|halfup|default|unnecessary)\\s+"  // Capture group 2: Rounding Mode
221             "\"([^\"]*)\"\\s+"           // Capture group 3: input
222             "\"([^\"]*)\""               // Capture group 4: expected output
223             "\\s*(?:#.*)?"),             // Trailing comment
224          0, status);
225 
226     RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
227     RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);
228 
229     if (U_FAILURE(status)){
230         dataerrln("Construct RegexMatcher() error.");
231         delete [] testData;
232         return;
233     }
234 
235     //
236     //  Loop over the test data file, once per line.
237     //
238     while (lineMat.find()) {
239         lineNum++;
240         if (U_FAILURE(status)) {
241             dataerrln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
242         }
243 
244         status = U_ZERO_ERROR;
245         UnicodeString testLine = lineMat.group(1, status);
246         // printf("%s\n", UnicodeStringPiece(testLine).data());
247         if (testLine.length() == 0) {
248             continue;
249         }
250 
251         //
252         // Parse the test line.  Skip blank and comment only lines.
253         // Separate out the three main fields - pattern, flags, target.
254         //
255 
256         commentMat.reset(testLine);
257         if (commentMat.lookingAt(status)) {
258             // This line is a comment, or blank.
259             continue;
260         }
261 
262 
263         //
264         //  Handle "parse" test case line from file
265         //
266         parseLineMat.reset(testLine);
267         if (parseLineMat.lookingAt(status)) {
268             execParseTest(lineNum,
269                           parseLineMat.group(1, status),    // input
270                           parseLineMat.group(2, status),    // Expected Type
271                           parseLineMat.group(3, status),    // Expected Decimal String
272                           status
273                           );
274             continue;
275         }
276 
277         //
278         //  Handle "format" test case line
279         //
280         formatLineMat.reset(testLine);
281         if (formatLineMat.lookingAt(status)) {
282             execFormatTest(lineNum,
283                            formatLineMat.group(1, status),    // Pattern
284                            formatLineMat.group(2, status),    // rounding mode
285                            formatLineMat.group(3, status),    // input decimal number
286                            formatLineMat.group(4, status),    // expected formatted result
287                            kFormattable,
288                            status);
289 
290             execFormatTest(lineNum,
291                            formatLineMat.group(1, status),    // Pattern
292                            formatLineMat.group(2, status),    // rounding mode
293                            formatLineMat.group(3, status),    // input decimal number
294                            formatLineMat.group(4, status),    // expected formatted result
295                            kStringPiece,
296                            status);
297             continue;
298         }
299 
300         //
301         //  Line is not a recognizable test case.
302         //
303         errln("Badly formed test case at line %d.\n%s\n",
304              lineNum, UnicodeStringPiece(testLine).data());
305 
306     }
307 
308     delete [] testData;
309 }
310 
311 
312 
execParseTest(int32_t lineNum,const UnicodeString & inputText,const UnicodeString & expectedType,const UnicodeString & expectedDecimal,UErrorCode & status)313 void DecimalFormatTest::execParseTest(int32_t lineNum,
314                                      const UnicodeString &inputText,
315                                      const UnicodeString &expectedType,
316                                      const UnicodeString &expectedDecimal,
317                                      UErrorCode &status) {
318 
319     if (U_FAILURE(status)) {
320         return;
321     }
322 
323     DecimalFormatSymbols symbols(Locale::getUS(), status);
324     UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
325     DecimalFormat format(pattern, symbols, status);
326     Formattable   result;
327     if (U_FAILURE(status)) {
328         dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
329             lineNum, u_errorName(status));
330         return;
331     }
332 
333     ParsePosition pos;
334     int32_t expectedParseEndPosition = inputText.length();
335 
336     format.parse(inputText, result, pos);
337 
338     if (expectedParseEndPosition != pos.getIndex()) {
339         errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d.  "
340               "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());
341         return;
342     }
343 
344     char   expectedTypeC[2];
345     expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
346     Formattable::Type expectType = Formattable::kDate;
347     switch (expectedTypeC[0]) {
348       case 'd': expectType = Formattable::kDouble; break;
349       case 'i': expectType = Formattable::kLong;   break;
350       case 'l': expectType = Formattable::kInt64;  break;
351       default:
352           errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
353               lineNum, InvariantStringPiece(expectedType).data());
354           return;
355     }
356     if (result.getType() != expectType) {
357         errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
358              lineNum, formattableType(expectType), formattableType(result.getType()));
359         return;
360     }
361 
362     StringPiece decimalResult = result.getDecimalNumber(status);
363     if (U_FAILURE(status)) {
364         errln("File %s, line %d: error %s.  Line in file dcfmtest.txt:  %d:",
365             __FILE__, __LINE__, u_errorName(status), lineNum);
366         return;
367     }
368 
369     InvariantStringPiece expectedResults(expectedDecimal);
370     if (decimalResult != expectedResults) {
371         errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
372             lineNum, expectedResults.data(), decimalResult.data());
373     }
374 
375     return;
376 }
377 
378 
execFormatTest(int32_t lineNum,const UnicodeString & pattern,const UnicodeString & round,const UnicodeString & input,const UnicodeString & expected,EFormatInputType inType,UErrorCode & status)379 void DecimalFormatTest::execFormatTest(int32_t lineNum,
380                            const UnicodeString &pattern,     // Pattern
381                            const UnicodeString &round,       // rounding mode
382                            const UnicodeString &input,       // input decimal number
383                            const UnicodeString &expected,    // expected formatted result
384                            EFormatInputType inType,          // input number type
385                            UErrorCode &status) {
386     if (U_FAILURE(status)) {
387         return;
388     }
389 
390     DecimalFormatSymbols symbols(Locale::getUS(), status);
391     // printf("Pattern = %s\n", UnicodeStringPiece(pattern).data());
392     DecimalFormat fmtr(pattern, symbols, status);
393     if (U_FAILURE(status)) {
394         dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
395             lineNum, u_errorName(status));
396         return;
397     }
398     if (round=="ceiling") {
399         fmtr.setRoundingMode(DecimalFormat::kRoundCeiling);
400     } else if (round=="floor") {
401         fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
402     } else if (round=="down") {
403         fmtr.setRoundingMode(DecimalFormat::kRoundDown);
404     } else if (round=="up") {
405         fmtr.setRoundingMode(DecimalFormat::kRoundUp);
406     } else if (round=="halfeven") {
407         fmtr.setRoundingMode(DecimalFormat::kRoundHalfEven);
408     } else if (round=="halfdown") {
409         fmtr.setRoundingMode(DecimalFormat::kRoundHalfDown);
410     } else if (round=="halfup") {
411         fmtr.setRoundingMode(DecimalFormat::kRoundHalfUp);
412     } else if (round=="default") {
413         // don't set any value.
414     } else if (round=="unnecessary") {
415         fmtr.setRoundingMode(DecimalFormat::kRoundUnnecessary);
416     } else {
417         fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
418         errln("file dcfmtest.txt, line %d: Bad rounding mode \"%s\"",
419                 lineNum, UnicodeStringPiece(round).data());
420     }
421 
422     const char *typeStr = "Unknown";
423     UnicodeString result;
424     UnicodeStringPiece spInput(input);
425 
426     switch (inType) {
427     case kFormattable:
428         {
429             typeStr = "Formattable";
430             Formattable fmtbl;
431             fmtbl.setDecimalNumber(spInput, status);
432             fmtr.format(fmtbl, result, NULL, status);
433         }
434         break;
435     case kStringPiece:
436         typeStr = "StringPiece";
437         fmtr.format(spInput, result, NULL, status);
438         break;
439     }
440 
441     if ((status == U_FORMAT_INEXACT_ERROR) && (result == "") && (expected == "Inexact")) {
442         // Test succeeded.
443         status = U_ZERO_ERROR;
444         return;
445     }
446 
447     if (U_FAILURE(status)) {
448         errln("[%s] file dcfmtest.txt, line %d: format() returned %s.",
449             typeStr, lineNum, u_errorName(status));
450         status = U_ZERO_ERROR;
451         return;
452     }
453 
454     if (result != expected) {
455         errln("[%s] file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
456             typeStr, lineNum, UnicodeStringPiece(expected).data(), UnicodeStringPiece(result).data());
457     }
458 }
459 
460 
461 //-------------------------------------------------------------------------------
462 //
463 //  Read a text data file, convert it from UTF-8 to UChars, and return the data
464 //    in one big UChar * buffer, which the caller must delete.
465 //
466 //    (Lightly modified version of a similar function in regextst.cpp)
467 //
468 //--------------------------------------------------------------------------------
ReadAndConvertFile(const char * fileName,int32_t & ulen,UErrorCode & status)469 UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
470                                      UErrorCode &status) {
471     UChar       *retPtr  = NULL;
472     char        *fileBuf = NULL;
473     const char  *fileBufNoBOM = NULL;
474     FILE        *f       = NULL;
475 
476     ulen = 0;
477     if (U_FAILURE(status)) {
478         return retPtr;
479     }
480 
481     //
482     //  Open the file.
483     //
484     f = fopen(fileName, "rb");
485     if (f == 0) {
486         dataerrln("Error opening test data file %s\n", fileName);
487         status = U_FILE_ACCESS_ERROR;
488         return NULL;
489     }
490     //
491     //  Read it in
492     //
493     int32_t            fileSize;
494     int32_t            amtRead;
495     int32_t            amtReadNoBOM;
496 
497     fseek( f, 0, SEEK_END);
498     fileSize = ftell(f);
499     fileBuf = new char[fileSize];
500     fseek(f, 0, SEEK_SET);
501     amtRead = fread(fileBuf, 1, fileSize, f);
502     if (amtRead != fileSize || fileSize <= 0) {
503         errln("Error reading test data file.");
504         goto cleanUpAndReturn;
505     }
506 
507     //
508     // Look for a UTF-8 BOM on the data just read.
509     //    The test data file is UTF-8.
510     //    The BOM needs to be there in the source file to keep the Windows &
511     //    EBCDIC machines happy, so force an error if it goes missing.
512     //    Many Linux editors will silently strip it.
513     //
514     fileBufNoBOM = fileBuf + 3;
515     amtReadNoBOM = amtRead - 3;
516     if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
517         // TODO:  restore this check.
518         errln("Test data file %s is missing its BOM", fileName);
519         fileBufNoBOM = fileBuf;
520         amtReadNoBOM = amtRead;
521     }
522 
523     //
524     // Find the length of the input in UTF-16 UChars
525     //  (by preflighting the conversion)
526     //
527     u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
528 
529     //
530     // Convert file contents from UTF-8 to UTF-16
531     //
532     if (status == U_BUFFER_OVERFLOW_ERROR) {
533         // Buffer Overflow is expected from the preflight operation.
534         status = U_ZERO_ERROR;
535         retPtr = new UChar[ulen+1];
536         u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
537     }
538 
539 cleanUpAndReturn:
540     fclose(f);
541     delete[] fileBuf;
542     if (U_FAILURE(status)) {
543         errln("ICU Error \"%s\"\n", u_errorName(status));
544         delete retPtr;
545         retPtr = NULL;
546     };
547     return retPtr;
548 }
549 
550 #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
551 
552