1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2002-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 //
10 //   dcfmtest.cpp
11 //
12 //     Decimal Formatter tests, data driven.
13 //
14 
15 #include "intltest.h"
16 
17 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_REGULAR_EXPRESSIONS
18 
19 #include "unicode/regex.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ustring.h"
22 #include "unicode/unistr.h"
23 #include "unicode/dcfmtsym.h"
24 #include "unicode/decimfmt.h"
25 #include "unicode/locid.h"
26 #include "cmemory.h"
27 #include "dcfmtest.h"
28 #include "util.h"
29 #include "cstring.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdio.h>
33 
34 #if defined(__GLIBCXX__)
35 namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
36 #endif
37 
38 #include <string>
39 #include <iostream>
40 
41 //---------------------------------------------------------------------------
42 //
43 //  Test class boilerplate
44 //
45 //---------------------------------------------------------------------------
DecimalFormatTest()46 DecimalFormatTest::DecimalFormatTest()
47 {
48 }
49 
50 
~DecimalFormatTest()51 DecimalFormatTest::~DecimalFormatTest()
52 {
53 }
54 
55 
56 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)57 void DecimalFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
58 {
59     if (exec) logln("TestSuite DecimalFormatTest: ");
60     switch (index) {
61 
62 #if !UCONFIG_NO_FILE_IO
63         case 0: name = "DataDrivenTests";
64             if (exec) DataDrivenTests();
65             break;
66 #else
67         case 0: name = "skip";
68             break;
69 #endif
70 
71         default: name = "";
72             break; //needed to end loop
73     }
74 }
75 
76 
77 //---------------------------------------------------------------------------
78 //
79 //   Error Checking / Reporting macros used in all of the tests.
80 //
81 //---------------------------------------------------------------------------
82 #define DF_CHECK_STATUS {if (U_FAILURE(status)) \
83     {dataerrln("DecimalFormatTest failure at line %d.  status=%s", \
84     __LINE__, u_errorName(status)); return 0;}}
85 
86 #define DF_ASSERT(expr) {if ((expr)==FALSE) {errln("DecimalFormatTest failure at line %d.\n", __LINE__);};}
87 
88 #define DF_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
89 if (status!=errcode) {dataerrln("DecimalFormatTest failure at line %d.  Expected status=%s, got %s", \
90     __LINE__, u_errorName(errcode), u_errorName(status));};}
91 
92 #define DF_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
93     "DecimalFormatTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
94 
95 #define DF_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
96     errln("DecimalFormatTest failure at line %d, from %d.", __LINE__, (line)); return;}}
97 
98 
99 
100 //
101 //  InvariantStringPiece
102 //    Wrap a StringPiece around the extracted invariant data of a UnicodeString.
103 //    The data is guaranteed to be nul terminated.  (This is not true of StringPiece
104 //    in general, but is true of InvariantStringPiece)
105 //
106 class InvariantStringPiece: public StringPiece {
107   public:
108     InvariantStringPiece(const UnicodeString &s);
~InvariantStringPiece()109     ~InvariantStringPiece() {};
110   private:
111     MaybeStackArray<char, 20>  buf;
112 };
113 
InvariantStringPiece(const UnicodeString & s)114 InvariantStringPiece::InvariantStringPiece(const UnicodeString &s) {
115     int32_t  len = s.length();
116     if (len+1 > buf.getCapacity()) {
117         buf.resize(len+1);
118     }
119     // Buffer size is len+1 so that s.extract() will nul-terminate the string.
120     s.extract(0, len, buf.getAlias(), len+1, US_INV);
121     this->set(buf.getAlias(), len);
122 }
123 
124 
125 //  UnicodeStringPiece
126 //    Wrap a StringPiece around the extracted (to the default charset) data of
127 //    a UnicodeString.  The extracted data is guaranteed to be nul terminated.
128 //    (This is not true of StringPiece in general, but is true of UnicodeStringPiece)
129 //
130 class UnicodeStringPiece: public StringPiece {
131   public:
132     UnicodeStringPiece(const UnicodeString &s);
~UnicodeStringPiece()133     ~UnicodeStringPiece() {};
134   private:
135     MaybeStackArray<char, 20>  buf;
136 };
137 
UnicodeStringPiece(const UnicodeString & s)138 UnicodeStringPiece::UnicodeStringPiece(const UnicodeString &s) {
139     int32_t  len = s.length();
140     int32_t  capacity = buf.getCapacity();
141     int32_t requiredCapacity = s.extract(0, len, buf.getAlias(), capacity) + 1;
142     if (capacity < requiredCapacity) {
143         buf.resize(requiredCapacity);
144         capacity = requiredCapacity;
145         s.extract(0, len, buf.getAlias(), capacity);
146     }
147     this->set(buf.getAlias(), requiredCapacity - 1);
148 }
149 
150 
151 
152 //---------------------------------------------------------------------------
153 //
154 //      DataDrivenTests
155 //             The test cases are in a separate data file,
156 //
157 //---------------------------------------------------------------------------
158 
159 // Translate a Formattable::type enum value to a string, for error message formatting.
formattableType(Formattable::Type typ)160 static const char *formattableType(Formattable::Type typ) {
161     static const char *types[] = {"kDate",
162                                   "kDouble",
163                                   "kLong",
164                                   "kString",
165                                   "kArray",
166                                   "kInt64",
167                                   "kObject"
168                                   };
169     if (typ<0 || typ>Formattable::kObject) {
170         return "Unknown";
171     }
172     return types[typ];
173 }
174 
175 const char *
getPath(char * buffer,const char * filename)176 DecimalFormatTest::getPath(char *buffer, const char *filename) {
177     UErrorCode status=U_ZERO_ERROR;
178     const char *testDataDirectory = IntlTest::getSourceTestData(status);
179     DF_CHECK_STATUS;
180 
181     strcpy(buffer, testDataDirectory);
182     strcat(buffer, filename);
183     return buffer;
184 }
185 
DataDrivenTests()186 void DecimalFormatTest::DataDrivenTests() {
187     char tdd[2048];
188     const char *srcPath;
189     UErrorCode  status  = U_ZERO_ERROR;
190     int32_t     lineNum = 0;
191 
192     //
193     //  Open and read the test data file.
194     //
195     srcPath=getPath(tdd, "dcfmtest.txt");
196     if(srcPath==NULL) {
197         return; /* something went wrong, error already output */
198     }
199 
200     int32_t    len;
201     UChar *testData = ReadAndConvertFile(srcPath, len, status);
202     if (U_FAILURE(status)) {
203         return; /* something went wrong, error already output */
204     }
205 
206     //
207     //  Put the test data into a UnicodeString
208     //
209     UnicodeString testString(FALSE, testData, len);
210 
211     RegexMatcher    parseLineMat(UnicodeString(
212             "(?i)\\s*parse\\s+"
213             "\"([^\"]*)\"\\s+"           // Capture group 1: input text
214             "([ild])\\s+"                // Capture group 2: expected parsed type
215             "\"([^\"]*)\"\\s+"           // Capture group 3: expected parsed decimal
216             "\\s*(?:#.*)?"),             // Trailing comment
217          0, status);
218 
219     RegexMatcher    formatLineMat(UnicodeString(
220             "(?i)\\s*format\\s+"
221             "(\\S+)\\s+"                 // Capture group 1: pattern
222             "(ceiling|floor|down|up|halfeven|halfdown|halfup|default|unnecessary)\\s+"  // Capture group 2: Rounding Mode
223             "\"([^\"]*)\"\\s+"           // Capture group 3: input
224             "\"([^\"]*)\""               // Capture group 4: expected output
225             "\\s*(?:#.*)?"),             // Trailing comment
226          0, status);
227 
228     RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
229     RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);
230 
231     if (U_FAILURE(status)){
232         dataerrln("Construct RegexMatcher() error.");
233         delete [] testData;
234         return;
235     }
236 
237     //
238     //  Loop over the test data file, once per line.
239     //
240     while (lineMat.find()) {
241         lineNum++;
242         if (U_FAILURE(status)) {
243             dataerrln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
244         }
245 
246         status = U_ZERO_ERROR;
247         UnicodeString testLine = lineMat.group(1, status);
248         // printf("%s\n", UnicodeStringPiece(testLine).data());
249         if (testLine.length() == 0) {
250             continue;
251         }
252 
253         //
254         // Parse the test line.  Skip blank and comment only lines.
255         // Separate out the three main fields - pattern, flags, target.
256         //
257 
258         commentMat.reset(testLine);
259         if (commentMat.lookingAt(status)) {
260             // This line is a comment, or blank.
261             continue;
262         }
263 
264 
265         //
266         //  Handle "parse" test case line from file
267         //
268         parseLineMat.reset(testLine);
269         if (parseLineMat.lookingAt(status)) {
270             execParseTest(lineNum,
271                           parseLineMat.group(1, status),    // input
272                           parseLineMat.group(2, status),    // Expected Type
273                           parseLineMat.group(3, status),    // Expected Decimal String
274                           status
275                           );
276             continue;
277         }
278 
279         //
280         //  Handle "format" test case line
281         //
282         formatLineMat.reset(testLine);
283         if (formatLineMat.lookingAt(status)) {
284             execFormatTest(lineNum,
285                            formatLineMat.group(1, status),    // Pattern
286                            formatLineMat.group(2, status),    // rounding mode
287                            formatLineMat.group(3, status),    // input decimal number
288                            formatLineMat.group(4, status),    // expected formatted result
289                            kFormattable,
290                            status);
291 
292             execFormatTest(lineNum,
293                            formatLineMat.group(1, status),    // Pattern
294                            formatLineMat.group(2, status),    // rounding mode
295                            formatLineMat.group(3, status),    // input decimal number
296                            formatLineMat.group(4, status),    // expected formatted result
297                            kStringPiece,
298                            status);
299             continue;
300         }
301 
302         //
303         //  Line is not a recognizable test case.
304         //
305         errln("Badly formed test case at line %d.\n%s\n",
306              lineNum, UnicodeStringPiece(testLine).data());
307 
308     }
309 
310     delete [] testData;
311 }
312 
313 
314 
execParseTest(int32_t lineNum,const UnicodeString & inputText,const UnicodeString & expectedType,const UnicodeString & expectedDecimal,UErrorCode & status)315 void DecimalFormatTest::execParseTest(int32_t lineNum,
316                                      const UnicodeString &inputText,
317                                      const UnicodeString &expectedType,
318                                      const UnicodeString &expectedDecimal,
319                                      UErrorCode &status) {
320 
321     if (U_FAILURE(status)) {
322         return;
323     }
324 
325     DecimalFormatSymbols symbols(Locale::getUS(), status);
326     UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
327     DecimalFormat format(pattern, symbols, status);
328     Formattable   result;
329     if (U_FAILURE(status)) {
330         dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
331             lineNum, u_errorName(status));
332         return;
333     }
334 
335     ParsePosition pos;
336     int32_t expectedParseEndPosition = inputText.length();
337 
338     format.parse(inputText, result, pos);
339 
340     if (expectedParseEndPosition != pos.getIndex()) {
341         errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d.  "
342               "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());
343         return;
344     }
345 
346     char   expectedTypeC[2];
347     expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
348     Formattable::Type expectType = Formattable::kDate;
349     switch (expectedTypeC[0]) {
350       case 'd': expectType = Formattable::kDouble; break;
351       case 'i': expectType = Formattable::kLong;   break;
352       case 'l': expectType = Formattable::kInt64;  break;
353       default:
354           errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
355               lineNum, InvariantStringPiece(expectedType).data());
356           return;
357     }
358     if (result.getType() != expectType) {
359         errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
360              lineNum, formattableType(expectType), formattableType(result.getType()));
361         return;
362     }
363 
364     StringPiece decimalResult = result.getDecimalNumber(status);
365     if (U_FAILURE(status)) {
366         errln("File %s, line %d: error %s.  Line in file dcfmtest.txt:  %d:",
367             __FILE__, __LINE__, u_errorName(status), lineNum);
368         return;
369     }
370 
371     InvariantStringPiece expectedResults(expectedDecimal);
372     if (decimalResult != expectedResults) {
373         errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
374             lineNum, expectedResults.data(), decimalResult.data());
375     }
376 
377     return;
378 }
379 
380 
execFormatTest(int32_t lineNum,const UnicodeString & pattern,const UnicodeString & round,const UnicodeString & input,const UnicodeString & expected,EFormatInputType inType,UErrorCode & status)381 void DecimalFormatTest::execFormatTest(int32_t lineNum,
382                            const UnicodeString &pattern,     // Pattern
383                            const UnicodeString &round,       // rounding mode
384                            const UnicodeString &input,       // input decimal number
385                            const UnicodeString &expected,    // expected formatted result
386                            EFormatInputType inType,          // input number type
387                            UErrorCode &status) {
388     if (U_FAILURE(status)) {
389         return;
390     }
391 
392     DecimalFormatSymbols symbols(Locale::getUS(), status);
393     // printf("Pattern = %s\n", UnicodeStringPiece(pattern).data());
394     DecimalFormat fmtr(pattern, symbols, status);
395     if (U_FAILURE(status)) {
396         dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
397             lineNum, u_errorName(status));
398         return;
399     }
400     if (round=="ceiling") {
401         fmtr.setRoundingMode(DecimalFormat::kRoundCeiling);
402     } else if (round=="floor") {
403         fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
404     } else if (round=="down") {
405         fmtr.setRoundingMode(DecimalFormat::kRoundDown);
406     } else if (round=="up") {
407         fmtr.setRoundingMode(DecimalFormat::kRoundUp);
408     } else if (round=="halfeven") {
409         fmtr.setRoundingMode(DecimalFormat::kRoundHalfEven);
410     } else if (round=="halfdown") {
411         fmtr.setRoundingMode(DecimalFormat::kRoundHalfDown);
412     } else if (round=="halfup") {
413         fmtr.setRoundingMode(DecimalFormat::kRoundHalfUp);
414     } else if (round=="default") {
415         // don't set any value.
416     } else if (round=="unnecessary") {
417         fmtr.setRoundingMode(DecimalFormat::kRoundUnnecessary);
418     } else {
419         fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
420         errln("file dcfmtest.txt, line %d: Bad rounding mode \"%s\"",
421                 lineNum, UnicodeStringPiece(round).data());
422     }
423 
424     const char *typeStr = "Unknown";
425     UnicodeString result;
426     UnicodeStringPiece spInput(input);
427 
428     switch (inType) {
429     case kFormattable:
430         {
431             typeStr = "Formattable";
432             Formattable fmtbl;
433             fmtbl.setDecimalNumber(spInput, status);
434             fmtr.format(fmtbl, result, NULL, status);
435         }
436         break;
437     case kStringPiece:
438         typeStr = "StringPiece";
439         fmtr.format(spInput, result, NULL, status);
440         break;
441     }
442 
443     if ((status == U_FORMAT_INEXACT_ERROR) && (result == "") && (expected == "Inexact")) {
444         // Test succeeded.
445         status = U_ZERO_ERROR;
446         return;
447     }
448 
449     if (U_FAILURE(status)) {
450         errln("[%s] file dcfmtest.txt, line %d: format() returned %s.",
451             typeStr, lineNum, u_errorName(status));
452         status = U_ZERO_ERROR;
453         return;
454     }
455 
456     if (result != expected) {
457         errln("[%s] file dcfmtest.txt, line %d: expected \"%s\", got \"%s\", %s",
458             typeStr, lineNum, UnicodeStringPiece(expected).data(), UnicodeStringPiece(result).data(),
459             u_errorName(status));
460     }
461 }
462 
463 
464 //-------------------------------------------------------------------------------
465 //
466 //  Read a text data file, convert it from UTF-8 to UChars, and return the data
467 //    in one big UChar * buffer, which the caller must delete.
468 //
469 //    (Lightly modified version of a similar function in regextst.cpp)
470 //
471 //--------------------------------------------------------------------------------
ReadAndConvertFile(const char * fileName,int32_t & ulen,UErrorCode & status)472 UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
473                                      UErrorCode &status) {
474     UChar       *retPtr  = NULL;
475     char        *fileBuf = NULL;
476     const char  *fileBufNoBOM = NULL;
477     FILE        *f       = NULL;
478 
479     ulen = 0;
480     if (U_FAILURE(status)) {
481         return retPtr;
482     }
483 
484     //
485     //  Open the file.
486     //
487     f = fopen(fileName, "rb");
488     if (f == 0) {
489         dataerrln("Error opening test data file %s\n", fileName);
490         status = U_FILE_ACCESS_ERROR;
491         return NULL;
492     }
493     //
494     //  Read it in
495     //
496     int32_t            fileSize;
497     int32_t            amtRead;
498     int32_t            amtReadNoBOM;
499 
500     fseek( f, 0, SEEK_END);
501     fileSize = ftell(f);
502     fileBuf = new char[fileSize];
503     fseek(f, 0, SEEK_SET);
504     amtRead = fread(fileBuf, 1, fileSize, f);
505     if (amtRead != fileSize || fileSize <= 0) {
506         errln("Error reading test data file.");
507         goto cleanUpAndReturn;
508     }
509 
510     //
511     // Look for a UTF-8 BOM on the data just read.
512     //    The test data file is UTF-8.
513     //    The BOM needs to be there in the source file to keep the Windows &
514     //    EBCDIC machines happy, so force an error if it goes missing.
515     //    Many Linux editors will silently strip it.
516     //
517     fileBufNoBOM = fileBuf + 3;
518     amtReadNoBOM = amtRead - 3;
519     if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
520         // TODO:  restore this check.
521         errln("Test data file %s is missing its BOM", fileName);
522         fileBufNoBOM = fileBuf;
523         amtReadNoBOM = amtRead;
524     }
525 
526     //
527     // Find the length of the input in UTF-16 UChars
528     //  (by preflighting the conversion)
529     //
530     u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
531 
532     //
533     // Convert file contents from UTF-8 to UTF-16
534     //
535     if (status == U_BUFFER_OVERFLOW_ERROR) {
536         // Buffer Overflow is expected from the preflight operation.
537         status = U_ZERO_ERROR;
538         retPtr = new UChar[ulen+1];
539         u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
540     }
541 
542 cleanUpAndReturn:
543     fclose(f);
544     delete[] fileBuf;
545     if (U_FAILURE(status)) {
546         errln("ICU Error \"%s\"\n", u_errorName(status));
547         delete retPtr;
548         retPtr = NULL;
549     };
550     return retPtr;
551 }
552 
553 #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
554 
555