1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2002-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 //
10 //   dcfmtest.cpp
11 //
12 //     Decimal Formatter tests, data driven.
13 //
14 
15 #include "intltest.h"
16 
17 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_REGULAR_EXPRESSIONS
18 
19 #include "unicode/regex.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ustring.h"
22 #include "unicode/unistr.h"
23 #include "unicode/dcfmtsym.h"
24 #include "unicode/decimfmt.h"
25 #include "unicode/locid.h"
26 #include "cmemory.h"
27 #include "dcfmtest.h"
28 #include "util.h"
29 #include "cstring.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdio.h>
33 
34 #if defined(__GLIBCXX__)
35 namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
36 #endif
37 
38 #include <string>
39 #include <iostream>
40 
41 //---------------------------------------------------------------------------
42 //
43 //  Test class boilerplate
44 //
45 //---------------------------------------------------------------------------
DecimalFormatTest()46 DecimalFormatTest::DecimalFormatTest()
47 {
48 }
49 
50 
~DecimalFormatTest()51 DecimalFormatTest::~DecimalFormatTest()
52 {
53 }
54 
55 
56 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)57 void DecimalFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
58 {
59     if (exec) logln("TestSuite DecimalFormatTest: ");
60     switch (index) {
61 
62 #if !UCONFIG_NO_FILE_IO
63         case 0: name = "DataDrivenTests";
64             if (exec) DataDrivenTests();
65             break;
66 #else
67         case 0: name = "skip";
68             break;
69 #endif
70 
71         default: name = "";
72             break; //needed to end loop
73     }
74 }
75 
76 
77 //---------------------------------------------------------------------------
78 //
79 //   Error Checking / Reporting macros used in all of the tests.
80 //
81 //---------------------------------------------------------------------------
82 #define DF_CHECK_STATUS UPRV_BLOCK_MACRO_BEGIN { \
83     if (U_FAILURE(status)) { \
84         dataerrln("DecimalFormatTest failure at line %d.  status=%s", \
85                   __LINE__, u_errorName(status)); \
86         return 0; \
87     } \
88 } UPRV_BLOCK_MACRO_END
89 
90 #define DF_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
91     if ((expr)==FALSE) { \
92         errln("DecimalFormatTest failure at line %d.\n", __LINE__); \
93     } \
94 } UPRV_BLOCK_MACRO_END
95 
96 #define DF_ASSERT_FAIL(expr, errcode) UPRV_BLOCK_MACRO_BEGIN { \
97     UErrorCode status=U_ZERO_ERROR; \
98     (expr); \
99     if (status!=errcode) { \
100         dataerrln("DecimalFormatTest failure at line %d.  Expected status=%s, got %s", \
101                   __LINE__, u_errorName(errcode), u_errorName(status)); \
102     } \
103 } UPRV_BLOCK_MACRO_END
104 
105 #define DF_CHECK_STATUS_L(line) UPRV_BLOCK_MACRO_BEGIN { \
106     if (U_FAILURE(status)) { \
107         errln("DecimalFormatTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); \
108     } \
109 } UPRV_BLOCK_MACRO_END
110 
111 #define DF_ASSERT_L(expr, line) UPRV_BLOCK_MACRO_BEGIN { \
112     if ((expr)==FALSE) { \
113         errln("DecimalFormatTest failure at line %d, from %d.", __LINE__, (line)); \
114         return; \
115     } \
116 } UPRV_BLOCK_MACRO_END
117 
118 
119 
120 //
121 //  InvariantStringPiece
122 //    Wrap a StringPiece around the extracted invariant data of a UnicodeString.
123 //    The data is guaranteed to be nul terminated.  (This is not true of StringPiece
124 //    in general, but is true of InvariantStringPiece)
125 //
126 class InvariantStringPiece: public StringPiece {
127   public:
128     InvariantStringPiece(const UnicodeString &s);
~InvariantStringPiece()129     ~InvariantStringPiece() {}
130   private:
131     MaybeStackArray<char, 20>  buf;
132 };
133 
InvariantStringPiece(const UnicodeString & s)134 InvariantStringPiece::InvariantStringPiece(const UnicodeString &s) {
135     int32_t  len = s.length();
136     if (len+1 > buf.getCapacity()) {
137         buf.resize(len+1);
138     }
139     // Buffer size is len+1 so that s.extract() will nul-terminate the string.
140     s.extract(0, len, buf.getAlias(), len+1, US_INV);
141     this->set(buf.getAlias(), len);
142 }
143 
144 
145 //  UnicodeStringPiece
146 //    Wrap a StringPiece around the extracted (to the default charset) data of
147 //    a UnicodeString.  The extracted data is guaranteed to be nul terminated.
148 //    (This is not true of StringPiece in general, but is true of UnicodeStringPiece)
149 //
150 class UnicodeStringPiece: public StringPiece {
151   public:
152     UnicodeStringPiece(const UnicodeString &s);
~UnicodeStringPiece()153     ~UnicodeStringPiece() {}
154   private:
155     MaybeStackArray<char, 20>  buf;
156 };
157 
UnicodeStringPiece(const UnicodeString & s)158 UnicodeStringPiece::UnicodeStringPiece(const UnicodeString &s) {
159     int32_t  len = s.length();
160     int32_t  capacity = buf.getCapacity();
161     int32_t requiredCapacity = s.extract(0, len, buf.getAlias(), capacity) + 1;
162     if (capacity < requiredCapacity) {
163         buf.resize(requiredCapacity);
164         capacity = requiredCapacity;
165         s.extract(0, len, buf.getAlias(), capacity);
166     }
167     this->set(buf.getAlias(), requiredCapacity - 1);
168 }
169 
170 
171 
172 //---------------------------------------------------------------------------
173 //
174 //      DataDrivenTests
175 //             The test cases are in a separate data file,
176 //
177 //---------------------------------------------------------------------------
178 
179 // Translate a Formattable::type enum value to a string, for error message formatting.
formattableType(Formattable::Type typ)180 static const char *formattableType(Formattable::Type typ) {
181     static const char *types[] = {"kDate",
182                                   "kDouble",
183                                   "kLong",
184                                   "kString",
185                                   "kArray",
186                                   "kInt64",
187                                   "kObject"
188                                   };
189     if (typ<0 || typ>Formattable::kObject) {
190         return "Unknown";
191     }
192     return types[typ];
193 }
194 
195 const char *
getPath(char * buffer,const char * filename)196 DecimalFormatTest::getPath(char *buffer, const char *filename) {
197     UErrorCode status=U_ZERO_ERROR;
198     const char *testDataDirectory = IntlTest::getSourceTestData(status);
199     DF_CHECK_STATUS;
200 
201     strcpy(buffer, testDataDirectory);
202     strcat(buffer, filename);
203     return buffer;
204 }
205 
DataDrivenTests()206 void DecimalFormatTest::DataDrivenTests() {
207     char tdd[2048];
208     const char *srcPath;
209     UErrorCode  status  = U_ZERO_ERROR;
210     int32_t     lineNum = 0;
211 
212     //
213     //  Open and read the test data file.
214     //
215     srcPath=getPath(tdd, "dcfmtest.txt");
216     if(srcPath==NULL) {
217         return; /* something went wrong, error already output */
218     }
219 
220     int32_t    len;
221     UChar *testData = ReadAndConvertFile(srcPath, len, status);
222     if (U_FAILURE(status)) {
223         return; /* something went wrong, error already output */
224     }
225 
226     //
227     //  Put the test data into a UnicodeString
228     //
229     UnicodeString testString(FALSE, testData, len);
230 
231     RegexMatcher    parseLineMat(UnicodeString(
232             "(?i)\\s*parse\\s+"
233             "\"([^\"]*)\"\\s+"           // Capture group 1: input text
234             "([ild])\\s+"                // Capture group 2: expected parsed type
235             "\"([^\"]*)\"\\s+"           // Capture group 3: expected parsed decimal
236             "\\s*(?:#.*)?"),             // Trailing comment
237          0, status);
238 
239     RegexMatcher    formatLineMat(UnicodeString(
240             "(?i)\\s*format\\s+"
241             "(\\S+)\\s+"                 // Capture group 1: pattern
242             "(ceiling|floor|down|up|halfeven|halfdown|halfup|default|unnecessary)\\s+"  // Capture group 2: Rounding Mode
243             "\"([^\"]*)\"\\s+"           // Capture group 3: input
244             "\"([^\"]*)\""               // Capture group 4: expected output
245             "\\s*(?:#.*)?"),             // Trailing comment
246          0, status);
247 
248     RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
249     RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);
250 
251     if (U_FAILURE(status)){
252         dataerrln("Construct RegexMatcher() error.");
253         delete [] testData;
254         return;
255     }
256 
257     //
258     //  Loop over the test data file, once per line.
259     //
260     while (lineMat.find()) {
261         lineNum++;
262         if (U_FAILURE(status)) {
263             dataerrln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
264         }
265 
266         status = U_ZERO_ERROR;
267         UnicodeString testLine = lineMat.group(1, status);
268         // printf("%s\n", UnicodeStringPiece(testLine).data());
269         if (testLine.length() == 0) {
270             continue;
271         }
272 
273         //
274         // Parse the test line.  Skip blank and comment only lines.
275         // Separate out the three main fields - pattern, flags, target.
276         //
277 
278         commentMat.reset(testLine);
279         if (commentMat.lookingAt(status)) {
280             // This line is a comment, or blank.
281             continue;
282         }
283 
284 
285         //
286         //  Handle "parse" test case line from file
287         //
288         parseLineMat.reset(testLine);
289         if (parseLineMat.lookingAt(status)) {
290             execParseTest(lineNum,
291                           parseLineMat.group(1, status),    // input
292                           parseLineMat.group(2, status),    // Expected Type
293                           parseLineMat.group(3, status),    // Expected Decimal String
294                           status
295                           );
296             continue;
297         }
298 
299         //
300         //  Handle "format" test case line
301         //
302         formatLineMat.reset(testLine);
303         if (formatLineMat.lookingAt(status)) {
304             execFormatTest(lineNum,
305                            formatLineMat.group(1, status),    // Pattern
306                            formatLineMat.group(2, status),    // rounding mode
307                            formatLineMat.group(3, status),    // input decimal number
308                            formatLineMat.group(4, status),    // expected formatted result
309                            kFormattable,
310                            status);
311 
312             execFormatTest(lineNum,
313                            formatLineMat.group(1, status),    // Pattern
314                            formatLineMat.group(2, status),    // rounding mode
315                            formatLineMat.group(3, status),    // input decimal number
316                            formatLineMat.group(4, status),    // expected formatted result
317                            kStringPiece,
318                            status);
319             continue;
320         }
321 
322         //
323         //  Line is not a recognizable test case.
324         //
325         errln("Badly formed test case at line %d.\n%s\n",
326              lineNum, UnicodeStringPiece(testLine).data());
327 
328     }
329 
330     delete [] testData;
331 }
332 
333 
334 
execParseTest(int32_t lineNum,const UnicodeString & inputText,const UnicodeString & expectedType,const UnicodeString & expectedDecimal,UErrorCode & status)335 void DecimalFormatTest::execParseTest(int32_t lineNum,
336                                      const UnicodeString &inputText,
337                                      const UnicodeString &expectedType,
338                                      const UnicodeString &expectedDecimal,
339                                      UErrorCode &status) {
340 
341     if (U_FAILURE(status)) {
342         return;
343     }
344 
345     DecimalFormatSymbols symbols(Locale::getUS(), status);
346     UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
347     DecimalFormat format(pattern, symbols, status);
348     Formattable   result;
349     if (U_FAILURE(status)) {
350         dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
351             lineNum, u_errorName(status));
352         return;
353     }
354 
355     ParsePosition pos;
356     int32_t expectedParseEndPosition = inputText.length();
357 
358     format.parse(inputText, result, pos);
359 
360     if (expectedParseEndPosition != pos.getIndex()) {
361         errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d.  "
362               "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());
363         return;
364     }
365 
366     char   expectedTypeC[2];
367     expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
368     Formattable::Type expectType = Formattable::kDate;
369     switch (expectedTypeC[0]) {
370       case 'd': expectType = Formattable::kDouble; break;
371       case 'i': expectType = Formattable::kLong;   break;
372       case 'l': expectType = Formattable::kInt64;  break;
373       default:
374           errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
375               lineNum, InvariantStringPiece(expectedType).data());
376           return;
377     }
378     if (result.getType() != expectType) {
379         errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
380              lineNum, formattableType(expectType), formattableType(result.getType()));
381         return;
382     }
383 
384     StringPiece decimalResult = result.getDecimalNumber(status);
385     if (U_FAILURE(status)) {
386         errln("File %s, line %d: error %s.  Line in file dcfmtest.txt:  %d:",
387             __FILE__, __LINE__, u_errorName(status), lineNum);
388         return;
389     }
390 
391     InvariantStringPiece expectedResults(expectedDecimal);
392     if (decimalResult != expectedResults) {
393         errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
394             lineNum, expectedResults.data(), decimalResult.data());
395     }
396 
397     return;
398 }
399 
400 
execFormatTest(int32_t lineNum,const UnicodeString & pattern,const UnicodeString & round,const UnicodeString & input,const UnicodeString & expected,EFormatInputType inType,UErrorCode & status)401 void DecimalFormatTest::execFormatTest(int32_t lineNum,
402                            const UnicodeString &pattern,     // Pattern
403                            const UnicodeString &round,       // rounding mode
404                            const UnicodeString &input,       // input decimal number
405                            const UnicodeString &expected,    // expected formatted result
406                            EFormatInputType inType,          // input number type
407                            UErrorCode &status) {
408     if (U_FAILURE(status)) {
409         return;
410     }
411 
412     DecimalFormatSymbols symbols(Locale::getUS(), status);
413     // printf("Pattern = %s\n", UnicodeStringPiece(pattern).data());
414     DecimalFormat fmtr(pattern, symbols, status);
415     if (U_FAILURE(status)) {
416         dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
417             lineNum, u_errorName(status));
418         return;
419     }
420     if (round=="ceiling") {
421         fmtr.setRoundingMode(DecimalFormat::kRoundCeiling);
422     } else if (round=="floor") {
423         fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
424     } else if (round=="down") {
425         fmtr.setRoundingMode(DecimalFormat::kRoundDown);
426     } else if (round=="up") {
427         fmtr.setRoundingMode(DecimalFormat::kRoundUp);
428     } else if (round=="halfeven") {
429         fmtr.setRoundingMode(DecimalFormat::kRoundHalfEven);
430     } else if (round=="halfdown") {
431         fmtr.setRoundingMode(DecimalFormat::kRoundHalfDown);
432     } else if (round=="halfup") {
433         fmtr.setRoundingMode(DecimalFormat::kRoundHalfUp);
434     } else if (round=="default") {
435         // don't set any value.
436     } else if (round=="unnecessary") {
437         fmtr.setRoundingMode(DecimalFormat::kRoundUnnecessary);
438     } else {
439         fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
440         errln("file dcfmtest.txt, line %d: Bad rounding mode \"%s\"",
441                 lineNum, UnicodeStringPiece(round).data());
442     }
443 
444     const char *typeStr = "Unknown";
445     UnicodeString result;
446     UnicodeStringPiece spInput(input);
447 
448     switch (inType) {
449     case kFormattable:
450         {
451             typeStr = "Formattable";
452             Formattable fmtbl;
453             fmtbl.setDecimalNumber(spInput, status);
454             fmtr.format(fmtbl, result, NULL, status);
455         }
456         break;
457     case kStringPiece:
458         typeStr = "StringPiece";
459         fmtr.format(spInput, result, NULL, status);
460         break;
461     }
462 
463     if ((status == U_FORMAT_INEXACT_ERROR) && (result == "") && (expected == "Inexact")) {
464         // Test succeeded.
465         status = U_ZERO_ERROR;
466         return;
467     }
468 
469     if (U_FAILURE(status)) {
470         errln("[%s] file dcfmtest.txt, line %d: format() returned %s.",
471             typeStr, lineNum, u_errorName(status));
472         status = U_ZERO_ERROR;
473         return;
474     }
475 
476     if (result != expected) {
477         errln("[%s] file dcfmtest.txt, line %d: expected \"%s\", got \"%s\", %s",
478             typeStr, lineNum, UnicodeStringPiece(expected).data(), UnicodeStringPiece(result).data(),
479             u_errorName(status));
480     }
481 }
482 
483 
484 //-------------------------------------------------------------------------------
485 //
486 //  Read a text data file, convert it from UTF-8 to UChars, and return the data
487 //    in one big UChar * buffer, which the caller must delete.
488 //
489 //    (Lightly modified version of a similar function in regextst.cpp)
490 //
491 //--------------------------------------------------------------------------------
ReadAndConvertFile(const char * fileName,int32_t & ulen,UErrorCode & status)492 UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
493                                      UErrorCode &status) {
494     UChar       *retPtr  = NULL;
495     char        *fileBuf = NULL;
496     const char  *fileBufNoBOM = NULL;
497     FILE        *f       = NULL;
498 
499     ulen = 0;
500     if (U_FAILURE(status)) {
501         return retPtr;
502     }
503 
504     //
505     //  Open the file.
506     //
507     f = fopen(fileName, "rb");
508     if (f == 0) {
509         dataerrln("Error opening test data file %s\n", fileName);
510         status = U_FILE_ACCESS_ERROR;
511         return NULL;
512     }
513     //
514     //  Read it in
515     //
516     int32_t            fileSize;
517     int32_t            amtRead;
518     int32_t            amtReadNoBOM;
519 
520     fseek( f, 0, SEEK_END);
521     fileSize = ftell(f);
522     fileBuf = new char[fileSize];
523     fseek(f, 0, SEEK_SET);
524     amtRead = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
525     if (amtRead != fileSize || fileSize <= 0) {
526         errln("Error reading test data file.");
527         goto cleanUpAndReturn;
528     }
529 
530     //
531     // Look for a UTF-8 BOM on the data just read.
532     //    The test data file is UTF-8.
533     //    The BOM needs to be there in the source file to keep the Windows &
534     //    EBCDIC machines happy, so force an error if it goes missing.
535     //    Many Linux editors will silently strip it.
536     //
537     fileBufNoBOM = fileBuf + 3;
538     amtReadNoBOM = amtRead - 3;
539     if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
540         // TODO:  restore this check.
541         errln("Test data file %s is missing its BOM", fileName);
542         fileBufNoBOM = fileBuf;
543         amtReadNoBOM = amtRead;
544     }
545 
546     //
547     // Find the length of the input in UTF-16 UChars
548     //  (by preflighting the conversion)
549     //
550     u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
551 
552     //
553     // Convert file contents from UTF-8 to UTF-16
554     //
555     if (status == U_BUFFER_OVERFLOW_ERROR) {
556         // Buffer Overflow is expected from the preflight operation.
557         status = U_ZERO_ERROR;
558         retPtr = new UChar[ulen+1];
559         u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
560     }
561 
562 cleanUpAndReturn:
563     fclose(f);
564     delete[] fileBuf;
565     if (U_FAILURE(status)) {
566         errln("ICU Error \"%s\"\n", u_errorName(status));
567         delete retPtr;
568         retPtr = NULL;
569     }
570     return retPtr;
571 }
572 
573 #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
574 
575