1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2002-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 //
10 // dcfmtest.cpp
11 //
12 // Decimal Formatter tests, data driven.
13 //
14
15 #include "intltest.h"
16
17 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_REGULAR_EXPRESSIONS
18
19 #include "unicode/regex.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ustring.h"
22 #include "unicode/unistr.h"
23 #include "unicode/dcfmtsym.h"
24 #include "unicode/decimfmt.h"
25 #include "unicode/locid.h"
26 #include "cmemory.h"
27 #include "dcfmtest.h"
28 #include "util.h"
29 #include "cstring.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdio.h>
33
34 #if defined(__GLIBCXX__)
35 namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
36 #endif
37
38 #include <string>
39 #include <iostream>
40
41 //---------------------------------------------------------------------------
42 //
43 // Test class boilerplate
44 //
45 //---------------------------------------------------------------------------
DecimalFormatTest()46 DecimalFormatTest::DecimalFormatTest()
47 {
48 }
49
50
~DecimalFormatTest()51 DecimalFormatTest::~DecimalFormatTest()
52 {
53 }
54
55
56
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)57 void DecimalFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
58 {
59 if (exec) logln("TestSuite DecimalFormatTest: ");
60 switch (index) {
61
62 #if !UCONFIG_NO_FILE_IO
63 case 0: name = "DataDrivenTests";
64 if (exec) DataDrivenTests();
65 break;
66 #else
67 case 0: name = "skip";
68 break;
69 #endif
70
71 default: name = "";
72 break; //needed to end loop
73 }
74 }
75
76
77 //---------------------------------------------------------------------------
78 //
79 // Error Checking / Reporting macros used in all of the tests.
80 //
81 //---------------------------------------------------------------------------
82 #define DF_CHECK_STATUS {if (U_FAILURE(status)) \
83 {dataerrln("DecimalFormatTest failure at line %d. status=%s", \
84 __LINE__, u_errorName(status)); return 0;}}
85
86 #define DF_ASSERT(expr) {if ((expr)==FALSE) {errln("DecimalFormatTest failure at line %d.\n", __LINE__);};}
87
88 #define DF_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
89 if (status!=errcode) {dataerrln("DecimalFormatTest failure at line %d. Expected status=%s, got %s", \
90 __LINE__, u_errorName(errcode), u_errorName(status));};}
91
92 #define DF_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
93 "DecimalFormatTest failure at line %d, from %d. status=%d\n",__LINE__, (line), status); }}
94
95 #define DF_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
96 errln("DecimalFormatTest failure at line %d, from %d.", __LINE__, (line)); return;}}
97
98
99
100 //
101 // InvariantStringPiece
102 // Wrap a StringPiece around the extracted invariant data of a UnicodeString.
103 // The data is guaranteed to be nul terminated. (This is not true of StringPiece
104 // in general, but is true of InvariantStringPiece)
105 //
106 class InvariantStringPiece: public StringPiece {
107 public:
108 InvariantStringPiece(const UnicodeString &s);
~InvariantStringPiece()109 ~InvariantStringPiece() {};
110 private:
111 MaybeStackArray<char, 20> buf;
112 };
113
InvariantStringPiece(const UnicodeString & s)114 InvariantStringPiece::InvariantStringPiece(const UnicodeString &s) {
115 int32_t len = s.length();
116 if (len+1 > buf.getCapacity()) {
117 buf.resize(len+1);
118 }
119 // Buffer size is len+1 so that s.extract() will nul-terminate the string.
120 s.extract(0, len, buf.getAlias(), len+1, US_INV);
121 this->set(buf.getAlias(), len);
122 }
123
124
125 // UnicodeStringPiece
126 // Wrap a StringPiece around the extracted (to the default charset) data of
127 // a UnicodeString. The extracted data is guaranteed to be nul terminated.
128 // (This is not true of StringPiece in general, but is true of UnicodeStringPiece)
129 //
130 class UnicodeStringPiece: public StringPiece {
131 public:
132 UnicodeStringPiece(const UnicodeString &s);
~UnicodeStringPiece()133 ~UnicodeStringPiece() {};
134 private:
135 MaybeStackArray<char, 20> buf;
136 };
137
UnicodeStringPiece(const UnicodeString & s)138 UnicodeStringPiece::UnicodeStringPiece(const UnicodeString &s) {
139 int32_t len = s.length();
140 int32_t capacity = buf.getCapacity();
141 int32_t requiredCapacity = s.extract(0, len, buf.getAlias(), capacity) + 1;
142 if (capacity < requiredCapacity) {
143 buf.resize(requiredCapacity);
144 capacity = requiredCapacity;
145 s.extract(0, len, buf.getAlias(), capacity);
146 }
147 this->set(buf.getAlias(), requiredCapacity - 1);
148 }
149
150
151
152 //---------------------------------------------------------------------------
153 //
154 // DataDrivenTests
155 // The test cases are in a separate data file,
156 //
157 //---------------------------------------------------------------------------
158
159 // Translate a Formattable::type enum value to a string, for error message formatting.
formattableType(Formattable::Type typ)160 static const char *formattableType(Formattable::Type typ) {
161 static const char *types[] = {"kDate",
162 "kDouble",
163 "kLong",
164 "kString",
165 "kArray",
166 "kInt64",
167 "kObject"
168 };
169 if (typ<0 || typ>Formattable::kObject) {
170 return "Unknown";
171 }
172 return types[typ];
173 }
174
175 const char *
getPath(char * buffer,const char * filename)176 DecimalFormatTest::getPath(char *buffer, const char *filename) {
177 UErrorCode status=U_ZERO_ERROR;
178 const char *testDataDirectory = IntlTest::getSourceTestData(status);
179 DF_CHECK_STATUS;
180
181 strcpy(buffer, testDataDirectory);
182 strcat(buffer, filename);
183 return buffer;
184 }
185
DataDrivenTests()186 void DecimalFormatTest::DataDrivenTests() {
187 char tdd[2048];
188 const char *srcPath;
189 UErrorCode status = U_ZERO_ERROR;
190 int32_t lineNum = 0;
191
192 //
193 // Open and read the test data file.
194 //
195 srcPath=getPath(tdd, "dcfmtest.txt");
196 if(srcPath==NULL) {
197 return; /* something went wrong, error already output */
198 }
199
200 int32_t len;
201 UChar *testData = ReadAndConvertFile(srcPath, len, status);
202 if (U_FAILURE(status)) {
203 return; /* something went wrong, error already output */
204 }
205
206 //
207 // Put the test data into a UnicodeString
208 //
209 UnicodeString testString(FALSE, testData, len);
210
211 RegexMatcher parseLineMat(UnicodeString(
212 "(?i)\\s*parse\\s+"
213 "\"([^\"]*)\"\\s+" // Capture group 1: input text
214 "([ild])\\s+" // Capture group 2: expected parsed type
215 "\"([^\"]*)\"\\s+" // Capture group 3: expected parsed decimal
216 "\\s*(?:#.*)?"), // Trailing comment
217 0, status);
218
219 RegexMatcher formatLineMat(UnicodeString(
220 "(?i)\\s*format\\s+"
221 "(\\S+)\\s+" // Capture group 1: pattern
222 "(ceiling|floor|down|up|halfeven|halfdown|halfup|default|unnecessary)\\s+" // Capture group 2: Rounding Mode
223 "\"([^\"]*)\"\\s+" // Capture group 3: input
224 "\"([^\"]*)\"" // Capture group 4: expected output
225 "\\s*(?:#.*)?"), // Trailing comment
226 0, status);
227
228 RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
229 RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);
230
231 if (U_FAILURE(status)){
232 dataerrln("Construct RegexMatcher() error.");
233 delete [] testData;
234 return;
235 }
236
237 //
238 // Loop over the test data file, once per line.
239 //
240 while (lineMat.find()) {
241 lineNum++;
242 if (U_FAILURE(status)) {
243 dataerrln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
244 }
245
246 status = U_ZERO_ERROR;
247 UnicodeString testLine = lineMat.group(1, status);
248 // printf("%s\n", UnicodeStringPiece(testLine).data());
249 if (testLine.length() == 0) {
250 continue;
251 }
252
253 //
254 // Parse the test line. Skip blank and comment only lines.
255 // Separate out the three main fields - pattern, flags, target.
256 //
257
258 commentMat.reset(testLine);
259 if (commentMat.lookingAt(status)) {
260 // This line is a comment, or blank.
261 continue;
262 }
263
264
265 //
266 // Handle "parse" test case line from file
267 //
268 parseLineMat.reset(testLine);
269 if (parseLineMat.lookingAt(status)) {
270 execParseTest(lineNum,
271 parseLineMat.group(1, status), // input
272 parseLineMat.group(2, status), // Expected Type
273 parseLineMat.group(3, status), // Expected Decimal String
274 status
275 );
276 continue;
277 }
278
279 //
280 // Handle "format" test case line
281 //
282 formatLineMat.reset(testLine);
283 if (formatLineMat.lookingAt(status)) {
284 execFormatTest(lineNum,
285 formatLineMat.group(1, status), // Pattern
286 formatLineMat.group(2, status), // rounding mode
287 formatLineMat.group(3, status), // input decimal number
288 formatLineMat.group(4, status), // expected formatted result
289 kFormattable,
290 status);
291
292 execFormatTest(lineNum,
293 formatLineMat.group(1, status), // Pattern
294 formatLineMat.group(2, status), // rounding mode
295 formatLineMat.group(3, status), // input decimal number
296 formatLineMat.group(4, status), // expected formatted result
297 kStringPiece,
298 status);
299 continue;
300 }
301
302 //
303 // Line is not a recognizable test case.
304 //
305 errln("Badly formed test case at line %d.\n%s\n",
306 lineNum, UnicodeStringPiece(testLine).data());
307
308 }
309
310 delete [] testData;
311 }
312
313
314
execParseTest(int32_t lineNum,const UnicodeString & inputText,const UnicodeString & expectedType,const UnicodeString & expectedDecimal,UErrorCode & status)315 void DecimalFormatTest::execParseTest(int32_t lineNum,
316 const UnicodeString &inputText,
317 const UnicodeString &expectedType,
318 const UnicodeString &expectedDecimal,
319 UErrorCode &status) {
320
321 if (U_FAILURE(status)) {
322 return;
323 }
324
325 DecimalFormatSymbols symbols(Locale::getUS(), status);
326 UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
327 DecimalFormat format(pattern, symbols, status);
328 Formattable result;
329 if (U_FAILURE(status)) {
330 dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
331 lineNum, u_errorName(status));
332 return;
333 }
334
335 ParsePosition pos;
336 int32_t expectedParseEndPosition = inputText.length();
337
338 format.parse(inputText, result, pos);
339
340 if (expectedParseEndPosition != pos.getIndex()) {
341 errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d. "
342 "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());
343 return;
344 }
345
346 char expectedTypeC[2];
347 expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
348 Formattable::Type expectType = Formattable::kDate;
349 switch (expectedTypeC[0]) {
350 case 'd': expectType = Formattable::kDouble; break;
351 case 'i': expectType = Formattable::kLong; break;
352 case 'l': expectType = Formattable::kInt64; break;
353 default:
354 errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
355 lineNum, InvariantStringPiece(expectedType).data());
356 return;
357 }
358 if (result.getType() != expectType) {
359 errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
360 lineNum, formattableType(expectType), formattableType(result.getType()));
361 return;
362 }
363
364 StringPiece decimalResult = result.getDecimalNumber(status);
365 if (U_FAILURE(status)) {
366 errln("File %s, line %d: error %s. Line in file dcfmtest.txt: %d:",
367 __FILE__, __LINE__, u_errorName(status), lineNum);
368 return;
369 }
370
371 InvariantStringPiece expectedResults(expectedDecimal);
372 if (decimalResult != expectedResults) {
373 errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
374 lineNum, expectedResults.data(), decimalResult.data());
375 }
376
377 return;
378 }
379
380
execFormatTest(int32_t lineNum,const UnicodeString & pattern,const UnicodeString & round,const UnicodeString & input,const UnicodeString & expected,EFormatInputType inType,UErrorCode & status)381 void DecimalFormatTest::execFormatTest(int32_t lineNum,
382 const UnicodeString &pattern, // Pattern
383 const UnicodeString &round, // rounding mode
384 const UnicodeString &input, // input decimal number
385 const UnicodeString &expected, // expected formatted result
386 EFormatInputType inType, // input number type
387 UErrorCode &status) {
388 if (U_FAILURE(status)) {
389 return;
390 }
391
392 DecimalFormatSymbols symbols(Locale::getUS(), status);
393 // printf("Pattern = %s\n", UnicodeStringPiece(pattern).data());
394 DecimalFormat fmtr(pattern, symbols, status);
395 if (U_FAILURE(status)) {
396 dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
397 lineNum, u_errorName(status));
398 return;
399 }
400 if (round=="ceiling") {
401 fmtr.setRoundingMode(DecimalFormat::kRoundCeiling);
402 } else if (round=="floor") {
403 fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
404 } else if (round=="down") {
405 fmtr.setRoundingMode(DecimalFormat::kRoundDown);
406 } else if (round=="up") {
407 fmtr.setRoundingMode(DecimalFormat::kRoundUp);
408 } else if (round=="halfeven") {
409 fmtr.setRoundingMode(DecimalFormat::kRoundHalfEven);
410 } else if (round=="halfdown") {
411 fmtr.setRoundingMode(DecimalFormat::kRoundHalfDown);
412 } else if (round=="halfup") {
413 fmtr.setRoundingMode(DecimalFormat::kRoundHalfUp);
414 } else if (round=="default") {
415 // don't set any value.
416 } else if (round=="unnecessary") {
417 fmtr.setRoundingMode(DecimalFormat::kRoundUnnecessary);
418 } else {
419 fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
420 errln("file dcfmtest.txt, line %d: Bad rounding mode \"%s\"",
421 lineNum, UnicodeStringPiece(round).data());
422 }
423
424 const char *typeStr = "Unknown";
425 UnicodeString result;
426 UnicodeStringPiece spInput(input);
427
428 switch (inType) {
429 case kFormattable:
430 {
431 typeStr = "Formattable";
432 Formattable fmtbl;
433 fmtbl.setDecimalNumber(spInput, status);
434 fmtr.format(fmtbl, result, NULL, status);
435 }
436 break;
437 case kStringPiece:
438 typeStr = "StringPiece";
439 fmtr.format(spInput, result, NULL, status);
440 break;
441 }
442
443 if ((status == U_FORMAT_INEXACT_ERROR) && (result == "") && (expected == "Inexact")) {
444 // Test succeeded.
445 status = U_ZERO_ERROR;
446 return;
447 }
448
449 if (U_FAILURE(status)) {
450 errln("[%s] file dcfmtest.txt, line %d: format() returned %s.",
451 typeStr, lineNum, u_errorName(status));
452 status = U_ZERO_ERROR;
453 return;
454 }
455
456 if (result != expected) {
457 errln("[%s] file dcfmtest.txt, line %d: expected \"%s\", got \"%s\", %s",
458 typeStr, lineNum, UnicodeStringPiece(expected).data(), UnicodeStringPiece(result).data(),
459 u_errorName(status));
460 }
461 }
462
463
464 //-------------------------------------------------------------------------------
465 //
466 // Read a text data file, convert it from UTF-8 to UChars, and return the data
467 // in one big UChar * buffer, which the caller must delete.
468 //
469 // (Lightly modified version of a similar function in regextst.cpp)
470 //
471 //--------------------------------------------------------------------------------
ReadAndConvertFile(const char * fileName,int32_t & ulen,UErrorCode & status)472 UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
473 UErrorCode &status) {
474 UChar *retPtr = NULL;
475 char *fileBuf = NULL;
476 const char *fileBufNoBOM = NULL;
477 FILE *f = NULL;
478
479 ulen = 0;
480 if (U_FAILURE(status)) {
481 return retPtr;
482 }
483
484 //
485 // Open the file.
486 //
487 f = fopen(fileName, "rb");
488 if (f == 0) {
489 dataerrln("Error opening test data file %s\n", fileName);
490 status = U_FILE_ACCESS_ERROR;
491 return NULL;
492 }
493 //
494 // Read it in
495 //
496 int32_t fileSize;
497 int32_t amtRead;
498 int32_t amtReadNoBOM;
499
500 fseek( f, 0, SEEK_END);
501 fileSize = ftell(f);
502 fileBuf = new char[fileSize];
503 fseek(f, 0, SEEK_SET);
504 amtRead = fread(fileBuf, 1, fileSize, f);
505 if (amtRead != fileSize || fileSize <= 0) {
506 errln("Error reading test data file.");
507 goto cleanUpAndReturn;
508 }
509
510 //
511 // Look for a UTF-8 BOM on the data just read.
512 // The test data file is UTF-8.
513 // The BOM needs to be there in the source file to keep the Windows &
514 // EBCDIC machines happy, so force an error if it goes missing.
515 // Many Linux editors will silently strip it.
516 //
517 fileBufNoBOM = fileBuf + 3;
518 amtReadNoBOM = amtRead - 3;
519 if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
520 // TODO: restore this check.
521 errln("Test data file %s is missing its BOM", fileName);
522 fileBufNoBOM = fileBuf;
523 amtReadNoBOM = amtRead;
524 }
525
526 //
527 // Find the length of the input in UTF-16 UChars
528 // (by preflighting the conversion)
529 //
530 u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
531
532 //
533 // Convert file contents from UTF-8 to UTF-16
534 //
535 if (status == U_BUFFER_OVERFLOW_ERROR) {
536 // Buffer Overflow is expected from the preflight operation.
537 status = U_ZERO_ERROR;
538 retPtr = new UChar[ulen+1];
539 u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
540 }
541
542 cleanUpAndReturn:
543 fclose(f);
544 delete[] fileBuf;
545 if (U_FAILURE(status)) {
546 errln("ICU Error \"%s\"\n", u_errorName(status));
547 delete retPtr;
548 retPtr = NULL;
549 };
550 return retPtr;
551 }
552
553 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
554
555