// ยฉ 2018 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING #include "numbertest.h" #include "numparse_impl.h" #include "static_unicode_sets.h" #include "unicode/dcfmtsym.h" #include "unicode/testlog.h" #include #include using icu::unisets::get; void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { if (exec) { logln("TestSuite NumberParserTest: "); } TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(testBasic); TESTCASE_AUTO(testSeriesMatcher); TESTCASE_AUTO(testCombinedCurrencyMatcher); TESTCASE_AUTO(testAffixPatternMatcher); TESTCASE_AUTO_END; } void NumberParserTest::testBasic() { IcuTestErrorCode status(*this, "testBasic"); static const struct TestCase { int32_t flags; const char16_t* inputString; const char16_t* patternString; int32_t expectedCharsConsumed; double expectedResultDouble; } cases[] = {{3, u"51423", u"0", 5, 51423.}, {3, u"51423x", u"0", 5, 51423.}, {3, u" 51423", u"0", 6, 51423.}, {3, u"51423 ", u"0", 5, 51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"0", 10, 51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏx", u"0", 10, 51423.}, {3, u" ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"0", 11, 51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ ", u"0", 10, 51423.}, {7, u"51,423", u"#,##,##0", 6, 51423.}, {7, u" 51,423", u"#,##,##0", 7, 51423.}, {7, u"51,423 ", u"#,##,##0", 6, 51423.}, {7, u"51,423,", u"#,##,##0", 6, 51423.}, {7, u"51,423,,", u"#,##,##0", 6, 51423.}, {7, u"51,423.5", u"#,##,##0", 8, 51423.5}, {7, u"51,423.5,", u"#,##,##0", 8, 51423.5}, {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5}, {7, u"51,423.5.", u"#,##,##0", 8, 51423.5}, {7, u"51,423.5..", u"#,##,##0", 8, 51423.5}, {7, u"๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 11, 51423.}, {7, u"๐Ÿณ,๐Ÿด๐Ÿต,๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 19, 78951423.}, {7, u"๐Ÿณ๐Ÿด,๐Ÿต๐Ÿฑ๐Ÿญ.๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 18, 78951.423}, {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ", u"#,##,##0", 11, 78000.}, {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฌ๐Ÿฌ", u"#,##,##0", 18, 78000.}, {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 18, 78000.023}, {7, u"๐Ÿณ๐Ÿด.๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 11, 78.}, {7, u"1,", u"#,##,##0", 1, 1.}, {7, u"1,,", u"#,##,##0", 1, 1.}, {7, u"1.,", u"#,##,##0", 2, 1.}, {3, u"1,.", u"#,##,##0", 3, 1.}, {7, u"1..", u"#,##,##0", 2, 1.}, {3, u",1", u"#,##,##0", 2, 1.}, {3, u"1,1", u"#,##,##0", 1, 1.}, {3, u"1,1,", u"#,##,##0", 1, 1.}, {3, u"1,1,,", u"#,##,##0", 1, 1.}, {3, u"1,1,1", u"#,##,##0", 1, 1.}, {3, u"1,1,1,", u"#,##,##0", 1, 1.}, {3, u"1,1,1,,", u"#,##,##0", 1, 1.}, {3, u"-51423", u"0", 6, -51423.}, {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after {3, u"+51423", u"0", 6, 51423.}, {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after {3, u"%51423", u"0", 6, 51423.}, {3, u"51423%", u"0", 6, 51423.}, {3, u"51423%%", u"0", 6, 51423.}, {3, u"โ€ฐ51423", u"0", 6, 51423.}, {3, u"51423โ€ฐ", u"0", 6, 51423.}, {3, u"51423โ€ฐโ€ฐ", u"0", 6, 51423.}, {3, u"โˆž", u"0", 1, INFINITY}, {3, u"-โˆž", u"0", 2, -INFINITY}, {3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak? {3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak? {3, u"a51423US dollars", u"a0ยคยคยค", 16, 51423.}, {3, u"a 51423 US dollars", u"a0ยคยคยค", 18, 51423.}, {3, u"514.23 USD", u"ยค0", 10, 514.23}, {3, u"514.23 GBP", u"ยค0", 10, 514.23}, {3, u"a ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ b", u"a0b", 14, 51423.}, {3, u"-a ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ b", u"a0b", 15, -51423.}, {3, u"a -๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ b", u"a0b", 15, -51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"[0];(0)", 10, 51423.}, {3, u"[๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"[0];(0)", 11, 51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ]", u"[0];(0)", 11, 51423.}, {3, u"[๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ]", u"[0];(0)", 12, 51423.}, {3, u"(๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"[0];(0)", 11, -51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ)", u"[0];(0)", 11, -51423.}, {3, u"(๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ)", u"[0];(0)", 12, -51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"{0};{0}", 10, 51423.}, {3, u"{๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"{0};{0}", 11, 51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ}", u"{0};{0}", 11, 51423.}, {3, u"{๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ}", u"{0};{0}", 12, 51423.}, {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b" {3, u"๐Ÿฑ.๐Ÿญ๐Ÿฐ๐ŸฎE๐Ÿฏ", u"0", 12, 5142.}, {3, u"๐Ÿฑ.๐Ÿญ๐Ÿฐ๐ŸฎE-๐Ÿฏ", u"0", 13, 0.005142}, {3, u"๐Ÿฑ.๐Ÿญ๐Ÿฐ๐Ÿฎe-๐Ÿฏ", u"0", 13, 0.005142}, {7, u"5,142.50 Canadian dollars", u"#,##,##0 ยคยคยค", 25, 5142.5}, {3, u"a$ b5", u"a ยค b0", 5, 5.0}, {3, u"๐Ÿ“บ1.23", u"๐Ÿ“บ0;๐Ÿ“ป0", 6, 1.23}, {3, u"๐Ÿ“ป1.23", u"๐Ÿ“บ0;๐Ÿ“ป0", 6, -1.23}, {3, u".00", u"0", 3, 0.0}, {3, u" 1,234", u"a0", 35, 1234.}, // should not hang {3, u"NaN", u"0", 3, NAN}, {3, u"NaN E5", u"0", 6, NAN}, {3, u"0", u"0", 1, 0.0}}; parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; for (auto& cas : cases) { UnicodeString inputString(cas.inputString); UnicodeString patternString(cas.patternString); LocalPointer parser( NumberParserImpl::createSimpleParser( Locale("en"), patternString, parseFlags, status)); if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { continue; } UnicodeString message = UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString(); if (0 != (cas.flags & 0x01)) { // Test greedy code path ParsedNumber resultObject; parser->parse(inputString, true, resultObject, status); assertTrue("Greedy Parse failed: " + message, resultObject.success()); assertEquals( "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); assertEquals( "Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); } if (0 != (cas.flags & 0x02)) { // Test slow code path ParsedNumber resultObject; parser->parse(inputString, false, resultObject, status); assertTrue("Non-Greedy Parse failed: " + message, resultObject.success()); assertEquals( "Non-Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); assertEquals( "Non-Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); } if (0 != (cas.flags & 0x04)) { // Test with strict separators parser.adoptInstead( NumberParserImpl::createSimpleParser( Locale("en"), patternString, parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE, status)); ParsedNumber resultObject; parser->parse(inputString, true, resultObject, status); assertTrue("Strict Parse failed: " + message, resultObject.success()); assertEquals( "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); assertEquals( "Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); } } } void NumberParserTest::testSeriesMatcher() { IcuTestErrorCode status(*this, "testSeriesMatcher"); DecimalFormatSymbols symbols("en", status); if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) { return; } PlusSignMatcher m0(symbols, false); MinusSignMatcher m1(symbols, false); IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES); PercentMatcher m3(symbols); IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES); ArraySeriesMatcher::MatcherArray matchers(5); matchers[0] = &m0; matchers[1] = &m1; matchers[2] = &m2; matchers[3] = &m3; matchers[4] = &m4; ArraySeriesMatcher series(matchers, 5); assertFalse("", series.smokeTest(StringSegment(u"x", false))); assertFalse("", series.smokeTest(StringSegment(u"-", false))); assertTrue("", series.smokeTest(StringSegment(u"+", false))); static const struct TestCase { const char16_t* input; int32_t expectedOffset; bool expectedMaybeMore; } cases[] = {{u"", 0, true}, {u" ", 0, false}, {u"$", 0, false}, {u"+", 0, true}, {u" +", 0, false}, {u"+-", 0, true}, {u"+ -", 0, false}, {u"+- ", 0, true}, {u"+- $", 0, false}, {u"+-%", 3, true}, {u" +- % ", 0, false}, {u"+- % ", 7, true}, {u"+-%$", 3, false}}; for (auto& cas : cases) { UnicodeString input(cas.input); StringSegment segment(input, false); ParsedNumber result; bool actualMaybeMore = series.match(segment, result, status); int actualOffset = segment.getOffset(); assertEquals("'" + input + "'", cas.expectedOffset, actualOffset); assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore); } } void NumberParserTest::testCombinedCurrencyMatcher() { IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher"); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); Locale locale = Locale::getEnglish(); DecimalFormatSymbols dfs(locale, status); if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) { return; } dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); AffixTokenMatcherSetupData affixSetupData = { currencySymbols, {"en", status}, ignorables, "en", 0}; AffixTokenMatcherWarehouse warehouse(&affixSetupData); NumberParseMatcher& matcher = warehouse.currency(status); affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY; AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData); NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status); static const struct TestCase { const char16_t* input; const char16_t* expectedCurrencyCode; const char16_t* expectedNoForeignCurrencyCode; } cases[]{{u"", u"", u""}, {u"FOO", u"", u""}, {u"USD", u"USD", u""}, {u"$", u"USD", u""}, {u"US dollars", u"USD", u""}, {u"eu", u"", u""}, {u"euros", u"EUR", u""}, {u"ICU", u"ICU", u"ICU"}, {u"IU$", u"ICU", u"ICU"}}; for (auto& cas : cases) { UnicodeString input(cas.input); { StringSegment segment(input, false); ParsedNumber result; matcher.match(segment, result, status); assertEquals( "Parsing " + input, cas.expectedCurrencyCode, result.currencyCode); assertEquals( "Whole string on " + input, cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(), result.charEnd); } { StringSegment segment(input, false); ParsedNumber result; matcherNoForeign.match(segment, result, status); assertEquals( "[no foreign] Parsing " + input, cas.expectedNoForeignCurrencyCode, result.currencyCode); assertEquals( "[no foreign] Whole string on " + input, cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(), result.charEnd); } } } void NumberParserTest::testAffixPatternMatcher() { IcuTestErrorCode status(*this, "testAffixPatternMatcher"); Locale locale = Locale::getEnglish(); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); DecimalFormatSymbols dfs(locale, status); dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); AffixTokenMatcherSetupData affixSetupData = { currencySymbols, {"en", status}, ignorables, "en", 0}; AffixTokenMatcherWarehouse warehouse(&affixSetupData); static const struct TestCase { bool exactMatch; const char16_t* affixPattern; int32_t expectedMatcherLength; const char16_t* sampleParseableString; } cases[] = {{false, u"-", 1, u"-"}, {false, u"+-%", 5, u"+-%"}, {true, u"+-%", 3, u"+-%"}, {false, u"ab c", 5, u"a bc"}, {true, u"abc", 3, u"abc"}, {false, u"hello-to+this%veryยคlongโ€ฐstring", 59, u"hello-to+this%very USD longโ€ฐstring"}}; for (auto& cas : cases) { UnicodeString affixPattern(cas.affixPattern); UnicodeString sampleParseableString(cas.sampleParseableString); int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0; bool success; AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern( affixPattern, warehouse, parseFlags, &success, status); if (!status.errDataIfFailureAndReset("Creation should be successful")) { // Check that the matcher has the expected number of children assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length()); // Check that the matcher works on a sample string StringSegment segment(sampleParseableString, false); ParsedNumber result; matcher.match(segment, result, status); assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd); } } } #endif