1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "numparse_types.h"
13 #include "numparse_currency.h"
14 #include "ucurrimp.h"
15 #include "unicode/errorcode.h"
16 #include "numparse_utils.h"
17 
18 using namespace icu;
19 using namespace icu::numparse;
20 using namespace icu::numparse::impl;
21 
22 
CombinedCurrencyMatcher(const CurrencySymbols & currencySymbols,const DecimalFormatSymbols & dfs,parse_flags_t parseFlags,UErrorCode & status)23 CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
24                                                  parse_flags_t parseFlags, UErrorCode& status)
25         : fCurrency1(currencySymbols.getCurrencySymbol(status)),
26           fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
27           fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
28           afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
29           beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
30           fLocaleName(dfs.getLocale().getName(), -1, status) {
31     utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
32 
33     // Pre-load the long names for the current locale and currency
34     // if we are parsing without the full currency data.
35     if (!fUseFullCurrencyData) {
36         for (int32_t i=0; i<StandardPlural::COUNT; i++) {
37             auto plural = static_cast<StandardPlural::Form>(i);
38             fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
39         }
40     }
41 
42     // TODO: Figure out how to make this faster and re-enable.
43     // Computing the "lead code points" set for fastpathing is too slow to use in production.
44     // See http://bugs.icu-project.org/trac/ticket/13584
45 //    // Compute the full set of characters that could be the first in a currency to allow for
46 //    // efficient smoke test.
47 //    fLeadCodePoints.add(fCurrency1.char32At(0));
48 //    fLeadCodePoints.add(fCurrency2.char32At(0));
49 //    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
50 //    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
51 //    // Always apply case mapping closure for currencies
52 //    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
53 //    fLeadCodePoints.freeze();
54 }
55 
56 bool
match(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const57 CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
58     if (result.currencyCode[0] != 0) {
59         return false;
60     }
61 
62     // Try to match a currency spacing separator.
63     int32_t initialOffset = segment.getOffset();
64     bool maybeMore = false;
65     if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
66         int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
67         if (overlap == beforeSuffixInsert.length()) {
68             segment.adjustOffset(overlap);
69             // Note: let currency spacing be a weak match. Don't update chars consumed.
70         }
71         maybeMore = maybeMore || overlap == segment.length();
72     }
73 
74     // Match the currency string, and reset if we didn't find one.
75     maybeMore = maybeMore || matchCurrency(segment, result, status);
76     if (result.currencyCode[0] == 0) {
77         segment.setOffset(initialOffset);
78         return maybeMore;
79     }
80 
81     // Try to match a currency spacing separator.
82     if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
83         int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
84         if (overlap == afterPrefixInsert.length()) {
85             segment.adjustOffset(overlap);
86             // Note: let currency spacing be a weak match. Don't update chars consumed.
87         }
88         maybeMore = maybeMore || overlap == segment.length();
89     }
90 
91     return maybeMore;
92 }
93 
matchCurrency(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const94 bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
95                                             UErrorCode& status) const {
96     bool maybeMore = false;
97 
98     int32_t overlap1;
99     if (!fCurrency1.isEmpty()) {
100         overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
101     } else {
102         overlap1 = -1;
103     }
104     maybeMore = maybeMore || overlap1 == segment.length();
105     if (overlap1 == fCurrency1.length()) {
106         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
107         segment.adjustOffset(overlap1);
108         result.setCharsConsumed(segment);
109         return maybeMore;
110     }
111 
112     int32_t overlap2;
113     if (!fCurrency2.isEmpty()) {
114         // ISO codes should be accepted case-insensitive.
115         // https://unicode-org.atlassian.net/browse/ICU-13696
116         overlap2 = segment.getCommonPrefixLength(fCurrency2);
117     } else {
118         overlap2 = -1;
119     }
120     maybeMore = maybeMore || overlap2 == segment.length();
121     if (overlap2 == fCurrency2.length()) {
122         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
123         segment.adjustOffset(overlap2);
124         result.setCharsConsumed(segment);
125         return maybeMore;
126     }
127 
128     if (fUseFullCurrencyData) {
129         // Use the full currency data.
130         // NOTE: This call site should be improved with #13584.
131         const UnicodeString segmentString = segment.toTempUnicodeString();
132 
133         // Try to parse the currency
134         ParsePosition ppos(0);
135         int32_t partialMatchLen = 0;
136         uprv_parseCurrency(
137                 fLocaleName.data(),
138                 segmentString,
139                 ppos,
140                 UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
141                 &partialMatchLen,
142                 result.currencyCode,
143                 status);
144         maybeMore = maybeMore || partialMatchLen == segment.length();
145 
146         if (U_SUCCESS(status) && ppos.getIndex() != 0) {
147             // Complete match.
148             // NOTE: The currency code should already be saved in the ParsedNumber.
149             segment.adjustOffset(ppos.getIndex());
150             result.setCharsConsumed(segment);
151             return maybeMore;
152         }
153 
154     } else {
155         // Use the locale long names.
156         int32_t longestFullMatch = 0;
157         for (int32_t i=0; i<StandardPlural::COUNT; i++) {
158             const UnicodeString& name = fLocalLongNames[i];
159             int32_t overlap = segment.getCommonPrefixLength(name);
160             if (overlap == name.length() && name.length() > longestFullMatch) {
161                 longestFullMatch = name.length();
162             }
163             maybeMore = maybeMore || overlap > 0;
164         }
165         if (longestFullMatch > 0) {
166             utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
167             segment.adjustOffset(longestFullMatch);
168             result.setCharsConsumed(segment);
169             return maybeMore;
170         }
171     }
172 
173     // No match found.
174     return maybeMore;
175 }
176 
smokeTest(const StringSegment &) const177 bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {
178     // TODO: See constructor
179     return true;
180     //return segment.startsWith(fLeadCodePoints);
181 }
182 
toString() const183 UnicodeString CombinedCurrencyMatcher::toString() const {
184     return u"<CombinedCurrencyMatcher>";
185 }
186 
187 
188 #endif /* #if !UCONFIG_NO_FORMATTING */
189