1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "numparse_types.h"
13 #include "numparse_scientific.h"
14 #include "static_unicode_sets.h"
15 
16 using namespace icu;
17 using namespace icu::numparse;
18 using namespace icu::numparse::impl;
19 
20 
21 namespace {
22 
minusSignSet()23 inline const UnicodeSet& minusSignSet() {
24     return *unisets::get(unisets::MINUS_SIGN);
25 }
26 
plusSignSet()27 inline const UnicodeSet& plusSignSet() {
28     return *unisets::get(unisets::PLUS_SIGN);
29 }
30 
31 } // namespace
32 
33 
ScientificMatcher(const DecimalFormatSymbols & dfs,const Grouper & grouper)34 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
35         : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
36           fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED) {
37 
38     const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
39     if (minusSignSet().contains(minusSign)) {
40         fCustomMinusSign.setToBogus();
41     } else {
42         fCustomMinusSign = minusSign;
43     }
44 
45     const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
46     if (plusSignSet().contains(plusSign)) {
47         fCustomPlusSign.setToBogus();
48     } else {
49         fCustomPlusSign = plusSign;
50     }
51 }
52 
match(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const53 bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
54     // Only accept scientific notation after the mantissa.
55     if (!result.seenNumber()) {
56         return false;
57     }
58 
59     // Only accept one exponent per string.
60     if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
61         return false;
62     }
63 
64     // First match the scientific separator, and then match another number after it.
65     // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
66     int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
67     if (overlap1 == fExponentSeparatorString.length()) {
68         // Full exponent separator match.
69 
70         // First attempt to get a code point, returning true if we can't get one.
71         if (segment.length() == overlap1) {
72             return true;
73         }
74         segment.adjustOffset(overlap1);
75 
76         // Allow a sign, and then try to match digits.
77         int8_t exponentSign = 1;
78         if (segment.startsWith(minusSignSet())) {
79             exponentSign = -1;
80             segment.adjustOffsetByCodePoint();
81         } else if (segment.startsWith(plusSignSet())) {
82             segment.adjustOffsetByCodePoint();
83         } else if (segment.startsWith(fCustomMinusSign)) {
84             // Note: call site is guarded with startsWith, which returns false on empty string
85             int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
86             if (overlap2 != fCustomMinusSign.length()) {
87                 // Partial custom sign match; un-match the exponent separator.
88                 segment.adjustOffset(-overlap1);
89                 return true;
90             }
91             exponentSign = -1;
92             segment.adjustOffset(overlap2);
93         } else if (segment.startsWith(fCustomPlusSign)) {
94             // Note: call site is guarded with startsWith, which returns false on empty string
95             int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
96             if (overlap2 != fCustomPlusSign.length()) {
97                 // Partial custom sign match; un-match the exponent separator.
98                 segment.adjustOffset(-overlap1);
99                 return true;
100             }
101             segment.adjustOffset(overlap2);
102         }
103 
104         // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
105         bool wasBogus = result.quantity.bogus;
106         result.quantity.bogus = false;
107         int digitsOffset = segment.getOffset();
108         bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
109         result.quantity.bogus = wasBogus;
110 
111         if (segment.getOffset() != digitsOffset) {
112             // At least one exponent digit was matched.
113             result.flags |= FLAG_HAS_EXPONENT;
114         } else {
115             // No exponent digits were matched; un-match the exponent separator.
116             segment.adjustOffset(-overlap1);
117         }
118         return digitsReturnValue;
119 
120     } else if (overlap1 == segment.length()) {
121         // Partial exponent separator match
122         return true;
123     }
124 
125     // No match
126     return false;
127 }
128 
smokeTest(const StringSegment & segment) const129 bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
130     return segment.startsWith(fExponentSeparatorString);
131 }
132 
toString() const133 UnicodeString ScientificMatcher::toString() const {
134     return u"<Scientific>";
135 }
136 
137 
138 #endif /* #if !UCONFIG_NO_FORMATTING */
139