1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 #define UNISTR_FROM_CHAR_EXPLICIT
12 
13 #include "uassert.h"
14 #include "number_patternstring.h"
15 #include "unicode/utf16.h"
16 #include "number_utils.h"
17 #include "number_roundingutils.h"
18 
19 using namespace icu;
20 using namespace icu::number;
21 using namespace icu::number::impl;
22 
23 
parseToPatternInfo(const UnicodeString & patternString,ParsedPatternInfo & patternInfo,UErrorCode & status)24 void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
25                                        UErrorCode& status) {
26     patternInfo.consumePattern(patternString, status);
27 }
28 
29 DecimalFormatProperties
parseToProperties(const UnicodeString & pattern,IgnoreRounding ignoreRounding,UErrorCode & status)30 PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
31                                  UErrorCode& status) {
32     DecimalFormatProperties properties;
33     parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
34     return properties;
35 }
36 
parseToProperties(const UnicodeString & pattern,UErrorCode & status)37 DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
38                                                          UErrorCode& status) {
39     return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
40 }
41 
42 void
parseToExistingProperties(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)43 PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
44                                          IgnoreRounding ignoreRounding, UErrorCode& status) {
45     parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
46 }
47 
48 
charAt(int32_t flags,int32_t index) const49 char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
50     const Endpoints& endpoints = getEndpoints(flags);
51     if (index < 0 || index >= endpoints.end - endpoints.start) {
52         U_ASSERT(false);
53     }
54     return pattern.charAt(endpoints.start + index);
55 }
56 
length(int32_t flags) const57 int32_t ParsedPatternInfo::length(int32_t flags) const {
58     return getLengthFromEndpoints(getEndpoints(flags));
59 }
60 
getLengthFromEndpoints(const Endpoints & endpoints)61 int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
62     return endpoints.end - endpoints.start;
63 }
64 
getString(int32_t flags) const65 UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
66     const Endpoints& endpoints = getEndpoints(flags);
67     if (endpoints.start == endpoints.end) {
68         return UnicodeString();
69     }
70     // Create a new UnicodeString
71     return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
72 }
73 
getEndpoints(int32_t flags) const74 const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
75     bool prefix = (flags & AFFIX_PREFIX) != 0;
76     bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
77     bool padding = (flags & AFFIX_PADDING) != 0;
78     if (isNegative && padding) {
79         return negative.paddingEndpoints;
80     } else if (padding) {
81         return positive.paddingEndpoints;
82     } else if (prefix && isNegative) {
83         return negative.prefixEndpoints;
84     } else if (prefix) {
85         return positive.prefixEndpoints;
86     } else if (isNegative) {
87         return negative.suffixEndpoints;
88     } else {
89         return positive.suffixEndpoints;
90     }
91 }
92 
positiveHasPlusSign() const93 bool ParsedPatternInfo::positiveHasPlusSign() const {
94     return positive.hasPlusSign;
95 }
96 
hasNegativeSubpattern() const97 bool ParsedPatternInfo::hasNegativeSubpattern() const {
98     return fHasNegativeSubpattern;
99 }
100 
negativeHasMinusSign() const101 bool ParsedPatternInfo::negativeHasMinusSign() const {
102     return negative.hasMinusSign;
103 }
104 
hasCurrencySign() const105 bool ParsedPatternInfo::hasCurrencySign() const {
106     return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
107 }
108 
containsSymbolType(AffixPatternType type,UErrorCode & status) const109 bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
110     return AffixUtils::containsType(pattern, type, status);
111 }
112 
hasBody() const113 bool ParsedPatternInfo::hasBody() const {
114     return positive.integerTotal > 0;
115 }
116 
117 /////////////////////////////////////////////////////
118 /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
119 /////////////////////////////////////////////////////
120 
peek()121 UChar32 ParsedPatternInfo::ParserState::peek() {
122     if (offset == pattern.length()) {
123         return -1;
124     } else {
125         return pattern.char32At(offset);
126     }
127 }
128 
next()129 UChar32 ParsedPatternInfo::ParserState::next() {
130     int codePoint = peek();
131     offset += U16_LENGTH(codePoint);
132     return codePoint;
133 }
134 
consumePattern(const UnicodeString & patternString,UErrorCode & status)135 void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
136     if (U_FAILURE(status)) { return; }
137     this->pattern = patternString;
138 
139     // This class is not intended for writing twice!
140     // Use move assignment to overwrite instead.
141     U_ASSERT(state.offset == 0);
142 
143     // pattern := subpattern (';' subpattern)?
144     currentSubpattern = &positive;
145     consumeSubpattern(status);
146     if (U_FAILURE(status)) { return; }
147     if (state.peek() == u';') {
148         state.next(); // consume the ';'
149         // Don't consume the negative subpattern if it is empty (trailing ';')
150         if (state.peek() != -1) {
151             fHasNegativeSubpattern = true;
152             currentSubpattern = &negative;
153             consumeSubpattern(status);
154             if (U_FAILURE(status)) { return; }
155         }
156     }
157     if (state.peek() != -1) {
158         state.toParseException(u"Found unquoted special character");
159         status = U_UNQUOTED_SPECIAL;
160     }
161 }
162 
consumeSubpattern(UErrorCode & status)163 void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
164     // subpattern := literals? number exponent? literals?
165     consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
166     if (U_FAILURE(status)) { return; }
167     consumeAffix(currentSubpattern->prefixEndpoints, status);
168     if (U_FAILURE(status)) { return; }
169     consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
170     if (U_FAILURE(status)) { return; }
171     consumeFormat(status);
172     if (U_FAILURE(status)) { return; }
173     consumeExponent(status);
174     if (U_FAILURE(status)) { return; }
175     consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
176     if (U_FAILURE(status)) { return; }
177     consumeAffix(currentSubpattern->suffixEndpoints, status);
178     if (U_FAILURE(status)) { return; }
179     consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
180     if (U_FAILURE(status)) { return; }
181 }
182 
consumePadding(PadPosition paddingLocation,UErrorCode & status)183 void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
184     if (state.peek() != u'*') {
185         return;
186     }
187     if (currentSubpattern->hasPadding) {
188         state.toParseException(u"Cannot have multiple pad specifiers");
189         status = U_MULTIPLE_PAD_SPECIFIERS;
190         return;
191     }
192     currentSubpattern->paddingLocation = paddingLocation;
193     currentSubpattern->hasPadding = true;
194     state.next(); // consume the '*'
195     currentSubpattern->paddingEndpoints.start = state.offset;
196     consumeLiteral(status);
197     currentSubpattern->paddingEndpoints.end = state.offset;
198 }
199 
consumeAffix(Endpoints & endpoints,UErrorCode & status)200 void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
201     // literals := { literal }
202     endpoints.start = state.offset;
203     while (true) {
204         switch (state.peek()) {
205             case u'#':
206             case u'@':
207             case u';':
208             case u'*':
209             case u'.':
210             case u',':
211             case u'0':
212             case u'1':
213             case u'2':
214             case u'3':
215             case u'4':
216             case u'5':
217             case u'6':
218             case u'7':
219             case u'8':
220             case u'9':
221             case -1:
222                 // Characters that cannot appear unquoted in a literal
223                 // break outer;
224                 goto after_outer;
225 
226             case u'%':
227                 currentSubpattern->hasPercentSign = true;
228                 break;
229 
230             case u'‰':
231                 currentSubpattern->hasPerMilleSign = true;
232                 break;
233 
234             case u'¤':
235                 currentSubpattern->hasCurrencySign = true;
236                 break;
237 
238             case u'-':
239                 currentSubpattern->hasMinusSign = true;
240                 break;
241 
242             case u'+':
243                 currentSubpattern->hasPlusSign = true;
244                 break;
245 
246             default:
247                 break;
248         }
249         consumeLiteral(status);
250         if (U_FAILURE(status)) { return; }
251     }
252     after_outer:
253     endpoints.end = state.offset;
254 }
255 
consumeLiteral(UErrorCode & status)256 void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
257     if (state.peek() == -1) {
258         state.toParseException(u"Expected unquoted literal but found EOL");
259         status = U_PATTERN_SYNTAX_ERROR;
260         return;
261     } else if (state.peek() == u'\'') {
262         state.next(); // consume the starting quote
263         while (state.peek() != u'\'') {
264             if (state.peek() == -1) {
265                 state.toParseException(u"Expected quoted literal but found EOL");
266                 status = U_PATTERN_SYNTAX_ERROR;
267                 return;
268             } else {
269                 state.next(); // consume a quoted character
270             }
271         }
272         state.next(); // consume the ending quote
273     } else {
274         // consume a non-quoted literal character
275         state.next();
276     }
277 }
278 
consumeFormat(UErrorCode & status)279 void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
280     consumeIntegerFormat(status);
281     if (U_FAILURE(status)) { return; }
282     if (state.peek() == u'.') {
283         state.next(); // consume the decimal point
284         currentSubpattern->hasDecimal = true;
285         currentSubpattern->widthExceptAffixes += 1;
286         consumeFractionFormat(status);
287         if (U_FAILURE(status)) { return; }
288     }
289 }
290 
consumeIntegerFormat(UErrorCode & status)291 void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
292     // Convenience reference:
293     ParsedSubpatternInfo& result = *currentSubpattern;
294 
295     while (true) {
296         switch (state.peek()) {
297             case u',':
298                 result.widthExceptAffixes += 1;
299                 result.groupingSizes <<= 16;
300                 break;
301 
302             case u'#':
303                 if (result.integerNumerals > 0) {
304                     state.toParseException(u"# cannot follow 0 before decimal point");
305                     status = U_UNEXPECTED_TOKEN;
306                     return;
307                 }
308                 result.widthExceptAffixes += 1;
309                 result.groupingSizes += 1;
310                 if (result.integerAtSigns > 0) {
311                     result.integerTrailingHashSigns += 1;
312                 } else {
313                     result.integerLeadingHashSigns += 1;
314                 }
315                 result.integerTotal += 1;
316                 break;
317 
318             case u'@':
319                 if (result.integerNumerals > 0) {
320                     state.toParseException(u"Cannot mix 0 and @");
321                     status = U_UNEXPECTED_TOKEN;
322                     return;
323                 }
324                 if (result.integerTrailingHashSigns > 0) {
325                     state.toParseException(u"Cannot nest # inside of a run of @");
326                     status = U_UNEXPECTED_TOKEN;
327                     return;
328                 }
329                 result.widthExceptAffixes += 1;
330                 result.groupingSizes += 1;
331                 result.integerAtSigns += 1;
332                 result.integerTotal += 1;
333                 break;
334 
335             case u'0':
336             case u'1':
337             case u'2':
338             case u'3':
339             case u'4':
340             case u'5':
341             case u'6':
342             case u'7':
343             case u'8':
344             case u'9':
345                 if (result.integerAtSigns > 0) {
346                     state.toParseException(u"Cannot mix @ and 0");
347                     status = U_UNEXPECTED_TOKEN;
348                     return;
349                 }
350                 result.widthExceptAffixes += 1;
351                 result.groupingSizes += 1;
352                 result.integerNumerals += 1;
353                 result.integerTotal += 1;
354                 if (!result.rounding.isZero() || state.peek() != u'0') {
355                     result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true);
356                 }
357                 break;
358 
359             default:
360                 goto after_outer;
361         }
362         state.next(); // consume the symbol
363     }
364 
365     after_outer:
366     // Disallow patterns with a trailing ',' or with two ',' next to each other
367     auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
368     auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
369     auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
370     if (grouping1 == 0 && grouping2 != -1) {
371         state.toParseException(u"Trailing grouping separator is invalid");
372         status = U_UNEXPECTED_TOKEN;
373         return;
374     }
375     if (grouping2 == 0 && grouping3 != -1) {
376         state.toParseException(u"Grouping width of zero is invalid");
377         status = U_PATTERN_SYNTAX_ERROR;
378         return;
379     }
380 }
381 
consumeFractionFormat(UErrorCode & status)382 void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
383     // Convenience reference:
384     ParsedSubpatternInfo& result = *currentSubpattern;
385 
386     int32_t zeroCounter = 0;
387     while (true) {
388         switch (state.peek()) {
389             case u'#':
390                 result.widthExceptAffixes += 1;
391                 result.fractionHashSigns += 1;
392                 result.fractionTotal += 1;
393                 zeroCounter++;
394                 break;
395 
396             case u'0':
397             case u'1':
398             case u'2':
399             case u'3':
400             case u'4':
401             case u'5':
402             case u'6':
403             case u'7':
404             case u'8':
405             case u'9':
406                 if (result.fractionHashSigns > 0) {
407                     state.toParseException(u"0 cannot follow # after decimal point");
408                     status = U_UNEXPECTED_TOKEN;
409                     return;
410                 }
411                 result.widthExceptAffixes += 1;
412                 result.fractionNumerals += 1;
413                 result.fractionTotal += 1;
414                 if (state.peek() == u'0') {
415                     zeroCounter++;
416                 } else {
417                     result.rounding
418                             .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false);
419                     zeroCounter = 0;
420                 }
421                 break;
422 
423             default:
424                 return;
425         }
426         state.next(); // consume the symbol
427     }
428 }
429 
consumeExponent(UErrorCode & status)430 void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
431     // Convenience reference:
432     ParsedSubpatternInfo& result = *currentSubpattern;
433 
434     if (state.peek() != u'E') {
435         return;
436     }
437     if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
438         state.toParseException(u"Cannot have grouping separator in scientific notation");
439         status = U_MALFORMED_EXPONENTIAL_PATTERN;
440         return;
441     }
442     state.next(); // consume the E
443     result.widthExceptAffixes++;
444     if (state.peek() == u'+') {
445         state.next(); // consume the +
446         result.exponentHasPlusSign = true;
447         result.widthExceptAffixes++;
448     }
449     while (state.peek() == u'0') {
450         state.next(); // consume the 0
451         result.exponentZeros += 1;
452         result.widthExceptAffixes++;
453     }
454 }
455 
456 ///////////////////////////////////////////////////
457 /// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
458 ///////////////////////////////////////////////////
459 
parseToExistingPropertiesImpl(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)460 void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
461                                                   DecimalFormatProperties& properties,
462                                                   IgnoreRounding ignoreRounding, UErrorCode& status) {
463     if (pattern.length() == 0) {
464         // Backwards compatibility requires that we reset to the default values.
465         // TODO: Only overwrite the properties that "saveToProperties" normally touches?
466         properties.clear();
467         return;
468     }
469 
470     ParsedPatternInfo patternInfo;
471     parseToPatternInfo(pattern, patternInfo, status);
472     if (U_FAILURE(status)) { return; }
473     patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
474 }
475 
476 void
patternInfoToProperties(DecimalFormatProperties & properties,ParsedPatternInfo & patternInfo,IgnoreRounding _ignoreRounding,UErrorCode & status)477 PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
478                                        IgnoreRounding _ignoreRounding, UErrorCode& status) {
479     // Translate from PatternParseResult to Properties.
480     // Note that most data from "negative" is ignored per the specification of DecimalFormat.
481 
482     const ParsedSubpatternInfo& positive = patternInfo.positive;
483 
484     bool ignoreRounding;
485     if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
486         ignoreRounding = false;
487     } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
488         ignoreRounding = positive.hasCurrencySign;
489     } else {
490         U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
491         ignoreRounding = true;
492     }
493 
494     // Grouping settings
495     auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
496     auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
497     auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
498     if (grouping2 != -1) {
499         properties.groupingSize = grouping1;
500         properties.groupingUsed = true;
501     } else {
502         properties.groupingSize = -1;
503         properties.groupingUsed = false;
504     }
505     if (grouping3 != -1) {
506         properties.secondaryGroupingSize = grouping2;
507     } else {
508         properties.secondaryGroupingSize = -1;
509     }
510 
511     // For backwards compatibility, require that the pattern emit at least one min digit.
512     int minInt, minFrac;
513     if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
514         // patterns like ".##"
515         minInt = 0;
516         minFrac = uprv_max(1, positive.fractionNumerals);
517     } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
518         // patterns like "#.##"
519         minInt = 1;
520         minFrac = 0;
521     } else {
522         minInt = positive.integerNumerals;
523         minFrac = positive.fractionNumerals;
524     }
525 
526     // Rounding settings
527     // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
528     if (positive.integerAtSigns > 0) {
529         properties.minimumFractionDigits = -1;
530         properties.maximumFractionDigits = -1;
531         properties.roundingIncrement = 0.0;
532         properties.minimumSignificantDigits = positive.integerAtSigns;
533         properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
534     } else if (!positive.rounding.isZero()) {
535         if (!ignoreRounding) {
536             properties.minimumFractionDigits = minFrac;
537             properties.maximumFractionDigits = positive.fractionTotal;
538             properties.roundingIncrement = positive.rounding.toDouble();
539         } else {
540             properties.minimumFractionDigits = -1;
541             properties.maximumFractionDigits = -1;
542             properties.roundingIncrement = 0.0;
543         }
544         properties.minimumSignificantDigits = -1;
545         properties.maximumSignificantDigits = -1;
546     } else {
547         if (!ignoreRounding) {
548             properties.minimumFractionDigits = minFrac;
549             properties.maximumFractionDigits = positive.fractionTotal;
550             properties.roundingIncrement = 0.0;
551         } else {
552             properties.minimumFractionDigits = -1;
553             properties.maximumFractionDigits = -1;
554             properties.roundingIncrement = 0.0;
555         }
556         properties.minimumSignificantDigits = -1;
557         properties.maximumSignificantDigits = -1;
558     }
559 
560     // If the pattern ends with a '.' then force the decimal point.
561     if (positive.hasDecimal && positive.fractionTotal == 0) {
562         properties.decimalSeparatorAlwaysShown = true;
563     } else {
564         properties.decimalSeparatorAlwaysShown = false;
565     }
566 
567     // Scientific notation settings
568     if (positive.exponentZeros > 0) {
569         properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
570         properties.minimumExponentDigits = positive.exponentZeros;
571         if (positive.integerAtSigns == 0) {
572             // patterns without '@' can define max integer digits, used for engineering notation
573             properties.minimumIntegerDigits = positive.integerNumerals;
574             properties.maximumIntegerDigits = positive.integerTotal;
575         } else {
576             // patterns with '@' cannot define max integer digits
577             properties.minimumIntegerDigits = 1;
578             properties.maximumIntegerDigits = -1;
579         }
580     } else {
581         properties.exponentSignAlwaysShown = false;
582         properties.minimumExponentDigits = -1;
583         properties.minimumIntegerDigits = minInt;
584         properties.maximumIntegerDigits = -1;
585     }
586 
587     // Compute the affix patterns (required for both padding and affixes)
588     UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
589     UnicodeString posSuffix = patternInfo.getString(0);
590 
591     // Padding settings
592     if (positive.hasPadding) {
593         // The width of the positive prefix and suffix templates are included in the padding
594         int paddingWidth = positive.widthExceptAffixes +
595                            AffixUtils::estimateLength(posPrefix, status) +
596                            AffixUtils::estimateLength(posSuffix, status);
597         properties.formatWidth = paddingWidth;
598         UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
599         if (rawPaddingString.length() == 1) {
600             properties.padString = rawPaddingString;
601         } else if (rawPaddingString.length() == 2) {
602             if (rawPaddingString.charAt(0) == u'\'') {
603                 properties.padString.setTo(u"'", -1);
604             } else {
605                 properties.padString = rawPaddingString;
606             }
607         } else {
608             properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
609         }
610         properties.padPosition = positive.paddingLocation;
611     } else {
612         properties.formatWidth = -1;
613         properties.padString.setToBogus();
614         properties.padPosition.nullify();
615     }
616 
617     // Set the affixes
618     // Always call the setter, even if the prefixes are empty, especially in the case of the
619     // negative prefix pattern, to prevent default values from overriding the pattern.
620     properties.positivePrefixPattern = posPrefix;
621     properties.positiveSuffixPattern = posSuffix;
622     if (patternInfo.fHasNegativeSubpattern) {
623         properties.negativePrefixPattern = patternInfo.getString(
624                 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
625         properties.negativeSuffixPattern = patternInfo.getString(
626                 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
627     } else {
628         properties.negativePrefixPattern.setToBogus();
629         properties.negativeSuffixPattern.setToBogus();
630     }
631 
632     // Set the magnitude multiplier
633     if (positive.hasPercentSign) {
634         properties.magnitudeMultiplier = 2;
635     } else if (positive.hasPerMilleSign) {
636         properties.magnitudeMultiplier = 3;
637     } else {
638         properties.magnitudeMultiplier = 0;
639     }
640 }
641 
642 ///////////////////////////////////////////////////////////////////
643 /// End PatternStringParser.java; begin PatternStringUtils.java ///
644 ///////////////////////////////////////////////////////////////////
645 
propertiesToPatternString(const DecimalFormatProperties & properties,UErrorCode & status)646 UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
647                                                             UErrorCode& status) {
648     UnicodeString sb;
649 
650     // Convenience references
651     // The uprv_min() calls prevent DoS
652     int dosMax = 100;
653     int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax);
654     int firstGroupingSize = uprv_min(properties.groupingSize, dosMax);
655     int paddingWidth = uprv_min(properties.formatWidth, dosMax);
656     NullableValue<PadPosition> paddingLocation = properties.padPosition;
657     UnicodeString paddingString = properties.padString;
658     int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0);
659     int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
660     int minFrac = uprv_max(uprv_min(properties.minimumFractionDigits, dosMax), 0);
661     int maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
662     int minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
663     int maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
664     bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
665     int exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
666     bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
667     UnicodeString pp = properties.positivePrefix;
668     UnicodeString ppp = properties.positivePrefixPattern;
669     UnicodeString ps = properties.positiveSuffix;
670     UnicodeString psp = properties.positiveSuffixPattern;
671     UnicodeString np = properties.negativePrefix;
672     UnicodeString npp = properties.negativePrefixPattern;
673     UnicodeString ns = properties.negativeSuffix;
674     UnicodeString nsp = properties.negativeSuffixPattern;
675 
676     // Prefixes
677     if (!ppp.isBogus()) {
678         sb.append(ppp);
679     }
680     sb.append(AffixUtils::escape(pp));
681     int afterPrefixPos = sb.length();
682 
683     // Figure out the grouping sizes.
684     int grouping1, grouping2, grouping;
685     if (groupingSize != uprv_min(dosMax, -1) && firstGroupingSize != uprv_min(dosMax, -1) &&
686         groupingSize != firstGroupingSize) {
687         grouping = groupingSize;
688         grouping1 = groupingSize;
689         grouping2 = firstGroupingSize;
690     } else if (groupingSize != uprv_min(dosMax, -1)) {
691         grouping = groupingSize;
692         grouping1 = 0;
693         grouping2 = groupingSize;
694     } else if (firstGroupingSize != uprv_min(dosMax, -1)) {
695         grouping = groupingSize;
696         grouping1 = 0;
697         grouping2 = firstGroupingSize;
698     } else {
699         grouping = 0;
700         grouping1 = 0;
701         grouping2 = 0;
702     }
703     int groupingLength = grouping1 + grouping2 + 1;
704 
705     // Figure out the digits we need to put in the pattern.
706     double roundingInterval = properties.roundingIncrement;
707     UnicodeString digitsString;
708     int digitsStringScale = 0;
709     if (maxSig != uprv_min(dosMax, -1)) {
710         // Significant Digits.
711         while (digitsString.length() < minSig) {
712             digitsString.append(u'@');
713         }
714         while (digitsString.length() < maxSig) {
715             digitsString.append(u'#');
716         }
717     } else if (roundingInterval != 0.0) {
718         // Rounding Interval.
719         digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval);
720         // TODO: Check for DoS here?
721         DecimalQuantity incrementQuantity;
722         incrementQuantity.setToDouble(roundingInterval);
723         incrementQuantity.adjustMagnitude(-digitsStringScale);
724         incrementQuantity.roundToMagnitude(0, kDefaultMode, status);
725         UnicodeString str = incrementQuantity.toPlainString();
726         if (str.charAt(0) == u'-') {
727             // TODO: Unsupported operation exception or fail silently?
728             digitsString.append(str, 1, str.length() - 1);
729         } else {
730             digitsString.append(str);
731         }
732     }
733     while (digitsString.length() + digitsStringScale < minInt) {
734         digitsString.insert(0, u'0');
735     }
736     while (-digitsStringScale < minFrac) {
737         digitsString.append(u'0');
738         digitsStringScale--;
739     }
740 
741     // Write the digits to the string builder
742     int m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
743     m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
744     int mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
745     for (int magnitude = m0; magnitude >= mN; magnitude--) {
746         int di = digitsString.length() + digitsStringScale - magnitude - 1;
747         if (di < 0 || di >= digitsString.length()) {
748             sb.append(u'#');
749         } else {
750             sb.append(digitsString.charAt(di));
751         }
752         if (magnitude > grouping2 && grouping > 0 && (magnitude - grouping2) % grouping == 0) {
753             sb.append(u',');
754         } else if (magnitude > 0 && magnitude == grouping2) {
755             sb.append(u',');
756         } else if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
757             sb.append(u'.');
758         }
759     }
760 
761     // Exponential notation
762     if (exponentDigits != uprv_min(dosMax, -1)) {
763         sb.append(u'E');
764         if (exponentShowPlusSign) {
765             sb.append(u'+');
766         }
767         for (int i = 0; i < exponentDigits; i++) {
768             sb.append(u'0');
769         }
770     }
771 
772     // Suffixes
773     int beforeSuffixPos = sb.length();
774     if (!psp.isBogus()) {
775         sb.append(psp);
776     }
777     sb.append(AffixUtils::escape(ps));
778 
779     // Resolve Padding
780     if (paddingWidth != -1 && !paddingLocation.isNull()) {
781         while (paddingWidth - sb.length() > 0) {
782             sb.insert(afterPrefixPos, u'#');
783             beforeSuffixPos++;
784         }
785         int addedLength;
786         switch (paddingLocation.get(status)) {
787             case PadPosition::UNUM_PAD_BEFORE_PREFIX:
788                 addedLength = escapePaddingString(paddingString, sb, 0, status);
789                 sb.insert(0, u'*');
790                 afterPrefixPos += addedLength + 1;
791                 beforeSuffixPos += addedLength + 1;
792                 break;
793             case PadPosition::UNUM_PAD_AFTER_PREFIX:
794                 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
795                 sb.insert(afterPrefixPos, u'*');
796                 afterPrefixPos += addedLength + 1;
797                 beforeSuffixPos += addedLength + 1;
798                 break;
799             case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
800                 escapePaddingString(paddingString, sb, beforeSuffixPos, status);
801                 sb.insert(beforeSuffixPos, u'*');
802                 break;
803             case PadPosition::UNUM_PAD_AFTER_SUFFIX:
804                 sb.append(u'*');
805                 escapePaddingString(paddingString, sb, sb.length(), status);
806                 break;
807         }
808         if (U_FAILURE(status)) { return sb; }
809     }
810 
811     // Negative affixes
812     // Ignore if the negative prefix pattern is "-" and the negative suffix is empty
813     if (!np.isBogus() || !ns.isBogus() || (npp.isBogus() && !nsp.isBogus()) ||
814         (!npp.isBogus() && (npp.length() != 1 || npp.charAt(0) != u'-' || nsp.length() != 0))) {
815         sb.append(u';');
816         if (!npp.isBogus()) {
817             sb.append(npp);
818         }
819         sb.append(AffixUtils::escape(np));
820         // Copy the positive digit format into the negative.
821         // This is optional; the pattern is the same as if '#' were appended here instead.
822         // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
823         // See http://bugs.icu-project.org/trac/ticket/13707
824         UnicodeString copy(sb);
825         sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
826         if (!nsp.isBogus()) {
827             sb.append(nsp);
828         }
829         sb.append(AffixUtils::escape(ns));
830     }
831 
832     return sb;
833 }
834 
escapePaddingString(UnicodeString input,UnicodeString & output,int startIndex,UErrorCode & status)835 int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
836                                             UErrorCode& status) {
837     (void) status;
838     if (input.length() == 0) {
839         input.setTo(kFallbackPaddingString, -1);
840     }
841     int startLength = output.length();
842     if (input.length() == 1) {
843         if (input.compare(u"'", -1) == 0) {
844             output.insert(startIndex, u"''", -1);
845         } else {
846             output.insert(startIndex, input);
847         }
848     } else {
849         output.insert(startIndex, u'\'');
850         int offset = 1;
851         for (int i = 0; i < input.length(); i++) {
852             // it's okay to deal in chars here because the quote mark is the only interesting thing.
853             char16_t ch = input.charAt(i);
854             if (ch == u'\'') {
855                 output.insert(startIndex + offset, u"''", -1);
856                 offset += 2;
857             } else {
858                 output.insert(startIndex + offset, ch);
859                 offset += 1;
860             }
861         }
862         output.insert(startIndex + offset, u'\'');
863     }
864     return output.length() - startLength;
865 }
866 
867 UnicodeString
convertLocalized(const UnicodeString & input,const DecimalFormatSymbols & symbols,bool toLocalized,UErrorCode & status)868 PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
869                                      bool toLocalized, UErrorCode& status) {
870     // Construct a table of strings to be converted between localized and standard.
871     static constexpr int32_t LEN = 21;
872     UnicodeString table[LEN][2];
873     int standIdx = toLocalized ? 0 : 1;
874     int localIdx = toLocalized ? 1 : 0;
875     table[0][standIdx] = u"%";
876     table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
877     table[1][standIdx] = u"‰";
878     table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
879     table[2][standIdx] = u".";
880     table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
881     table[3][standIdx] = u",";
882     table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
883     table[4][standIdx] = u"-";
884     table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
885     table[5][standIdx] = u"+";
886     table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
887     table[6][standIdx] = u";";
888     table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
889     table[7][standIdx] = u"@";
890     table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
891     table[8][standIdx] = u"E";
892     table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
893     table[9][standIdx] = u"*";
894     table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
895     table[10][standIdx] = u"#";
896     table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
897     for (int i = 0; i < 10; i++) {
898         table[11 + i][standIdx] = u'0' + i;
899         table[11 + i][localIdx] = symbols.getConstDigitSymbol(i);
900     }
901 
902     // Special case: quotes are NOT allowed to be in any localIdx strings.
903     // Substitute them with '’' instead.
904     for (int32_t i = 0; i < LEN; i++) {
905         table[i][localIdx].findAndReplace(u'\'', u'’');
906     }
907 
908     // Iterate through the string and convert.
909     // State table:
910     // 0 => base state
911     // 1 => first char inside a quoted sequence in input and output string
912     // 2 => inside a quoted sequence in input and output string
913     // 3 => first char after a close quote in input string;
914     // close quote still needs to be written to output string
915     // 4 => base state in input string; inside quoted sequence in output string
916     // 5 => first char inside a quoted sequence in input string;
917     // inside quoted sequence in output string
918     UnicodeString result;
919     int state = 0;
920     for (int offset = 0; offset < input.length(); offset++) {
921         UChar ch = input.charAt(offset);
922 
923         // Handle a quote character (state shift)
924         if (ch == u'\'') {
925             if (state == 0) {
926                 result.append(u'\'');
927                 state = 1;
928                 continue;
929             } else if (state == 1) {
930                 result.append(u'\'');
931                 state = 0;
932                 continue;
933             } else if (state == 2) {
934                 state = 3;
935                 continue;
936             } else if (state == 3) {
937                 result.append(u'\'');
938                 result.append(u'\'');
939                 state = 1;
940                 continue;
941             } else if (state == 4) {
942                 state = 5;
943                 continue;
944             } else {
945                 U_ASSERT(state == 5);
946                 result.append(u'\'');
947                 result.append(u'\'');
948                 state = 4;
949                 continue;
950             }
951         }
952 
953         if (state == 0 || state == 3 || state == 4) {
954             for (auto& pair : table) {
955                 // Perform a greedy match on this symbol string
956                 UnicodeString temp = input.tempSubString(offset, pair[0].length());
957                 if (temp == pair[0]) {
958                     // Skip ahead past this region for the next iteration
959                     offset += pair[0].length() - 1;
960                     if (state == 3 || state == 4) {
961                         result.append(u'\'');
962                         state = 0;
963                     }
964                     result.append(pair[1]);
965                     goto continue_outer;
966                 }
967             }
968             // No replacement found. Check if a special quote is necessary
969             for (auto& pair : table) {
970                 UnicodeString temp = input.tempSubString(offset, pair[1].length());
971                 if (temp == pair[1]) {
972                     if (state == 0) {
973                         result.append(u'\'');
974                         state = 4;
975                     }
976                     result.append(ch);
977                     goto continue_outer;
978                 }
979             }
980             // Still nothing. Copy the char verbatim. (Add a close quote if necessary)
981             if (state == 3 || state == 4) {
982                 result.append(u'\'');
983                 state = 0;
984             }
985             result.append(ch);
986         } else {
987             U_ASSERT(state == 1 || state == 2 || state == 5);
988             result.append(ch);
989             state = 2;
990         }
991         continue_outer:;
992     }
993     // Resolve final quotes
994     if (state == 3 || state == 4) {
995         result.append(u'\'');
996         state = 0;
997     }
998     if (state != 0) {
999         // Malformed localized pattern: unterminated quote
1000         status = U_PATTERN_SYNTAX_ERROR;
1001     }
1002     return result;
1003 }
1004 
patternInfoToStringBuilder(const AffixPatternProvider & patternInfo,bool isPrefix,int8_t signum,UNumberSignDisplay signDisplay,StandardPlural::Form plural,bool perMilleReplacesPercent,UnicodeString & output)1005 void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1006                                                     int8_t signum, UNumberSignDisplay signDisplay,
1007                                                     StandardPlural::Form plural,
1008                                                     bool perMilleReplacesPercent, UnicodeString& output) {
1009 
1010     // Should the output render '+' where '-' would normally appear in the pattern?
1011     bool plusReplacesMinusSign = signum != -1 && (
1012             signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || (
1013                     signum == 1 && (
1014                             signDisplay == UNUM_SIGN_EXCEPT_ZERO ||
1015                             signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) &&
1016                                  patternInfo.positiveHasPlusSign() == false;
1017 
1018     // Should we use the affix from the negative subpattern? (If not, we will use the positive
1019     // subpattern.)
1020     bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (
1021             signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
1022 
1023     // Resolve the flags for the affix pattern.
1024     int flags = 0;
1025     if (useNegativeAffixPattern) {
1026         flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1027     }
1028     if (isPrefix) {
1029         flags |= AffixPatternProvider::AFFIX_PREFIX;
1030     }
1031     if (plural != StandardPlural::Form::COUNT) {
1032         U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1033         flags |= plural;
1034     }
1035 
1036     // Should we prepend a sign to the pattern?
1037     bool prependSign;
1038     if (!isPrefix || useNegativeAffixPattern) {
1039         prependSign = false;
1040     } else if (signum == -1) {
1041         prependSign = signDisplay != UNUM_SIGN_NEVER;
1042     } else {
1043         prependSign = plusReplacesMinusSign;
1044     }
1045 
1046     // Compute the length of the affix pattern.
1047     int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
1048 
1049     // Finally, set the result into the StringBuilder.
1050     output.remove();
1051     for (int index = 0; index < length; index++) {
1052         char16_t candidate;
1053         if (prependSign && index == 0) {
1054             candidate = u'-';
1055         } else if (prependSign) {
1056             candidate = patternInfo.charAt(flags, index - 1);
1057         } else {
1058             candidate = patternInfo.charAt(flags, index);
1059         }
1060         if (plusReplacesMinusSign && candidate == u'-') {
1061             candidate = u'+';
1062         }
1063         if (perMilleReplacesPercent && candidate == u'%') {
1064             candidate = u'‰';
1065         }
1066         output.append(candidate);
1067     }
1068 }
1069 
1070 #endif /* #if !UCONFIG_NO_FORMATTING */
1071