1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12 
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16 
17 namespace {
18 
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21 
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26 
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTS = nullptr;
29 
cleanupDefaultCurrencySpacing()30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31     delete UNISET_DIGIT;
32     UNISET_DIGIT = nullptr;
33     delete UNISET_NOTS;
34     UNISET_NOTS = nullptr;
35     gDefaultCurrencySpacingInitOnce.reset();
36     return TRUE;
37 }
38 
initDefaultCurrencySpacing(UErrorCode & status)39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40     ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41     UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42     UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status);
43     if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) {
44         status = U_MEMORY_ALLOCATION_ERROR;
45         return;
46     }
47     UNISET_DIGIT->freeze();
48     UNISET_NOTS->freeze();
49 }
50 
51 }  // namespace
52 
53 
54 Modifier::~Modifier() = default;
55 
Parameters()56 Modifier::Parameters::Parameters()
57         : obj(nullptr) {}
58 
Parameters(const ModifierStore * _obj,int8_t _signum,StandardPlural::Form _plural)59 Modifier::Parameters::Parameters(
60     const ModifierStore* _obj, int8_t _signum, StandardPlural::Form _plural)
61         : obj(_obj), signum(_signum), plural(_plural) {}
62 
63 ModifierStore::~ModifierStore() = default;
64 
~AdoptingModifierStore()65 AdoptingModifierStore::~AdoptingModifierStore()  {
66     for (const Modifier *mod : mods) {
67         delete mod;
68     }
69 }
70 
71 
apply(NumberStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const72 int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
73                                      UErrorCode &status) const {
74     // Insert the suffix first since inserting the prefix will change the rightIndex
75     int length = output.insert(rightIndex, fSuffix, fField, status);
76     length += output.insert(leftIndex, fPrefix, fField, status);
77     return length;
78 }
79 
getPrefixLength() const80 int32_t ConstantAffixModifier::getPrefixLength() const {
81     return fPrefix.length();
82 }
83 
getCodePointCount() const84 int32_t ConstantAffixModifier::getCodePointCount() const {
85     return fPrefix.countChar32() + fSuffix.countChar32();
86 }
87 
isStrong() const88 bool ConstantAffixModifier::isStrong() const {
89     return fStrong;
90 }
91 
containsField(UNumberFormatFields field) const92 bool ConstantAffixModifier::containsField(UNumberFormatFields field) const {
93     (void)field;
94     // This method is not currently used.
95     U_ASSERT(false);
96     return false;
97 }
98 
getParameters(Parameters & output) const99 void ConstantAffixModifier::getParameters(Parameters& output) const {
100     (void)output;
101     // This method is not currently used.
102     U_ASSERT(false);
103 }
104 
semanticallyEquivalent(const Modifier & other) const105 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
106     auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
107     if (_other == nullptr) {
108         return false;
109     }
110     return fPrefix == _other->fPrefix
111         && fSuffix == _other->fSuffix
112         && fField == _other->fField
113         && fStrong == _other->fStrong;
114 }
115 
116 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong)117 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
118         : SimpleModifier(simpleFormatter, field, strong, {}) {}
119 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong,const Modifier::Parameters parameters)120 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
121                                const Modifier::Parameters parameters)
122         : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
123           fParameters(parameters) {
124     int32_t argLimit = SimpleFormatter::getArgumentLimit(
125             fCompiledPattern.getBuffer(), fCompiledPattern.length());
126     if (argLimit == 0) {
127         // No arguments in compiled pattern
128         fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
129         U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
130         // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
131         fSuffixOffset = -1;
132         fSuffixLength = 0;
133     } else {
134         U_ASSERT(argLimit == 1);
135         if (fCompiledPattern.charAt(1) != 0) {
136             // Found prefix
137             fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
138             fSuffixOffset = 3 + fPrefixLength;
139         } else {
140             // No prefix
141             fPrefixLength = 0;
142             fSuffixOffset = 2;
143         }
144         if (3 + fPrefixLength < fCompiledPattern.length()) {
145             // Found suffix
146             fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
147         } else {
148             // No suffix
149             fSuffixLength = 0;
150         }
151     }
152 }
153 
SimpleModifier()154 SimpleModifier::SimpleModifier()
155         : fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
156 }
157 
apply(NumberStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const158 int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
159                               UErrorCode &status) const {
160     return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status);
161 }
162 
getPrefixLength() const163 int32_t SimpleModifier::getPrefixLength() const {
164     return fPrefixLength;
165 }
166 
getCodePointCount() const167 int32_t SimpleModifier::getCodePointCount() const {
168     int32_t count = 0;
169     if (fPrefixLength > 0) {
170         count += fCompiledPattern.countChar32(2, fPrefixLength);
171     }
172     if (fSuffixLength > 0) {
173         count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
174     }
175     return count;
176 }
177 
isStrong() const178 bool SimpleModifier::isStrong() const {
179     return fStrong;
180 }
181 
containsField(UNumberFormatFields field) const182 bool SimpleModifier::containsField(UNumberFormatFields field) const {
183     (void)field;
184     // This method is not currently used.
185     U_ASSERT(false);
186     return false;
187 }
188 
getParameters(Parameters & output) const189 void SimpleModifier::getParameters(Parameters& output) const {
190     output = fParameters;
191 }
192 
semanticallyEquivalent(const Modifier & other) const193 bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
194     auto* _other = dynamic_cast<const SimpleModifier*>(&other);
195     if (_other == nullptr) {
196         return false;
197     }
198     if (fParameters.obj != nullptr) {
199         return fParameters.obj == _other->fParameters.obj;
200     }
201     return fCompiledPattern == _other->fCompiledPattern
202         && fField == _other->fField
203         && fStrong == _other->fStrong;
204 }
205 
206 
207 int32_t
formatAsPrefixSuffix(NumberStringBuilder & result,int32_t startIndex,int32_t endIndex,Field field,UErrorCode & status) const208 SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex,
209                                      Field field, UErrorCode &status) const {
210     if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
211         // There is no argument for the inner number; overwrite the entire segment with our string.
212         return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
213     } else {
214         if (fPrefixLength > 0) {
215             result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
216         }
217         if (fSuffixLength > 0) {
218             result.insert(
219                     endIndex + fPrefixLength,
220                     fCompiledPattern,
221                     1 + fSuffixOffset,
222                     1 + fSuffixOffset + fSuffixLength,
223                     field,
224                     status);
225         }
226         return fPrefixLength + fSuffixLength;
227     }
228 }
229 
230 
231 int32_t
formatTwoArgPattern(const SimpleFormatter & compiled,NumberStringBuilder & result,int32_t index,int32_t * outPrefixLength,int32_t * outSuffixLength,Field field,UErrorCode & status)232 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result,
233                                     int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
234                                     Field field, UErrorCode& status) {
235     const UnicodeString& compiledPattern = compiled.compiledPattern;
236     int32_t argLimit = SimpleFormatter::getArgumentLimit(
237             compiledPattern.getBuffer(), compiledPattern.length());
238     if (argLimit != 2) {
239         status = U_INTERNAL_PROGRAM_ERROR;
240         return 0;
241     }
242     int32_t offset = 1; // offset into compiledPattern
243     int32_t length = 0; // chars added to result
244 
245     int32_t prefixLength = compiledPattern.charAt(offset);
246     offset++;
247     if (prefixLength < ARG_NUM_LIMIT) {
248         // No prefix
249         prefixLength = 0;
250     } else {
251         prefixLength -= ARG_NUM_LIMIT;
252         result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
253         offset += prefixLength;
254         length += prefixLength;
255         offset++;
256     }
257 
258     int32_t infixLength = compiledPattern.charAt(offset);
259     offset++;
260     if (infixLength < ARG_NUM_LIMIT) {
261         // No infix
262         infixLength = 0;
263     } else {
264         infixLength -= ARG_NUM_LIMIT;
265         result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
266         offset += infixLength;
267         length += infixLength;
268         offset++;
269     }
270 
271     int32_t suffixLength;
272     if (offset == compiledPattern.length()) {
273         // No suffix
274         suffixLength = 0;
275     } else {
276         suffixLength = compiledPattern.charAt(offset) -  ARG_NUM_LIMIT;
277         offset++;
278         result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
279         length += suffixLength;
280     }
281 
282     *outPrefixLength = prefixLength;
283     *outSuffixLength = suffixLength;
284 
285     return length;
286 }
287 
288 
apply(NumberStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const289 int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
290                                           UErrorCode &status) const {
291     int32_t length = output.insert(leftIndex, fPrefix, status);
292     if (fOverwrite) {
293         length += output.splice(
294             leftIndex + length,
295             rightIndex + length,
296             UnicodeString(), 0, 0,
297             UNUM_FIELD_COUNT, status);
298     }
299     length += output.insert(rightIndex + length, fSuffix, status);
300     return length;
301 }
302 
getPrefixLength() const303 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
304     return fPrefix.length();
305 }
306 
getCodePointCount() const307 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
308     return fPrefix.codePointCount() + fSuffix.codePointCount();
309 }
310 
isStrong() const311 bool ConstantMultiFieldModifier::isStrong() const {
312     return fStrong;
313 }
314 
containsField(UNumberFormatFields field) const315 bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field) const {
316     return fPrefix.containsField(field) || fSuffix.containsField(field);
317 }
318 
getParameters(Parameters & output) const319 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
320     output = fParameters;
321 }
322 
semanticallyEquivalent(const Modifier & other) const323 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
324     auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
325     if (_other == nullptr) {
326         return false;
327     }
328     if (fParameters.obj != nullptr) {
329         return fParameters.obj == _other->fParameters.obj;
330     }
331     return fPrefix.contentEquals(_other->fPrefix)
332         && fSuffix.contentEquals(_other->fSuffix)
333         && fOverwrite == _other->fOverwrite
334         && fStrong == _other->fStrong;
335 }
336 
337 
CurrencySpacingEnabledModifier(const NumberStringBuilder & prefix,const NumberStringBuilder & suffix,bool overwrite,bool strong,const DecimalFormatSymbols & symbols,UErrorCode & status)338 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix,
339                                                                const NumberStringBuilder &suffix,
340                                                                bool overwrite,
341                                                                bool strong,
342                                                                const DecimalFormatSymbols &symbols,
343                                                                UErrorCode &status)
344         : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
345     // Check for currency spacing. Do not build the UnicodeSets unless there is
346     // a currency code point at a boundary.
347     if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) {
348         int prefixCp = prefix.getLastCodePoint();
349         UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
350         if (prefixUnicodeSet.contains(prefixCp)) {
351             fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
352             fAfterPrefixUnicodeSet.freeze();
353             fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
354         } else {
355             fAfterPrefixUnicodeSet.setToBogus();
356             fAfterPrefixInsert.setToBogus();
357         }
358     } else {
359         fAfterPrefixUnicodeSet.setToBogus();
360         fAfterPrefixInsert.setToBogus();
361     }
362     if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) {
363         int suffixCp = suffix.getLastCodePoint();
364         UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
365         if (suffixUnicodeSet.contains(suffixCp)) {
366             fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
367             fBeforeSuffixUnicodeSet.freeze();
368             fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
369         } else {
370             fBeforeSuffixUnicodeSet.setToBogus();
371             fBeforeSuffixInsert.setToBogus();
372         }
373     } else {
374         fBeforeSuffixUnicodeSet.setToBogus();
375         fBeforeSuffixInsert.setToBogus();
376     }
377 }
378 
apply(NumberStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const379 int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
380                                               UErrorCode &status) const {
381     // Currency spacing logic
382     int length = 0;
383     if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
384         fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
385         // TODO: Should we use the CURRENCY field here?
386         length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status);
387     }
388     if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
389         fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
390         // TODO: Should we use the CURRENCY field here?
391         length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status);
392     }
393 
394     // Call super for the remaining logic
395     length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
396     return length;
397 }
398 
399 int32_t
applyCurrencySpacing(NumberStringBuilder & output,int32_t prefixStart,int32_t prefixLen,int32_t suffixStart,int32_t suffixLen,const DecimalFormatSymbols & symbols,UErrorCode & status)400 CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart,
401                                                      int32_t prefixLen, int32_t suffixStart,
402                                                      int32_t suffixLen,
403                                                      const DecimalFormatSymbols &symbols,
404                                                      UErrorCode &status) {
405     int length = 0;
406     bool hasPrefix = (prefixLen > 0);
407     bool hasSuffix = (suffixLen > 0);
408     bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
409     if (hasPrefix && hasNumber) {
410         length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
411     }
412     if (hasSuffix && hasNumber) {
413         length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
414     }
415     return length;
416 }
417 
418 int32_t
applyCurrencySpacingAffix(NumberStringBuilder & output,int32_t index,EAffix affix,const DecimalFormatSymbols & symbols,UErrorCode & status)419 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index,
420                                                           EAffix affix,
421                                                           const DecimalFormatSymbols &symbols,
422                                                           UErrorCode &status) {
423     // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
424     // This works even if the last code point in the prefix is 2 code units because the
425     // field value gets populated to both indices in the field array.
426     Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
427     if (affixField != UNUM_CURRENCY_FIELD) {
428         return 0;
429     }
430     int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
431     UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
432     if (!affixUniset.contains(affixCp)) {
433         return 0;
434     }
435     int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
436     UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
437     if (!numberUniset.contains(numberCp)) {
438         return 0;
439     }
440     UnicodeString spacingString = getInsertString(symbols, affix, status);
441 
442     // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
443     // It would be more efficient if this could be done before affixes were attached,
444     // so that it could be prepended/appended instead of inserted.
445     // However, the build code path is more efficient, and this is the most natural
446     // place to put currency spacing in the non-build code path.
447     // TODO: Should we use the CURRENCY field here?
448     return output.insert(index, spacingString, UNUM_FIELD_COUNT, status);
449 }
450 
451 UnicodeSet
getUnicodeSet(const DecimalFormatSymbols & symbols,EPosition position,EAffix affix,UErrorCode & status)452 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
453                                               EAffix affix, UErrorCode &status) {
454     // Ensure the static defaults are initialized:
455     umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
456     if (U_FAILURE(status)) {
457         return UnicodeSet();
458     }
459 
460     const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
461             position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
462             affix == SUFFIX,
463             status);
464     if (pattern.compare(u"[:digit:]", -1) == 0) {
465         return *UNISET_DIGIT;
466     } else if (pattern.compare(u"[:^S:]", -1) == 0) {
467         return *UNISET_NOTS;
468     } else {
469         return UnicodeSet(pattern, status);
470     }
471 }
472 
473 UnicodeString
getInsertString(const DecimalFormatSymbols & symbols,EAffix affix,UErrorCode & status)474 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
475                                                 UErrorCode &status) {
476     return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
477 }
478 
479 #endif /* #if !UCONFIG_NO_FORMATTING */
480