1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "pluralranges.h"
16 #include "uresimp.h"
17 #include "util.h"
18 
19 using namespace icu;
20 using namespace icu::number;
21 using namespace icu::number::impl;
22 
23 namespace {
24 
25 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)26 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
27     return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
28 }
29 
30 
31 struct NumberRangeData {
32     SimpleFormatter rangePattern;
33     SimpleFormatter approximatelyPattern;
34 };
35 
36 class NumberRangeDataSink : public ResourceSink {
37   public:
NumberRangeDataSink(NumberRangeData & data)38     NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
39 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)40     void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
41         ResourceTable miscTable = value.getTable(status);
42         if (U_FAILURE(status)) { return; }
43         for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
44             if (uprv_strcmp(key, "range") == 0) {
45                 if (hasRangeData()) {
46                     continue; // have already seen this pattern
47                 }
48                 fData.rangePattern = {value.getUnicodeString(status), status};
49             } else if (uprv_strcmp(key, "approximately") == 0) {
50                 if (hasApproxData()) {
51                     continue; // have already seen this pattern
52                 }
53                 fData.approximatelyPattern = {value.getUnicodeString(status), status};
54             }
55         }
56     }
57 
hasRangeData()58     bool hasRangeData() {
59         return fData.rangePattern.getArgumentLimit() != 0;
60     }
61 
hasApproxData()62     bool hasApproxData() {
63         return fData.approximatelyPattern.getArgumentLimit() != 0;
64     }
65 
isComplete()66     bool isComplete() {
67         return hasRangeData() && hasApproxData();
68     }
69 
fillInDefaults(UErrorCode & status)70     void fillInDefaults(UErrorCode& status) {
71         if (!hasRangeData()) {
72             fData.rangePattern = {u"{0}–{1}", status};
73         }
74         if (!hasApproxData()) {
75             fData.approximatelyPattern = {u"~{0}", status};
76         }
77     }
78 
79   private:
80     NumberRangeData& fData;
81 };
82 
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)83 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
84     if (U_FAILURE(status)) { return; }
85     LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
86     if (U_FAILURE(status)) { return; }
87     NumberRangeDataSink sink(data);
88 
89     CharString dataPath;
90     dataPath.append("NumberElements/", -1, status);
91     dataPath.append(nsName, -1, status);
92     dataPath.append("/miscPatterns", -1, status);
93     if (U_FAILURE(status)) { return; }
94 
95     UErrorCode localStatus = U_ZERO_ERROR;
96     ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
97     if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
98         status = localStatus;
99         return;
100     }
101 
102     // Fall back to latn if necessary
103     if (!sink.isComplete()) {
104         ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
105     }
106 
107     sink.fillInDefaults(status);
108 }
109 
110 } // namespace
111 
112 
113 
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)114 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
115     : formatterImpl1(macros.formatter1.fMacros, status),
116       formatterImpl2(macros.formatter2.fMacros, status),
117       fSameFormatters(macros.singleFormatter),
118       fCollapse(macros.collapse),
119       fIdentityFallback(macros.identityFallback) {
120 
121     const char* nsName = formatterImpl1.getRawMicroProps().nsName;
122     if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
123         status = U_ILLEGAL_ARGUMENT_ERROR;
124         return;
125     }
126 
127     NumberRangeData data;
128     getNumberRangeData(macros.locale.getName(), nsName, data, status);
129     if (U_FAILURE(status)) { return; }
130     fRangeFormatter = data.rangePattern;
131     fApproximatelyModifier = {data.approximatelyPattern, kUndefinedField, false};
132 
133     // TODO: Get locale from PluralRules instead?
134     fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status);
135     if (U_FAILURE(status)) { return; }
136 }
137 
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const138 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
139     if (U_FAILURE(status)) {
140         return;
141     }
142 
143     MicroProps micros1;
144     MicroProps micros2;
145     formatterImpl1.preProcess(data.quantity1, micros1, status);
146     if (fSameFormatters) {
147         formatterImpl1.preProcess(data.quantity2, micros2, status);
148     } else {
149         formatterImpl2.preProcess(data.quantity2, micros2, status);
150     }
151     if (U_FAILURE(status)) {
152         return;
153     }
154 
155     // If any of the affixes are different, an identity is not possible
156     // and we must use formatRange().
157     // TODO: Write this as MicroProps operator==() ?
158     // TODO: Avoid the redundancy of these equality operations with the
159     // ones in formatRange?
160     if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
161             || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
162             || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
163         formatRange(data, micros1, micros2, status);
164         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
165         return;
166     }
167 
168     // Check for identity
169     if (equalBeforeRounding) {
170         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
171     } else if (data.quantity1 == data.quantity2) {
172         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
173     } else {
174         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
175     }
176 
177     switch (identity2d(fIdentityFallback, data.identityResult)) {
178         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
179                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
180         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
181                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
182         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
183                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
184         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
185                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
186         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
187                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
188         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
189                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
190             formatRange(data, micros1, micros2, status);
191             break;
192 
193         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
194                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
195         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
196                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
197         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
198                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
199             formatApproximately(data, micros1, micros2, status);
200             break;
201 
202         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
203                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
204         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
205                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
206         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
207                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
208             formatSingleValue(data, micros1, micros2, status);
209             break;
210 
211         default:
212             UPRV_UNREACHABLE;
213     }
214 }
215 
216 
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const217 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
218                                                  MicroProps& micros1, MicroProps& micros2,
219                                                  UErrorCode& status) const {
220     if (U_FAILURE(status)) { return; }
221     if (fSameFormatters) {
222         int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
223         NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
224     } else {
225         formatRange(data, micros1, micros2, status);
226     }
227 }
228 
229 
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const230 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
231                                                     MicroProps& micros1, MicroProps& micros2,
232                                                     UErrorCode& status) const {
233     if (U_FAILURE(status)) { return; }
234     if (fSameFormatters) {
235         int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
236         // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
237         length += micros1.modInner->apply(data.getStringRef(), 0, length, status);
238         length += micros1.modMiddle->apply(data.getStringRef(), 0, length, status);
239         length += fApproximatelyModifier.apply(data.getStringRef(), 0, length, status);
240         micros1.modOuter->apply(data.getStringRef(), 0, length, status);
241     } else {
242         formatRange(data, micros1, micros2, status);
243     }
244 }
245 
246 
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const247 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
248                                            MicroProps& micros1, MicroProps& micros2,
249                                            UErrorCode& status) const {
250     if (U_FAILURE(status)) { return; }
251 
252     // modInner is always notation (scientific); collapsable in ALL.
253     // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
254     // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
255     // Never collapse an outer mod but not an inner mod.
256     bool collapseOuter, collapseMiddle, collapseInner;
257     switch (fCollapse) {
258         case UNUM_RANGE_COLLAPSE_ALL:
259         case UNUM_RANGE_COLLAPSE_AUTO:
260         case UNUM_RANGE_COLLAPSE_UNIT:
261         {
262             // OUTER MODIFIER
263             collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
264 
265             if (!collapseOuter) {
266                 // Never collapse inner mods if outer mods are not collapsable
267                 collapseMiddle = false;
268                 collapseInner = false;
269                 break;
270             }
271 
272             // MIDDLE MODIFIER
273             collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
274 
275             if (!collapseMiddle) {
276                 // Never collapse inner mods if outer mods are not collapsable
277                 collapseInner = false;
278                 break;
279             }
280 
281             // MIDDLE MODIFIER HEURISTICS
282             // (could disable collapsing of the middle modifier)
283             // The modifiers are equal by this point, so we can look at just one of them.
284             const Modifier* mm = micros1.modMiddle;
285             if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
286                 // Only collapse if the modifier is a unit.
287                 // TODO: Make a better way to check for a unit?
288                 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
289                 if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
290                         && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
291                     collapseMiddle = false;
292                 }
293             } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
294                 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
295                 if (mm->getCodePointCount() <= 1) {
296                     collapseMiddle = false;
297                 }
298             }
299 
300             if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
301                 collapseInner = false;
302                 break;
303             }
304 
305             // INNER MODIFIER
306             collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
307 
308             // All done checking for collapsability.
309             break;
310         }
311 
312         default:
313             collapseOuter = false;
314             collapseMiddle = false;
315             collapseInner = false;
316             break;
317     }
318 
319     FormattedStringBuilder& string = data.getStringRef();
320     int32_t lengthPrefix = 0;
321     int32_t length1 = 0;
322     int32_t lengthInfix = 0;
323     int32_t length2 = 0;
324     int32_t lengthSuffix = 0;
325 
326     // Use #define so that these are evaluated at the call site.
327     #define UPRV_INDEX_0 (lengthPrefix)
328     #define UPRV_INDEX_1 (lengthPrefix + length1)
329     #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
330     #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
331 
332     int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
333         fRangeFormatter,
334         string,
335         0,
336         &lengthPrefix,
337         &lengthSuffix,
338         kUndefinedField,
339         status);
340     if (U_FAILURE(status)) { return; }
341     lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
342     U_ASSERT(lengthInfix > 0);
343 
344     // SPACING HEURISTIC
345     // Add spacing unless all modifiers are collapsed.
346     // TODO: add API to control this?
347     // TODO: Use a data-driven heuristic like currency spacing?
348     // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
349     {
350         bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
351         bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
352         bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
353         if (repeatInner || repeatMiddle || repeatOuter) {
354             // Add spacing if there is not already spacing
355             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
356                 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
357             }
358             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
359                 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
360             }
361         }
362     }
363 
364     length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
365     length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
366 
367     // TODO: Support padding?
368 
369     if (collapseInner) {
370         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
371         const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
372         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
373     } else {
374         length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
375         length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
376     }
377 
378     if (collapseMiddle) {
379         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
380         const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
381         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
382     } else {
383         length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
384         length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
385     }
386 
387     if (collapseOuter) {
388         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
389         const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
390         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
391     } else {
392         length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
393         length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
394     }
395 }
396 
397 
398 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const399 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
400     Modifier::Parameters parameters;
401     first.getParameters(parameters);
402     if (parameters.obj == nullptr) {
403         // No plural form; return a fallback (e.g., the first)
404         return first;
405     }
406     StandardPlural::Form firstPlural = parameters.plural;
407 
408     second.getParameters(parameters);
409     if (parameters.obj == nullptr) {
410         // No plural form; return a fallback (e.g., the first)
411         return first;
412     }
413     StandardPlural::Form secondPlural = parameters.plural;
414 
415     // Get the required plural form from data
416     StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
417 
418     // Get and return the new Modifier
419     const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
420     U_ASSERT(mod != nullptr);
421     return *mod;
422 }
423 
424 
425 
426 #endif /* #if !UCONFIG_NO_FORMATTING */
427