1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "pluralranges.h"
16 #include "uresimp.h"
17 #include "util.h"
18
19 using namespace icu;
20 using namespace icu::number;
21 using namespace icu::number::impl;
22
23 namespace {
24
25 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)26 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
27 return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
28 }
29
30
31 struct NumberRangeData {
32 SimpleFormatter rangePattern;
33 SimpleFormatter approximatelyPattern;
34 };
35
36 class NumberRangeDataSink : public ResourceSink {
37 public:
NumberRangeDataSink(NumberRangeData & data)38 NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
39
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)40 void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
41 ResourceTable miscTable = value.getTable(status);
42 if (U_FAILURE(status)) { return; }
43 for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
44 if (uprv_strcmp(key, "range") == 0) {
45 if (hasRangeData()) {
46 continue; // have already seen this pattern
47 }
48 fData.rangePattern = {value.getUnicodeString(status), status};
49 } else if (uprv_strcmp(key, "approximately") == 0) {
50 if (hasApproxData()) {
51 continue; // have already seen this pattern
52 }
53 fData.approximatelyPattern = {value.getUnicodeString(status), status};
54 }
55 }
56 }
57
hasRangeData()58 bool hasRangeData() {
59 return fData.rangePattern.getArgumentLimit() != 0;
60 }
61
hasApproxData()62 bool hasApproxData() {
63 return fData.approximatelyPattern.getArgumentLimit() != 0;
64 }
65
isComplete()66 bool isComplete() {
67 return hasRangeData() && hasApproxData();
68 }
69
fillInDefaults(UErrorCode & status)70 void fillInDefaults(UErrorCode& status) {
71 if (!hasRangeData()) {
72 fData.rangePattern = {u"{0}–{1}", status};
73 }
74 if (!hasApproxData()) {
75 fData.approximatelyPattern = {u"~{0}", status};
76 }
77 }
78
79 private:
80 NumberRangeData& fData;
81 };
82
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)83 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
84 if (U_FAILURE(status)) { return; }
85 LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
86 if (U_FAILURE(status)) { return; }
87 NumberRangeDataSink sink(data);
88
89 CharString dataPath;
90 dataPath.append("NumberElements/", -1, status);
91 dataPath.append(nsName, -1, status);
92 dataPath.append("/miscPatterns", -1, status);
93 if (U_FAILURE(status)) { return; }
94
95 UErrorCode localStatus = U_ZERO_ERROR;
96 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
97 if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
98 status = localStatus;
99 return;
100 }
101
102 // Fall back to latn if necessary
103 if (!sink.isComplete()) {
104 ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
105 }
106
107 sink.fillInDefaults(status);
108 }
109
110 } // namespace
111
112
113
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)114 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
115 : formatterImpl1(macros.formatter1.fMacros, status),
116 formatterImpl2(macros.formatter2.fMacros, status),
117 fSameFormatters(macros.singleFormatter),
118 fCollapse(macros.collapse),
119 fIdentityFallback(macros.identityFallback) {
120
121 const char* nsName = formatterImpl1.getRawMicroProps().nsName;
122 if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
123 status = U_ILLEGAL_ARGUMENT_ERROR;
124 return;
125 }
126
127 NumberRangeData data;
128 getNumberRangeData(macros.locale.getName(), nsName, data, status);
129 if (U_FAILURE(status)) { return; }
130 fRangeFormatter = data.rangePattern;
131 fApproximatelyModifier = {data.approximatelyPattern, kUndefinedField, false};
132
133 // TODO: Get locale from PluralRules instead?
134 fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status);
135 if (U_FAILURE(status)) { return; }
136 }
137
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const138 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
139 if (U_FAILURE(status)) {
140 return;
141 }
142
143 MicroProps micros1;
144 MicroProps micros2;
145 formatterImpl1.preProcess(data.quantity1, micros1, status);
146 if (fSameFormatters) {
147 formatterImpl1.preProcess(data.quantity2, micros2, status);
148 } else {
149 formatterImpl2.preProcess(data.quantity2, micros2, status);
150 }
151 if (U_FAILURE(status)) {
152 return;
153 }
154
155 // If any of the affixes are different, an identity is not possible
156 // and we must use formatRange().
157 // TODO: Write this as MicroProps operator==() ?
158 // TODO: Avoid the redundancy of these equality operations with the
159 // ones in formatRange?
160 if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
161 || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
162 || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
163 formatRange(data, micros1, micros2, status);
164 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
165 return;
166 }
167
168 // Check for identity
169 if (equalBeforeRounding) {
170 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
171 } else if (data.quantity1 == data.quantity2) {
172 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
173 } else {
174 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
175 }
176
177 switch (identity2d(fIdentityFallback, data.identityResult)) {
178 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
179 UNUM_IDENTITY_RESULT_NOT_EQUAL):
180 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
181 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
182 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
183 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
184 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
185 UNUM_IDENTITY_RESULT_NOT_EQUAL):
186 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
187 UNUM_IDENTITY_RESULT_NOT_EQUAL):
188 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
189 UNUM_IDENTITY_RESULT_NOT_EQUAL):
190 formatRange(data, micros1, micros2, status);
191 break;
192
193 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
194 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
195 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
196 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
197 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
198 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
199 formatApproximately(data, micros1, micros2, status);
200 break;
201
202 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
203 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
204 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
205 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
206 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
207 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
208 formatSingleValue(data, micros1, micros2, status);
209 break;
210
211 default:
212 UPRV_UNREACHABLE;
213 }
214 }
215
216
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const217 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
218 MicroProps& micros1, MicroProps& micros2,
219 UErrorCode& status) const {
220 if (U_FAILURE(status)) { return; }
221 if (fSameFormatters) {
222 int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
223 NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
224 } else {
225 formatRange(data, micros1, micros2, status);
226 }
227 }
228
229
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const230 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
231 MicroProps& micros1, MicroProps& micros2,
232 UErrorCode& status) const {
233 if (U_FAILURE(status)) { return; }
234 if (fSameFormatters) {
235 int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
236 // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
237 length += micros1.modInner->apply(data.getStringRef(), 0, length, status);
238 length += micros1.modMiddle->apply(data.getStringRef(), 0, length, status);
239 length += fApproximatelyModifier.apply(data.getStringRef(), 0, length, status);
240 micros1.modOuter->apply(data.getStringRef(), 0, length, status);
241 } else {
242 formatRange(data, micros1, micros2, status);
243 }
244 }
245
246
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const247 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
248 MicroProps& micros1, MicroProps& micros2,
249 UErrorCode& status) const {
250 if (U_FAILURE(status)) { return; }
251
252 // modInner is always notation (scientific); collapsable in ALL.
253 // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
254 // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
255 // Never collapse an outer mod but not an inner mod.
256 bool collapseOuter, collapseMiddle, collapseInner;
257 switch (fCollapse) {
258 case UNUM_RANGE_COLLAPSE_ALL:
259 case UNUM_RANGE_COLLAPSE_AUTO:
260 case UNUM_RANGE_COLLAPSE_UNIT:
261 {
262 // OUTER MODIFIER
263 collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
264
265 if (!collapseOuter) {
266 // Never collapse inner mods if outer mods are not collapsable
267 collapseMiddle = false;
268 collapseInner = false;
269 break;
270 }
271
272 // MIDDLE MODIFIER
273 collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
274
275 if (!collapseMiddle) {
276 // Never collapse inner mods if outer mods are not collapsable
277 collapseInner = false;
278 break;
279 }
280
281 // MIDDLE MODIFIER HEURISTICS
282 // (could disable collapsing of the middle modifier)
283 // The modifiers are equal by this point, so we can look at just one of them.
284 const Modifier* mm = micros1.modMiddle;
285 if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
286 // Only collapse if the modifier is a unit.
287 // TODO: Make a better way to check for a unit?
288 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
289 if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
290 && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
291 collapseMiddle = false;
292 }
293 } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
294 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
295 if (mm->getCodePointCount() <= 1) {
296 collapseMiddle = false;
297 }
298 }
299
300 if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
301 collapseInner = false;
302 break;
303 }
304
305 // INNER MODIFIER
306 collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
307
308 // All done checking for collapsability.
309 break;
310 }
311
312 default:
313 collapseOuter = false;
314 collapseMiddle = false;
315 collapseInner = false;
316 break;
317 }
318
319 FormattedStringBuilder& string = data.getStringRef();
320 int32_t lengthPrefix = 0;
321 int32_t length1 = 0;
322 int32_t lengthInfix = 0;
323 int32_t length2 = 0;
324 int32_t lengthSuffix = 0;
325
326 // Use #define so that these are evaluated at the call site.
327 #define UPRV_INDEX_0 (lengthPrefix)
328 #define UPRV_INDEX_1 (lengthPrefix + length1)
329 #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
330 #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
331
332 int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
333 fRangeFormatter,
334 string,
335 0,
336 &lengthPrefix,
337 &lengthSuffix,
338 kUndefinedField,
339 status);
340 if (U_FAILURE(status)) { return; }
341 lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
342 U_ASSERT(lengthInfix > 0);
343
344 // SPACING HEURISTIC
345 // Add spacing unless all modifiers are collapsed.
346 // TODO: add API to control this?
347 // TODO: Use a data-driven heuristic like currency spacing?
348 // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
349 {
350 bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
351 bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
352 bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
353 if (repeatInner || repeatMiddle || repeatOuter) {
354 // Add spacing if there is not already spacing
355 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
356 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
357 }
358 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
359 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
360 }
361 }
362 }
363
364 length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
365 length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
366
367 // TODO: Support padding?
368
369 if (collapseInner) {
370 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
371 const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
372 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
373 } else {
374 length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
375 length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
376 }
377
378 if (collapseMiddle) {
379 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
380 const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
381 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
382 } else {
383 length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
384 length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
385 }
386
387 if (collapseOuter) {
388 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
389 const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
390 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
391 } else {
392 length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
393 length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
394 }
395 }
396
397
398 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const399 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
400 Modifier::Parameters parameters;
401 first.getParameters(parameters);
402 if (parameters.obj == nullptr) {
403 // No plural form; return a fallback (e.g., the first)
404 return first;
405 }
406 StandardPlural::Form firstPlural = parameters.plural;
407
408 second.getParameters(parameters);
409 if (parameters.obj == nullptr) {
410 // No plural form; return a fallback (e.g., the first)
411 return first;
412 }
413 StandardPlural::Form secondPlural = parameters.plural;
414
415 // Get the required plural form from data
416 StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
417
418 // Get and return the new Modifier
419 const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
420 U_ASSERT(mod != nullptr);
421 return *mod;
422 }
423
424
425
426 #endif /* #if !UCONFIG_NO_FORMATTING */
427