1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "cstring.h"
9 #include "number_decimalquantity.h"
10 #include "resource.h"
11 #include "uassert.h"
12 #include "unicode/unistr.h"
13 #include "unicode/ures.h"
14 #include "units_data.h"
15 #include "uresimp.h"
16 #include "util.h"
17 #include <utility>
18 
19 U_NAMESPACE_BEGIN
20 namespace units {
21 
22 namespace {
23 
24 using icu::number::impl::DecimalQuantity;
25 
trimSpaces(CharString & factor,UErrorCode & status)26 void trimSpaces(CharString& factor, UErrorCode& status){
27    CharString trimmed;
28    for (int i = 0 ; i < factor.length(); i++) {
29        if (factor[i] == ' ') continue;
30 
31        trimmed.append(factor[i], status);
32    }
33 
34    factor = std::move(trimmed);
35 }
36 
37 /**
38  * A ResourceSink that collects conversion rate information.
39  *
40  * This class is for use by ures_getAllItemsWithFallback.
41  */
42 class ConversionRateDataSink : public ResourceSink {
43   public:
44     /**
45      * Constructor.
46      * @param out The vector to which ConversionRateInfo instances are to be
47      * added. This vector must outlive the use of the ResourceSink.
48      */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)49     explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
50 
51     /**
52      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
53      * conversion rates that are found in `value` to the output vector.
54      *
55      * @param source This string must be "convertUnits": the resource that this
56      * class supports reading.
57      * @param value The "convertUnits" resource, containing unit conversion rate
58      * information.
59      * @param noFallback Ignored.
60      * @param status The standard ICU error code output parameter.
61      */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)62     void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
63         if (U_FAILURE(status)) { return; }
64         if (uprv_strcmp(source, "convertUnits") != 0) {
65             // This is very strict, however it is the cheapest way to be sure
66             // that with `value`, we're looking at the convertUnits table.
67             status = U_ILLEGAL_ARGUMENT_ERROR;
68             return;
69         }
70         ResourceTable conversionRateTable = value.getTable(status);
71         const char *srcUnit;
72         // We're reusing `value`, which seems to be a common pattern:
73         for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
74             ResourceTable unitTable = value.getTable(status);
75             const char *key;
76             UnicodeString baseUnit = ICU_Utility::makeBogusString();
77             UnicodeString factor = ICU_Utility::makeBogusString();
78             UnicodeString offset = ICU_Utility::makeBogusString();
79             for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
80                 if (uprv_strcmp(key, "target") == 0) {
81                     baseUnit = value.getUnicodeString(status);
82                 } else if (uprv_strcmp(key, "factor") == 0) {
83                     factor = value.getUnicodeString(status);
84                 } else if (uprv_strcmp(key, "offset") == 0) {
85                     offset = value.getUnicodeString(status);
86                 }
87             }
88             if (U_FAILURE(status)) { return; }
89             if (baseUnit.isBogus() || factor.isBogus()) {
90                 // We could not find a usable conversion rate: bad resource.
91                 status = U_MISSING_RESOURCE_ERROR;
92                 return;
93             }
94 
95             // We don't have this ConversionRateInfo yet: add it.
96             ConversionRateInfo *cr = outVector->emplaceBack();
97             if (!cr) {
98                 status = U_MEMORY_ALLOCATION_ERROR;
99                 return;
100             } else {
101                 cr->sourceUnit.append(srcUnit, status);
102                 cr->baseUnit.appendInvariantChars(baseUnit, status);
103                 cr->factor.appendInvariantChars(factor, status);
104                 trimSpaces(cr->factor, status);
105                 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
106             }
107         }
108         return;
109     }
110 
111   private:
112     MaybeStackVector<ConversionRateInfo> *outVector;
113 };
114 
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)115 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
116     return a.compareTo(b) < 0;
117 }
118 
119 /**
120  * A ResourceSink that collects unit preferences information.
121  *
122  * This class is for use by ures_getAllItemsWithFallback.
123  */
124 class UnitPreferencesSink : public ResourceSink {
125   public:
126     /**
127      * Constructor.
128      * @param outPrefs The vector to which UnitPreference instances are to be
129      * added. This vector must outlive the use of the ResourceSink.
130      * @param outMetadata  The vector to which UnitPreferenceMetadata instances
131      * are to be added. This vector must outlive the use of the ResourceSink.
132      */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)133     explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
134                                  MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
135         : preferences(outPrefs), metadata(outMetadata) {}
136 
137     /**
138      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
139      * preferences info that are found in `value` to the output vector.
140      *
141      * @param source This string must be "unitPreferenceData": the resource that
142      * this class supports reading.
143      * @param value The "unitPreferenceData" resource, containing unit
144      * preferences data.
145      * @param noFallback Ignored.
146      * @param status The standard ICU error code output parameter. Note: if an
147      * error is returned, outPrefs and outMetadata may be inconsistent.
148      */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)149     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
150         if (U_FAILURE(status)) { return; }
151         if (uprv_strcmp(key, "unitPreferenceData") != 0) {
152             // This is very strict, however it is the cheapest way to be sure
153             // that with `value`, we're looking at the convertUnits table.
154             status = U_ILLEGAL_ARGUMENT_ERROR;
155             return;
156         }
157         // The unitPreferenceData structure (see data/misc/units.txt) contains a
158         // hierarchy of category/usage/region, within which are a set of
159         // preferences. Hence three for-loops and another loop for the
160         // preferences themselves:
161         ResourceTable unitPreferenceDataTable = value.getTable(status);
162         const char *category;
163         for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
164             ResourceTable categoryTable = value.getTable(status);
165             const char *usage;
166             for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
167                 ResourceTable regionTable = value.getTable(status);
168                 const char *region;
169                 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
170                     // `value` now contains the set of preferences for
171                     // category/usage/region.
172                     ResourceArray unitPrefs = value.getArray(status);
173                     if (U_FAILURE(status)) { return; }
174                     int32_t prefLen = unitPrefs.getSize();
175 
176                     // Update metadata for this set of preferences.
177                     UnitPreferenceMetadata *meta = metadata->emplaceBack(
178                         category, usage, region, preferences->length(), prefLen, status);
179                     if (!meta) {
180                         status = U_MEMORY_ALLOCATION_ERROR;
181                         return;
182                     }
183                     if (U_FAILURE(status)) { return; }
184                     if (metadata->length() > 1) {
185                         // Verify that unit preferences are sorted and
186                         // without duplicates.
187                         if (!(*(*metadata)[metadata->length() - 2] <
188                               *(*metadata)[metadata->length() - 1])) {
189                             status = U_INVALID_FORMAT_ERROR;
190                             return;
191                         }
192                     }
193 
194                     // Collect the individual preferences.
195                     for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
196                         UnitPreference *up = preferences->emplaceBack();
197                         if (!up) {
198                             status = U_MEMORY_ALLOCATION_ERROR;
199                             return;
200                         }
201                         ResourceTable unitPref = value.getTable(status);
202                         if (U_FAILURE(status)) { return; }
203                         for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
204                             if (uprv_strcmp(key, "unit") == 0) {
205                                 int32_t length;
206                                 const UChar *u = value.getString(length, status);
207                                 up->unit.appendInvariantChars(u, length, status);
208                             } else if (uprv_strcmp(key, "geq") == 0) {
209                                 int32_t length;
210                                 const UChar *g = value.getString(length, status);
211                                 CharString geq;
212                                 geq.appendInvariantChars(g, length, status);
213                                 DecimalQuantity dq;
214                                 dq.setToDecNumber(geq.data(), status);
215                                 up->geq = dq.toDouble();
216                             } else if (uprv_strcmp(key, "skeleton") == 0) {
217                                 up->skeleton = value.getUnicodeString(status);
218                             }
219                         }
220                     }
221                 }
222             }
223         }
224     }
225 
226   private:
227     MaybeStackVector<UnitPreference> *preferences;
228     MaybeStackVector<UnitPreferenceMetadata> *metadata;
229 };
230 
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)231 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
232                      const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
233                      bool *foundRegion, UErrorCode &status) {
234     if (U_FAILURE(status)) { return -1; }
235     int32_t start = 0;
236     int32_t end = metadata->length();
237     *foundCategory = false;
238     *foundUsage = false;
239     *foundRegion = false;
240     while (start < end) {
241         int32_t mid = (start + end) / 2;
242         int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
243         if (cmp < 0) {
244             start = mid + 1;
245         } else if (cmp > 0) {
246             end = mid;
247         } else {
248             return mid;
249         }
250     }
251     return -1;
252 }
253 
254 /**
255  * Finds the UnitPreferenceMetadata instance that matches the given category,
256  * usage and region: if missing, region falls back to "001", and usage
257  * repeatedly drops tailing components, eventually trying "default"
258  * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
259  *
260  * @param metadata The full list of UnitPreferenceMetadata instances.
261  * @param category The category to search for. See getUnitCategory().
262  * @param usage The usage for which formatting preferences is needed. If the
263  * given usage is not known, automatic fallback occurs, see function description
264  * above.
265  * @param region The region for which preferences are needed. If there are no
266  * region-specific preferences, this function automatically falls back to the
267  * "001" region (global).
268  * @param status The standard ICU error code output parameter.
269  *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
270  *   * If fallback to "default" or "001" didn't resolve, status will be
271  *     U_MISSING_RESOURCE.
272  * @return The index into the metadata vector which represents the appropriate
273  * preferences. If appropriate preferences are not found, -1 is returned.
274  */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)275 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
276                                    StringPiece category, StringPiece usage, StringPiece region,
277                                    UErrorCode &status) {
278     if (U_FAILURE(status)) { return -1; }
279     bool foundCategory, foundUsage, foundRegion;
280     UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
281     int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
282     if (U_FAILURE(status)) { return -1; }
283     if (idx >= 0) { return idx; }
284     if (!foundCategory) {
285         status = U_ILLEGAL_ARGUMENT_ERROR;
286         return -1;
287     }
288     U_ASSERT(foundCategory);
289     while (!foundUsage) {
290         int32_t lastDashIdx = desired.usage.lastIndexOf('-');
291         if (lastDashIdx > 0) {
292             desired.usage.truncate(lastDashIdx);
293         } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
294             desired.usage.truncate(0).append("default", status);
295         } else {
296             // "default" is not supposed to be missing for any valid category.
297             status = U_MISSING_RESOURCE_ERROR;
298             return -1;
299         }
300         idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
301         if (U_FAILURE(status)) { return -1; }
302     }
303     U_ASSERT(foundCategory);
304     U_ASSERT(foundUsage);
305     if (!foundRegion) {
306         if (uprv_strcmp(desired.region.data(), "001") != 0) {
307             desired.region.truncate(0).append("001", status);
308             idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
309         }
310         if (!foundRegion) {
311             // "001" is not supposed to be missing for any valid usage.
312             status = U_MISSING_RESOURCE_ERROR;
313             return -1;
314         }
315     }
316     U_ASSERT(foundCategory);
317     U_ASSERT(foundUsage);
318     U_ASSERT(foundRegion);
319     U_ASSERT(idx >= 0);
320     return idx;
321 }
322 
323 } // namespace
324 
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)325 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
326                                                StringPiece region, int32_t prefsOffset,
327                                                int32_t prefsCount, UErrorCode &status) {
328     this->category.append(category, status);
329     this->usage.append(usage, status);
330     this->region.append(region, status);
331     this->prefsOffset = prefsOffset;
332     this->prefsCount = prefsCount;
333 }
334 
compareTo(const UnitPreferenceMetadata & other) const335 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
336     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
337     if (cmp == 0) {
338         cmp = uprv_strcmp(usage.data(), other.usage.data());
339     }
340     if (cmp == 0) {
341         cmp = uprv_strcmp(region.data(), other.region.data());
342     }
343     return cmp;
344 }
345 
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const346 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
347                                           bool *foundUsage, bool *foundRegion) const {
348     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
349     if (cmp == 0) {
350         *foundCategory = true;
351         cmp = uprv_strcmp(usage.data(), other.usage.data());
352     }
353     if (cmp == 0) {
354         *foundUsage = true;
355         cmp = uprv_strcmp(region.data(), other.region.data());
356     }
357     if (cmp == 0) {
358         *foundRegion = true;
359     }
360     return cmp;
361 }
362 
getUnitCategory(const char * baseUnitIdentifier,UErrorCode & status)363 CharString U_I18N_API getUnitCategory(const char *baseUnitIdentifier, UErrorCode &status) {
364     CharString result;
365     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
366     LocalUResourceBundlePointer unitQuantities(
367         ures_getByKey(unitsBundle.getAlias(), "unitQuantities", NULL, &status));
368     int32_t categoryLength;
369     if (U_FAILURE(status)) { return result; }
370     const UChar *uCategory =
371         ures_getStringByKey(unitQuantities.getAlias(), baseUnitIdentifier, &categoryLength, &status);
372     if (U_FAILURE(status)) {
373         // TODO(CLDR-13787,hugovdm): special-casing the consumption-inverse
374         // case. Once CLDR-13787 is clarified, this should be generalised (or
375         // possibly removed):
376         if (uprv_strcmp(baseUnitIdentifier, "meter-per-cubic-meter") == 0) {
377             status = U_ZERO_ERROR;
378             result.append("consumption-inverse", status);
379             return result;
380         }
381     }
382     result.appendInvariantChars(uCategory, categoryLength, status);
383     return result;
384 }
385 
386 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)387 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
388     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
389     ConversionRateDataSink sink(&result);
390     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
391 }
392 
extractConversionInfo(StringPiece source,UErrorCode & status) const393 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
394                                                                  UErrorCode &status) const {
395     for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
396         if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
397     }
398 
399     status = U_INTERNAL_PROGRAM_ERROR;
400     return nullptr;
401 }
402 
UnitPreferences(UErrorCode & status)403 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
404     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
405     UnitPreferencesSink sink(&unitPrefs_, &metadata_);
406     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
407 }
408 
409 // TODO: make outPreferences const?
410 //
411 // TODO: consider replacing `UnitPreference **&outPreferences` with slice class
412 // of some kind.
getPreferencesFor(StringPiece category,StringPiece usage,StringPiece region,const UnitPreference * const * & outPreferences,int32_t & preferenceCount,UErrorCode & status) const413 void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
414                                                    StringPiece region,
415                                                    const UnitPreference *const *&outPreferences,
416                                                    int32_t &preferenceCount, UErrorCode &status) const {
417     int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
418     if (U_FAILURE(status)) { return; }
419     U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
420     const UnitPreferenceMetadata *m = metadata_[idx];
421     outPreferences = unitPrefs_.getAlias() + m->prefsOffset;
422     preferenceCount = m->prefsCount;
423 }
424 
425 } // namespace units
426 U_NAMESPACE_END
427 
428 #endif /* #if !UCONFIG_NO_FORMATTING */
429