1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // Extra functions for MeasureUnit not needed for all clients.
5 // Separate .o file so that it can be removed for modularity.
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_FORMATTING
10 
11 // Allow implicit conversion from char16_t* to UnicodeString for this file:
12 // Helpful in toString methods and elsewhere.
13 #define UNISTR_FROM_STRING_EXPLICIT
14 
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "measunit_impl.h"
19 #include "resource.h"
20 #include "uarrsort.h"
21 #include "uassert.h"
22 #include "ucln_in.h"
23 #include "umutex.h"
24 #include "unicode/bytestrie.h"
25 #include "unicode/bytestriebuilder.h"
26 #include "unicode/localpointer.h"
27 #include "unicode/measunit.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/stringtriebuilder.h"
30 #include "unicode/ures.h"
31 #include "unicode/ustringtrie.h"
32 #include "uresimp.h"
33 #include <cstdlib>
34 
35 U_NAMESPACE_BEGIN
36 
37 
38 namespace {
39 
40 // TODO: Propose a new error code for this?
41 constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
42 
43 // Trie value offset for SI Prefixes. This is big enough to ensure we only
44 // insert positive integers into the trie.
45 constexpr int32_t kSIPrefixOffset = 64;
46 
47 // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
48 constexpr int32_t kCompoundPartOffset = 128;
49 
50 enum CompoundPart {
51     // Represents "-per-"
52     COMPOUND_PART_PER = kCompoundPartOffset,
53     // Represents "-"
54     COMPOUND_PART_TIMES,
55     // Represents "-and-"
56     COMPOUND_PART_AND,
57 };
58 
59 // Trie value offset for "per-".
60 constexpr int32_t kInitialCompoundPartOffset = 192;
61 
62 enum InitialCompoundPart {
63     // Represents "per-", the only compound part that can appear at the start of
64     // an identifier.
65     INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
66 };
67 
68 // Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
69 constexpr int32_t kPowerPartOffset = 256;
70 
71 enum PowerPart {
72     POWER_PART_P2 = kPowerPartOffset + 2,
73     POWER_PART_P3,
74     POWER_PART_P4,
75     POWER_PART_P5,
76     POWER_PART_P6,
77     POWER_PART_P7,
78     POWER_PART_P8,
79     POWER_PART_P9,
80     POWER_PART_P10,
81     POWER_PART_P11,
82     POWER_PART_P12,
83     POWER_PART_P13,
84     POWER_PART_P14,
85     POWER_PART_P15,
86 };
87 
88 // Trie value offset for simple units, e.g. "gram", "nautical-mile",
89 // "fluid-ounce-imperial".
90 constexpr int32_t kSimpleUnitOffset = 512;
91 
92 const struct SIPrefixStrings {
93     const char* const string;
94     UMeasureSIPrefix value;
95 } gSIPrefixStrings[] = {
96     { "yotta", UMEASURE_SI_PREFIX_YOTTA },
97     { "zetta", UMEASURE_SI_PREFIX_ZETTA },
98     { "exa", UMEASURE_SI_PREFIX_EXA },
99     { "peta", UMEASURE_SI_PREFIX_PETA },
100     { "tera", UMEASURE_SI_PREFIX_TERA },
101     { "giga", UMEASURE_SI_PREFIX_GIGA },
102     { "mega", UMEASURE_SI_PREFIX_MEGA },
103     { "kilo", UMEASURE_SI_PREFIX_KILO },
104     { "hecto", UMEASURE_SI_PREFIX_HECTO },
105     { "deka", UMEASURE_SI_PREFIX_DEKA },
106     { "deci", UMEASURE_SI_PREFIX_DECI },
107     { "centi", UMEASURE_SI_PREFIX_CENTI },
108     { "milli", UMEASURE_SI_PREFIX_MILLI },
109     { "micro", UMEASURE_SI_PREFIX_MICRO },
110     { "nano", UMEASURE_SI_PREFIX_NANO },
111     { "pico", UMEASURE_SI_PREFIX_PICO },
112     { "femto", UMEASURE_SI_PREFIX_FEMTO },
113     { "atto", UMEASURE_SI_PREFIX_ATTO },
114     { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
115     { "yocto", UMEASURE_SI_PREFIX_YOCTO },
116 };
117 
118 /**
119  * A ResourceSink that collects simple unit identifiers from the keys of the
120  * convertUnits table into an array, and adds these values to a TrieBuilder,
121  * with associated values being their index into this array plus a specified
122  * offset, to a trie.
123  *
124  * Example code:
125  *
126  *     UErrorCode status = U_ZERO_ERROR;
127  *     BytesTrieBuilder b(status);
128  *     const char *unitIdentifiers[200];
129  *     SimpleUnitIdentifiersSink identifierSink(unitIdentifiers, 200, b, kTrieValueOffset);
130  *     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
131  *     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
132  */
133 class SimpleUnitIdentifiersSink : public icu::ResourceSink {
134   public:
135     /**
136      * Constructor.
137      * @param out Array of char* to which the simple unit identifiers will be
138      *     saved.
139      * @param outSize The size of `out`.
140      * @param trieBuilder The trie builder to which the simple unit identifier
141      *     should be added. The trie builder must outlive this resource sink.
142      * @param trieValueOffset This is added to the index of the identifier in
143      *     the `out` array, before adding to `trieBuilder` as the value
144      *     associated with the identifier.
145      */
SimpleUnitIdentifiersSink(const char ** out,int32_t outSize,BytesTrieBuilder & trieBuilder,int32_t trieValueOffset)146     explicit SimpleUnitIdentifiersSink(const char **out, int32_t outSize, BytesTrieBuilder &trieBuilder,
147                                        int32_t trieValueOffset)
148         : outArray(out), outSize(outSize), trieBuilder(trieBuilder), trieValueOffset(trieValueOffset),
149           outIndex(0) {
150     }
151 
152     /**
153      * Adds the table keys found in value to the output vector.
154      * @param key The key of the resource passed to `value`: the second
155      *     parameter of the ures_getAllItemsWithFallback() call.
156      * @param value Should be a ResourceTable value, if
157      *     ures_getAllItemsWithFallback() was called correctly for this sink.
158      * @param noFallback Ignored.
159      * @param status The standard ICU error code output parameter.
160      */
put(const char *,ResourceValue & value,UBool,UErrorCode & status)161     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
162         ResourceTable table = value.getTable(status);
163         if (U_FAILURE(status)) return;
164 
165         if (outIndex + table.getSize() > outSize) {
166             status = U_INDEX_OUTOFBOUNDS_ERROR;
167             return;
168         }
169 
170         // Collect keys from the table resource.
171         const char *key;
172         for (int32_t i = 0; table.getKeyAndValue(i, key, value); ++i) {
173             U_ASSERT(i < table.getSize());
174             U_ASSERT(outIndex < outSize);
175             if (uprv_strcmp(key, "kilogram") == 0) {
176                 // For parsing, we use "gram", the prefixless metric mass unit. We
177                 // thus ignore the SI Base Unit of Mass: it exists due to being the
178                 // mass conversion target unit, but not needed for MeasureUnit
179                 // parsing.
180                 continue;
181             }
182             outArray[outIndex] = key;
183             trieBuilder.add(key, trieValueOffset + outIndex, status);
184             outIndex++;
185         }
186     }
187 
188   private:
189     const char **outArray;
190     int32_t outSize;
191     BytesTrieBuilder &trieBuilder;
192     int32_t trieValueOffset;
193 
194     int32_t outIndex;
195 };
196 
197 icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
198 
199 // Array of simple unit IDs.
200 //
201 // The array memory itself is owned by this pointer, but the individual char* in
202 // that array point at static memory. (Note that these char* are also returned
203 // by SingleUnitImpl::getSimpleUnitID().)
204 const char **gSimpleUnits = nullptr;
205 
206 char *gSerializedUnitExtrasStemTrie = nullptr;
207 
cleanupUnitExtras()208 UBool U_CALLCONV cleanupUnitExtras() {
209     uprv_free(gSerializedUnitExtrasStemTrie);
210     gSerializedUnitExtrasStemTrie = nullptr;
211     uprv_free(gSimpleUnits);
212     gSimpleUnits = nullptr;
213     gUnitExtrasInitOnce.reset();
214     return TRUE;
215 }
216 
initUnitExtras(UErrorCode & status)217 void U_CALLCONV initUnitExtras(UErrorCode& status) {
218     ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
219 
220     BytesTrieBuilder b(status);
221     if (U_FAILURE(status)) { return; }
222 
223     // Add SI prefixes
224     for (const auto& siPrefixInfo : gSIPrefixStrings) {
225         b.add(siPrefixInfo.string, siPrefixInfo.value + kSIPrefixOffset, status);
226     }
227     if (U_FAILURE(status)) { return; }
228 
229     // Add syntax parts (compound, power prefixes)
230     b.add("-per-", COMPOUND_PART_PER, status);
231     b.add("-", COMPOUND_PART_TIMES, status);
232     b.add("-and-", COMPOUND_PART_AND, status);
233     b.add("per-", INITIAL_COMPOUND_PART_PER, status);
234     b.add("square-", POWER_PART_P2, status);
235     b.add("cubic-", POWER_PART_P3, status);
236     b.add("pow2-", POWER_PART_P2, status);
237     b.add("pow3-", POWER_PART_P3, status);
238     b.add("pow4-", POWER_PART_P4, status);
239     b.add("pow5-", POWER_PART_P5, status);
240     b.add("pow6-", POWER_PART_P6, status);
241     b.add("pow7-", POWER_PART_P7, status);
242     b.add("pow8-", POWER_PART_P8, status);
243     b.add("pow9-", POWER_PART_P9, status);
244     b.add("pow10-", POWER_PART_P10, status);
245     b.add("pow11-", POWER_PART_P11, status);
246     b.add("pow12-", POWER_PART_P12, status);
247     b.add("pow13-", POWER_PART_P13, status);
248     b.add("pow14-", POWER_PART_P14, status);
249     b.add("pow15-", POWER_PART_P15, status);
250     if (U_FAILURE(status)) { return; }
251 
252     // Add sanctioned simple units by offset: simple units all have entries in
253     // units/convertUnits resources.
254     // TODO(ICU-21059): confirm whether this is clean enough, or whether we need to
255     // filter units' validity list instead.
256     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
257     LocalUResourceBundlePointer convertUnits(
258         ures_getByKey(unitsBundle.getAlias(), "convertUnits", NULL, &status));
259     if (U_FAILURE(status)) { return; }
260 
261     // Allocate enough space: with identifierSink below skipping kilogram, we're
262     // probably allocating one more than needed.
263     int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
264     int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
265     gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
266     if (gSimpleUnits == nullptr) {
267         status = U_MEMORY_ALLOCATION_ERROR;
268         return;
269     }
270     uprv_memset(gSimpleUnits, 0, arrayMallocSize);
271 
272     // Populate gSimpleUnits and build the associated trie.
273     SimpleUnitIdentifiersSink identifierSink(gSimpleUnits, simpleUnitsCount, b, kSimpleUnitOffset);
274     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
275 
276     // Build the CharsTrie
277     // TODO: Use SLOW or FAST here?
278     StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
279     if (U_FAILURE(status)) { return; }
280 
281     // Copy the result into the global constant pointer
282     size_t numBytes = result.length();
283     gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
284     if (gSerializedUnitExtrasStemTrie == nullptr) {
285         status = U_MEMORY_ALLOCATION_ERROR;
286         return;
287     }
288     uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
289 }
290 
291 class Token {
292 public:
Token(int32_t match)293     Token(int32_t match) : fMatch(match) {}
294 
295     enum Type {
296         TYPE_UNDEFINED,
297         TYPE_SI_PREFIX,
298         // Token type for "-per-", "-", and "-and-".
299         TYPE_COMPOUND_PART,
300         // Token type for "per-".
301         TYPE_INITIAL_COMPOUND_PART,
302         TYPE_POWER_PART,
303         TYPE_SIMPLE_UNIT,
304     };
305 
306     // Calling getType() is invalid, resulting in an assertion failure, if Token
307     // value isn't positive.
getType() const308     Type getType() const {
309         U_ASSERT(fMatch > 0);
310         if (fMatch < kCompoundPartOffset) {
311             return TYPE_SI_PREFIX;
312         }
313         if (fMatch < kInitialCompoundPartOffset) {
314             return TYPE_COMPOUND_PART;
315         }
316         if (fMatch < kPowerPartOffset) {
317             return TYPE_INITIAL_COMPOUND_PART;
318         }
319         if (fMatch < kSimpleUnitOffset) {
320             return TYPE_POWER_PART;
321         }
322         return TYPE_SIMPLE_UNIT;
323     }
324 
getSIPrefix() const325     UMeasureSIPrefix getSIPrefix() const {
326         U_ASSERT(getType() == TYPE_SI_PREFIX);
327         return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
328     }
329 
330     // Valid only for tokens with type TYPE_COMPOUND_PART.
getMatch() const331     int32_t getMatch() const {
332         U_ASSERT(getType() == TYPE_COMPOUND_PART);
333         return fMatch;
334     }
335 
getInitialCompoundPart() const336     int32_t getInitialCompoundPart() const {
337         // Even if there is only one InitialCompoundPart value, we have this
338         // function for the simplicity of code consistency.
339         U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
340         // Defensive: if this assert fails, code using this function also needs
341         // to change.
342         U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
343         return fMatch;
344     }
345 
getPower() const346     int8_t getPower() const {
347         U_ASSERT(getType() == TYPE_POWER_PART);
348         return static_cast<int8_t>(fMatch - kPowerPartOffset);
349     }
350 
getSimpleUnitIndex() const351     int32_t getSimpleUnitIndex() const {
352         U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
353         return fMatch - kSimpleUnitOffset;
354     }
355 
356 private:
357     int32_t fMatch;
358 };
359 
360 class Parser {
361 public:
362     /**
363      * Factory function for parsing the given identifier.
364      *
365      * @param source The identifier to parse. This function does not make a copy
366      * of source: the underlying string that source points at, must outlive the
367      * parser.
368      * @param status ICU error code.
369      */
from(StringPiece source,UErrorCode & status)370     static Parser from(StringPiece source, UErrorCode& status) {
371         if (U_FAILURE(status)) {
372             return Parser();
373         }
374         umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
375         if (U_FAILURE(status)) {
376             return Parser();
377         }
378         return Parser(source);
379     }
380 
parse(UErrorCode & status)381     MeasureUnitImpl parse(UErrorCode& status) {
382         MeasureUnitImpl result;
383         parseImpl(result, status);
384         return result;
385     }
386 
387 private:
388     // Tracks parser progress: the offset into fSource.
389     int32_t fIndex = 0;
390 
391     // Since we're not owning this memory, whatever is passed to the constructor
392     // should live longer than this Parser - and the parser shouldn't return any
393     // references to that string.
394     StringPiece fSource;
395     BytesTrie fTrie;
396 
397     // Set to true when we've seen a "-per-" or a "per-", after which all units
398     // are in the denominator. Until we find an "-and-", at which point the
399     // identifier is invalid pending TODO(CLDR-13700).
400     bool fAfterPer = false;
401 
Parser()402     Parser() : fSource(""), fTrie(u"") {}
403 
Parser(StringPiece source)404     Parser(StringPiece source)
405         : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
406 
hasNext() const407     inline bool hasNext() const {
408         return fIndex < fSource.length();
409     }
410 
411     // Returns the next Token parsed from fSource, advancing fIndex to the end
412     // of that token in fSource. In case of U_FAILURE(status), the token
413     // returned will cause an abort if getType() is called on it.
nextToken(UErrorCode & status)414     Token nextToken(UErrorCode& status) {
415         fTrie.reset();
416         int32_t match = -1;
417         // Saves the position in the fSource string for the end of the most
418         // recent matching token.
419         int32_t previ = -1;
420         // Find the longest token that matches a value in the trie:
421         while (fIndex < fSource.length()) {
422             auto result = fTrie.next(fSource.data()[fIndex++]);
423             if (result == USTRINGTRIE_NO_MATCH) {
424                 break;
425             } else if (result == USTRINGTRIE_NO_VALUE) {
426                 continue;
427             }
428             U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
429             match = fTrie.getValue();
430             previ = fIndex;
431             if (result == USTRINGTRIE_FINAL_VALUE) {
432                 break;
433             }
434             U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
435             // continue;
436         }
437 
438         if (match < 0) {
439             status = kUnitIdentifierSyntaxError;
440         } else {
441             fIndex = previ;
442         }
443         return Token(match);
444     }
445 
446     /**
447      * Returns the next "single unit" via result.
448      *
449      * If a "-per-" was parsed, the result will have appropriate negative
450      * dimensionality.
451      *
452      * Returns an error if we parse both compound units and "-and-", since mixed
453      * compound units are not yet supported - TODO(CLDR-13700).
454      *
455      * @param result Will be overwritten by the result, if status shows success.
456      * @param sawAnd If an "-and-" was parsed prior to finding the "single
457      * unit", sawAnd is set to true. If not, it is left as is.
458      * @param status ICU error code.
459      */
nextSingleUnit(SingleUnitImpl & result,bool & sawAnd,UErrorCode & status)460     void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
461         if (U_FAILURE(status)) {
462             return;
463         }
464 
465         // state:
466         // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
467         // 1 = power token seen (will not accept another power token)
468         // 2 = SI prefix token seen (will not accept a power or SI prefix token)
469         int32_t state = 0;
470 
471         bool atStart = fIndex == 0;
472         Token token = nextToken(status);
473         if (U_FAILURE(status)) { return; }
474 
475         if (atStart) {
476             // Identifiers optionally start with "per-".
477             if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
478                 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
479                 fAfterPer = true;
480                 result.dimensionality = -1;
481 
482                 token = nextToken(status);
483                 if (U_FAILURE(status)) { return; }
484             }
485         } else {
486             // All other SingleUnit's are separated from previous SingleUnit's
487             // via a compound part:
488             if (token.getType() != Token::TYPE_COMPOUND_PART) {
489                 status = kUnitIdentifierSyntaxError;
490                 return;
491             }
492 
493             switch (token.getMatch()) {
494             case COMPOUND_PART_PER:
495                 if (sawAnd) {
496                     // Mixed compound units not yet supported,
497                     // TODO(CLDR-13700).
498                     status = kUnitIdentifierSyntaxError;
499                     return;
500                 }
501                 fAfterPer = true;
502                 result.dimensionality = -1;
503                 break;
504 
505             case COMPOUND_PART_TIMES:
506                 if (fAfterPer) {
507                     result.dimensionality = -1;
508                 }
509                 break;
510 
511             case COMPOUND_PART_AND:
512                 if (fAfterPer) {
513                     // Can't start with "-and-", and mixed compound units
514                     // not yet supported, TODO(CLDR-13700).
515                     status = kUnitIdentifierSyntaxError;
516                     return;
517                 }
518                 sawAnd = true;
519                 break;
520             }
521 
522             token = nextToken(status);
523             if (U_FAILURE(status)) { return; }
524         }
525 
526         // Read tokens until we have a complete SingleUnit or we reach the end.
527         while (true) {
528             switch (token.getType()) {
529                 case Token::TYPE_POWER_PART:
530                     if (state > 0) {
531                         status = kUnitIdentifierSyntaxError;
532                         return;
533                     }
534                     result.dimensionality *= token.getPower();
535                     state = 1;
536                     break;
537 
538                 case Token::TYPE_SI_PREFIX:
539                     if (state > 1) {
540                         status = kUnitIdentifierSyntaxError;
541                         return;
542                     }
543                     result.siPrefix = token.getSIPrefix();
544                     state = 2;
545                     break;
546 
547                 case Token::TYPE_SIMPLE_UNIT:
548                     result.index = token.getSimpleUnitIndex();
549                     return;
550 
551                 default:
552                     status = kUnitIdentifierSyntaxError;
553                     return;
554             }
555 
556             if (!hasNext()) {
557                 // We ran out of tokens before finding a complete single unit.
558                 status = kUnitIdentifierSyntaxError;
559                 return;
560             }
561             token = nextToken(status);
562             if (U_FAILURE(status)) {
563                 return;
564             }
565         }
566     }
567 
568     /// @param result is modified, not overridden. Caller must pass in a
569     /// default-constructed (empty) MeasureUnitImpl instance.
parseImpl(MeasureUnitImpl & result,UErrorCode & status)570     void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
571         if (U_FAILURE(status)) {
572             return;
573         }
574         if (fSource.empty()) {
575             // The dimenionless unit: nothing to parse. leave result as is.
576             return;
577         }
578         int32_t unitNum = 0;
579         while (hasNext()) {
580             bool sawAnd = false;
581             SingleUnitImpl singleUnit;
582             nextSingleUnit(singleUnit, sawAnd, status);
583             if (U_FAILURE(status)) {
584                 return;
585             }
586             U_ASSERT(!singleUnit.isDimensionless());
587             bool added = result.append(singleUnit, status);
588             if (sawAnd && !added) {
589                 // Two similar units are not allowed in a mixed unit
590                 status = kUnitIdentifierSyntaxError;
591                 return;
592             }
593             if ((++unitNum) >= 2) {
594                 // nextSingleUnit fails appropriately for "per" and "and" in the
595                 // same identifier. It doesn't fail for other compound units
596                 // (COMPOUND_PART_TIMES). Consequently we take care of that
597                 // here.
598                 UMeasureUnitComplexity complexity =
599                     sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
600                 if (unitNum == 2) {
601                     U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
602                     result.complexity = complexity;
603                 } else if (result.complexity != complexity) {
604                     // Can't have mixed compound units
605                     status = kUnitIdentifierSyntaxError;
606                     return;
607                 }
608             }
609         }
610     }
611 };
612 
613 int32_t U_CALLCONV
compareSingleUnits(const void *,const void * left,const void * right)614 compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
615     auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
616     auto realRight = static_cast<const SingleUnitImpl* const*>(right);
617     return (*realLeft)->compareTo(**realRight);
618 }
619 
620 /**
621  * Generate the identifier string for a single unit in place.
622  *
623  * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
624  * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
625  *
626  * @param first If singleUnit is part of a compound unit, and not its first
627  * single unit, set this to false. Otherwise: set to true.
628  */
serializeSingle(const SingleUnitImpl & singleUnit,bool first,CharString & output,UErrorCode & status)629 void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
630     if (first && singleUnit.dimensionality < 0) {
631         // Essentially the "unary per". For compound units with a numerator, the
632         // caller takes care of the "binary per".
633         output.append("per-", status);
634     }
635 
636     if (singleUnit.isDimensionless()) {
637         status = U_INTERNAL_PROGRAM_ERROR;
638         return;
639     }
640     int8_t posPower = std::abs(singleUnit.dimensionality);
641     if (posPower == 0) {
642         status = U_INTERNAL_PROGRAM_ERROR;
643     } else if (posPower == 1) {
644         // no-op
645     } else if (posPower == 2) {
646         output.append("square-", status);
647     } else if (posPower == 3) {
648         output.append("cubic-", status);
649     } else if (posPower < 10) {
650         output.append("pow", status);
651         output.append(posPower + '0', status);
652         output.append('-', status);
653     } else if (posPower <= 15) {
654         output.append("pow1", status);
655         output.append('0' + (posPower % 10), status);
656         output.append('-', status);
657     } else {
658         status = kUnitIdentifierSyntaxError;
659     }
660     if (U_FAILURE(status)) {
661         return;
662     }
663 
664     if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
665         for (const auto& siPrefixInfo : gSIPrefixStrings) {
666             if (siPrefixInfo.value == singleUnit.siPrefix) {
667                 output.append(siPrefixInfo.string, status);
668                 break;
669             }
670         }
671     }
672     if (U_FAILURE(status)) {
673         return;
674     }
675 
676     output.append(singleUnit.getSimpleUnitID(), status);
677 }
678 
679 /**
680  * Normalize a MeasureUnitImpl and generate the identifier string in place.
681  */
serialize(MeasureUnitImpl & impl,UErrorCode & status)682 void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
683     if (U_FAILURE(status)) {
684         return;
685     }
686     U_ASSERT(impl.identifier.isEmpty());
687     if (impl.units.length() == 0) {
688         // Dimensionless, constructed by the default constructor: no appending
689         // to impl.identifier, we wish it to contain the zero-length string.
690         return;
691     }
692     if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
693         // Note: don't sort a MIXED unit
694         uprv_sortArray(
695             impl.units.getAlias(),
696             impl.units.length(),
697             sizeof(impl.units[0]),
698             compareSingleUnits,
699             nullptr,
700             false,
701             &status);
702         if (U_FAILURE(status)) {
703             return;
704         }
705     }
706     serializeSingle(*impl.units[0], true, impl.identifier, status);
707     if (impl.units.length() == 1) {
708         return;
709     }
710     for (int32_t i = 1; i < impl.units.length(); i++) {
711         const SingleUnitImpl& prev = *impl.units[i-1];
712         const SingleUnitImpl& curr = *impl.units[i];
713         if (impl.complexity == UMEASURE_UNIT_MIXED) {
714             impl.identifier.append("-and-", status);
715             serializeSingle(curr, true, impl.identifier, status);
716         } else {
717             if (prev.dimensionality > 0 && curr.dimensionality < 0) {
718                 impl.identifier.append("-per-", status);
719             } else {
720                 impl.identifier.append('-', status);
721             }
722             serializeSingle(curr, false, impl.identifier, status);
723         }
724     }
725 
726 }
727 
728 /**
729  * Appends a SingleUnitImpl to a MeasureUnitImpl.
730  *
731  * @return true if a new item was added. If unit is the dimensionless unit, it
732  * is never added: the return value will always be false.
733  */
appendImpl(MeasureUnitImpl & impl,const SingleUnitImpl & unit,UErrorCode & status)734 bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
735     if (unit.isDimensionless()) {
736         // We don't append dimensionless units.
737         return false;
738     }
739     // Find a similar unit that already exists, to attempt to coalesce
740     SingleUnitImpl* oldUnit = nullptr;
741     for (int32_t i = 0; i < impl.units.length(); i++) {
742         auto* candidate = impl.units[i];
743         if (candidate->isCompatibleWith(unit)) {
744             oldUnit = candidate;
745         }
746     }
747     if (oldUnit) {
748         // Both dimensionalities will be positive, or both will be negative, by
749         // virtue of isCompatibleWith().
750         oldUnit->dimensionality += unit.dimensionality;
751     } else {
752         SingleUnitImpl* destination = impl.units.emplaceBack();
753         if (!destination) {
754             status = U_MEMORY_ALLOCATION_ERROR;
755             return false;
756         }
757         *destination = unit;
758     }
759     return (oldUnit == nullptr);
760 }
761 
762 } // namespace
763 
764 
forMeasureUnit(const MeasureUnit & measureUnit,UErrorCode & status)765 SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
766     MeasureUnitImpl temp;
767     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
768     if (U_FAILURE(status)) {
769         return {};
770     }
771     if (impl.units.length() == 0) {
772         return {};
773     }
774     if (impl.units.length() == 1) {
775         return *impl.units[0];
776     }
777     status = U_ILLEGAL_ARGUMENT_ERROR;
778     return {};
779 }
780 
build(UErrorCode & status) const781 MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
782     MeasureUnitImpl temp;
783     temp.append(*this, status);
784     return std::move(temp).build(status);
785 }
786 
getSimpleUnitID() const787 const char *SingleUnitImpl::getSimpleUnitID() const {
788     return gSimpleUnits[index];
789 }
790 
MeasureUnitImpl(const MeasureUnitImpl & other,UErrorCode & status)791 MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) {
792     *this = other.copy(status);
793 }
794 
MeasureUnitImpl(const SingleUnitImpl & singleUnit,UErrorCode & status)795 MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
796     this->append(singleUnit, status);
797 }
798 
forIdentifier(StringPiece identifier,UErrorCode & status)799 MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
800     return Parser::from(identifier, status).parse(status);
801 }
802 
forMeasureUnit(const MeasureUnit & measureUnit,MeasureUnitImpl & memory,UErrorCode & status)803 const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
804         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
805     if (measureUnit.fImpl) {
806         return *measureUnit.fImpl;
807     } else {
808         memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
809         return memory;
810     }
811 }
812 
forMeasureUnitMaybeCopy(const MeasureUnit & measureUnit,UErrorCode & status)813 MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
814         const MeasureUnit& measureUnit, UErrorCode& status) {
815     if (measureUnit.fImpl) {
816         return measureUnit.fImpl->copy(status);
817     } else {
818         return Parser::from(measureUnit.getIdentifier(), status).parse(status);
819     }
820 }
821 
takeReciprocal(UErrorCode &)822 void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
823     identifier.clear();
824     for (int32_t i = 0; i < units.length(); i++) {
825         units[i]->dimensionality *= -1;
826     }
827 }
828 
append(const SingleUnitImpl & singleUnit,UErrorCode & status)829 bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) {
830     identifier.clear();
831     return appendImpl(*this, singleUnit, status);
832 }
833 
extractIndividualUnits(UErrorCode & status) const834 MaybeStackVector<MeasureUnitImpl> MeasureUnitImpl::extractIndividualUnits(UErrorCode &status) const {
835     MaybeStackVector<MeasureUnitImpl> result;
836 
837     if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
838         result.emplaceBackAndCheckErrorCode(status, *this, status);
839         return result;
840     }
841 
842     for (int32_t i = 0; i < units.length(); i++) {
843         result.emplaceBackAndCheckErrorCode(status, *units[i], status);
844     }
845 
846     return result;
847 }
848 
build(UErrorCode & status)849 MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
850     serialize(*this, status);
851     return MeasureUnit(std::move(*this));
852 }
853 
forIdentifier(StringPiece identifier,UErrorCode & status)854 MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
855     return Parser::from(identifier, status).parse(status).build(status);
856 }
857 
getComplexity(UErrorCode & status) const858 UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
859     MeasureUnitImpl temp;
860     return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
861 }
862 
getSIPrefix(UErrorCode & status) const863 UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const {
864     return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix;
865 }
866 
withSIPrefix(UMeasureSIPrefix prefix,UErrorCode & status) const867 MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const {
868     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
869     singleUnit.siPrefix = prefix;
870     return singleUnit.build(status);
871 }
872 
getDimensionality(UErrorCode & status) const873 int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
874     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
875     if (U_FAILURE(status)) { return 0; }
876     if (singleUnit.isDimensionless()) {
877         return 0;
878     }
879     return singleUnit.dimensionality;
880 }
881 
withDimensionality(int32_t dimensionality,UErrorCode & status) const882 MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
883     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
884     singleUnit.dimensionality = dimensionality;
885     return singleUnit.build(status);
886 }
887 
reciprocal(UErrorCode & status) const888 MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
889     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
890     impl.takeReciprocal(status);
891     return std::move(impl).build(status);
892 }
893 
product(const MeasureUnit & other,UErrorCode & status) const894 MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
895     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
896     MeasureUnitImpl temp;
897     const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
898     if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
899         status = U_ILLEGAL_ARGUMENT_ERROR;
900         return {};
901     }
902     for (int32_t i = 0; i < otherImpl.units.length(); i++) {
903         impl.append(*otherImpl.units[i], status);
904     }
905     if (impl.units.length() > 1) {
906         impl.complexity = UMEASURE_UNIT_COMPOUND;
907     }
908     return std::move(impl).build(status);
909 }
910 
splitToSingleUnitsImpl(int32_t & outCount,UErrorCode & status) const911 LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
912     MeasureUnitImpl temp;
913     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
914     outCount = impl.units.length();
915     MeasureUnit* arr = new MeasureUnit[outCount];
916     if (arr == nullptr) {
917         status = U_MEMORY_ALLOCATION_ERROR;
918         return LocalArray<MeasureUnit>();
919     }
920     for (int32_t i = 0; i < outCount; i++) {
921         arr[i] = impl.units[i]->build(status);
922     }
923     return LocalArray<MeasureUnit>(arr, status);
924 }
925 
926 
927 U_NAMESPACE_END
928 
929 #endif /* !UNCONFIG_NO_FORMATTING */
930