1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2007-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File plurrule.cpp
10 */
11 
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "unicode/utypes.h"
16 #include "unicode/localpointer.h"
17 #include "unicode/plurrule.h"
18 #include "unicode/upluralrules.h"
19 #include "unicode/ures.h"
20 #include "unicode/numfmt.h"
21 #include "unicode/decimfmt.h"
22 #include "charstr.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "hash.h"
26 #include "locutil.h"
27 #include "mutex.h"
28 #include "patternprops.h"
29 #include "plurrule_impl.h"
30 #include "putilimp.h"
31 #include "ucln_in.h"
32 #include "ustrfmt.h"
33 #include "uassert.h"
34 #include "uvectr32.h"
35 #include "sharedpluralrules.h"
36 #include "unifiedcache.h"
37 #include "number_decimalquantity.h"
38 
39 #if !UCONFIG_NO_FORMATTING
40 
41 U_NAMESPACE_BEGIN
42 
43 using namespace icu::pluralimpl;
44 using icu::number::impl::DecimalQuantity;
45 
46 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
47 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
48 static const UChar PK_IN[]={LOW_I,LOW_N,0};
49 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
50 static const UChar PK_IS[]={LOW_I,LOW_S,0};
51 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
52 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
53 static const UChar PK_OR[]={LOW_O,LOW_R,0};
54 static const UChar PK_VAR_N[]={LOW_N,0};
55 static const UChar PK_VAR_I[]={LOW_I,0};
56 static const UChar PK_VAR_F[]={LOW_F,0};
57 static const UChar PK_VAR_T[]={LOW_T,0};
58 static const UChar PK_VAR_V[]={LOW_V,0};
59 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
60 static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
61 static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
62 
63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)64 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
65 
66 PluralRules::PluralRules(UErrorCode& /*status*/)
67 :   UObject(),
68     mRules(nullptr),
69     mInternalStatus(U_ZERO_ERROR)
70 {
71 }
72 
PluralRules(const PluralRules & other)73 PluralRules::PluralRules(const PluralRules& other)
74 : UObject(other),
75     mRules(nullptr),
76     mInternalStatus(U_ZERO_ERROR)
77 {
78     *this=other;
79 }
80 
~PluralRules()81 PluralRules::~PluralRules() {
82     delete mRules;
83 }
84 
~SharedPluralRules()85 SharedPluralRules::~SharedPluralRules() {
86     delete ptr;
87 }
88 
89 PluralRules*
clone() const90 PluralRules::clone() const {
91     PluralRules* newObj = new PluralRules(*this);
92     // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr if
93     // the newly created object was not fully constructed properly (an error occurred).
94     if (newObj != nullptr && U_FAILURE(newObj->mInternalStatus)) {
95         delete newObj;
96         newObj = nullptr;
97     }
98     return newObj;
99 }
100 
101 PluralRules&
operator =(const PluralRules & other)102 PluralRules::operator=(const PluralRules& other) {
103     if (this != &other) {
104         delete mRules;
105         mRules = nullptr;
106         mInternalStatus = other.mInternalStatus;
107         if (U_FAILURE(mInternalStatus)) {
108             // bail out early if the object we were copying from was already 'invalid'.
109             return *this;
110         }
111         if (other.mRules != nullptr) {
112             mRules = new RuleChain(*other.mRules);
113             if (mRules == nullptr) {
114                 mInternalStatus = U_MEMORY_ALLOCATION_ERROR;
115             }
116             else if (U_FAILURE(mRules->fInternalStatus)) {
117                 // If the RuleChain wasn't fully copied, then set our status to failure as well.
118                 mInternalStatus = mRules->fInternalStatus;
119             }
120         }
121     }
122     return *this;
123 }
124 
getAvailableLocales(UErrorCode & status)125 StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
126     if (U_FAILURE(status)) {
127         return nullptr;
128     }
129     LocalPointer<StringEnumeration> result(new PluralAvailableLocalesEnumeration(status), status);
130     if (U_FAILURE(status)) {
131         return nullptr;
132     }
133     return result.orphan();
134 }
135 
136 
137 PluralRules* U_EXPORT2
createRules(const UnicodeString & description,UErrorCode & status)138 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
139     if (U_FAILURE(status)) {
140         return nullptr;
141     }
142     PluralRuleParser parser;
143     LocalPointer<PluralRules> newRules(new PluralRules(status), status);
144     if (U_FAILURE(status)) {
145         return nullptr;
146     }
147     parser.parse(description, newRules.getAlias(), status);
148     if (U_FAILURE(status)) {
149         newRules.adoptInstead(nullptr);
150     }
151     return newRules.orphan();
152 }
153 
154 
155 PluralRules* U_EXPORT2
createDefaultRules(UErrorCode & status)156 PluralRules::createDefaultRules(UErrorCode& status) {
157     return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
158 }
159 
160 /******************************************************************************/
161 /* Create PluralRules cache */
162 
163 template<> U_I18N_API
createObject(const void *,UErrorCode & status) const164 const SharedPluralRules *LocaleCacheKey<SharedPluralRules>::createObject(
165         const void * /*unused*/, UErrorCode &status) const {
166     const char *localeId = fLoc.getName();
167     LocalPointer<PluralRules> pr(PluralRules::internalForLocale(localeId, UPLURAL_TYPE_CARDINAL, status), status);
168     if (U_FAILURE(status)) {
169         return nullptr;
170     }
171     LocalPointer<SharedPluralRules> result(new SharedPluralRules(pr.getAlias()), status);
172     if (U_FAILURE(status)) {
173         return nullptr;
174     }
175     pr.orphan(); // result was successfully created so it nows pr.
176     result->addRef();
177     return result.orphan();
178 }
179 
180 /* end plural rules cache */
181 /******************************************************************************/
182 
183 const SharedPluralRules* U_EXPORT2
createSharedInstance(const Locale & locale,UPluralType type,UErrorCode & status)184 PluralRules::createSharedInstance(
185         const Locale& locale, UPluralType type, UErrorCode& status) {
186     if (U_FAILURE(status)) {
187         return nullptr;
188     }
189     if (type != UPLURAL_TYPE_CARDINAL) {
190         status = U_UNSUPPORTED_ERROR;
191         return nullptr;
192     }
193     const SharedPluralRules *result = nullptr;
194     UnifiedCache::getByLocale(locale, result, status);
195     return result;
196 }
197 
198 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UErrorCode & status)199 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
200     return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
201 }
202 
203 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UPluralType type,UErrorCode & status)204 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
205     if (type != UPLURAL_TYPE_CARDINAL) {
206         return internalForLocale(locale, type, status);
207     }
208     const SharedPluralRules *shared = createSharedInstance(
209             locale, type, status);
210     if (U_FAILURE(status)) {
211         return nullptr;
212     }
213     PluralRules *result = (*shared)->clone();
214     shared->removeRef();
215     if (result == nullptr) {
216         status = U_MEMORY_ALLOCATION_ERROR;
217     }
218     return result;
219 }
220 
221 PluralRules* U_EXPORT2
internalForLocale(const Locale & locale,UPluralType type,UErrorCode & status)222 PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
223     if (U_FAILURE(status)) {
224         return nullptr;
225     }
226     if (type >= UPLURAL_TYPE_COUNT) {
227         status = U_ILLEGAL_ARGUMENT_ERROR;
228         return nullptr;
229     }
230     LocalPointer<PluralRules> newObj(new PluralRules(status), status);
231     if (U_FAILURE(status)) {
232         return nullptr;
233     }
234     UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
235     // TODO: which other errors, if any, should be returned?
236     if (locRule.length() == 0) {
237         // If an out-of-memory error occurred, then stop and report the failure.
238         if (status == U_MEMORY_ALLOCATION_ERROR) {
239             return nullptr;
240         }
241         // Locales with no specific rules (all numbers have the "other" category
242         //   will return a U_MISSING_RESOURCE_ERROR at this point. This is not
243         //   an error.
244         locRule =  UnicodeString(PLURAL_DEFAULT_RULE);
245         status = U_ZERO_ERROR;
246     }
247     PluralRuleParser parser;
248     parser.parse(locRule, newObj.getAlias(), status);
249         //  TODO: should rule parse errors be returned, or
250         //        should we silently use default rules?
251         //        Original impl used default rules.
252         //        Ask the question to ICU Core.
253 
254     return newObj.orphan();
255 }
256 
257 UnicodeString
select(int32_t number) const258 PluralRules::select(int32_t number) const {
259     return select(FixedDecimal(number));
260 }
261 
262 UnicodeString
select(double number) const263 PluralRules::select(double number) const {
264     return select(FixedDecimal(number));
265 }
266 
267 UnicodeString
select(const IFixedDecimal & number) const268 PluralRules::select(const IFixedDecimal &number) const {
269     if (mRules == nullptr) {
270         return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
271     }
272     else {
273         return mRules->select(number);
274     }
275 }
276 
277 
278 
279 StringEnumeration*
getKeywords(UErrorCode & status) const280 PluralRules::getKeywords(UErrorCode& status) const {
281     if (U_FAILURE(status)) {
282         return nullptr;
283     }
284     if (U_FAILURE(mInternalStatus)) {
285         status = mInternalStatus;
286         return nullptr;
287     }
288     LocalPointer<StringEnumeration> nameEnumerator(new PluralKeywordEnumeration(mRules, status), status);
289     if (U_FAILURE(status)) {
290         return nullptr;
291     }
292     return nameEnumerator.orphan();
293 }
294 
295 double
getUniqueKeywordValue(const UnicodeString &)296 PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
297   // Not Implemented.
298   return UPLRULES_NO_UNIQUE_VALUE;
299 }
300 
301 int32_t
getAllKeywordValues(const UnicodeString &,double *,int32_t,UErrorCode & error)302 PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
303                                  int32_t /* destCapacity */, UErrorCode& error) {
304     error = U_UNSUPPORTED_ERROR;
305     return 0;
306 }
307 
308 
scaleForInt(double d)309 static double scaleForInt(double d) {
310     double scale = 1.0;
311     while (d != floor(d)) {
312         d = d * 10.0;
313         scale = scale * 10.0;
314     }
315     return scale;
316 }
317 
318 static int32_t
getSamplesFromString(const UnicodeString & samples,double * dest,int32_t destCapacity,UErrorCode & status)319 getSamplesFromString(const UnicodeString &samples, double *dest,
320                         int32_t destCapacity, UErrorCode& status) {
321     int32_t sampleCount = 0;
322     int32_t sampleStartIdx = 0;
323     int32_t sampleEndIdx = 0;
324 
325     //std::string ss;  // TODO: debugging.
326     // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
327     for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
328         sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
329         if (sampleEndIdx == -1) {
330             sampleEndIdx = samples.length();
331         }
332         const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
333         // ss.erase();
334         // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
335         int32_t tildeIndex = sampleRange.indexOf(TILDE);
336         if (tildeIndex < 0) {
337             FixedDecimal fixed(sampleRange, status);
338             double sampleValue = fixed.source;
339             if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
340                 dest[sampleCount++] = sampleValue;
341             }
342         } else {
343 
344             FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
345             FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
346             double rangeLo = fixedLo.source;
347             double rangeHi = fixedHi.source;
348             if (U_FAILURE(status)) {
349                 break;
350             }
351             if (rangeHi < rangeLo) {
352                 status = U_INVALID_FORMAT_ERROR;
353                 break;
354             }
355 
356             // For ranges of samples with fraction decimal digits, scale the number up so that we
357             //   are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
358 
359             double scale = scaleForInt(rangeLo);
360             double t = scaleForInt(rangeHi);
361             if (t > scale) {
362                 scale = t;
363             }
364             rangeLo *= scale;
365             rangeHi *= scale;
366             for (double n=rangeLo; n<=rangeHi; n+=1) {
367                 // Hack Alert: don't return any decimal samples with integer values that
368                 //    originated from a format with trailing decimals.
369                 //    This API is returning doubles, which can't distinguish having displayed
370                 //    zeros to the right of the decimal.
371                 //    This results in test failures with values mapping back to a different keyword.
372                 double sampleValue = n/scale;
373                 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
374                     dest[sampleCount++] = sampleValue;
375                 }
376                 if (sampleCount >= destCapacity) {
377                     break;
378                 }
379             }
380         }
381         sampleStartIdx = sampleEndIdx + 1;
382     }
383     return sampleCount;
384 }
385 
386 
387 int32_t
getSamples(const UnicodeString & keyword,double * dest,int32_t destCapacity,UErrorCode & status)388 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
389                         int32_t destCapacity, UErrorCode& status) {
390     if (destCapacity == 0 || U_FAILURE(status)) {
391         return 0;
392     }
393     if (U_FAILURE(mInternalStatus)) {
394         status = mInternalStatus;
395         return 0;
396     }
397     RuleChain *rc = rulesForKeyword(keyword);
398     if (rc == nullptr) {
399         return 0;
400     }
401     int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
402     if (numSamples == 0) {
403         numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
404     }
405     return numSamples;
406 }
407 
408 
rulesForKeyword(const UnicodeString & keyword) const409 RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
410     RuleChain *rc;
411     for (rc = mRules; rc != nullptr; rc = rc->fNext) {
412         if (rc->fKeyword == keyword) {
413             break;
414         }
415     }
416     return rc;
417 }
418 
419 
420 UBool
isKeyword(const UnicodeString & keyword) const421 PluralRules::isKeyword(const UnicodeString& keyword) const {
422     if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
423         return true;
424     }
425     return rulesForKeyword(keyword) != nullptr;
426 }
427 
428 UnicodeString
getKeywordOther() const429 PluralRules::getKeywordOther() const {
430     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
431 }
432 
433 UBool
operator ==(const PluralRules & other) const434 PluralRules::operator==(const PluralRules& other) const  {
435     const UnicodeString *ptrKeyword;
436     UErrorCode status= U_ZERO_ERROR;
437 
438     if ( this == &other ) {
439         return TRUE;
440     }
441     LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
442     LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
443     if (U_FAILURE(status)) {
444         return FALSE;
445     }
446 
447     if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
448         return FALSE;
449     }
450     myKeywordList->reset(status);
451     while ((ptrKeyword=myKeywordList->snext(status))!=nullptr) {
452         if (!other.isKeyword(*ptrKeyword)) {
453             return FALSE;
454         }
455     }
456     otherKeywordList->reset(status);
457     while ((ptrKeyword=otherKeywordList->snext(status))!=nullptr) {
458         if (!this->isKeyword(*ptrKeyword)) {
459             return FALSE;
460         }
461     }
462     if (U_FAILURE(status)) {
463         return FALSE;
464     }
465 
466     return TRUE;
467 }
468 
469 
470 void
parse(const UnicodeString & ruleData,PluralRules * prules,UErrorCode & status)471 PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
472 {
473     if (U_FAILURE(status)) {
474         return;
475     }
476     U_ASSERT(ruleIndex == 0);    // Parsers are good for a single use only!
477     ruleSrc = &ruleData;
478 
479     while (ruleIndex< ruleSrc->length()) {
480         getNextToken(status);
481         if (U_FAILURE(status)) {
482             return;
483         }
484         checkSyntax(status);
485         if (U_FAILURE(status)) {
486             return;
487         }
488         switch (type) {
489         case tAnd:
490             U_ASSERT(curAndConstraint != nullptr);
491             curAndConstraint = curAndConstraint->add(status);
492             break;
493         case tOr:
494             {
495                 U_ASSERT(currentChain != nullptr);
496                 OrConstraint *orNode=currentChain->ruleHeader;
497                 while (orNode->next != nullptr) {
498                     orNode = orNode->next;
499                 }
500                 orNode->next= new OrConstraint();
501                 if (orNode->next == nullptr) {
502                     status = U_MEMORY_ALLOCATION_ERROR;
503                     break;
504                 }
505                 orNode=orNode->next;
506                 orNode->next=nullptr;
507                 curAndConstraint = orNode->add(status);
508             }
509             break;
510         case tIs:
511             U_ASSERT(curAndConstraint != nullptr);
512             U_ASSERT(curAndConstraint->value == -1);
513             U_ASSERT(curAndConstraint->rangeList == nullptr);
514             break;
515         case tNot:
516             U_ASSERT(curAndConstraint != nullptr);
517             curAndConstraint->negated=TRUE;
518             break;
519 
520         case tNotEqual:
521             curAndConstraint->negated=TRUE;
522             U_FALLTHROUGH;
523         case tIn:
524         case tWithin:
525         case tEqual:
526             {
527                 U_ASSERT(curAndConstraint != nullptr);
528                 LocalPointer<UVector32> newRangeList(new UVector32(status), status);
529                 if (U_FAILURE(status)) {
530                     break;
531                 }
532                 curAndConstraint->rangeList = newRangeList.orphan();
533                 curAndConstraint->rangeList->addElement(-1, status);  // range Low
534                 curAndConstraint->rangeList->addElement(-1, status);  // range Hi
535                 rangeLowIdx = 0;
536                 rangeHiIdx  = 1;
537                 curAndConstraint->value=PLURAL_RANGE_HIGH;
538                 curAndConstraint->integerOnly = (type != tWithin);
539             }
540             break;
541         case tNumber:
542             U_ASSERT(curAndConstraint != nullptr);
543             if ( (curAndConstraint->op==AndConstraint::MOD)&&
544                  (curAndConstraint->opNum == -1 ) ) {
545                 curAndConstraint->opNum=getNumberValue(token);
546             }
547             else {
548                 if (curAndConstraint->rangeList == nullptr) {
549                     // this is for an 'is' rule
550                     curAndConstraint->value = getNumberValue(token);
551                 } else {
552                     // this is for an 'in' or 'within' rule
553                     if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
554                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
555                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
556                     }
557                     else {
558                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
559                         if (curAndConstraint->rangeList->elementAti(rangeLowIdx) >
560                                 curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
561                             // Range Lower bound > Range Upper bound.
562                             // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
563                             // used for all plural rule parse errors.
564                             status = U_UNEXPECTED_TOKEN;
565                             break;
566                         }
567                     }
568                 }
569             }
570             break;
571         case tComma:
572             // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
573             //       Catch cases like "n mod 10, is 1" here instead.
574             if (curAndConstraint == nullptr || curAndConstraint->rangeList == nullptr) {
575                 status = U_UNEXPECTED_TOKEN;
576                 break;
577             }
578             U_ASSERT(curAndConstraint->rangeList->size() >= 2);
579             rangeLowIdx = curAndConstraint->rangeList->size();
580             curAndConstraint->rangeList->addElement(-1, status);  // range Low
581             rangeHiIdx = curAndConstraint->rangeList->size();
582             curAndConstraint->rangeList->addElement(-1, status);  // range Hi
583             break;
584         case tMod:
585             U_ASSERT(curAndConstraint != nullptr);
586             curAndConstraint->op=AndConstraint::MOD;
587             break;
588         case tVariableN:
589         case tVariableI:
590         case tVariableF:
591         case tVariableT:
592         case tVariableV:
593             U_ASSERT(curAndConstraint != nullptr);
594             curAndConstraint->digitsType = type;
595             break;
596         case tKeyword:
597             {
598             RuleChain *newChain = new RuleChain;
599             if (newChain == nullptr) {
600                 status = U_MEMORY_ALLOCATION_ERROR;
601                 break;
602             }
603             newChain->fKeyword = token;
604             if (prules->mRules == nullptr) {
605                 prules->mRules = newChain;
606             } else {
607                 // The new rule chain goes at the end of the linked list of rule chains,
608                 //   unless there is an "other" keyword & chain. "other" must remain last.
609                 RuleChain *insertAfter = prules->mRules;
610                 while (insertAfter->fNext!=nullptr &&
611                        insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
612                     insertAfter=insertAfter->fNext;
613                 }
614                 newChain->fNext = insertAfter->fNext;
615                 insertAfter->fNext = newChain;
616             }
617             OrConstraint *orNode = new OrConstraint();
618             if (orNode == nullptr) {
619                 status = U_MEMORY_ALLOCATION_ERROR;
620                 break;
621             }
622             newChain->ruleHeader = orNode;
623             curAndConstraint = orNode->add(status);
624             currentChain = newChain;
625             }
626             break;
627 
628         case tInteger:
629             for (;;) {
630                 getNextToken(status);
631                 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
632                     break;
633                 }
634                 if (type == tEllipsis) {
635                     currentChain->fIntegerSamplesUnbounded = TRUE;
636                     continue;
637                 }
638                 currentChain->fIntegerSamples.append(token);
639             }
640             break;
641 
642         case tDecimal:
643             for (;;) {
644                 getNextToken(status);
645                 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
646                     break;
647                 }
648                 if (type == tEllipsis) {
649                     currentChain->fDecimalSamplesUnbounded = TRUE;
650                     continue;
651                 }
652                 currentChain->fDecimalSamples.append(token);
653             }
654             break;
655 
656         default:
657             break;
658         }
659         prevType=type;
660         if (U_FAILURE(status)) {
661             break;
662         }
663     }
664 }
665 
666 UnicodeString
getRuleFromResource(const Locale & locale,UPluralType type,UErrorCode & errCode)667 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
668     UnicodeString emptyStr;
669 
670     if (U_FAILURE(errCode)) {
671         return emptyStr;
672     }
673     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &errCode));
674     if(U_FAILURE(errCode)) {
675         return emptyStr;
676     }
677     const char *typeKey;
678     switch (type) {
679     case UPLURAL_TYPE_CARDINAL:
680         typeKey = "locales";
681         break;
682     case UPLURAL_TYPE_ORDINAL:
683         typeKey = "locales_ordinals";
684         break;
685     default:
686         // Must not occur: The caller should have checked for valid types.
687         errCode = U_ILLEGAL_ARGUMENT_ERROR;
688         return emptyStr;
689     }
690     LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, nullptr, &errCode));
691     if(U_FAILURE(errCode)) {
692         return emptyStr;
693     }
694     int32_t resLen=0;
695     const char *curLocaleName=locale.getName();
696     const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
697 
698     if (s == nullptr) {
699         // Check parent locales.
700         UErrorCode status = U_ZERO_ERROR;
701         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
702         const char *curLocaleName2=locale.getName();
703         uprv_strcpy(parentLocaleName, curLocaleName2);
704 
705         while (uloc_getParent(parentLocaleName, parentLocaleName,
706                                        ULOC_FULLNAME_CAPACITY, &status) > 0) {
707             resLen=0;
708             s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
709             if (s != nullptr) {
710                 errCode = U_ZERO_ERROR;
711                 break;
712             }
713             status = U_ZERO_ERROR;
714         }
715     }
716     if (s==nullptr) {
717         return emptyStr;
718     }
719 
720     char setKey[256];
721     u_UCharsToChars(s, setKey, resLen + 1);
722     // printf("\n PluralRule: %s\n", setKey);
723 
724     LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", nullptr, &errCode));
725     if(U_FAILURE(errCode)) {
726         return emptyStr;
727     }
728     LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, nullptr, &errCode));
729     if (U_FAILURE(errCode)) {
730         return emptyStr;
731     }
732 
733     int32_t numberKeys = ures_getSize(setRes.getAlias());
734     UnicodeString result;
735     const char *key=nullptr;
736     for(int32_t i=0; i<numberKeys; ++i) {   // Keys are zero, one, few, ...
737         UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
738         UnicodeString uKey(key, -1, US_INV);
739         result.append(uKey);
740         result.append(COLON);
741         result.append(rules);
742         result.append(SEMI_COLON);
743     }
744     return result;
745 }
746 
747 
748 UnicodeString
getRules() const749 PluralRules::getRules() const {
750     UnicodeString rules;
751     if (mRules != nullptr) {
752         mRules->dumpRules(rules);
753     }
754     return rules;
755 }
756 
AndConstraint(const AndConstraint & other)757 AndConstraint::AndConstraint(const AndConstraint& other) {
758     this->fInternalStatus = other.fInternalStatus;
759     if (U_FAILURE(fInternalStatus)) {
760         return; // stop early if the object we are copying from is invalid.
761     }
762     this->op = other.op;
763     this->opNum=other.opNum;
764     this->value=other.value;
765     if (other.rangeList != nullptr) {
766         LocalPointer<UVector32> newRangeList(new UVector32(fInternalStatus), fInternalStatus);
767         if (U_FAILURE(fInternalStatus)) {
768             return;
769         }
770         this->rangeList = newRangeList.orphan();
771         this->rangeList->assign(*other.rangeList, fInternalStatus);
772     }
773     this->integerOnly=other.integerOnly;
774     this->negated=other.negated;
775     this->digitsType = other.digitsType;
776     if (other.next != nullptr) {
777         this->next = new AndConstraint(*other.next);
778         if (this->next == nullptr) {
779             fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
780         }
781     }
782 }
783 
~AndConstraint()784 AndConstraint::~AndConstraint() {
785     delete rangeList;
786     rangeList = nullptr;
787     delete next;
788     next = nullptr;
789 }
790 
791 UBool
isFulfilled(const IFixedDecimal & number)792 AndConstraint::isFulfilled(const IFixedDecimal &number) {
793     UBool result = TRUE;
794     if (digitsType == none) {
795         // An empty AndConstraint, created by a rule with a keyword but no following expression.
796         return TRUE;
797     }
798 
799     PluralOperand operand = tokenTypeToPluralOperand(digitsType);
800     double n = number.getPluralOperand(operand);     // pulls n | i | v | f value for the number.
801                                                      // Will always be positive.
802                                                      // May be non-integer (n option only)
803     do {
804         if (integerOnly && n != uprv_floor(n)) {
805             result = FALSE;
806             break;
807         }
808 
809         if (op == MOD) {
810             n = fmod(n, opNum);
811         }
812         if (rangeList == nullptr) {
813             result = value == -1 ||    // empty rule
814                      n == value;       //  'is' rule
815             break;
816         }
817         result = FALSE;                // 'in' or 'within' rule
818         for (int32_t r=0; r<rangeList->size(); r+=2) {
819             if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
820                 result = TRUE;
821                 break;
822             }
823         }
824     } while (FALSE);
825 
826     if (negated) {
827         result = !result;
828     }
829     return result;
830 }
831 
832 AndConstraint*
add(UErrorCode & status)833 AndConstraint::add(UErrorCode& status) {
834     if (U_FAILURE(fInternalStatus)) {
835         status = fInternalStatus;
836         return nullptr;
837     }
838     this->next = new AndConstraint();
839     if (this->next == nullptr) {
840         status = U_MEMORY_ALLOCATION_ERROR;
841     }
842     return this->next;
843 }
844 
845 
OrConstraint(const OrConstraint & other)846 OrConstraint::OrConstraint(const OrConstraint& other) {
847     this->fInternalStatus = other.fInternalStatus;
848     if (U_FAILURE(fInternalStatus)) {
849         return; // stop early if the object we are copying from is invalid.
850     }
851     if ( other.childNode != nullptr ) {
852         this->childNode = new AndConstraint(*(other.childNode));
853         if (this->childNode == nullptr) {
854             fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
855             return;
856         }
857     }
858     if (other.next != nullptr ) {
859         this->next = new OrConstraint(*(other.next));
860         if (this->next == nullptr) {
861             fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
862             return;
863         }
864         if (U_FAILURE(this->next->fInternalStatus)) {
865             this->fInternalStatus = this->next->fInternalStatus;
866         }
867     }
868 }
869 
~OrConstraint()870 OrConstraint::~OrConstraint() {
871     delete childNode;
872     childNode = nullptr;
873     delete next;
874     next = nullptr;
875 }
876 
877 AndConstraint*
add(UErrorCode & status)878 OrConstraint::add(UErrorCode& status) {
879     if (U_FAILURE(fInternalStatus)) {
880         status = fInternalStatus;
881         return nullptr;
882     }
883     OrConstraint *curOrConstraint=this;
884     {
885         while (curOrConstraint->next!=nullptr) {
886             curOrConstraint = curOrConstraint->next;
887         }
888         U_ASSERT(curOrConstraint->childNode == nullptr);
889         curOrConstraint->childNode = new AndConstraint();
890         if (curOrConstraint->childNode == nullptr) {
891             status = U_MEMORY_ALLOCATION_ERROR;
892         }
893     }
894     return curOrConstraint->childNode;
895 }
896 
897 UBool
isFulfilled(const IFixedDecimal & number)898 OrConstraint::isFulfilled(const IFixedDecimal &number) {
899     OrConstraint* orRule=this;
900     UBool result=FALSE;
901 
902     while (orRule!=nullptr && !result) {
903         result=TRUE;
904         AndConstraint* andRule = orRule->childNode;
905         while (andRule!=nullptr && result) {
906             result = andRule->isFulfilled(number);
907             andRule=andRule->next;
908         }
909         orRule = orRule->next;
910     }
911 
912     return result;
913 }
914 
915 
RuleChain(const RuleChain & other)916 RuleChain::RuleChain(const RuleChain& other) :
917         fKeyword(other.fKeyword), fDecimalSamples(other.fDecimalSamples),
918         fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded),
919         fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded), fInternalStatus(other.fInternalStatus) {
920     if (U_FAILURE(this->fInternalStatus)) {
921         return; // stop early if the object we are copying from is invalid.
922     }
923     if (other.ruleHeader != nullptr) {
924         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
925         if (this->ruleHeader == nullptr) {
926             this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
927         }
928         else if (U_FAILURE(this->ruleHeader->fInternalStatus)) {
929             // If the OrConstraint wasn't fully copied, then set our status to failure as well.
930             this->fInternalStatus = this->ruleHeader->fInternalStatus;
931             return; // exit early.
932         }
933     }
934     if (other.fNext != nullptr ) {
935         this->fNext = new RuleChain(*other.fNext);
936         if (this->fNext == nullptr) {
937             this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
938         }
939         else if (U_FAILURE(this->fNext->fInternalStatus)) {
940             // If the RuleChain wasn't fully copied, then set our status to failure as well.
941             this->fInternalStatus = this->fNext->fInternalStatus;
942         }
943     }
944 }
945 
~RuleChain()946 RuleChain::~RuleChain() {
947     delete fNext;
948     delete ruleHeader;
949 }
950 
951 UnicodeString
select(const IFixedDecimal & number) const952 RuleChain::select(const IFixedDecimal &number) const {
953     if (!number.isNaN() && !number.isInfinite()) {
954         for (const RuleChain *rules = this; rules != nullptr; rules = rules->fNext) {
955              if (rules->ruleHeader->isFulfilled(number)) {
956                  return rules->fKeyword;
957              }
958         }
959     }
960     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
961 }
962 
tokenString(tokenType tok)963 static UnicodeString tokenString(tokenType tok) {
964     UnicodeString s;
965     switch (tok) {
966       case tVariableN:
967         s.append(LOW_N); break;
968       case tVariableI:
969         s.append(LOW_I); break;
970       case tVariableF:
971         s.append(LOW_F); break;
972       case tVariableV:
973         s.append(LOW_V); break;
974       case tVariableT:
975         s.append(LOW_T); break;
976       default:
977         s.append(TILDE);
978     }
979     return s;
980 }
981 
982 void
dumpRules(UnicodeString & result)983 RuleChain::dumpRules(UnicodeString& result) {
984     UChar digitString[16];
985 
986     if ( ruleHeader != nullptr ) {
987         result +=  fKeyword;
988         result += COLON;
989         result += SPACE;
990         OrConstraint* orRule=ruleHeader;
991         while ( orRule != nullptr ) {
992             AndConstraint* andRule=orRule->childNode;
993             while ( andRule != nullptr ) {
994                 if ((andRule->op==AndConstraint::NONE) &&  (andRule->rangeList==nullptr) && (andRule->value == -1)) {
995                     // Empty Rules.
996                 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) ) {
997                     result += tokenString(andRule->digitsType);
998                     result += UNICODE_STRING_SIMPLE(" is ");
999                     if (andRule->negated) {
1000                         result += UNICODE_STRING_SIMPLE("not ");
1001                     }
1002                     uprv_itou(digitString,16, andRule->value,10,0);
1003                     result += UnicodeString(digitString);
1004                 }
1005                 else {
1006                     result += tokenString(andRule->digitsType);
1007                     result += SPACE;
1008                     if (andRule->op==AndConstraint::MOD) {
1009                         result += UNICODE_STRING_SIMPLE("mod ");
1010                         uprv_itou(digitString,16, andRule->opNum,10,0);
1011                         result += UnicodeString(digitString);
1012                     }
1013                     if (andRule->rangeList==nullptr) {
1014                         if (andRule->negated) {
1015                             result += UNICODE_STRING_SIMPLE(" is not ");
1016                             uprv_itou(digitString,16, andRule->value,10,0);
1017                             result += UnicodeString(digitString);
1018                         }
1019                         else {
1020                             result += UNICODE_STRING_SIMPLE(" is ");
1021                             uprv_itou(digitString,16, andRule->value,10,0);
1022                             result += UnicodeString(digitString);
1023                         }
1024                     }
1025                     else {
1026                         if (andRule->negated) {
1027                             if ( andRule->integerOnly ) {
1028                                 result += UNICODE_STRING_SIMPLE(" not in ");
1029                             }
1030                             else {
1031                                 result += UNICODE_STRING_SIMPLE(" not within ");
1032                             }
1033                         }
1034                         else {
1035                             if ( andRule->integerOnly ) {
1036                                 result += UNICODE_STRING_SIMPLE(" in ");
1037                             }
1038                             else {
1039                                 result += UNICODE_STRING_SIMPLE(" within ");
1040                             }
1041                         }
1042                         for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
1043                             int32_t rangeLo = andRule->rangeList->elementAti(r);
1044                             int32_t rangeHi = andRule->rangeList->elementAti(r+1);
1045                             uprv_itou(digitString,16, rangeLo, 10, 0);
1046                             result += UnicodeString(digitString);
1047                             result += UNICODE_STRING_SIMPLE("..");
1048                             uprv_itou(digitString,16, rangeHi, 10,0);
1049                             result += UnicodeString(digitString);
1050                             if (r+2 < andRule->rangeList->size()) {
1051                                 result += UNICODE_STRING_SIMPLE(", ");
1052                             }
1053                         }
1054                     }
1055                 }
1056                 if ( (andRule=andRule->next) != nullptr) {
1057                     result += UNICODE_STRING_SIMPLE(" and ");
1058                 }
1059             }
1060             if ( (orRule = orRule->next) != nullptr ) {
1061                 result += UNICODE_STRING_SIMPLE(" or ");
1062             }
1063         }
1064     }
1065     if ( fNext != nullptr ) {
1066         result += UNICODE_STRING_SIMPLE("; ");
1067         fNext->dumpRules(result);
1068     }
1069 }
1070 
1071 
1072 UErrorCode
getKeywords(int32_t capacityOfKeywords,UnicodeString * keywords,int32_t & arraySize) const1073 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1074     if (U_FAILURE(fInternalStatus)) {
1075         return fInternalStatus;
1076     }
1077     if ( arraySize < capacityOfKeywords-1 ) {
1078         keywords[arraySize++]=fKeyword;
1079     }
1080     else {
1081         return U_BUFFER_OVERFLOW_ERROR;
1082     }
1083 
1084     if ( fNext != nullptr ) {
1085         return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
1086     }
1087     else {
1088         return U_ZERO_ERROR;
1089     }
1090 }
1091 
1092 UBool
isKeyword(const UnicodeString & keywordParam) const1093 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1094     if ( fKeyword == keywordParam ) {
1095         return TRUE;
1096     }
1097 
1098     if ( fNext != nullptr ) {
1099         return fNext->isKeyword(keywordParam);
1100     }
1101     else {
1102         return FALSE;
1103     }
1104 }
1105 
1106 
PluralRuleParser()1107 PluralRuleParser::PluralRuleParser() :
1108         ruleIndex(0), token(), type(none), prevType(none),
1109         curAndConstraint(nullptr), currentChain(nullptr), rangeLowIdx(-1), rangeHiIdx(-1)
1110 {
1111 }
1112 
~PluralRuleParser()1113 PluralRuleParser::~PluralRuleParser() {
1114 }
1115 
1116 
1117 int32_t
getNumberValue(const UnicodeString & token)1118 PluralRuleParser::getNumberValue(const UnicodeString& token) {
1119     int32_t i;
1120     char digits[128];
1121 
1122     i = token.extract(0, token.length(), digits, UPRV_LENGTHOF(digits), US_INV);
1123     digits[i]='\0';
1124 
1125     return((int32_t)atoi(digits));
1126 }
1127 
1128 
1129 void
checkSyntax(UErrorCode & status)1130 PluralRuleParser::checkSyntax(UErrorCode &status)
1131 {
1132     if (U_FAILURE(status)) {
1133         return;
1134     }
1135     if (!(prevType==none || prevType==tSemiColon)) {
1136         type = getKeyType(token, type);  // Switch token type from tKeyword if we scanned a reserved word,
1137                                                //   and we are not at the start of a rule, where a
1138                                                //   keyword is expected.
1139     }
1140 
1141     switch(prevType) {
1142     case none:
1143     case tSemiColon:
1144         if (type!=tKeyword && type != tEOF) {
1145             status = U_UNEXPECTED_TOKEN;
1146         }
1147         break;
1148     case tVariableN:
1149     case tVariableI:
1150     case tVariableF:
1151     case tVariableT:
1152     case tVariableV:
1153         if (type != tIs && type != tMod && type != tIn &&
1154             type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
1155             status = U_UNEXPECTED_TOKEN;
1156         }
1157         break;
1158     case tKeyword:
1159         if (type != tColon) {
1160             status = U_UNEXPECTED_TOKEN;
1161         }
1162         break;
1163     case tColon:
1164         if (!(type == tVariableN ||
1165               type == tVariableI ||
1166               type == tVariableF ||
1167               type == tVariableT ||
1168               type == tVariableV ||
1169               type == tAt)) {
1170             status = U_UNEXPECTED_TOKEN;
1171         }
1172         break;
1173     case tIs:
1174         if ( type != tNumber && type != tNot) {
1175             status = U_UNEXPECTED_TOKEN;
1176         }
1177         break;
1178     case tNot:
1179         if (type != tNumber && type != tIn && type != tWithin) {
1180             status = U_UNEXPECTED_TOKEN;
1181         }
1182         break;
1183     case tMod:
1184     case tDot2:
1185     case tIn:
1186     case tWithin:
1187     case tEqual:
1188     case tNotEqual:
1189         if (type != tNumber) {
1190             status = U_UNEXPECTED_TOKEN;
1191         }
1192         break;
1193     case tAnd:
1194     case tOr:
1195         if ( type != tVariableN &&
1196              type != tVariableI &&
1197              type != tVariableF &&
1198              type != tVariableT &&
1199              type != tVariableV) {
1200             status = U_UNEXPECTED_TOKEN;
1201         }
1202         break;
1203     case tComma:
1204         if (type != tNumber) {
1205             status = U_UNEXPECTED_TOKEN;
1206         }
1207         break;
1208     case tNumber:
1209         if (type != tDot2  && type != tSemiColon && type != tIs       && type != tNot    &&
1210             type != tIn    && type != tEqual     && type != tNotEqual && type != tWithin &&
1211             type != tAnd   && type != tOr        && type != tComma    && type != tAt     &&
1212             type != tEOF)
1213         {
1214             status = U_UNEXPECTED_TOKEN;
1215         }
1216         // TODO: a comma following a number that is not part of a range will be allowed.
1217         //       It's not the only case of this sort of thing. Parser needs a re-write.
1218         break;
1219     case tAt:
1220         if (type != tDecimal && type != tInteger) {
1221             status = U_UNEXPECTED_TOKEN;
1222         }
1223         break;
1224     default:
1225         status = U_UNEXPECTED_TOKEN;
1226         break;
1227     }
1228 }
1229 
1230 
1231 /*
1232  *  Scan the next token from the input rules.
1233  *     rules and returned token type are in the parser state variables.
1234  */
1235 void
getNextToken(UErrorCode & status)1236 PluralRuleParser::getNextToken(UErrorCode &status)
1237 {
1238     if (U_FAILURE(status)) {
1239         return;
1240     }
1241 
1242     UChar ch;
1243     while (ruleIndex < ruleSrc->length()) {
1244         ch = ruleSrc->charAt(ruleIndex);
1245         type = charType(ch);
1246         if (type != tSpace) {
1247             break;
1248         }
1249         ++(ruleIndex);
1250     }
1251     if (ruleIndex >= ruleSrc->length()) {
1252         type = tEOF;
1253         return;
1254     }
1255     int32_t curIndex= ruleIndex;
1256 
1257     switch (type) {
1258       case tColon:
1259       case tSemiColon:
1260       case tComma:
1261       case tEllipsis:
1262       case tTilde:   // scanned '~'
1263       case tAt:      // scanned '@'
1264       case tEqual:   // scanned '='
1265       case tMod:     // scanned '%'
1266         // Single character tokens.
1267         ++curIndex;
1268         break;
1269 
1270       case tNotEqual:  // scanned '!'
1271         if (ruleSrc->charAt(curIndex+1) == EQUALS) {
1272             curIndex += 2;
1273         } else {
1274             type = none;
1275             curIndex += 1;
1276         }
1277         break;
1278 
1279       case tKeyword:
1280          while (type == tKeyword && ++curIndex < ruleSrc->length()) {
1281              ch = ruleSrc->charAt(curIndex);
1282              type = charType(ch);
1283          }
1284          type = tKeyword;
1285          break;
1286 
1287       case tNumber:
1288          while (type == tNumber && ++curIndex < ruleSrc->length()) {
1289              ch = ruleSrc->charAt(curIndex);
1290              type = charType(ch);
1291          }
1292          type = tNumber;
1293          break;
1294 
1295        case tDot:
1296          // We could be looking at either ".." in a range, or "..." at the end of a sample.
1297          if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
1298              ++curIndex;
1299              break; // Single dot
1300          }
1301          if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
1302              curIndex += 2;
1303              type = tDot2;
1304              break; // double dot
1305          }
1306          type = tEllipsis;
1307          curIndex += 3;
1308          break;     // triple dot
1309 
1310        default:
1311          status = U_UNEXPECTED_TOKEN;
1312          ++curIndex;
1313          break;
1314     }
1315 
1316     U_ASSERT(ruleIndex <= ruleSrc->length());
1317     U_ASSERT(curIndex <= ruleSrc->length());
1318     token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
1319     ruleIndex = curIndex;
1320 }
1321 
1322 tokenType
charType(UChar ch)1323 PluralRuleParser::charType(UChar ch) {
1324     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1325         return tNumber;
1326     }
1327     if (ch>=LOW_A && ch<=LOW_Z) {
1328         return tKeyword;
1329     }
1330     switch (ch) {
1331     case COLON:
1332         return tColon;
1333     case SPACE:
1334         return tSpace;
1335     case SEMI_COLON:
1336         return tSemiColon;
1337     case DOT:
1338         return tDot;
1339     case COMMA:
1340         return tComma;
1341     case EXCLAMATION:
1342         return tNotEqual;
1343     case EQUALS:
1344         return tEqual;
1345     case PERCENT_SIGN:
1346         return tMod;
1347     case AT:
1348         return tAt;
1349     case ELLIPSIS:
1350         return tEllipsis;
1351     case TILDE:
1352         return tTilde;
1353     default :
1354         return none;
1355     }
1356 }
1357 
1358 
1359 //  Set token type for reserved words in the Plural Rule syntax.
1360 
1361 tokenType
getKeyType(const UnicodeString & token,tokenType keyType)1362 PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
1363 {
1364     if (keyType != tKeyword) {
1365         return keyType;
1366     }
1367 
1368     if (0 == token.compare(PK_VAR_N, 1)) {
1369         keyType = tVariableN;
1370     } else if (0 == token.compare(PK_VAR_I, 1)) {
1371         keyType = tVariableI;
1372     } else if (0 == token.compare(PK_VAR_F, 1)) {
1373         keyType = tVariableF;
1374     } else if (0 == token.compare(PK_VAR_T, 1)) {
1375         keyType = tVariableT;
1376     } else if (0 == token.compare(PK_VAR_V, 1)) {
1377         keyType = tVariableV;
1378     } else if (0 == token.compare(PK_IS, 2)) {
1379         keyType = tIs;
1380     } else if (0 == token.compare(PK_AND, 3)) {
1381         keyType = tAnd;
1382     } else if (0 == token.compare(PK_IN, 2)) {
1383         keyType = tIn;
1384     } else if (0 == token.compare(PK_WITHIN, 6)) {
1385         keyType = tWithin;
1386     } else if (0 == token.compare(PK_NOT, 3)) {
1387         keyType = tNot;
1388     } else if (0 == token.compare(PK_MOD, 3)) {
1389         keyType = tMod;
1390     } else if (0 == token.compare(PK_OR, 2)) {
1391         keyType = tOr;
1392     } else if (0 == token.compare(PK_DECIMAL, 7)) {
1393         keyType = tDecimal;
1394     } else if (0 == token.compare(PK_INTEGER, 7)) {
1395         keyType = tInteger;
1396     }
1397     return keyType;
1398 }
1399 
1400 
PluralKeywordEnumeration(RuleChain * header,UErrorCode & status)1401 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1402         : pos(0), fKeywordNames(status) {
1403     if (U_FAILURE(status)) {
1404         return;
1405     }
1406     fKeywordNames.setDeleter(uprv_deleteUObject);
1407     UBool  addKeywordOther = TRUE;
1408     RuleChain *node = header;
1409     while (node != nullptr) {
1410         auto newElem = new UnicodeString(node->fKeyword);
1411         if (newElem == nullptr) {
1412             status = U_MEMORY_ALLOCATION_ERROR;
1413             return;
1414         }
1415         fKeywordNames.addElement(newElem, status);
1416         if (U_FAILURE(status)) {
1417             delete newElem;
1418             return;
1419         }
1420         if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1421             addKeywordOther = FALSE;
1422         }
1423         node = node->fNext;
1424     }
1425 
1426     if (addKeywordOther) {
1427         auto newElem = new UnicodeString(PLURAL_KEYWORD_OTHER);
1428         if (newElem == nullptr) {
1429             status = U_MEMORY_ALLOCATION_ERROR;
1430             return;
1431         }
1432         fKeywordNames.addElement(newElem, status);
1433         if (U_FAILURE(status)) {
1434             delete newElem;
1435             return;
1436         }
1437     }
1438 }
1439 
1440 const UnicodeString*
snext(UErrorCode & status)1441 PluralKeywordEnumeration::snext(UErrorCode& status) {
1442     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1443         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1444     }
1445     return nullptr;
1446 }
1447 
1448 void
reset(UErrorCode &)1449 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1450     pos=0;
1451 }
1452 
1453 int32_t
count(UErrorCode &) const1454 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1455     return fKeywordNames.size();
1456 }
1457 
~PluralKeywordEnumeration()1458 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1459 }
1460 
tokenTypeToPluralOperand(tokenType tt)1461 PluralOperand tokenTypeToPluralOperand(tokenType tt) {
1462     switch(tt) {
1463     case tVariableN:
1464         return PLURAL_OPERAND_N;
1465     case tVariableI:
1466         return PLURAL_OPERAND_I;
1467     case tVariableF:
1468         return PLURAL_OPERAND_F;
1469     case tVariableV:
1470         return PLURAL_OPERAND_V;
1471     case tVariableT:
1472         return PLURAL_OPERAND_T;
1473     default:
1474         U_ASSERT(FALSE);  // unexpected.
1475         return PLURAL_OPERAND_N;
1476     }
1477 }
1478 
FixedDecimal(double n,int32_t v,int64_t f)1479 FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
1480     init(n, v, f);
1481     // check values. TODO make into unit test.
1482     //
1483     //            long visiblePower = (int) Math.pow(10, v);
1484     //            if (decimalDigits > visiblePower) {
1485     //                throw new IllegalArgumentException();
1486     //            }
1487     //            double fraction = intValue + (decimalDigits / (double) visiblePower);
1488     //            if (fraction != source) {
1489     //                double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1490     //                if (diff > 0.00000001d) {
1491     //                    throw new IllegalArgumentException();
1492     //                }
1493     //            }
1494 }
1495 
FixedDecimal(double n,int32_t v)1496 FixedDecimal::FixedDecimal(double n, int32_t v) {
1497     // Ugly, but for samples we don't care.
1498     init(n, v, getFractionalDigits(n, v));
1499 }
1500 
FixedDecimal(double n)1501 FixedDecimal::FixedDecimal(double n) {
1502     init(n);
1503 }
1504 
FixedDecimal()1505 FixedDecimal::FixedDecimal() {
1506     init(0, 0, 0);
1507 }
1508 
1509 
1510 // Create a FixedDecimal from a UnicodeString containing a number.
1511 //    Inefficient, but only used for samples, so simplicity trumps efficiency.
1512 
FixedDecimal(const UnicodeString & num,UErrorCode & status)1513 FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
1514     CharString cs;
1515     cs.appendInvariantChars(num, status);
1516     DecimalQuantity dl;
1517     dl.setToDecNumber(cs.toStringPiece(), status);
1518     if (U_FAILURE(status)) {
1519         init(0, 0, 0);
1520         return;
1521     }
1522     int32_t decimalPoint = num.indexOf(DOT);
1523     double n = dl.toDouble();
1524     if (decimalPoint == -1) {
1525         init(n, 0, 0);
1526     } else {
1527         int32_t v = num.length() - decimalPoint - 1;
1528         init(n, v, getFractionalDigits(n, v));
1529     }
1530 }
1531 
1532 
FixedDecimal(const FixedDecimal & other)1533 FixedDecimal::FixedDecimal(const FixedDecimal &other) {
1534     source = other.source;
1535     visibleDecimalDigitCount = other.visibleDecimalDigitCount;
1536     decimalDigits = other.decimalDigits;
1537     decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
1538     intValue = other.intValue;
1539     _hasIntegerValue = other._hasIntegerValue;
1540     isNegative = other.isNegative;
1541     _isNaN = other._isNaN;
1542     _isInfinite = other._isInfinite;
1543 }
1544 
1545 FixedDecimal::~FixedDecimal() = default;
1546 
1547 
init(double n)1548 void FixedDecimal::init(double n) {
1549     int32_t numFractionDigits = decimals(n);
1550     init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1551 }
1552 
1553 
init(double n,int32_t v,int64_t f)1554 void FixedDecimal::init(double n, int32_t v, int64_t f) {
1555     isNegative = n < 0.0;
1556     source = fabs(n);
1557     _isNaN = uprv_isNaN(source);
1558     _isInfinite = uprv_isInfinite(source);
1559     if (_isNaN || _isInfinite) {
1560         v = 0;
1561         f = 0;
1562         intValue = 0;
1563         _hasIntegerValue = FALSE;
1564     } else {
1565         intValue = (int64_t)source;
1566         _hasIntegerValue = (source == intValue);
1567     }
1568 
1569     visibleDecimalDigitCount = v;
1570     decimalDigits = f;
1571     if (f == 0) {
1572          decimalDigitsWithoutTrailingZeros = 0;
1573     } else {
1574         int64_t fdwtz = f;
1575         while ((fdwtz%10) == 0) {
1576             fdwtz /= 10;
1577         }
1578         decimalDigitsWithoutTrailingZeros = fdwtz;
1579     }
1580 }
1581 
1582 
1583 //  Fast path only exact initialization. Return true if successful.
1584 //     Note: Do not multiply by 10 each time through loop, rounding cruft can build
1585 //           up that makes the check for an integer result fail.
1586 //           A single multiply of the original number works more reliably.
1587 static int32_t p10[] = {1, 10, 100, 1000, 10000};
quickInit(double n)1588 UBool FixedDecimal::quickInit(double n) {
1589     UBool success = FALSE;
1590     n = fabs(n);
1591     int32_t numFractionDigits;
1592     for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
1593         double scaledN = n * p10[numFractionDigits];
1594         if (scaledN == floor(scaledN)) {
1595             success = TRUE;
1596             break;
1597         }
1598     }
1599     if (success) {
1600         init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1601     }
1602     return success;
1603 }
1604 
1605 
1606 
decimals(double n)1607 int32_t FixedDecimal::decimals(double n) {
1608     // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1609     // fastpath the common cases, integers or fractions with 3 or fewer digits
1610     n = fabs(n);
1611     for (int ndigits=0; ndigits<=3; ndigits++) {
1612         double scaledN = n * p10[ndigits];
1613         if (scaledN == floor(scaledN)) {
1614             return ndigits;
1615         }
1616     }
1617 
1618     // Slow path, convert with sprintf, parse converted output.
1619     char  buf[30] = {0};
1620     sprintf(buf, "%1.15e", n);
1621     // formatted number looks like this: 1.234567890123457e-01
1622     int exponent = atoi(buf+18);
1623     int numFractionDigits = 15;
1624     for (int i=16; ; --i) {
1625         if (buf[i] != '0') {
1626             break;
1627         }
1628         --numFractionDigits;
1629     }
1630     numFractionDigits -= exponent;   // Fraction part of fixed point representation.
1631     return numFractionDigits;
1632 }
1633 
1634 
1635 // Get the fraction digits of a double, represented as an integer.
1636 //    v is the number of visible fraction digits in the displayed form of the number.
1637 //       Example: n = 1001.234, v = 6, result = 234000
1638 //    TODO: need to think through how this is used in the plural rule context.
1639 //          This function can easily encounter integer overflow,
1640 //          and can easily return noise digits when the precision of a double is exceeded.
1641 
getFractionalDigits(double n,int32_t v)1642 int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
1643     if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
1644         return 0;
1645     }
1646     n = fabs(n);
1647     double fract = n - floor(n);
1648     switch (v) {
1649       case 1: return (int64_t)(fract*10.0 + 0.5);
1650       case 2: return (int64_t)(fract*100.0 + 0.5);
1651       case 3: return (int64_t)(fract*1000.0 + 0.5);
1652       default:
1653           double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
1654           if (scaled > U_INT64_MAX) {
1655               return U_INT64_MAX;
1656           } else {
1657               return (int64_t)scaled;
1658           }
1659       }
1660 }
1661 
1662 
adjustForMinFractionDigits(int32_t minFractionDigits)1663 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
1664     int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
1665     if (numTrailingFractionZeros > 0) {
1666         for (int32_t i=0; i<numTrailingFractionZeros; i++) {
1667             // Do not let the decimalDigits value overflow if there are many trailing zeros.
1668             // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1669             if (decimalDigits >= 100000000000000000LL) {
1670                 break;
1671             }
1672             decimalDigits *= 10;
1673         }
1674         visibleDecimalDigitCount += numTrailingFractionZeros;
1675     }
1676 }
1677 
1678 
getPluralOperand(PluralOperand operand) const1679 double FixedDecimal::getPluralOperand(PluralOperand operand) const {
1680     switch(operand) {
1681         case PLURAL_OPERAND_N: return source;
1682         case PLURAL_OPERAND_I: return static_cast<double>(intValue);
1683         case PLURAL_OPERAND_F: return static_cast<double>(decimalDigits);
1684         case PLURAL_OPERAND_T: return static_cast<double>(decimalDigitsWithoutTrailingZeros);
1685         case PLURAL_OPERAND_V: return visibleDecimalDigitCount;
1686         default:
1687              U_ASSERT(FALSE);  // unexpected.
1688              return source;
1689     }
1690 }
1691 
isNaN() const1692 bool FixedDecimal::isNaN() const {
1693     return _isNaN;
1694 }
1695 
isInfinite() const1696 bool FixedDecimal::isInfinite() const {
1697     return _isInfinite;
1698 }
1699 
hasIntegerValue() const1700 bool FixedDecimal::hasIntegerValue() const {
1701     return _hasIntegerValue;
1702 }
1703 
isNanOrInfinity() const1704 bool FixedDecimal::isNanOrInfinity() const {
1705     return _isNaN || _isInfinite;
1706 }
1707 
getVisibleFractionDigitCount() const1708 int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1709     return visibleDecimalDigitCount;
1710 }
1711 
1712 
1713 
PluralAvailableLocalesEnumeration(UErrorCode & status)1714 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
1715     fOpenStatus = status;
1716     if (U_FAILURE(status)) {
1717         return;
1718     }
1719     fOpenStatus = U_ZERO_ERROR; // clear any warnings.
1720     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &fOpenStatus));
1721     fLocales = ures_getByKey(rb.getAlias(), "locales", nullptr, &fOpenStatus);
1722 }
1723 
~PluralAvailableLocalesEnumeration()1724 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1725     ures_close(fLocales);
1726     ures_close(fRes);
1727     fLocales = nullptr;
1728     fRes = nullptr;
1729 }
1730 
next(int32_t * resultLength,UErrorCode & status)1731 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
1732     if (U_FAILURE(status)) {
1733         return nullptr;
1734     }
1735     if (U_FAILURE(fOpenStatus)) {
1736         status = fOpenStatus;
1737         return nullptr;
1738     }
1739     fRes = ures_getNextResource(fLocales, fRes, &status);
1740     if (fRes == nullptr || U_FAILURE(status)) {
1741         if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
1742             status = U_ZERO_ERROR;
1743         }
1744         return nullptr;
1745     }
1746     const char *result = ures_getKey(fRes);
1747     if (resultLength != nullptr) {
1748         *resultLength = static_cast<int32_t>(uprv_strlen(result));
1749     }
1750     return result;
1751 }
1752 
1753 
reset(UErrorCode & status)1754 void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
1755     if (U_FAILURE(status)) {
1756        return;
1757     }
1758     if (U_FAILURE(fOpenStatus)) {
1759         status = fOpenStatus;
1760         return;
1761     }
1762     ures_resetIterator(fLocales);
1763 }
1764 
count(UErrorCode & status) const1765 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
1766     if (U_FAILURE(status)) {
1767         return 0;
1768     }
1769     if (U_FAILURE(fOpenStatus)) {
1770         status = fOpenStatus;
1771         return 0;
1772     }
1773     return ures_getSize(fLocales);
1774 }
1775 
1776 U_NAMESPACE_END
1777 
1778 
1779 #endif /* #if !UCONFIG_NO_FORMATTING */
1780 
1781 //eof
1782