1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *   Copyright (C) 1997-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 *   file name:  nfrs.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 * Modification history
14 * Date        Name      Comments
15 * 10/11/2001  Doug      Ported from ICU4J
16 */
17 
18 #include "nfrs.h"
19 
20 #if U_HAVE_RBNF
21 
22 #include "unicode/uchar.h"
23 #include "nfrule.h"
24 #include "nfrlist.h"
25 #include "patternprops.h"
26 
27 #ifdef RBNF_DEBUG
28 #include "cmemory.h"
29 #endif
30 
31 enum {
32     /** -x */
33     NEGATIVE_RULE_INDEX = 0,
34     /** x.x */
35     IMPROPER_FRACTION_RULE_INDEX = 1,
36     /** 0.x */
37     PROPER_FRACTION_RULE_INDEX = 2,
38     /** x.0 */
39     MASTER_RULE_INDEX = 3,
40     /** Inf */
41     INFINITY_RULE_INDEX = 4,
42     /** NaN */
43     NAN_RULE_INDEX = 5,
44     NON_NUMERICAL_RULE_LENGTH = 6
45 };
46 
47 U_NAMESPACE_BEGIN
48 
49 #if 0
50 // euclid's algorithm works with doubles
51 // note, doubles only get us up to one quadrillion or so, which
52 // isn't as much range as we get with longs.  We probably still
53 // want either 64-bit math, or BigInteger.
54 
55 static int64_t
56 util_lcm(int64_t x, int64_t y)
57 {
58     x.abs();
59     y.abs();
60 
61     if (x == 0 || y == 0) {
62         return 0;
63     } else {
64         do {
65             if (x < y) {
66                 int64_t t = x; x = y; y = t;
67             }
68             x -= y * (x/y);
69         } while (x != 0);
70 
71         return y;
72     }
73 }
74 
75 #else
76 /**
77  * Calculates the least common multiple of x and y.
78  */
79 static int64_t
util_lcm(int64_t x,int64_t y)80 util_lcm(int64_t x, int64_t y)
81 {
82     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
83     // vol. 2, 1st ed., pp. 298-299
84     int64_t x1 = x;
85     int64_t y1 = y;
86 
87     int p2 = 0;
88     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
89         ++p2;
90         x1 >>= 1;
91         y1 >>= 1;
92     }
93 
94     int64_t t;
95     if ((x1 & 1) == 1) {
96         t = -y1;
97     } else {
98         t = x1;
99     }
100 
101     while (t != 0) {
102         while ((t & 1) == 0) {
103             t = t >> 1;
104         }
105         if (t > 0) {
106             x1 = t;
107         } else {
108             y1 = -t;
109         }
110         t = x1 - y1;
111     }
112 
113     int64_t gcd = x1 << p2;
114 
115     // x * y == gcd(x, y) * lcm(x, y)
116     return x / gcd * y;
117 }
118 #endif
119 
120 static const UChar gPercent = 0x0025;
121 static const UChar gColon = 0x003a;
122 static const UChar gSemicolon = 0x003b;
123 static const UChar gLineFeed = 0x000a;
124 
125 static const UChar gPercentPercent[] =
126 {
127     0x25, 0x25, 0
128 }; /* "%%" */
129 
130 static const UChar gNoparse[] =
131 {
132     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
133 }; /* "@noparse" */
134 
NFRuleSet(RuleBasedNumberFormat * _owner,UnicodeString * descriptions,int32_t index,UErrorCode & status)135 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
136   : name()
137   , rules(0)
138   , owner(_owner)
139   , fractionRules()
140   , fIsFractionRuleSet(FALSE)
141   , fIsPublic(FALSE)
142   , fIsParseable(TRUE)
143 {
144     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
145         nonNumericalRules[i] = NULL;
146     }
147 
148     if (U_FAILURE(status)) {
149         return;
150     }
151 
152     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
153 
154     if (description.length() == 0) {
155         // throw new IllegalArgumentException("Empty rule set description");
156         status = U_PARSE_ERROR;
157         return;
158     }
159 
160     // if the description begins with a rule set name (the rule set
161     // name can be omitted in formatter descriptions that consist
162     // of only one rule set), copy it out into our "name" member
163     // and delete it from the description
164     if (description.charAt(0) == gPercent) {
165         int32_t pos = description.indexOf(gColon);
166         if (pos == -1) {
167             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
168             status = U_PARSE_ERROR;
169         } else {
170             name.setTo(description, 0, pos);
171             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
172             }
173             description.remove(0, pos);
174         }
175     } else {
176         name.setTo(UNICODE_STRING_SIMPLE("%default"));
177     }
178 
179     if (description.length() == 0) {
180         // throw new IllegalArgumentException("Empty rule set description");
181         status = U_PARSE_ERROR;
182     }
183 
184     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
185 
186     if ( name.endsWith(gNoparse,8) ) {
187         fIsParseable = FALSE;
188         name.truncate(name.length()-8); // remove the @noparse from the name
189     }
190 
191     // all of the other members of NFRuleSet are initialized
192     // by parseRules()
193 }
194 
195 void
parseRules(UnicodeString & description,UErrorCode & status)196 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
197 {
198     // start by creating a Vector whose elements are Strings containing
199     // the descriptions of the rules (one rule per element).  The rules
200     // are separated by semicolons (there's no escape facility: ALL
201     // semicolons are rule delimiters)
202 
203     if (U_FAILURE(status)) {
204         return;
205     }
206 
207     // ensure we are starting with an empty rule list
208     rules.deleteAll();
209 
210     // dlf - the original code kept a separate description array for no reason,
211     // so I got rid of it.  The loop was too complex so I simplified it.
212 
213     UnicodeString currentDescription;
214     int32_t oldP = 0;
215     while (oldP < description.length()) {
216         int32_t p = description.indexOf(gSemicolon, oldP);
217         if (p == -1) {
218             p = description.length();
219         }
220         currentDescription.setTo(description, oldP, p - oldP);
221         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
222         oldP = p + 1;
223     }
224 
225     // for rules that didn't specify a base value, their base values
226     // were initialized to 0.  Make another pass through the list and
227     // set all those rules' base values.  We also remove any special
228     // rules from the list and put them into their own member variables
229     int64_t defaultBaseValue = 0;
230 
231     // (this isn't a for loop because we might be deleting items from
232     // the vector-- we want to make sure we only increment i when
233     // we _didn't_ delete aything from the vector)
234     int32_t rulesSize = rules.size();
235     for (int32_t i = 0; i < rulesSize; i++) {
236         NFRule* rule = rules[i];
237         int64_t baseValue = rule->getBaseValue();
238 
239         if (baseValue == 0) {
240             // if the rule's base value is 0, fill in a default
241             // base value (this will be 1 plus the preceding
242             // rule's base value for regular rule sets, and the
243             // same as the preceding rule's base value in fraction
244             // rule sets)
245             rule->setBaseValue(defaultBaseValue, status);
246         }
247         else {
248             // if it's a regular rule that already knows its base value,
249             // check to make sure the rules are in order, and update
250             // the default base value for the next rule
251             if (baseValue < defaultBaseValue) {
252                 // throw new IllegalArgumentException("Rules are not in order");
253                 status = U_PARSE_ERROR;
254                 return;
255             }
256             defaultBaseValue = baseValue;
257         }
258         if (!fIsFractionRuleSet) {
259             ++defaultBaseValue;
260         }
261     }
262 }
263 
264 /**
265  * Set one of the non-numerical rules.
266  * @param rule The rule to set.
267  */
setNonNumericalRule(NFRule * rule)268 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
269     int64_t baseValue = rule->getBaseValue();
270     if (baseValue == NFRule::kNegativeNumberRule) {
271         delete nonNumericalRules[NEGATIVE_RULE_INDEX];
272         nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
273     }
274     else if (baseValue == NFRule::kImproperFractionRule) {
275         setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
276     }
277     else if (baseValue == NFRule::kProperFractionRule) {
278         setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
279     }
280     else if (baseValue == NFRule::kMasterRule) {
281         setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
282     }
283     else if (baseValue == NFRule::kInfinityRule) {
284         delete nonNumericalRules[INFINITY_RULE_INDEX];
285         nonNumericalRules[INFINITY_RULE_INDEX] = rule;
286     }
287     else if (baseValue == NFRule::kNaNRule) {
288         delete nonNumericalRules[NAN_RULE_INDEX];
289         nonNumericalRules[NAN_RULE_INDEX] = rule;
290     }
291 }
292 
293 /**
294  * Determine the best fraction rule to use. Rules matching the decimal point from
295  * DecimalFormatSymbols become the main set of rules to use.
296  * @param originalIndex The index into nonNumericalRules
297  * @param newRule The new rule to consider
298  * @param rememberRule Should the new rule be added to fractionRules.
299  */
setBestFractionRule(int32_t originalIndex,NFRule * newRule,UBool rememberRule)300 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
301     if (rememberRule) {
302         fractionRules.add(newRule);
303     }
304     NFRule *bestResult = nonNumericalRules[originalIndex];
305     if (bestResult == NULL) {
306         nonNumericalRules[originalIndex] = newRule;
307     }
308     else {
309         // We have more than one. Which one is better?
310         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
311         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
312             == newRule->getDecimalPoint())
313         {
314             nonNumericalRules[originalIndex] = newRule;
315         }
316         // else leave it alone
317     }
318 }
319 
~NFRuleSet()320 NFRuleSet::~NFRuleSet()
321 {
322     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
323         if (i != IMPROPER_FRACTION_RULE_INDEX
324             && i != PROPER_FRACTION_RULE_INDEX
325             && i != MASTER_RULE_INDEX)
326         {
327             delete nonNumericalRules[i];
328         }
329         // else it will be deleted via NFRuleList fractionRules
330     }
331 }
332 
333 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)334 util_equalRules(const NFRule* rule1, const NFRule* rule2)
335 {
336     if (rule1) {
337         if (rule2) {
338             return *rule1 == *rule2;
339         }
340     } else if (!rule2) {
341         return TRUE;
342     }
343     return FALSE;
344 }
345 
346 UBool
operator ==(const NFRuleSet & rhs) const347 NFRuleSet::operator==(const NFRuleSet& rhs) const
348 {
349     if (rules.size() == rhs.rules.size() &&
350         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
351         name == rhs.name) {
352 
353         // ...then compare the non-numerical rule lists...
354         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
355             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
356                 return FALSE;
357             }
358         }
359 
360         // ...then compare the rule lists...
361         for (uint32_t i = 0; i < rules.size(); ++i) {
362             if (*rules[i] != *rhs.rules[i]) {
363                 return FALSE;
364             }
365         }
366         return TRUE;
367     }
368     return FALSE;
369 }
370 
371 void
setDecimalFormatSymbols(const DecimalFormatSymbols & newSymbols,UErrorCode & status)372 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
373     for (uint32_t i = 0; i < rules.size(); ++i) {
374         rules[i]->setDecimalFormatSymbols(newSymbols, status);
375     }
376     // Switch the fraction rules to mirror the DecimalFormatSymbols.
377     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
378         if (nonNumericalRules[nonNumericalIdx]) {
379             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
380                 NFRule *fractionRule = fractionRules[fIdx];
381                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
382                     setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
383                 }
384             }
385         }
386     }
387 
388     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
389         NFRule *rule = nonNumericalRules[nnrIdx];
390         if (rule) {
391             rule->setDecimalFormatSymbols(newSymbols, status);
392         }
393     }
394 }
395 
396 #define RECURSION_LIMIT 64
397 
398 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const399 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
400 {
401     if (recursionCount >= RECURSION_LIMIT) {
402         // stop recursion
403         status = U_INVALID_STATE_ERROR;
404         return;
405     }
406     const NFRule *rule = findNormalRule(number);
407     if (rule) { // else error, but can't report it
408         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
409     }
410 }
411 
412 void
format(double number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const413 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
414 {
415     if (recursionCount >= RECURSION_LIMIT) {
416         // stop recursion
417         status = U_INVALID_STATE_ERROR;
418         return;
419     }
420     const NFRule *rule = findDoubleRule(number);
421     if (rule) { // else error, but can't report it
422         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
423     }
424 }
425 
426 const NFRule*
findDoubleRule(double number) const427 NFRuleSet::findDoubleRule(double number) const
428 {
429     // if this is a fraction rule set, use findFractionRuleSetRule()
430     if (isFractionRuleSet()) {
431         return findFractionRuleSetRule(number);
432     }
433 
434     if (uprv_isNaN(number)) {
435         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
436         if (!rule) {
437             rule = owner->getDefaultNaNRule();
438         }
439         return rule;
440     }
441 
442     // if the number is negative, return the negative number rule
443     // (if there isn't a negative-number rule, we pretend it's a
444     // positive number)
445     if (number < 0) {
446         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
447             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
448         } else {
449             number = -number;
450         }
451     }
452 
453     if (uprv_isInfinite(number)) {
454         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
455         if (!rule) {
456             rule = owner->getDefaultInfinityRule();
457         }
458         return rule;
459     }
460 
461     // if the number isn't an integer, we use one of the fraction rules...
462     if (number != uprv_floor(number)) {
463         // if the number is between 0 and 1, return the proper
464         // fraction rule
465         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
466             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
467         }
468         // otherwise, return the improper fraction rule
469         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
470             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
471         }
472     }
473 
474     // if there's a master rule, use it to format the number
475     if (nonNumericalRules[MASTER_RULE_INDEX]) {
476         return nonNumericalRules[MASTER_RULE_INDEX];
477     }
478 
479     // and if we haven't yet returned a rule, use findNormalRule()
480     // to find the applicable rule
481     int64_t r = util64_fromDouble(number + 0.5);
482     return findNormalRule(r);
483 }
484 
485 const NFRule *
findNormalRule(int64_t number) const486 NFRuleSet::findNormalRule(int64_t number) const
487 {
488     // if this is a fraction rule set, use findFractionRuleSetRule()
489     // to find the rule (we should only go into this clause if the
490     // value is 0)
491     if (fIsFractionRuleSet) {
492         return findFractionRuleSetRule((double)number);
493     }
494 
495     // if the number is negative, return the negative-number rule
496     // (if there isn't one, pretend the number is positive)
497     if (number < 0) {
498         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
499             return nonNumericalRules[NEGATIVE_RULE_INDEX];
500         } else {
501             number = -number;
502         }
503     }
504 
505     // we have to repeat the preceding two checks, even though we
506     // do them in findRule(), because the version of format() that
507     // takes a long bypasses findRule() and goes straight to this
508     // function.  This function does skip the fraction rules since
509     // we know the value is an integer (it also skips the master
510     // rule, since it's considered a fraction rule.  Skipping the
511     // master rule in this function is also how we avoid infinite
512     // recursion)
513 
514     // {dlf} unfortunately this fails if there are no rules except
515     // special rules.  If there are no rules, use the master rule.
516 
517     // binary-search the rule list for the applicable rule
518     // (a rule is used for all values from its base value to
519     // the next rule's base value)
520     int32_t hi = rules.size();
521     if (hi > 0) {
522         int32_t lo = 0;
523 
524         while (lo < hi) {
525             int32_t mid = (lo + hi) / 2;
526             if (rules[mid]->getBaseValue() == number) {
527                 return rules[mid];
528             }
529             else if (rules[mid]->getBaseValue() > number) {
530                 hi = mid;
531             }
532             else {
533                 lo = mid + 1;
534             }
535         }
536         if (hi == 0) { // bad rule set, minimum base > 0
537             return NULL; // want to throw exception here
538         }
539 
540         NFRule *result = rules[hi - 1];
541 
542         // use shouldRollBack() to see whether we need to invoke the
543         // rollback rule (see shouldRollBack()'s documentation for
544         // an explanation of the rollback rule).  If we do, roll back
545         // one rule and return that one instead of the one we'd normally
546         // return
547         if (result->shouldRollBack((double)number)) {
548             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
549                 return NULL;
550             }
551             result = rules[hi - 2];
552         }
553         return result;
554     }
555     // else use the master rule
556     return nonNumericalRules[MASTER_RULE_INDEX];
557 }
558 
559 /**
560  * If this rule is a fraction rule set, this function is used by
561  * findRule() to select the most appropriate rule for formatting
562  * the number.  Basically, the base value of each rule in the rule
563  * set is treated as the denominator of a fraction.  Whichever
564  * denominator can produce the fraction closest in value to the
565  * number passed in is the result.  If there's a tie, the earlier
566  * one in the list wins.  (If there are two rules in a row with the
567  * same base value, the first one is used when the numerator of the
568  * fraction would be 1, and the second rule is used the rest of the
569  * time.
570  * @param number The number being formatted (which will always be
571  * a number between 0 and 1)
572  * @return The rule to use to format this number
573  */
574 const NFRule*
findFractionRuleSetRule(double number) const575 NFRuleSet::findFractionRuleSetRule(double number) const
576 {
577     // the obvious way to do this (multiply the value being formatted
578     // by each rule's base value until you get an integral result)
579     // doesn't work because of rounding error.  This method is more
580     // accurate
581 
582     // find the least common multiple of the rules' base values
583     // and multiply this by the number being formatted.  This is
584     // all the precision we need, and we can do all of the rest
585     // of the math using integer arithmetic
586     int64_t leastCommonMultiple = rules[0]->getBaseValue();
587     int64_t numerator;
588     {
589         for (uint32_t i = 1; i < rules.size(); ++i) {
590             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
591         }
592         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
593     }
594     // for each rule, do the following...
595     int64_t tempDifference;
596     int64_t difference = util64_fromDouble(uprv_maxMantissa());
597     int32_t winner = 0;
598     for (uint32_t i = 0; i < rules.size(); ++i) {
599         // "numerator" is the numerator of the fraction if the
600         // denominator is the LCD.  The numerator if the rule's
601         // base value is the denominator is "numerator" times the
602         // base value divided bythe LCD.  Here we check to see if
603         // that's an integer, and if not, how close it is to being
604         // an integer.
605         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
606 
607 
608         // normalize the result of the above calculation: we want
609         // the numerator's distance from the CLOSEST multiple
610         // of the LCD
611         if (leastCommonMultiple - tempDifference < tempDifference) {
612             tempDifference = leastCommonMultiple - tempDifference;
613         }
614 
615         // if this is as close as we've come, keep track of how close
616         // that is, and the line number of the rule that did it.  If
617         // we've scored a direct hit, we don't have to look at any more
618         // rules
619         if (tempDifference < difference) {
620             difference = tempDifference;
621             winner = i;
622             if (difference == 0) {
623                 break;
624             }
625         }
626     }
627 
628     // if we have two successive rules that both have the winning base
629     // value, then the first one (the one we found above) is used if
630     // the numerator of the fraction is 1 and the second one is used if
631     // the numerator of the fraction is anything else (this lets us
632     // do things like "one third"/"two thirds" without haveing to define
633     // a whole bunch of extra rule sets)
634     if ((unsigned)(winner + 1) < rules.size() &&
635         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
636         double n = ((double)rules[winner]->getBaseValue()) * number;
637         if (n < 0.5 || n >= 2) {
638             ++winner;
639         }
640     }
641 
642     // finally, return the winning rule
643     return rules[winner];
644 }
645 
646 /**
647  * Parses a string.  Matches the string to be parsed against each
648  * of its rules (with a base value less than upperBound) and returns
649  * the value produced by the rule that matched the most charcters
650  * in the source string.
651  * @param text The string to parse
652  * @param parsePosition The initial position is ignored and assumed
653  * to be 0.  On exit, this object has been updated to point to the
654  * first character position this rule set didn't consume.
655  * @param upperBound Limits the rules that can be allowed to match.
656  * Only rules whose base values are strictly less than upperBound
657  * are considered.
658  * @return The numerical result of parsing this string.  This will
659  * be the matching rule's base value, composed appropriately with
660  * the results of matching any of its substitutions.  The object
661  * will be an instance of Long if it's an integral value; otherwise,
662  * it will be an instance of Double.  This function always returns
663  * a valid object: If nothing matched the input string at all,
664  * this function returns new Long(0), and the parse position is
665  * left unchanged.
666  */
667 #ifdef RBNF_DEBUG
668 #include <stdio.h>
669 
dumpUS(FILE * f,const UnicodeString & us)670 static void dumpUS(FILE* f, const UnicodeString& us) {
671   int len = us.length();
672   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
673   if (buf != NULL) {
674 	  us.extract(0, len, buf);
675 	  buf[len] = 0;
676 	  fprintf(f, "%s", buf);
677 	  uprv_free(buf); //delete[] buf;
678   }
679 }
680 #endif
681 
682 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const683 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
684 {
685     // try matching each rule in the rule set against the text being
686     // parsed.  Whichever one matches the most characters is the one
687     // that determines the value we return.
688 
689     result.setLong(0);
690 
691     // dump out if there's no text to parse
692     if (text.length() == 0) {
693         return 0;
694     }
695 
696     ParsePosition highWaterMark;
697     ParsePosition workingPos = pos;
698 
699 #ifdef RBNF_DEBUG
700     fprintf(stderr, "<nfrs> %x '", this);
701     dumpUS(stderr, name);
702     fprintf(stderr, "' text '");
703     dumpUS(stderr, text);
704     fprintf(stderr, "'\n");
705     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
706 #endif
707     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
708     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
709         if (nonNumericalRules[i]) {
710             Formattable tempResult;
711             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
712             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
713                 result = tempResult;
714                 highWaterMark = workingPos;
715             }
716             workingPos = pos;
717         }
718     }
719 #ifdef RBNF_DEBUG
720     fprintf(stderr, "<nfrs> continue other with text '");
721     dumpUS(stderr, text);
722     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
723 #endif
724 
725     // finally, go through the regular rules one at a time.  We start
726     // at the end of the list because we want to try matching the most
727     // sigificant rule first (this helps ensure that we parse
728     // "five thousand three hundred six" as
729     // "(five thousand) (three hundred) (six)" rather than
730     // "((five thousand three) hundred) (six)").  Skip rules whose
731     // base values are higher than the upper bound (again, this helps
732     // limit ambiguity by making sure the rules that match a rule's
733     // are less significant than the rule containing the substitutions)/
734     {
735         int64_t ub = util64_fromDouble(upperBound);
736 #ifdef RBNF_DEBUG
737         {
738             char ubstr[64];
739             util64_toa(ub, ubstr, 64);
740             char ubstrhex[64];
741             util64_toa(ub, ubstrhex, 64, 16);
742             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
743         }
744 #endif
745         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
746             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
747                 continue;
748             }
749             Formattable tempResult;
750             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
751             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
752                 result = tempResult;
753                 highWaterMark = workingPos;
754             }
755             workingPos = pos;
756         }
757     }
758 #ifdef RBNF_DEBUG
759     fprintf(stderr, "<nfrs> exit\n");
760 #endif
761     // finally, update the parse postion we were passed to point to the
762     // first character we didn't use, and return the result that
763     // corresponds to that string of characters
764     pos = highWaterMark;
765 
766     return 1;
767 }
768 
769 void
appendRules(UnicodeString & result) const770 NFRuleSet::appendRules(UnicodeString& result) const
771 {
772     uint32_t i;
773 
774     // the rule set name goes first...
775     result.append(name);
776     result.append(gColon);
777     result.append(gLineFeed);
778 
779     // followed by the regular rules...
780     for (i = 0; i < rules.size(); i++) {
781         rules[i]->_appendRuleText(result);
782         result.append(gLineFeed);
783     }
784 
785     // followed by the special rules (if they exist)
786     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
787         NFRule *rule = nonNumericalRules[i];
788         if (nonNumericalRules[i]) {
789             if (rule->getBaseValue() == NFRule::kImproperFractionRule
790                 || rule->getBaseValue() == NFRule::kProperFractionRule
791                 || rule->getBaseValue() == NFRule::kMasterRule)
792             {
793                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
794                     NFRule *fractionRule = fractionRules[fIdx];
795                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
796                         fractionRule->_appendRuleText(result);
797                         result.append(gLineFeed);
798                     }
799                 }
800             }
801             else {
802                 rule->_appendRuleText(result);
803                 result.append(gLineFeed);
804             }
805         }
806     }
807 }
808 
809 // utility functions
810 
util64_fromDouble(double d)811 int64_t util64_fromDouble(double d) {
812     int64_t result = 0;
813     if (!uprv_isNaN(d)) {
814         double mant = uprv_maxMantissa();
815         if (d < -mant) {
816             d = -mant;
817         } else if (d > mant) {
818             d = mant;
819         }
820         UBool neg = d < 0;
821         if (neg) {
822             d = -d;
823         }
824         result = (int64_t)uprv_floor(d);
825         if (neg) {
826             result = -result;
827         }
828     }
829     return result;
830 }
831 
util64_pow(int32_t r,uint32_t e)832 int64_t util64_pow(int32_t r, uint32_t e)  {
833     if (r == 0) {
834         return 0;
835     } else if (e == 0) {
836         return 1;
837     } else {
838         int64_t n = r;
839         while (--e > 0) {
840             n *= r;
841         }
842         return n;
843     }
844 }
845 
846 static const uint8_t asciiDigits[] = {
847     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
848     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
849     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
850     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
851     0x77u, 0x78u, 0x79u, 0x7au,
852 };
853 
854 static const UChar kUMinus = (UChar)0x002d;
855 
856 #ifdef RBNF_DEBUG
857 static const char kMinus = '-';
858 
859 static const uint8_t digitInfo[] = {
860         0,     0,     0,     0,     0,     0,     0,     0,
861         0,     0,     0,     0,     0,     0,     0,     0,
862         0,     0,     0,     0,     0,     0,     0,     0,
863         0,     0,     0,     0,     0,     0,     0,     0,
864         0,     0,     0,     0,     0,     0,     0,     0,
865         0,     0,     0,     0,     0,     0,     0,     0,
866     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
867     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
868         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
869     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
870     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
871     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
872         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
873     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
874     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
875     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
876 };
877 
util64_atoi(const char * str,uint32_t radix)878 int64_t util64_atoi(const char* str, uint32_t radix)
879 {
880     if (radix > 36) {
881         radix = 36;
882     } else if (radix < 2) {
883         radix = 2;
884     }
885     int64_t lradix = radix;
886 
887     int neg = 0;
888     if (*str == kMinus) {
889         ++str;
890         neg = 1;
891     }
892     int64_t result = 0;
893     uint8_t b;
894     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
895         result *= lradix;
896         result += (int32_t)b;
897     }
898     if (neg) {
899         result = -result;
900     }
901     return result;
902 }
903 
util64_utoi(const UChar * str,uint32_t radix)904 int64_t util64_utoi(const UChar* str, uint32_t radix)
905 {
906     if (radix > 36) {
907         radix = 36;
908     } else if (radix < 2) {
909         radix = 2;
910     }
911     int64_t lradix = radix;
912 
913     int neg = 0;
914     if (*str == kUMinus) {
915         ++str;
916         neg = 1;
917     }
918     int64_t result = 0;
919     UChar c;
920     uint8_t b;
921     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
922         result *= lradix;
923         result += (int32_t)b;
924     }
925     if (neg) {
926         result = -result;
927     }
928     return result;
929 }
930 
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)931 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
932 {
933     if (radix > 36) {
934         radix = 36;
935     } else if (radix < 2) {
936         radix = 2;
937     }
938     int64_t base = radix;
939 
940     char* p = buf;
941     if (len && (w < 0) && (radix == 10) && !raw) {
942         w = -w;
943         *p++ = kMinus;
944         --len;
945     } else if (len && (w == 0)) {
946         *p++ = (char)raw ? 0 : asciiDigits[0];
947         --len;
948     }
949 
950     while (len && w != 0) {
951         int64_t n = w / base;
952         int64_t m = n * base;
953         int32_t d = (int32_t)(w-m);
954         *p++ = raw ? (char)d : asciiDigits[d];
955         w = n;
956         --len;
957     }
958     if (len) {
959         *p = 0; // null terminate if room for caller convenience
960     }
961 
962     len = p - buf;
963     if (*buf == kMinus) {
964         ++buf;
965     }
966     while (--p > buf) {
967         char c = *p;
968         *p = *buf;
969         *buf = c;
970         ++buf;
971     }
972 
973     return len;
974 }
975 #endif
976 
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)977 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
978 {
979     if (radix > 36) {
980         radix = 36;
981     } else if (radix < 2) {
982         radix = 2;
983     }
984     int64_t base = radix;
985 
986     UChar* p = buf;
987     if (len && (w < 0) && (radix == 10) && !raw) {
988         w = -w;
989         *p++ = kUMinus;
990         --len;
991     } else if (len && (w == 0)) {
992         *p++ = (UChar)raw ? 0 : asciiDigits[0];
993         --len;
994     }
995 
996     while (len && (w != 0)) {
997         int64_t n = w / base;
998         int64_t m = n * base;
999         int32_t d = (int32_t)(w-m);
1000         *p++ = (UChar)(raw ? d : asciiDigits[d]);
1001         w = n;
1002         --len;
1003     }
1004     if (len) {
1005         *p = 0; // null terminate if room for caller convenience
1006     }
1007 
1008     len = (uint32_t)(p - buf);
1009     if (*buf == kUMinus) {
1010         ++buf;
1011     }
1012     while (--p > buf) {
1013         UChar c = *p;
1014         *p = *buf;
1015         *buf = c;
1016         ++buf;
1017     }
1018 
1019     return len;
1020 }
1021 
1022 
1023 U_NAMESPACE_END
1024 
1025 /* U_HAVE_RBNF */
1026 #endif
1027 
1028