1 /*
2 ******************************************************************************
3 *   Copyright (C) 1997-2015, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 *   file name:  nfrs.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 * Modification history
12 * Date        Name      Comments
13 * 10/11/2001  Doug      Ported from ICU4J
14 */
15 
16 #include "nfrs.h"
17 
18 #if U_HAVE_RBNF
19 
20 #include "unicode/uchar.h"
21 #include "nfrule.h"
22 #include "nfrlist.h"
23 #include "patternprops.h"
24 
25 #ifdef RBNF_DEBUG
26 #include "cmemory.h"
27 #endif
28 
29 enum {
30     /** -x */
31     NEGATIVE_RULE_INDEX = 0,
32     /** x.x */
33     IMPROPER_FRACTION_RULE_INDEX = 1,
34     /** 0.x */
35     PROPER_FRACTION_RULE_INDEX = 2,
36     /** x.0 */
37     MASTER_RULE_INDEX = 3,
38     /** Inf */
39     INFINITY_RULE_INDEX = 4,
40     /** NaN */
41     NAN_RULE_INDEX = 5,
42     NON_NUMERICAL_RULE_LENGTH = 6
43 };
44 
45 U_NAMESPACE_BEGIN
46 
47 #if 0
48 // euclid's algorithm works with doubles
49 // note, doubles only get us up to one quadrillion or so, which
50 // isn't as much range as we get with longs.  We probably still
51 // want either 64-bit math, or BigInteger.
52 
53 static int64_t
54 util_lcm(int64_t x, int64_t y)
55 {
56     x.abs();
57     y.abs();
58 
59     if (x == 0 || y == 0) {
60         return 0;
61     } else {
62         do {
63             if (x < y) {
64                 int64_t t = x; x = y; y = t;
65             }
66             x -= y * (x/y);
67         } while (x != 0);
68 
69         return y;
70     }
71 }
72 
73 #else
74 /**
75  * Calculates the least common multiple of x and y.
76  */
77 static int64_t
util_lcm(int64_t x,int64_t y)78 util_lcm(int64_t x, int64_t y)
79 {
80     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
81     // vol. 2, 1st ed., pp. 298-299
82     int64_t x1 = x;
83     int64_t y1 = y;
84 
85     int p2 = 0;
86     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
87         ++p2;
88         x1 >>= 1;
89         y1 >>= 1;
90     }
91 
92     int64_t t;
93     if ((x1 & 1) == 1) {
94         t = -y1;
95     } else {
96         t = x1;
97     }
98 
99     while (t != 0) {
100         while ((t & 1) == 0) {
101             t = t >> 1;
102         }
103         if (t > 0) {
104             x1 = t;
105         } else {
106             y1 = -t;
107         }
108         t = x1 - y1;
109     }
110 
111     int64_t gcd = x1 << p2;
112 
113     // x * y == gcd(x, y) * lcm(x, y)
114     return x / gcd * y;
115 }
116 #endif
117 
118 static const UChar gPercent = 0x0025;
119 static const UChar gColon = 0x003a;
120 static const UChar gSemicolon = 0x003b;
121 static const UChar gLineFeed = 0x000a;
122 
123 static const UChar gPercentPercent[] =
124 {
125     0x25, 0x25, 0
126 }; /* "%%" */
127 
128 static const UChar gNoparse[] =
129 {
130     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
131 }; /* "@noparse" */
132 
NFRuleSet(RuleBasedNumberFormat * _owner,UnicodeString * descriptions,int32_t index,UErrorCode & status)133 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
134   : name()
135   , rules(0)
136   , owner(_owner)
137   , fractionRules()
138   , fIsFractionRuleSet(FALSE)
139   , fIsPublic(FALSE)
140   , fIsParseable(TRUE)
141 {
142     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
143         nonNumericalRules[i] = NULL;
144     }
145 
146     if (U_FAILURE(status)) {
147         return;
148     }
149 
150     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
151 
152     if (description.length() == 0) {
153         // throw new IllegalArgumentException("Empty rule set description");
154         status = U_PARSE_ERROR;
155         return;
156     }
157 
158     // if the description begins with a rule set name (the rule set
159     // name can be omitted in formatter descriptions that consist
160     // of only one rule set), copy it out into our "name" member
161     // and delete it from the description
162     if (description.charAt(0) == gPercent) {
163         int32_t pos = description.indexOf(gColon);
164         if (pos == -1) {
165             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
166             status = U_PARSE_ERROR;
167         } else {
168             name.setTo(description, 0, pos);
169             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
170             }
171             description.remove(0, pos);
172         }
173     } else {
174         name.setTo(UNICODE_STRING_SIMPLE("%default"));
175     }
176 
177     if (description.length() == 0) {
178         // throw new IllegalArgumentException("Empty rule set description");
179         status = U_PARSE_ERROR;
180     }
181 
182     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
183 
184     if ( name.endsWith(gNoparse,8) ) {
185         fIsParseable = FALSE;
186         name.truncate(name.length()-8); // remove the @noparse from the name
187     }
188 
189     // all of the other members of NFRuleSet are initialized
190     // by parseRules()
191 }
192 
193 void
parseRules(UnicodeString & description,UErrorCode & status)194 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
195 {
196     // start by creating a Vector whose elements are Strings containing
197     // the descriptions of the rules (one rule per element).  The rules
198     // are separated by semicolons (there's no escape facility: ALL
199     // semicolons are rule delimiters)
200 
201     if (U_FAILURE(status)) {
202         return;
203     }
204 
205     // ensure we are starting with an empty rule list
206     rules.deleteAll();
207 
208     // dlf - the original code kept a separate description array for no reason,
209     // so I got rid of it.  The loop was too complex so I simplified it.
210 
211     UnicodeString currentDescription;
212     int32_t oldP = 0;
213     while (oldP < description.length()) {
214         int32_t p = description.indexOf(gSemicolon, oldP);
215         if (p == -1) {
216             p = description.length();
217         }
218         currentDescription.setTo(description, oldP, p - oldP);
219         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
220         oldP = p + 1;
221     }
222 
223     // for rules that didn't specify a base value, their base values
224     // were initialized to 0.  Make another pass through the list and
225     // set all those rules' base values.  We also remove any special
226     // rules from the list and put them into their own member variables
227     int64_t defaultBaseValue = 0;
228 
229     // (this isn't a for loop because we might be deleting items from
230     // the vector-- we want to make sure we only increment i when
231     // we _didn't_ delete aything from the vector)
232     int32_t rulesSize = rules.size();
233     for (int32_t i = 0; i < rulesSize; i++) {
234         NFRule* rule = rules[i];
235         int64_t baseValue = rule->getBaseValue();
236 
237         if (baseValue == 0) {
238             // if the rule's base value is 0, fill in a default
239             // base value (this will be 1 plus the preceding
240             // rule's base value for regular rule sets, and the
241             // same as the preceding rule's base value in fraction
242             // rule sets)
243             rule->setBaseValue(defaultBaseValue, status);
244         }
245         else {
246             // if it's a regular rule that already knows its base value,
247             // check to make sure the rules are in order, and update
248             // the default base value for the next rule
249             if (baseValue < defaultBaseValue) {
250                 // throw new IllegalArgumentException("Rules are not in order");
251                 status = U_PARSE_ERROR;
252                 return;
253             }
254             defaultBaseValue = baseValue;
255         }
256         if (!fIsFractionRuleSet) {
257             ++defaultBaseValue;
258         }
259     }
260 }
261 
262 /**
263  * Set one of the non-numerical rules.
264  * @param rule The rule to set.
265  */
setNonNumericalRule(NFRule * rule)266 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
267     int64_t baseValue = rule->getBaseValue();
268     if (baseValue == NFRule::kNegativeNumberRule) {
269         delete nonNumericalRules[NEGATIVE_RULE_INDEX];
270         nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
271     }
272     else if (baseValue == NFRule::kImproperFractionRule) {
273         setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
274     }
275     else if (baseValue == NFRule::kProperFractionRule) {
276         setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
277     }
278     else if (baseValue == NFRule::kMasterRule) {
279         setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
280     }
281     else if (baseValue == NFRule::kInfinityRule) {
282         delete nonNumericalRules[INFINITY_RULE_INDEX];
283         nonNumericalRules[INFINITY_RULE_INDEX] = rule;
284     }
285     else if (baseValue == NFRule::kNaNRule) {
286         delete nonNumericalRules[NAN_RULE_INDEX];
287         nonNumericalRules[NAN_RULE_INDEX] = rule;
288     }
289 }
290 
291 /**
292  * Determine the best fraction rule to use. Rules matching the decimal point from
293  * DecimalFormatSymbols become the main set of rules to use.
294  * @param originalIndex The index into nonNumericalRules
295  * @param newRule The new rule to consider
296  * @param rememberRule Should the new rule be added to fractionRules.
297  */
setBestFractionRule(int32_t originalIndex,NFRule * newRule,UBool rememberRule)298 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
299     if (rememberRule) {
300         fractionRules.add(newRule);
301     }
302     NFRule *bestResult = nonNumericalRules[originalIndex];
303     if (bestResult == NULL) {
304         nonNumericalRules[originalIndex] = newRule;
305     }
306     else {
307         // We have more than one. Which one is better?
308         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
309         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
310             == newRule->getDecimalPoint())
311         {
312             nonNumericalRules[originalIndex] = newRule;
313         }
314         // else leave it alone
315     }
316 }
317 
~NFRuleSet()318 NFRuleSet::~NFRuleSet()
319 {
320     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
321         if (i != IMPROPER_FRACTION_RULE_INDEX
322             && i != PROPER_FRACTION_RULE_INDEX
323             && i != MASTER_RULE_INDEX)
324         {
325             delete nonNumericalRules[i];
326         }
327         // else it will be deleted via NFRuleList fractionRules
328     }
329 }
330 
331 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)332 util_equalRules(const NFRule* rule1, const NFRule* rule2)
333 {
334     if (rule1) {
335         if (rule2) {
336             return *rule1 == *rule2;
337         }
338     } else if (!rule2) {
339         return TRUE;
340     }
341     return FALSE;
342 }
343 
344 UBool
operator ==(const NFRuleSet & rhs) const345 NFRuleSet::operator==(const NFRuleSet& rhs) const
346 {
347     if (rules.size() == rhs.rules.size() &&
348         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
349         name == rhs.name) {
350 
351         // ...then compare the non-numerical rule lists...
352         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
353             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
354                 return FALSE;
355             }
356         }
357 
358         // ...then compare the rule lists...
359         for (uint32_t i = 0; i < rules.size(); ++i) {
360             if (*rules[i] != *rhs.rules[i]) {
361                 return FALSE;
362             }
363         }
364         return TRUE;
365     }
366     return FALSE;
367 }
368 
369 void
setDecimalFormatSymbols(const DecimalFormatSymbols & newSymbols,UErrorCode & status)370 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
371     for (uint32_t i = 0; i < rules.size(); ++i) {
372         rules[i]->setDecimalFormatSymbols(newSymbols, status);
373     }
374     // Switch the fraction rules to mirror the DecimalFormatSymbols.
375     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
376         if (nonNumericalRules[nonNumericalIdx]) {
377             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
378                 NFRule *fractionRule = fractionRules[fIdx];
379                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
380                     setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
381                 }
382             }
383         }
384     }
385 
386     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
387         NFRule *rule = nonNumericalRules[nnrIdx];
388         if (rule) {
389             rule->setDecimalFormatSymbols(newSymbols, status);
390         }
391     }
392 }
393 
394 #define RECURSION_LIMIT 64
395 
396 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const397 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
398 {
399     if (recursionCount >= RECURSION_LIMIT) {
400         // stop recursion
401         status = U_INVALID_STATE_ERROR;
402         return;
403     }
404     const NFRule *rule = findNormalRule(number);
405     if (rule) { // else error, but can't report it
406         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
407     }
408 }
409 
410 void
format(double number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const411 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
412 {
413     if (recursionCount >= RECURSION_LIMIT) {
414         // stop recursion
415         status = U_INVALID_STATE_ERROR;
416         return;
417     }
418     const NFRule *rule = findDoubleRule(number);
419     if (rule) { // else error, but can't report it
420         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
421     }
422 }
423 
424 const NFRule*
findDoubleRule(double number) const425 NFRuleSet::findDoubleRule(double number) const
426 {
427     // if this is a fraction rule set, use findFractionRuleSetRule()
428     if (isFractionRuleSet()) {
429         return findFractionRuleSetRule(number);
430     }
431 
432     if (uprv_isNaN(number)) {
433         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
434         if (!rule) {
435             rule = owner->getDefaultNaNRule();
436         }
437         return rule;
438     }
439 
440     // if the number is negative, return the negative number rule
441     // (if there isn't a negative-number rule, we pretend it's a
442     // positive number)
443     if (number < 0) {
444         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
445             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
446         } else {
447             number = -number;
448         }
449     }
450 
451     if (uprv_isInfinite(number)) {
452         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
453         if (!rule) {
454             rule = owner->getDefaultInfinityRule();
455         }
456         return rule;
457     }
458 
459     // if the number isn't an integer, we use one of the fraction rules...
460     if (number != uprv_floor(number)) {
461         // if the number is between 0 and 1, return the proper
462         // fraction rule
463         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
464             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
465         }
466         // otherwise, return the improper fraction rule
467         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
468             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
469         }
470     }
471 
472     // if there's a master rule, use it to format the number
473     if (nonNumericalRules[MASTER_RULE_INDEX]) {
474         return nonNumericalRules[MASTER_RULE_INDEX];
475     }
476 
477     // and if we haven't yet returned a rule, use findNormalRule()
478     // to find the applicable rule
479     int64_t r = util64_fromDouble(number + 0.5);
480     return findNormalRule(r);
481 }
482 
483 const NFRule *
findNormalRule(int64_t number) const484 NFRuleSet::findNormalRule(int64_t number) const
485 {
486     // if this is a fraction rule set, use findFractionRuleSetRule()
487     // to find the rule (we should only go into this clause if the
488     // value is 0)
489     if (fIsFractionRuleSet) {
490         return findFractionRuleSetRule((double)number);
491     }
492 
493     // if the number is negative, return the negative-number rule
494     // (if there isn't one, pretend the number is positive)
495     if (number < 0) {
496         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
497             return nonNumericalRules[NEGATIVE_RULE_INDEX];
498         } else {
499             number = -number;
500         }
501     }
502 
503     // we have to repeat the preceding two checks, even though we
504     // do them in findRule(), because the version of format() that
505     // takes a long bypasses findRule() and goes straight to this
506     // function.  This function does skip the fraction rules since
507     // we know the value is an integer (it also skips the master
508     // rule, since it's considered a fraction rule.  Skipping the
509     // master rule in this function is also how we avoid infinite
510     // recursion)
511 
512     // {dlf} unfortunately this fails if there are no rules except
513     // special rules.  If there are no rules, use the master rule.
514 
515     // binary-search the rule list for the applicable rule
516     // (a rule is used for all values from its base value to
517     // the next rule's base value)
518     int32_t hi = rules.size();
519     if (hi > 0) {
520         int32_t lo = 0;
521 
522         while (lo < hi) {
523             int32_t mid = (lo + hi) / 2;
524             if (rules[mid]->getBaseValue() == number) {
525                 return rules[mid];
526             }
527             else if (rules[mid]->getBaseValue() > number) {
528                 hi = mid;
529             }
530             else {
531                 lo = mid + 1;
532             }
533         }
534         if (hi == 0) { // bad rule set, minimum base > 0
535             return NULL; // want to throw exception here
536         }
537 
538         NFRule *result = rules[hi - 1];
539 
540         // use shouldRollBack() to see whether we need to invoke the
541         // rollback rule (see shouldRollBack()'s documentation for
542         // an explanation of the rollback rule).  If we do, roll back
543         // one rule and return that one instead of the one we'd normally
544         // return
545         if (result->shouldRollBack((double)number)) {
546             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
547                 return NULL;
548             }
549             result = rules[hi - 2];
550         }
551         return result;
552     }
553     // else use the master rule
554     return nonNumericalRules[MASTER_RULE_INDEX];
555 }
556 
557 /**
558  * If this rule is a fraction rule set, this function is used by
559  * findRule() to select the most appropriate rule for formatting
560  * the number.  Basically, the base value of each rule in the rule
561  * set is treated as the denominator of a fraction.  Whichever
562  * denominator can produce the fraction closest in value to the
563  * number passed in is the result.  If there's a tie, the earlier
564  * one in the list wins.  (If there are two rules in a row with the
565  * same base value, the first one is used when the numerator of the
566  * fraction would be 1, and the second rule is used the rest of the
567  * time.
568  * @param number The number being formatted (which will always be
569  * a number between 0 and 1)
570  * @return The rule to use to format this number
571  */
572 const NFRule*
findFractionRuleSetRule(double number) const573 NFRuleSet::findFractionRuleSetRule(double number) const
574 {
575     // the obvious way to do this (multiply the value being formatted
576     // by each rule's base value until you get an integral result)
577     // doesn't work because of rounding error.  This method is more
578     // accurate
579 
580     // find the least common multiple of the rules' base values
581     // and multiply this by the number being formatted.  This is
582     // all the precision we need, and we can do all of the rest
583     // of the math using integer arithmetic
584     int64_t leastCommonMultiple = rules[0]->getBaseValue();
585     int64_t numerator;
586     {
587         for (uint32_t i = 1; i < rules.size(); ++i) {
588             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
589         }
590         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
591     }
592     // for each rule, do the following...
593     int64_t tempDifference;
594     int64_t difference = util64_fromDouble(uprv_maxMantissa());
595     int32_t winner = 0;
596     for (uint32_t i = 0; i < rules.size(); ++i) {
597         // "numerator" is the numerator of the fraction if the
598         // denominator is the LCD.  The numerator if the rule's
599         // base value is the denominator is "numerator" times the
600         // base value divided bythe LCD.  Here we check to see if
601         // that's an integer, and if not, how close it is to being
602         // an integer.
603         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
604 
605 
606         // normalize the result of the above calculation: we want
607         // the numerator's distance from the CLOSEST multiple
608         // of the LCD
609         if (leastCommonMultiple - tempDifference < tempDifference) {
610             tempDifference = leastCommonMultiple - tempDifference;
611         }
612 
613         // if this is as close as we've come, keep track of how close
614         // that is, and the line number of the rule that did it.  If
615         // we've scored a direct hit, we don't have to look at any more
616         // rules
617         if (tempDifference < difference) {
618             difference = tempDifference;
619             winner = i;
620             if (difference == 0) {
621                 break;
622             }
623         }
624     }
625 
626     // if we have two successive rules that both have the winning base
627     // value, then the first one (the one we found above) is used if
628     // the numerator of the fraction is 1 and the second one is used if
629     // the numerator of the fraction is anything else (this lets us
630     // do things like "one third"/"two thirds" without haveing to define
631     // a whole bunch of extra rule sets)
632     if ((unsigned)(winner + 1) < rules.size() &&
633         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
634         double n = ((double)rules[winner]->getBaseValue()) * number;
635         if (n < 0.5 || n >= 2) {
636             ++winner;
637         }
638     }
639 
640     // finally, return the winning rule
641     return rules[winner];
642 }
643 
644 /**
645  * Parses a string.  Matches the string to be parsed against each
646  * of its rules (with a base value less than upperBound) and returns
647  * the value produced by the rule that matched the most charcters
648  * in the source string.
649  * @param text The string to parse
650  * @param parsePosition The initial position is ignored and assumed
651  * to be 0.  On exit, this object has been updated to point to the
652  * first character position this rule set didn't consume.
653  * @param upperBound Limits the rules that can be allowed to match.
654  * Only rules whose base values are strictly less than upperBound
655  * are considered.
656  * @return The numerical result of parsing this string.  This will
657  * be the matching rule's base value, composed appropriately with
658  * the results of matching any of its substitutions.  The object
659  * will be an instance of Long if it's an integral value; otherwise,
660  * it will be an instance of Double.  This function always returns
661  * a valid object: If nothing matched the input string at all,
662  * this function returns new Long(0), and the parse position is
663  * left unchanged.
664  */
665 #ifdef RBNF_DEBUG
666 #include <stdio.h>
667 
dumpUS(FILE * f,const UnicodeString & us)668 static void dumpUS(FILE* f, const UnicodeString& us) {
669   int len = us.length();
670   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
671   if (buf != NULL) {
672 	  us.extract(0, len, buf);
673 	  buf[len] = 0;
674 	  fprintf(f, "%s", buf);
675 	  uprv_free(buf); //delete[] buf;
676   }
677 }
678 #endif
679 
680 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const681 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
682 {
683     // try matching each rule in the rule set against the text being
684     // parsed.  Whichever one matches the most characters is the one
685     // that determines the value we return.
686 
687     result.setLong(0);
688 
689     // dump out if there's no text to parse
690     if (text.length() == 0) {
691         return 0;
692     }
693 
694     ParsePosition highWaterMark;
695     ParsePosition workingPos = pos;
696 
697 #ifdef RBNF_DEBUG
698     fprintf(stderr, "<nfrs> %x '", this);
699     dumpUS(stderr, name);
700     fprintf(stderr, "' text '");
701     dumpUS(stderr, text);
702     fprintf(stderr, "'\n");
703     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
704 #endif
705     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
706     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
707         if (nonNumericalRules[i]) {
708             Formattable tempResult;
709             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
710             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
711                 result = tempResult;
712                 highWaterMark = workingPos;
713             }
714             workingPos = pos;
715         }
716     }
717 #ifdef RBNF_DEBUG
718     fprintf(stderr, "<nfrs> continue other with text '");
719     dumpUS(stderr, text);
720     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
721 #endif
722 
723     // finally, go through the regular rules one at a time.  We start
724     // at the end of the list because we want to try matching the most
725     // sigificant rule first (this helps ensure that we parse
726     // "five thousand three hundred six" as
727     // "(five thousand) (three hundred) (six)" rather than
728     // "((five thousand three) hundred) (six)").  Skip rules whose
729     // base values are higher than the upper bound (again, this helps
730     // limit ambiguity by making sure the rules that match a rule's
731     // are less significant than the rule containing the substitutions)/
732     {
733         int64_t ub = util64_fromDouble(upperBound);
734 #ifdef RBNF_DEBUG
735         {
736             char ubstr[64];
737             util64_toa(ub, ubstr, 64);
738             char ubstrhex[64];
739             util64_toa(ub, ubstrhex, 64, 16);
740             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
741         }
742 #endif
743         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
744             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
745                 continue;
746             }
747             Formattable tempResult;
748             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
749             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
750                 result = tempResult;
751                 highWaterMark = workingPos;
752             }
753             workingPos = pos;
754         }
755     }
756 #ifdef RBNF_DEBUG
757     fprintf(stderr, "<nfrs> exit\n");
758 #endif
759     // finally, update the parse postion we were passed to point to the
760     // first character we didn't use, and return the result that
761     // corresponds to that string of characters
762     pos = highWaterMark;
763 
764     return 1;
765 }
766 
767 void
appendRules(UnicodeString & result) const768 NFRuleSet::appendRules(UnicodeString& result) const
769 {
770     uint32_t i;
771 
772     // the rule set name goes first...
773     result.append(name);
774     result.append(gColon);
775     result.append(gLineFeed);
776 
777     // followed by the regular rules...
778     for (i = 0; i < rules.size(); i++) {
779         rules[i]->_appendRuleText(result);
780         result.append(gLineFeed);
781     }
782 
783     // followed by the special rules (if they exist)
784     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
785         NFRule *rule = nonNumericalRules[i];
786         if (nonNumericalRules[i]) {
787             if (rule->getBaseValue() == NFRule::kImproperFractionRule
788                 || rule->getBaseValue() == NFRule::kProperFractionRule
789                 || rule->getBaseValue() == NFRule::kMasterRule)
790             {
791                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
792                     NFRule *fractionRule = fractionRules[fIdx];
793                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
794                         fractionRule->_appendRuleText(result);
795                         result.append(gLineFeed);
796                     }
797                 }
798             }
799             else {
800                 rule->_appendRuleText(result);
801                 result.append(gLineFeed);
802             }
803         }
804     }
805 }
806 
807 // utility functions
808 
util64_fromDouble(double d)809 int64_t util64_fromDouble(double d) {
810     int64_t result = 0;
811     if (!uprv_isNaN(d)) {
812         double mant = uprv_maxMantissa();
813         if (d < -mant) {
814             d = -mant;
815         } else if (d > mant) {
816             d = mant;
817         }
818         UBool neg = d < 0;
819         if (neg) {
820             d = -d;
821         }
822         result = (int64_t)uprv_floor(d);
823         if (neg) {
824             result = -result;
825         }
826     }
827     return result;
828 }
829 
util64_pow(int32_t r,uint32_t e)830 int64_t util64_pow(int32_t r, uint32_t e)  {
831     if (r == 0) {
832         return 0;
833     } else if (e == 0) {
834         return 1;
835     } else {
836         int64_t n = r;
837         while (--e > 0) {
838             n *= r;
839         }
840         return n;
841     }
842 }
843 
844 static const uint8_t asciiDigits[] = {
845     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
846     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
847     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
848     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
849     0x77u, 0x78u, 0x79u, 0x7au,
850 };
851 
852 static const UChar kUMinus = (UChar)0x002d;
853 
854 #ifdef RBNF_DEBUG
855 static const char kMinus = '-';
856 
857 static const uint8_t digitInfo[] = {
858         0,     0,     0,     0,     0,     0,     0,     0,
859         0,     0,     0,     0,     0,     0,     0,     0,
860         0,     0,     0,     0,     0,     0,     0,     0,
861         0,     0,     0,     0,     0,     0,     0,     0,
862         0,     0,     0,     0,     0,     0,     0,     0,
863         0,     0,     0,     0,     0,     0,     0,     0,
864     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
865     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
866         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
867     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
868     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
869     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
870         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
871     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
872     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
873     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
874 };
875 
util64_atoi(const char * str,uint32_t radix)876 int64_t util64_atoi(const char* str, uint32_t radix)
877 {
878     if (radix > 36) {
879         radix = 36;
880     } else if (radix < 2) {
881         radix = 2;
882     }
883     int64_t lradix = radix;
884 
885     int neg = 0;
886     if (*str == kMinus) {
887         ++str;
888         neg = 1;
889     }
890     int64_t result = 0;
891     uint8_t b;
892     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
893         result *= lradix;
894         result += (int32_t)b;
895     }
896     if (neg) {
897         result = -result;
898     }
899     return result;
900 }
901 
util64_utoi(const UChar * str,uint32_t radix)902 int64_t util64_utoi(const UChar* str, uint32_t radix)
903 {
904     if (radix > 36) {
905         radix = 36;
906     } else if (radix < 2) {
907         radix = 2;
908     }
909     int64_t lradix = radix;
910 
911     int neg = 0;
912     if (*str == kUMinus) {
913         ++str;
914         neg = 1;
915     }
916     int64_t result = 0;
917     UChar c;
918     uint8_t b;
919     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
920         result *= lradix;
921         result += (int32_t)b;
922     }
923     if (neg) {
924         result = -result;
925     }
926     return result;
927 }
928 
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)929 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
930 {
931     if (radix > 36) {
932         radix = 36;
933     } else if (radix < 2) {
934         radix = 2;
935     }
936     int64_t base = radix;
937 
938     char* p = buf;
939     if (len && (w < 0) && (radix == 10) && !raw) {
940         w = -w;
941         *p++ = kMinus;
942         --len;
943     } else if (len && (w == 0)) {
944         *p++ = (char)raw ? 0 : asciiDigits[0];
945         --len;
946     }
947 
948     while (len && w != 0) {
949         int64_t n = w / base;
950         int64_t m = n * base;
951         int32_t d = (int32_t)(w-m);
952         *p++ = raw ? (char)d : asciiDigits[d];
953         w = n;
954         --len;
955     }
956     if (len) {
957         *p = 0; // null terminate if room for caller convenience
958     }
959 
960     len = p - buf;
961     if (*buf == kMinus) {
962         ++buf;
963     }
964     while (--p > buf) {
965         char c = *p;
966         *p = *buf;
967         *buf = c;
968         ++buf;
969     }
970 
971     return len;
972 }
973 #endif
974 
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)975 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
976 {
977     if (radix > 36) {
978         radix = 36;
979     } else if (radix < 2) {
980         radix = 2;
981     }
982     int64_t base = radix;
983 
984     UChar* p = buf;
985     if (len && (w < 0) && (radix == 10) && !raw) {
986         w = -w;
987         *p++ = kUMinus;
988         --len;
989     } else if (len && (w == 0)) {
990         *p++ = (UChar)raw ? 0 : asciiDigits[0];
991         --len;
992     }
993 
994     while (len && (w != 0)) {
995         int64_t n = w / base;
996         int64_t m = n * base;
997         int32_t d = (int32_t)(w-m);
998         *p++ = (UChar)(raw ? d : asciiDigits[d]);
999         w = n;
1000         --len;
1001     }
1002     if (len) {
1003         *p = 0; // null terminate if room for caller convenience
1004     }
1005 
1006     len = (uint32_t)(p - buf);
1007     if (*buf == kUMinus) {
1008         ++buf;
1009     }
1010     while (--p > buf) {
1011         UChar c = *p;
1012         *p = *buf;
1013         *buf = c;
1014         ++buf;
1015     }
1016 
1017     return len;
1018 }
1019 
1020 
1021 U_NAMESPACE_END
1022 
1023 /* U_HAVE_RBNF */
1024 #endif
1025 
1026