1 /*
2 ******************************************************************************
3 * Copyright (C) 1997-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * file name: nfrs.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Modification history
12 * Date Name Comments
13 * 10/11/2001 Doug Ported from ICU4J
14 */
15
16 #include "nfrs.h"
17
18 #if U_HAVE_RBNF
19
20 #include "unicode/uchar.h"
21 #include "nfrule.h"
22 #include "nfrlist.h"
23 #include "patternprops.h"
24
25 #ifdef RBNF_DEBUG
26 #include "cmemory.h"
27 #endif
28
29 enum {
30 /** -x */
31 NEGATIVE_RULE_INDEX = 0,
32 /** x.x */
33 IMPROPER_FRACTION_RULE_INDEX = 1,
34 /** 0.x */
35 PROPER_FRACTION_RULE_INDEX = 2,
36 /** x.0 */
37 MASTER_RULE_INDEX = 3,
38 /** Inf */
39 INFINITY_RULE_INDEX = 4,
40 /** NaN */
41 NAN_RULE_INDEX = 5,
42 NON_NUMERICAL_RULE_LENGTH = 6
43 };
44
45 U_NAMESPACE_BEGIN
46
47 #if 0
48 // euclid's algorithm works with doubles
49 // note, doubles only get us up to one quadrillion or so, which
50 // isn't as much range as we get with longs. We probably still
51 // want either 64-bit math, or BigInteger.
52
53 static int64_t
54 util_lcm(int64_t x, int64_t y)
55 {
56 x.abs();
57 y.abs();
58
59 if (x == 0 || y == 0) {
60 return 0;
61 } else {
62 do {
63 if (x < y) {
64 int64_t t = x; x = y; y = t;
65 }
66 x -= y * (x/y);
67 } while (x != 0);
68
69 return y;
70 }
71 }
72
73 #else
74 /**
75 * Calculates the least common multiple of x and y.
76 */
77 static int64_t
util_lcm(int64_t x,int64_t y)78 util_lcm(int64_t x, int64_t y)
79 {
80 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
81 // vol. 2, 1st ed., pp. 298-299
82 int64_t x1 = x;
83 int64_t y1 = y;
84
85 int p2 = 0;
86 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
87 ++p2;
88 x1 >>= 1;
89 y1 >>= 1;
90 }
91
92 int64_t t;
93 if ((x1 & 1) == 1) {
94 t = -y1;
95 } else {
96 t = x1;
97 }
98
99 while (t != 0) {
100 while ((t & 1) == 0) {
101 t = t >> 1;
102 }
103 if (t > 0) {
104 x1 = t;
105 } else {
106 y1 = -t;
107 }
108 t = x1 - y1;
109 }
110
111 int64_t gcd = x1 << p2;
112
113 // x * y == gcd(x, y) * lcm(x, y)
114 return x / gcd * y;
115 }
116 #endif
117
118 static const UChar gPercent = 0x0025;
119 static const UChar gColon = 0x003a;
120 static const UChar gSemicolon = 0x003b;
121 static const UChar gLineFeed = 0x000a;
122
123 static const UChar gPercentPercent[] =
124 {
125 0x25, 0x25, 0
126 }; /* "%%" */
127
128 static const UChar gNoparse[] =
129 {
130 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
131 }; /* "@noparse" */
132
NFRuleSet(RuleBasedNumberFormat * _owner,UnicodeString * descriptions,int32_t index,UErrorCode & status)133 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
134 : name()
135 , rules(0)
136 , owner(_owner)
137 , fractionRules()
138 , fIsFractionRuleSet(FALSE)
139 , fIsPublic(FALSE)
140 , fIsParseable(TRUE)
141 {
142 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
143 nonNumericalRules[i] = NULL;
144 }
145
146 if (U_FAILURE(status)) {
147 return;
148 }
149
150 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
151
152 if (description.length() == 0) {
153 // throw new IllegalArgumentException("Empty rule set description");
154 status = U_PARSE_ERROR;
155 return;
156 }
157
158 // if the description begins with a rule set name (the rule set
159 // name can be omitted in formatter descriptions that consist
160 // of only one rule set), copy it out into our "name" member
161 // and delete it from the description
162 if (description.charAt(0) == gPercent) {
163 int32_t pos = description.indexOf(gColon);
164 if (pos == -1) {
165 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
166 status = U_PARSE_ERROR;
167 } else {
168 name.setTo(description, 0, pos);
169 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
170 }
171 description.remove(0, pos);
172 }
173 } else {
174 name.setTo(UNICODE_STRING_SIMPLE("%default"));
175 }
176
177 if (description.length() == 0) {
178 // throw new IllegalArgumentException("Empty rule set description");
179 status = U_PARSE_ERROR;
180 }
181
182 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
183
184 if ( name.endsWith(gNoparse,8) ) {
185 fIsParseable = FALSE;
186 name.truncate(name.length()-8); // remove the @noparse from the name
187 }
188
189 // all of the other members of NFRuleSet are initialized
190 // by parseRules()
191 }
192
193 void
parseRules(UnicodeString & description,UErrorCode & status)194 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
195 {
196 // start by creating a Vector whose elements are Strings containing
197 // the descriptions of the rules (one rule per element). The rules
198 // are separated by semicolons (there's no escape facility: ALL
199 // semicolons are rule delimiters)
200
201 if (U_FAILURE(status)) {
202 return;
203 }
204
205 // ensure we are starting with an empty rule list
206 rules.deleteAll();
207
208 // dlf - the original code kept a separate description array for no reason,
209 // so I got rid of it. The loop was too complex so I simplified it.
210
211 UnicodeString currentDescription;
212 int32_t oldP = 0;
213 while (oldP < description.length()) {
214 int32_t p = description.indexOf(gSemicolon, oldP);
215 if (p == -1) {
216 p = description.length();
217 }
218 currentDescription.setTo(description, oldP, p - oldP);
219 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
220 oldP = p + 1;
221 }
222
223 // for rules that didn't specify a base value, their base values
224 // were initialized to 0. Make another pass through the list and
225 // set all those rules' base values. We also remove any special
226 // rules from the list and put them into their own member variables
227 int64_t defaultBaseValue = 0;
228
229 // (this isn't a for loop because we might be deleting items from
230 // the vector-- we want to make sure we only increment i when
231 // we _didn't_ delete aything from the vector)
232 int32_t rulesSize = rules.size();
233 for (int32_t i = 0; i < rulesSize; i++) {
234 NFRule* rule = rules[i];
235 int64_t baseValue = rule->getBaseValue();
236
237 if (baseValue == 0) {
238 // if the rule's base value is 0, fill in a default
239 // base value (this will be 1 plus the preceding
240 // rule's base value for regular rule sets, and the
241 // same as the preceding rule's base value in fraction
242 // rule sets)
243 rule->setBaseValue(defaultBaseValue, status);
244 }
245 else {
246 // if it's a regular rule that already knows its base value,
247 // check to make sure the rules are in order, and update
248 // the default base value for the next rule
249 if (baseValue < defaultBaseValue) {
250 // throw new IllegalArgumentException("Rules are not in order");
251 status = U_PARSE_ERROR;
252 return;
253 }
254 defaultBaseValue = baseValue;
255 }
256 if (!fIsFractionRuleSet) {
257 ++defaultBaseValue;
258 }
259 }
260 }
261
262 /**
263 * Set one of the non-numerical rules.
264 * @param rule The rule to set.
265 */
setNonNumericalRule(NFRule * rule)266 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
267 int64_t baseValue = rule->getBaseValue();
268 if (baseValue == NFRule::kNegativeNumberRule) {
269 delete nonNumericalRules[NEGATIVE_RULE_INDEX];
270 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
271 }
272 else if (baseValue == NFRule::kImproperFractionRule) {
273 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
274 }
275 else if (baseValue == NFRule::kProperFractionRule) {
276 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
277 }
278 else if (baseValue == NFRule::kMasterRule) {
279 setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
280 }
281 else if (baseValue == NFRule::kInfinityRule) {
282 delete nonNumericalRules[INFINITY_RULE_INDEX];
283 nonNumericalRules[INFINITY_RULE_INDEX] = rule;
284 }
285 else if (baseValue == NFRule::kNaNRule) {
286 delete nonNumericalRules[NAN_RULE_INDEX];
287 nonNumericalRules[NAN_RULE_INDEX] = rule;
288 }
289 }
290
291 /**
292 * Determine the best fraction rule to use. Rules matching the decimal point from
293 * DecimalFormatSymbols become the main set of rules to use.
294 * @param originalIndex The index into nonNumericalRules
295 * @param newRule The new rule to consider
296 * @param rememberRule Should the new rule be added to fractionRules.
297 */
setBestFractionRule(int32_t originalIndex,NFRule * newRule,UBool rememberRule)298 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
299 if (rememberRule) {
300 fractionRules.add(newRule);
301 }
302 NFRule *bestResult = nonNumericalRules[originalIndex];
303 if (bestResult == NULL) {
304 nonNumericalRules[originalIndex] = newRule;
305 }
306 else {
307 // We have more than one. Which one is better?
308 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
309 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
310 == newRule->getDecimalPoint())
311 {
312 nonNumericalRules[originalIndex] = newRule;
313 }
314 // else leave it alone
315 }
316 }
317
~NFRuleSet()318 NFRuleSet::~NFRuleSet()
319 {
320 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
321 if (i != IMPROPER_FRACTION_RULE_INDEX
322 && i != PROPER_FRACTION_RULE_INDEX
323 && i != MASTER_RULE_INDEX)
324 {
325 delete nonNumericalRules[i];
326 }
327 // else it will be deleted via NFRuleList fractionRules
328 }
329 }
330
331 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)332 util_equalRules(const NFRule* rule1, const NFRule* rule2)
333 {
334 if (rule1) {
335 if (rule2) {
336 return *rule1 == *rule2;
337 }
338 } else if (!rule2) {
339 return TRUE;
340 }
341 return FALSE;
342 }
343
344 UBool
operator ==(const NFRuleSet & rhs) const345 NFRuleSet::operator==(const NFRuleSet& rhs) const
346 {
347 if (rules.size() == rhs.rules.size() &&
348 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
349 name == rhs.name) {
350
351 // ...then compare the non-numerical rule lists...
352 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
353 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
354 return FALSE;
355 }
356 }
357
358 // ...then compare the rule lists...
359 for (uint32_t i = 0; i < rules.size(); ++i) {
360 if (*rules[i] != *rhs.rules[i]) {
361 return FALSE;
362 }
363 }
364 return TRUE;
365 }
366 return FALSE;
367 }
368
369 void
setDecimalFormatSymbols(const DecimalFormatSymbols & newSymbols,UErrorCode & status)370 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
371 for (uint32_t i = 0; i < rules.size(); ++i) {
372 rules[i]->setDecimalFormatSymbols(newSymbols, status);
373 }
374 // Switch the fraction rules to mirror the DecimalFormatSymbols.
375 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
376 if (nonNumericalRules[nonNumericalIdx]) {
377 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
378 NFRule *fractionRule = fractionRules[fIdx];
379 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
380 setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
381 }
382 }
383 }
384 }
385
386 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
387 NFRule *rule = nonNumericalRules[nnrIdx];
388 if (rule) {
389 rule->setDecimalFormatSymbols(newSymbols, status);
390 }
391 }
392 }
393
394 #define RECURSION_LIMIT 64
395
396 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const397 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
398 {
399 if (recursionCount >= RECURSION_LIMIT) {
400 // stop recursion
401 status = U_INVALID_STATE_ERROR;
402 return;
403 }
404 const NFRule *rule = findNormalRule(number);
405 if (rule) { // else error, but can't report it
406 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
407 }
408 }
409
410 void
format(double number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const411 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
412 {
413 if (recursionCount >= RECURSION_LIMIT) {
414 // stop recursion
415 status = U_INVALID_STATE_ERROR;
416 return;
417 }
418 const NFRule *rule = findDoubleRule(number);
419 if (rule) { // else error, but can't report it
420 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
421 }
422 }
423
424 const NFRule*
findDoubleRule(double number) const425 NFRuleSet::findDoubleRule(double number) const
426 {
427 // if this is a fraction rule set, use findFractionRuleSetRule()
428 if (isFractionRuleSet()) {
429 return findFractionRuleSetRule(number);
430 }
431
432 if (uprv_isNaN(number)) {
433 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
434 if (!rule) {
435 rule = owner->getDefaultNaNRule();
436 }
437 return rule;
438 }
439
440 // if the number is negative, return the negative number rule
441 // (if there isn't a negative-number rule, we pretend it's a
442 // positive number)
443 if (number < 0) {
444 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
445 return nonNumericalRules[NEGATIVE_RULE_INDEX];
446 } else {
447 number = -number;
448 }
449 }
450
451 if (uprv_isInfinite(number)) {
452 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
453 if (!rule) {
454 rule = owner->getDefaultInfinityRule();
455 }
456 return rule;
457 }
458
459 // if the number isn't an integer, we use one of the fraction rules...
460 if (number != uprv_floor(number)) {
461 // if the number is between 0 and 1, return the proper
462 // fraction rule
463 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
464 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
465 }
466 // otherwise, return the improper fraction rule
467 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
468 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
469 }
470 }
471
472 // if there's a master rule, use it to format the number
473 if (nonNumericalRules[MASTER_RULE_INDEX]) {
474 return nonNumericalRules[MASTER_RULE_INDEX];
475 }
476
477 // and if we haven't yet returned a rule, use findNormalRule()
478 // to find the applicable rule
479 int64_t r = util64_fromDouble(number + 0.5);
480 return findNormalRule(r);
481 }
482
483 const NFRule *
findNormalRule(int64_t number) const484 NFRuleSet::findNormalRule(int64_t number) const
485 {
486 // if this is a fraction rule set, use findFractionRuleSetRule()
487 // to find the rule (we should only go into this clause if the
488 // value is 0)
489 if (fIsFractionRuleSet) {
490 return findFractionRuleSetRule((double)number);
491 }
492
493 // if the number is negative, return the negative-number rule
494 // (if there isn't one, pretend the number is positive)
495 if (number < 0) {
496 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
497 return nonNumericalRules[NEGATIVE_RULE_INDEX];
498 } else {
499 number = -number;
500 }
501 }
502
503 // we have to repeat the preceding two checks, even though we
504 // do them in findRule(), because the version of format() that
505 // takes a long bypasses findRule() and goes straight to this
506 // function. This function does skip the fraction rules since
507 // we know the value is an integer (it also skips the master
508 // rule, since it's considered a fraction rule. Skipping the
509 // master rule in this function is also how we avoid infinite
510 // recursion)
511
512 // {dlf} unfortunately this fails if there are no rules except
513 // special rules. If there are no rules, use the master rule.
514
515 // binary-search the rule list for the applicable rule
516 // (a rule is used for all values from its base value to
517 // the next rule's base value)
518 int32_t hi = rules.size();
519 if (hi > 0) {
520 int32_t lo = 0;
521
522 while (lo < hi) {
523 int32_t mid = (lo + hi) / 2;
524 if (rules[mid]->getBaseValue() == number) {
525 return rules[mid];
526 }
527 else if (rules[mid]->getBaseValue() > number) {
528 hi = mid;
529 }
530 else {
531 lo = mid + 1;
532 }
533 }
534 if (hi == 0) { // bad rule set, minimum base > 0
535 return NULL; // want to throw exception here
536 }
537
538 NFRule *result = rules[hi - 1];
539
540 // use shouldRollBack() to see whether we need to invoke the
541 // rollback rule (see shouldRollBack()'s documentation for
542 // an explanation of the rollback rule). If we do, roll back
543 // one rule and return that one instead of the one we'd normally
544 // return
545 if (result->shouldRollBack((double)number)) {
546 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
547 return NULL;
548 }
549 result = rules[hi - 2];
550 }
551 return result;
552 }
553 // else use the master rule
554 return nonNumericalRules[MASTER_RULE_INDEX];
555 }
556
557 /**
558 * If this rule is a fraction rule set, this function is used by
559 * findRule() to select the most appropriate rule for formatting
560 * the number. Basically, the base value of each rule in the rule
561 * set is treated as the denominator of a fraction. Whichever
562 * denominator can produce the fraction closest in value to the
563 * number passed in is the result. If there's a tie, the earlier
564 * one in the list wins. (If there are two rules in a row with the
565 * same base value, the first one is used when the numerator of the
566 * fraction would be 1, and the second rule is used the rest of the
567 * time.
568 * @param number The number being formatted (which will always be
569 * a number between 0 and 1)
570 * @return The rule to use to format this number
571 */
572 const NFRule*
findFractionRuleSetRule(double number) const573 NFRuleSet::findFractionRuleSetRule(double number) const
574 {
575 // the obvious way to do this (multiply the value being formatted
576 // by each rule's base value until you get an integral result)
577 // doesn't work because of rounding error. This method is more
578 // accurate
579
580 // find the least common multiple of the rules' base values
581 // and multiply this by the number being formatted. This is
582 // all the precision we need, and we can do all of the rest
583 // of the math using integer arithmetic
584 int64_t leastCommonMultiple = rules[0]->getBaseValue();
585 int64_t numerator;
586 {
587 for (uint32_t i = 1; i < rules.size(); ++i) {
588 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
589 }
590 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
591 }
592 // for each rule, do the following...
593 int64_t tempDifference;
594 int64_t difference = util64_fromDouble(uprv_maxMantissa());
595 int32_t winner = 0;
596 for (uint32_t i = 0; i < rules.size(); ++i) {
597 // "numerator" is the numerator of the fraction if the
598 // denominator is the LCD. The numerator if the rule's
599 // base value is the denominator is "numerator" times the
600 // base value divided bythe LCD. Here we check to see if
601 // that's an integer, and if not, how close it is to being
602 // an integer.
603 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
604
605
606 // normalize the result of the above calculation: we want
607 // the numerator's distance from the CLOSEST multiple
608 // of the LCD
609 if (leastCommonMultiple - tempDifference < tempDifference) {
610 tempDifference = leastCommonMultiple - tempDifference;
611 }
612
613 // if this is as close as we've come, keep track of how close
614 // that is, and the line number of the rule that did it. If
615 // we've scored a direct hit, we don't have to look at any more
616 // rules
617 if (tempDifference < difference) {
618 difference = tempDifference;
619 winner = i;
620 if (difference == 0) {
621 break;
622 }
623 }
624 }
625
626 // if we have two successive rules that both have the winning base
627 // value, then the first one (the one we found above) is used if
628 // the numerator of the fraction is 1 and the second one is used if
629 // the numerator of the fraction is anything else (this lets us
630 // do things like "one third"/"two thirds" without haveing to define
631 // a whole bunch of extra rule sets)
632 if ((unsigned)(winner + 1) < rules.size() &&
633 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
634 double n = ((double)rules[winner]->getBaseValue()) * number;
635 if (n < 0.5 || n >= 2) {
636 ++winner;
637 }
638 }
639
640 // finally, return the winning rule
641 return rules[winner];
642 }
643
644 /**
645 * Parses a string. Matches the string to be parsed against each
646 * of its rules (with a base value less than upperBound) and returns
647 * the value produced by the rule that matched the most charcters
648 * in the source string.
649 * @param text The string to parse
650 * @param parsePosition The initial position is ignored and assumed
651 * to be 0. On exit, this object has been updated to point to the
652 * first character position this rule set didn't consume.
653 * @param upperBound Limits the rules that can be allowed to match.
654 * Only rules whose base values are strictly less than upperBound
655 * are considered.
656 * @return The numerical result of parsing this string. This will
657 * be the matching rule's base value, composed appropriately with
658 * the results of matching any of its substitutions. The object
659 * will be an instance of Long if it's an integral value; otherwise,
660 * it will be an instance of Double. This function always returns
661 * a valid object: If nothing matched the input string at all,
662 * this function returns new Long(0), and the parse position is
663 * left unchanged.
664 */
665 #ifdef RBNF_DEBUG
666 #include <stdio.h>
667
dumpUS(FILE * f,const UnicodeString & us)668 static void dumpUS(FILE* f, const UnicodeString& us) {
669 int len = us.length();
670 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
671 if (buf != NULL) {
672 us.extract(0, len, buf);
673 buf[len] = 0;
674 fprintf(f, "%s", buf);
675 uprv_free(buf); //delete[] buf;
676 }
677 }
678 #endif
679
680 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const681 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
682 {
683 // try matching each rule in the rule set against the text being
684 // parsed. Whichever one matches the most characters is the one
685 // that determines the value we return.
686
687 result.setLong(0);
688
689 // dump out if there's no text to parse
690 if (text.length() == 0) {
691 return 0;
692 }
693
694 ParsePosition highWaterMark;
695 ParsePosition workingPos = pos;
696
697 #ifdef RBNF_DEBUG
698 fprintf(stderr, "<nfrs> %x '", this);
699 dumpUS(stderr, name);
700 fprintf(stderr, "' text '");
701 dumpUS(stderr, text);
702 fprintf(stderr, "'\n");
703 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
704 #endif
705 // Try each of the negative rules, fraction rules, infinity rules and NaN rules
706 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
707 if (nonNumericalRules[i]) {
708 Formattable tempResult;
709 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
710 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
711 result = tempResult;
712 highWaterMark = workingPos;
713 }
714 workingPos = pos;
715 }
716 }
717 #ifdef RBNF_DEBUG
718 fprintf(stderr, "<nfrs> continue other with text '");
719 dumpUS(stderr, text);
720 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
721 #endif
722
723 // finally, go through the regular rules one at a time. We start
724 // at the end of the list because we want to try matching the most
725 // sigificant rule first (this helps ensure that we parse
726 // "five thousand three hundred six" as
727 // "(five thousand) (three hundred) (six)" rather than
728 // "((five thousand three) hundred) (six)"). Skip rules whose
729 // base values are higher than the upper bound (again, this helps
730 // limit ambiguity by making sure the rules that match a rule's
731 // are less significant than the rule containing the substitutions)/
732 {
733 int64_t ub = util64_fromDouble(upperBound);
734 #ifdef RBNF_DEBUG
735 {
736 char ubstr[64];
737 util64_toa(ub, ubstr, 64);
738 char ubstrhex[64];
739 util64_toa(ub, ubstrhex, 64, 16);
740 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
741 }
742 #endif
743 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
744 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
745 continue;
746 }
747 Formattable tempResult;
748 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
749 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
750 result = tempResult;
751 highWaterMark = workingPos;
752 }
753 workingPos = pos;
754 }
755 }
756 #ifdef RBNF_DEBUG
757 fprintf(stderr, "<nfrs> exit\n");
758 #endif
759 // finally, update the parse postion we were passed to point to the
760 // first character we didn't use, and return the result that
761 // corresponds to that string of characters
762 pos = highWaterMark;
763
764 return 1;
765 }
766
767 void
appendRules(UnicodeString & result) const768 NFRuleSet::appendRules(UnicodeString& result) const
769 {
770 uint32_t i;
771
772 // the rule set name goes first...
773 result.append(name);
774 result.append(gColon);
775 result.append(gLineFeed);
776
777 // followed by the regular rules...
778 for (i = 0; i < rules.size(); i++) {
779 rules[i]->_appendRuleText(result);
780 result.append(gLineFeed);
781 }
782
783 // followed by the special rules (if they exist)
784 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
785 NFRule *rule = nonNumericalRules[i];
786 if (nonNumericalRules[i]) {
787 if (rule->getBaseValue() == NFRule::kImproperFractionRule
788 || rule->getBaseValue() == NFRule::kProperFractionRule
789 || rule->getBaseValue() == NFRule::kMasterRule)
790 {
791 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
792 NFRule *fractionRule = fractionRules[fIdx];
793 if (fractionRule->getBaseValue() == rule->getBaseValue()) {
794 fractionRule->_appendRuleText(result);
795 result.append(gLineFeed);
796 }
797 }
798 }
799 else {
800 rule->_appendRuleText(result);
801 result.append(gLineFeed);
802 }
803 }
804 }
805 }
806
807 // utility functions
808
util64_fromDouble(double d)809 int64_t util64_fromDouble(double d) {
810 int64_t result = 0;
811 if (!uprv_isNaN(d)) {
812 double mant = uprv_maxMantissa();
813 if (d < -mant) {
814 d = -mant;
815 } else if (d > mant) {
816 d = mant;
817 }
818 UBool neg = d < 0;
819 if (neg) {
820 d = -d;
821 }
822 result = (int64_t)uprv_floor(d);
823 if (neg) {
824 result = -result;
825 }
826 }
827 return result;
828 }
829
util64_pow(int32_t r,uint32_t e)830 int64_t util64_pow(int32_t r, uint32_t e) {
831 if (r == 0) {
832 return 0;
833 } else if (e == 0) {
834 return 1;
835 } else {
836 int64_t n = r;
837 while (--e > 0) {
838 n *= r;
839 }
840 return n;
841 }
842 }
843
844 static const uint8_t asciiDigits[] = {
845 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
846 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
847 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
848 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
849 0x77u, 0x78u, 0x79u, 0x7au,
850 };
851
852 static const UChar kUMinus = (UChar)0x002d;
853
854 #ifdef RBNF_DEBUG
855 static const char kMinus = '-';
856
857 static const uint8_t digitInfo[] = {
858 0, 0, 0, 0, 0, 0, 0, 0,
859 0, 0, 0, 0, 0, 0, 0, 0,
860 0, 0, 0, 0, 0, 0, 0, 0,
861 0, 0, 0, 0, 0, 0, 0, 0,
862 0, 0, 0, 0, 0, 0, 0, 0,
863 0, 0, 0, 0, 0, 0, 0, 0,
864 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
865 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
866 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
867 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
868 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
869 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
870 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
871 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
872 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
873 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
874 };
875
util64_atoi(const char * str,uint32_t radix)876 int64_t util64_atoi(const char* str, uint32_t radix)
877 {
878 if (radix > 36) {
879 radix = 36;
880 } else if (radix < 2) {
881 radix = 2;
882 }
883 int64_t lradix = radix;
884
885 int neg = 0;
886 if (*str == kMinus) {
887 ++str;
888 neg = 1;
889 }
890 int64_t result = 0;
891 uint8_t b;
892 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
893 result *= lradix;
894 result += (int32_t)b;
895 }
896 if (neg) {
897 result = -result;
898 }
899 return result;
900 }
901
util64_utoi(const UChar * str,uint32_t radix)902 int64_t util64_utoi(const UChar* str, uint32_t radix)
903 {
904 if (radix > 36) {
905 radix = 36;
906 } else if (radix < 2) {
907 radix = 2;
908 }
909 int64_t lradix = radix;
910
911 int neg = 0;
912 if (*str == kUMinus) {
913 ++str;
914 neg = 1;
915 }
916 int64_t result = 0;
917 UChar c;
918 uint8_t b;
919 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
920 result *= lradix;
921 result += (int32_t)b;
922 }
923 if (neg) {
924 result = -result;
925 }
926 return result;
927 }
928
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)929 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
930 {
931 if (radix > 36) {
932 radix = 36;
933 } else if (radix < 2) {
934 radix = 2;
935 }
936 int64_t base = radix;
937
938 char* p = buf;
939 if (len && (w < 0) && (radix == 10) && !raw) {
940 w = -w;
941 *p++ = kMinus;
942 --len;
943 } else if (len && (w == 0)) {
944 *p++ = (char)raw ? 0 : asciiDigits[0];
945 --len;
946 }
947
948 while (len && w != 0) {
949 int64_t n = w / base;
950 int64_t m = n * base;
951 int32_t d = (int32_t)(w-m);
952 *p++ = raw ? (char)d : asciiDigits[d];
953 w = n;
954 --len;
955 }
956 if (len) {
957 *p = 0; // null terminate if room for caller convenience
958 }
959
960 len = p - buf;
961 if (*buf == kMinus) {
962 ++buf;
963 }
964 while (--p > buf) {
965 char c = *p;
966 *p = *buf;
967 *buf = c;
968 ++buf;
969 }
970
971 return len;
972 }
973 #endif
974
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)975 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
976 {
977 if (radix > 36) {
978 radix = 36;
979 } else if (radix < 2) {
980 radix = 2;
981 }
982 int64_t base = radix;
983
984 UChar* p = buf;
985 if (len && (w < 0) && (radix == 10) && !raw) {
986 w = -w;
987 *p++ = kUMinus;
988 --len;
989 } else if (len && (w == 0)) {
990 *p++ = (UChar)raw ? 0 : asciiDigits[0];
991 --len;
992 }
993
994 while (len && (w != 0)) {
995 int64_t n = w / base;
996 int64_t m = n * base;
997 int32_t d = (int32_t)(w-m);
998 *p++ = (UChar)(raw ? d : asciiDigits[d]);
999 w = n;
1000 --len;
1001 }
1002 if (len) {
1003 *p = 0; // null terminate if room for caller convenience
1004 }
1005
1006 len = (uint32_t)(p - buf);
1007 if (*buf == kUMinus) {
1008 ++buf;
1009 }
1010 while (--p > buf) {
1011 UChar c = *p;
1012 *p = *buf;
1013 *buf = c;
1014 ++buf;
1015 }
1016
1017 return len;
1018 }
1019
1020
1021 U_NAMESPACE_END
1022
1023 /* U_HAVE_RBNF */
1024 #endif
1025
1026