1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 #include "utypeinfo.h"  // for 'typeid' to work
12 
13 #include "unicode/rbnf.h"
14 
15 #if U_HAVE_RBNF
16 
17 #include "unicode/normlzr.h"
18 #include "unicode/plurfmt.h"
19 #include "unicode/tblcoll.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ucol.h"
22 #include "unicode/uloc.h"
23 #include "unicode/unum.h"
24 #include "unicode/ures.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf16.h"
27 #include "unicode/udata.h"
28 #include "unicode/udisplaycontext.h"
29 #include "unicode/brkiter.h"
30 #include "nfrs.h"
31 
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "patternprops.h"
35 #include "uresimp.h"
36 
37 // debugging
38 // #define RBNF_DEBUG
39 
40 #ifdef RBNF_DEBUG
41 #include <stdio.h>
42 #endif
43 
44 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
45 
46 static const UChar gPercentPercent[] =
47 {
48     0x25, 0x25, 0
49 }; /* "%%" */
50 
51 // All urbnf objects are created through openRules, so we init all of the
52 // Unicode string constants required by rbnf, nfrs, or nfr here.
53 static const UChar gLenientParse[] =
54 {
55     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
56 }; /* "%%lenient-parse:" */
57 static const UChar gSemiColon = 0x003B;
58 static const UChar gSemiPercent[] =
59 {
60     0x3B, 0x25, 0
61 }; /* ";%" */
62 
63 #define kSomeNumberOfBitsDiv2 22
64 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
65 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
66 
67 U_NAMESPACE_BEGIN
68 
69 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
70 
71 /*
72 This is a utility class. It does not use ICU's RTTI.
73 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
74 Please make sure that intltest passes on Windows in Release mode,
75 since the string pooling per compilation unit will mess up how RTTI works.
76 The RTTI code was also removed due to lack of code coverage.
77 */
78 class LocalizationInfo : public UMemory {
79 protected:
80     virtual ~LocalizationInfo();
81     uint32_t refcount;
82 
83 public:
LocalizationInfo()84     LocalizationInfo() : refcount(0) {}
85 
ref(void)86     LocalizationInfo* ref(void) {
87         ++refcount;
88         return this;
89     }
90 
unref(void)91     LocalizationInfo* unref(void) {
92         if (refcount && --refcount == 0) {
93             delete this;
94         }
95         return NULL;
96     }
97 
98     virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const99     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
100 
101     virtual int32_t getNumberOfRuleSets(void) const = 0;
102     virtual const UChar* getRuleSetName(int32_t index) const = 0;
103     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
104     virtual const UChar* getLocaleName(int32_t index) const = 0;
105     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
106 
107     virtual int32_t indexForLocale(const UChar* locale) const;
108     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
109 
110 //    virtual UClassID getDynamicClassID() const = 0;
111 //    static UClassID getStaticClassID(void);
112 };
113 
~LocalizationInfo()114 LocalizationInfo::~LocalizationInfo() {}
115 
116 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
117 
118 // if both strings are NULL, this returns TRUE
119 static UBool
streq(const UChar * lhs,const UChar * rhs)120 streq(const UChar* lhs, const UChar* rhs) {
121     if (rhs == lhs) {
122         return TRUE;
123     }
124     if (lhs && rhs) {
125         return u_strcmp(lhs, rhs) == 0;
126     }
127     return FALSE;
128 }
129 
130 UBool
operator ==(const LocalizationInfo * rhs) const131 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
132     if (rhs) {
133         if (this == rhs) {
134             return TRUE;
135         }
136 
137         int32_t rsc = getNumberOfRuleSets();
138         if (rsc == rhs->getNumberOfRuleSets()) {
139             for (int i = 0; i < rsc; ++i) {
140                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
141                     return FALSE;
142                 }
143             }
144             int32_t dlc = getNumberOfDisplayLocales();
145             if (dlc == rhs->getNumberOfDisplayLocales()) {
146                 for (int i = 0; i < dlc; ++i) {
147                     const UChar* locale = getLocaleName(i);
148                     int32_t ix = rhs->indexForLocale(locale);
149                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
150                     if (!streq(locale, rhs->getLocaleName(ix))) {
151                         return FALSE;
152                     }
153                     for (int j = 0; j < rsc; ++j) {
154                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
155                             return FALSE;
156                         }
157                     }
158                 }
159                 return TRUE;
160             }
161         }
162     }
163     return FALSE;
164 }
165 
166 int32_t
indexForLocale(const UChar * locale) const167 LocalizationInfo::indexForLocale(const UChar* locale) const {
168     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
169         if (streq(locale, getLocaleName(i))) {
170             return i;
171         }
172     }
173     return -1;
174 }
175 
176 int32_t
indexForRuleSet(const UChar * ruleset) const177 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
178     if (ruleset) {
179         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
180             if (streq(ruleset, getRuleSetName(i))) {
181                 return i;
182             }
183         }
184     }
185     return -1;
186 }
187 
188 
189 typedef void (*Fn_Deleter)(void*);
190 
191 class VArray {
192     void** buf;
193     int32_t cap;
194     int32_t size;
195     Fn_Deleter deleter;
196 public:
VArray()197     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
198 
VArray(Fn_Deleter del)199     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
200 
~VArray()201     ~VArray() {
202         if (deleter) {
203             for (int i = 0; i < size; ++i) {
204                 (*deleter)(buf[i]);
205             }
206         }
207         uprv_free(buf);
208     }
209 
length()210     int32_t length() {
211         return size;
212     }
213 
add(void * elem,UErrorCode & status)214     void add(void* elem, UErrorCode& status) {
215         if (U_SUCCESS(status)) {
216             if (size == cap) {
217                 if (cap == 0) {
218                     cap = 1;
219                 } else if (cap < 256) {
220                     cap *= 2;
221                 } else {
222                     cap += 256;
223                 }
224                 if (buf == NULL) {
225                     buf = (void**)uprv_malloc(cap * sizeof(void*));
226                 } else {
227                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
228                 }
229                 if (buf == NULL) {
230                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
231                     status = U_MEMORY_ALLOCATION_ERROR;
232                     return;
233                 }
234                 void* start = &buf[size];
235                 size_t count = (cap - size) * sizeof(void*);
236                 uprv_memset(start, 0, count); // fill with nulls, just because
237             }
238             buf[size++] = elem;
239         }
240     }
241 
release(void)242     void** release(void) {
243         void** result = buf;
244         buf = NULL;
245         cap = 0;
246         size = 0;
247         return result;
248     }
249 };
250 
251 class LocDataParser;
252 
253 class StringLocalizationInfo : public LocalizationInfo {
254     UChar* info;
255     UChar*** data;
256     int32_t numRuleSets;
257     int32_t numLocales;
258 
259 friend class LocDataParser;
260 
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)261     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
262         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
263     {
264     }
265 
266 public:
267     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
268 
269     virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const270     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
271     virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const272     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
273     virtual const UChar* getLocaleName(int32_t index) const;
274     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
275 
276 //    virtual UClassID getDynamicClassID() const;
277 //    static UClassID getStaticClassID(void);
278 
279 private:
280     void init(UErrorCode& status) const;
281 };
282 
283 
284 enum {
285     OPEN_ANGLE = 0x003c, /* '<' */
286     CLOSE_ANGLE = 0x003e, /* '>' */
287     COMMA = 0x002c,
288     TICK = 0x0027,
289     QUOTE = 0x0022,
290     SPACE = 0x0020
291 };
292 
293 /**
294  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
295  */
296 class LocDataParser {
297     UChar* data;
298     const UChar* e;
299     UChar* p;
300     UChar ch;
301     UParseError& pe;
302     UErrorCode& ec;
303 
304 public:
LocDataParser(UParseError & parseError,UErrorCode & status)305     LocDataParser(UParseError& parseError, UErrorCode& status)
306         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()307     ~LocDataParser() {}
308 
309     /*
310     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
311     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
312     */
313     StringLocalizationInfo* parse(UChar* data, int32_t len);
314 
315 private:
316 
inc(void)317     void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)318     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)319     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)320     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const321     UBool inList(UChar c, const UChar* list) const {
322         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
323         while (*list && *list != c) ++list; return *list == c;
324     }
325     void parseError(const char* msg);
326 
327     StringLocalizationInfo* doParse(void);
328 
329     UChar** nextArray(int32_t& requiredLength);
330     UChar*  nextString(void);
331 };
332 
333 #ifdef RBNF_DEBUG
334 #define ERROR(msg) parseError(msg); return NULL;
335 #define EXPLANATION_ARG explanationArg
336 #else
337 #define ERROR(msg) parseError(NULL); return NULL;
338 #define EXPLANATION_ARG
339 #endif
340 
341 
342 static const UChar DQUOTE_STOPLIST[] = {
343     QUOTE, 0
344 };
345 
346 static const UChar SQUOTE_STOPLIST[] = {
347     TICK, 0
348 };
349 
350 static const UChar NOQUOTE_STOPLIST[] = {
351     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
352 };
353 
354 static void
DeleteFn(void * p)355 DeleteFn(void* p) {
356   uprv_free(p);
357 }
358 
359 StringLocalizationInfo*
parse(UChar * _data,int32_t len)360 LocDataParser::parse(UChar* _data, int32_t len) {
361     if (U_FAILURE(ec)) {
362         if (_data) uprv_free(_data);
363         return NULL;
364     }
365 
366     pe.line = 0;
367     pe.offset = -1;
368     pe.postContext[0] = 0;
369     pe.preContext[0] = 0;
370 
371     if (_data == NULL) {
372         ec = U_ILLEGAL_ARGUMENT_ERROR;
373         return NULL;
374     }
375 
376     if (len <= 0) {
377         ec = U_ILLEGAL_ARGUMENT_ERROR;
378         uprv_free(_data);
379         return NULL;
380     }
381 
382     data = _data;
383     e = data + len;
384     p = _data;
385     ch = 0xffff;
386 
387     return doParse();
388 }
389 
390 
391 StringLocalizationInfo*
doParse(void)392 LocDataParser::doParse(void) {
393     skipWhitespace();
394     if (!checkInc(OPEN_ANGLE)) {
395         ERROR("Missing open angle");
396     } else {
397         VArray array(DeleteFn);
398         UBool mightHaveNext = TRUE;
399         int32_t requiredLength = -1;
400         while (mightHaveNext) {
401             mightHaveNext = FALSE;
402             UChar** elem = nextArray(requiredLength);
403             skipWhitespace();
404             UBool haveComma = check(COMMA);
405             if (elem) {
406                 array.add(elem, ec);
407                 if (haveComma) {
408                     inc();
409                     mightHaveNext = TRUE;
410                 }
411             } else if (haveComma) {
412                 ERROR("Unexpected character");
413             }
414         }
415 
416         skipWhitespace();
417         if (!checkInc(CLOSE_ANGLE)) {
418             if (check(OPEN_ANGLE)) {
419                 ERROR("Missing comma in outer array");
420             } else {
421                 ERROR("Missing close angle bracket in outer array");
422             }
423         }
424 
425         skipWhitespace();
426         if (p != e) {
427             ERROR("Extra text after close of localization data");
428         }
429 
430         array.add(NULL, ec);
431         if (U_SUCCESS(ec)) {
432             int32_t numLocs = array.length() - 2; // subtract first, NULL
433             UChar*** result = (UChar***)array.release();
434 
435             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
436         }
437     }
438 
439     ERROR("Unknown error");
440 }
441 
442 UChar**
nextArray(int32_t & requiredLength)443 LocDataParser::nextArray(int32_t& requiredLength) {
444     if (U_FAILURE(ec)) {
445         return NULL;
446     }
447 
448     skipWhitespace();
449     if (!checkInc(OPEN_ANGLE)) {
450         ERROR("Missing open angle");
451     }
452 
453     VArray array;
454     UBool mightHaveNext = TRUE;
455     while (mightHaveNext) {
456         mightHaveNext = FALSE;
457         UChar* elem = nextString();
458         skipWhitespace();
459         UBool haveComma = check(COMMA);
460         if (elem) {
461             array.add(elem, ec);
462             if (haveComma) {
463                 inc();
464                 mightHaveNext = TRUE;
465             }
466         } else if (haveComma) {
467             ERROR("Unexpected comma");
468         }
469     }
470     skipWhitespace();
471     if (!checkInc(CLOSE_ANGLE)) {
472         if (check(OPEN_ANGLE)) {
473             ERROR("Missing close angle bracket in inner array");
474         } else {
475             ERROR("Missing comma in inner array");
476         }
477     }
478 
479     array.add(NULL, ec);
480     if (U_SUCCESS(ec)) {
481         if (requiredLength == -1) {
482             requiredLength = array.length() + 1;
483         } else if (array.length() != requiredLength) {
484             ec = U_ILLEGAL_ARGUMENT_ERROR;
485             ERROR("Array not of required length");
486         }
487 
488         return (UChar**)array.release();
489     }
490     ERROR("Unknown Error");
491 }
492 
493 UChar*
nextString()494 LocDataParser::nextString() {
495     UChar* result = NULL;
496 
497     skipWhitespace();
498     if (p < e) {
499         const UChar* terminators;
500         UChar c = *p;
501         UBool haveQuote = c == QUOTE || c == TICK;
502         if (haveQuote) {
503             inc();
504             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
505         } else {
506             terminators = NOQUOTE_STOPLIST;
507         }
508         UChar* start = p;
509         while (p < e && !inList(*p, terminators)) ++p;
510         if (p == e) {
511             ERROR("Unexpected end of data");
512         }
513 
514         UChar x = *p;
515         if (p > start) {
516             ch = x;
517             *p = 0x0; // terminate by writing to data
518             result = start; // just point into data
519         }
520         if (haveQuote) {
521             if (x != c) {
522                 ERROR("Missing matching quote");
523             } else if (p == start) {
524                 ERROR("Empty string");
525             }
526             inc();
527         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
528             ERROR("Unexpected character in string");
529         }
530     }
531 
532     // ok for there to be no next string
533     return result;
534 }
535 
parseError(const char * EXPLANATION_ARG)536 void LocDataParser::parseError(const char* EXPLANATION_ARG)
537 {
538     if (!data) {
539         return;
540     }
541 
542     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
543     if (start < data) {
544         start = data;
545     }
546     for (UChar* x = p; --x >= start;) {
547         if (!*x) {
548             start = x+1;
549             break;
550         }
551     }
552     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
553     if (limit > e) {
554         limit = e;
555     }
556     u_strncpy(pe.preContext, start, (int32_t)(p-start));
557     pe.preContext[p-start] = 0;
558     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
559     pe.postContext[limit-p] = 0;
560     pe.offset = (int32_t)(p - data);
561 
562 #ifdef RBNF_DEBUG
563     fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
564 
565     UnicodeString msg;
566     msg.append(start, p - start);
567     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
568     msg.append(p, limit-p);
569     msg.append(UNICODE_STRING_SIMPLE("'"));
570 
571     char buf[128];
572     int32_t len = msg.extract(0, msg.length(), buf, 128);
573     if (len >= 128) {
574         buf[127] = 0;
575     } else {
576         buf[len] = 0;
577     }
578     fprintf(stderr, "%s\n", buf);
579     fflush(stderr);
580 #endif
581 
582     uprv_free(data);
583     data = NULL;
584     p = NULL;
585     e = NULL;
586 
587     if (U_SUCCESS(ec)) {
588         ec = U_PARSE_ERROR;
589     }
590 }
591 
592 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
593 
594 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)595 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
596     if (U_FAILURE(status)) {
597         return NULL;
598     }
599 
600     int32_t len = info.length();
601     if (len == 0) {
602         return NULL; // no error;
603     }
604 
605     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
606     if (!p) {
607         status = U_MEMORY_ALLOCATION_ERROR;
608         return NULL;
609     }
610     info.extract(p, len, status);
611     if (!U_FAILURE(status)) {
612         status = U_ZERO_ERROR; // clear warning about non-termination
613     }
614 
615     LocDataParser parser(perror, status);
616     return parser.parse(p, len);
617 }
618 
~StringLocalizationInfo()619 StringLocalizationInfo::~StringLocalizationInfo() {
620     for (UChar*** p = (UChar***)data; *p; ++p) {
621         // remaining data is simply pointer into our unicode string data.
622         if (*p) uprv_free(*p);
623     }
624     if (data) uprv_free(data);
625     if (info) uprv_free(info);
626 }
627 
628 
629 const UChar*
getRuleSetName(int32_t index) const630 StringLocalizationInfo::getRuleSetName(int32_t index) const {
631     if (index >= 0 && index < getNumberOfRuleSets()) {
632         return data[0][index];
633     }
634     return NULL;
635 }
636 
637 const UChar*
getLocaleName(int32_t index) const638 StringLocalizationInfo::getLocaleName(int32_t index) const {
639     if (index >= 0 && index < getNumberOfDisplayLocales()) {
640         return data[index+1][0];
641     }
642     return NULL;
643 }
644 
645 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const646 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
647     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
648         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
649         return data[localeIndex+1][ruleIndex+1];
650     }
651     return NULL;
652 }
653 
654 // ----------
655 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)656 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
657                                              const UnicodeString& locs,
658                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
659   : ruleSets(NULL)
660   , ruleSetDescriptions(NULL)
661   , numRuleSets(0)
662   , defaultRuleSet(NULL)
663   , locale(alocale)
664   , collator(NULL)
665   , decimalFormatSymbols(NULL)
666   , defaultInfinityRule(NULL)
667   , defaultNaNRule(NULL)
668   , lenient(FALSE)
669   , lenientParseRules(NULL)
670   , localizations(NULL)
671   , capitalizationInfoSet(FALSE)
672   , capitalizationForUIListMenu(FALSE)
673   , capitalizationForStandAlone(FALSE)
674   , capitalizationBrkIter(NULL)
675 {
676   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
677   init(description, locinfo, perror, status);
678 }
679 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)680 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
681                                              const UnicodeString& locs,
682                                              UParseError& perror, UErrorCode& status)
683   : ruleSets(NULL)
684   , ruleSetDescriptions(NULL)
685   , numRuleSets(0)
686   , defaultRuleSet(NULL)
687   , locale(Locale::getDefault())
688   , collator(NULL)
689   , decimalFormatSymbols(NULL)
690   , defaultInfinityRule(NULL)
691   , defaultNaNRule(NULL)
692   , lenient(FALSE)
693   , lenientParseRules(NULL)
694   , localizations(NULL)
695   , capitalizationInfoSet(FALSE)
696   , capitalizationForUIListMenu(FALSE)
697   , capitalizationForStandAlone(FALSE)
698   , capitalizationBrkIter(NULL)
699 {
700   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
701   init(description, locinfo, perror, status);
702 }
703 
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)704 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
705                                              LocalizationInfo* info,
706                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
707   : ruleSets(NULL)
708   , ruleSetDescriptions(NULL)
709   , numRuleSets(0)
710   , defaultRuleSet(NULL)
711   , locale(alocale)
712   , collator(NULL)
713   , decimalFormatSymbols(NULL)
714   , defaultInfinityRule(NULL)
715   , defaultNaNRule(NULL)
716   , lenient(FALSE)
717   , lenientParseRules(NULL)
718   , localizations(NULL)
719   , capitalizationInfoSet(FALSE)
720   , capitalizationForUIListMenu(FALSE)
721   , capitalizationForStandAlone(FALSE)
722   , capitalizationBrkIter(NULL)
723 {
724   init(description, info, perror, status);
725 }
726 
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)727 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
728                          UParseError& perror,
729                          UErrorCode& status)
730   : ruleSets(NULL)
731   , ruleSetDescriptions(NULL)
732   , numRuleSets(0)
733   , defaultRuleSet(NULL)
734   , locale(Locale::getDefault())
735   , collator(NULL)
736   , decimalFormatSymbols(NULL)
737   , defaultInfinityRule(NULL)
738   , defaultNaNRule(NULL)
739   , lenient(FALSE)
740   , lenientParseRules(NULL)
741   , localizations(NULL)
742   , capitalizationInfoSet(FALSE)
743   , capitalizationForUIListMenu(FALSE)
744   , capitalizationForStandAlone(FALSE)
745   , capitalizationBrkIter(NULL)
746 {
747     init(description, NULL, perror, status);
748 }
749 
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)750 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
751                          const Locale& aLocale,
752                          UParseError& perror,
753                          UErrorCode& status)
754   : ruleSets(NULL)
755   , ruleSetDescriptions(NULL)
756   , numRuleSets(0)
757   , defaultRuleSet(NULL)
758   , locale(aLocale)
759   , collator(NULL)
760   , decimalFormatSymbols(NULL)
761   , defaultInfinityRule(NULL)
762   , defaultNaNRule(NULL)
763   , lenient(FALSE)
764   , lenientParseRules(NULL)
765   , localizations(NULL)
766   , capitalizationInfoSet(FALSE)
767   , capitalizationForUIListMenu(FALSE)
768   , capitalizationForStandAlone(FALSE)
769   , capitalizationBrkIter(NULL)
770 {
771     init(description, NULL, perror, status);
772 }
773 
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)774 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
775   : ruleSets(NULL)
776   , ruleSetDescriptions(NULL)
777   , numRuleSets(0)
778   , defaultRuleSet(NULL)
779   , locale(alocale)
780   , collator(NULL)
781   , decimalFormatSymbols(NULL)
782   , defaultInfinityRule(NULL)
783   , defaultNaNRule(NULL)
784   , lenient(FALSE)
785   , lenientParseRules(NULL)
786   , localizations(NULL)
787   , capitalizationInfoSet(FALSE)
788   , capitalizationForUIListMenu(FALSE)
789   , capitalizationForStandAlone(FALSE)
790   , capitalizationBrkIter(NULL)
791 {
792     if (U_FAILURE(status)) {
793         return;
794     }
795 
796     const char* rules_tag = "RBNFRules";
797     const char* fmt_tag = "";
798     switch (tag) {
799     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
800     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
801     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
802     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
803     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
804     }
805 
806     // TODO: read localization info from resource
807     LocalizationInfo* locinfo = NULL;
808 
809     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
810     if (U_SUCCESS(status)) {
811         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
812                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
813 
814         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
815         if (U_FAILURE(status)) {
816             ures_close(nfrb);
817         }
818         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
819         if (U_FAILURE(status)) {
820             ures_close(rbnfRules);
821             ures_close(nfrb);
822             return;
823         }
824 
825         UnicodeString desc;
826         while (ures_hasNext(ruleSets)) {
827            desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
828         }
829         UParseError perror;
830 
831         init(desc, locinfo, perror, status);
832 
833         ures_close(ruleSets);
834         ures_close(rbnfRules);
835     }
836     ures_close(nfrb);
837 }
838 
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)839 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
840   : NumberFormat(rhs)
841   , ruleSets(NULL)
842   , ruleSetDescriptions(NULL)
843   , numRuleSets(0)
844   , defaultRuleSet(NULL)
845   , locale(rhs.locale)
846   , collator(NULL)
847   , decimalFormatSymbols(NULL)
848   , defaultInfinityRule(NULL)
849   , defaultNaNRule(NULL)
850   , lenient(FALSE)
851   , lenientParseRules(NULL)
852   , localizations(NULL)
853   , capitalizationInfoSet(FALSE)
854   , capitalizationForUIListMenu(FALSE)
855   , capitalizationForStandAlone(FALSE)
856   , capitalizationBrkIter(NULL)
857 {
858     this->operator=(rhs);
859 }
860 
861 // --------
862 
863 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)864 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
865 {
866     if (this == &rhs) {
867         return *this;
868     }
869     NumberFormat::operator=(rhs);
870     UErrorCode status = U_ZERO_ERROR;
871     dispose();
872     locale = rhs.locale;
873     lenient = rhs.lenient;
874 
875     UParseError perror;
876     setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
877     init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
878     setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
879 
880     capitalizationInfoSet = rhs.capitalizationInfoSet;
881     capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
882     capitalizationForStandAlone = rhs.capitalizationForStandAlone;
883 #if !UCONFIG_NO_BREAK_ITERATION
884     capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
885 #endif
886 
887     return *this;
888 }
889 
~RuleBasedNumberFormat()890 RuleBasedNumberFormat::~RuleBasedNumberFormat()
891 {
892     dispose();
893 }
894 
895 Format*
clone(void) const896 RuleBasedNumberFormat::clone(void) const
897 {
898     return new RuleBasedNumberFormat(*this);
899 }
900 
901 UBool
operator ==(const Format & other) const902 RuleBasedNumberFormat::operator==(const Format& other) const
903 {
904     if (this == &other) {
905         return TRUE;
906     }
907 
908     if (typeid(*this) == typeid(other)) {
909         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
910         // test for capitalization info equality is adequately handled
911         // by the NumberFormat test for fCapitalizationContext equality;
912         // the info here is just derived from that.
913         if (locale == rhs.locale &&
914             lenient == rhs.lenient &&
915             (localizations == NULL
916                 ? rhs.localizations == NULL
917                 : (rhs.localizations == NULL
918                     ? FALSE
919                     : *localizations == rhs.localizations))) {
920 
921             NFRuleSet** p = ruleSets;
922             NFRuleSet** q = rhs.ruleSets;
923             if (p == NULL) {
924                 return q == NULL;
925             } else if (q == NULL) {
926                 return FALSE;
927             }
928             while (*p && *q && (**p == **q)) {
929                 ++p;
930                 ++q;
931             }
932             return *q == NULL && *p == NULL;
933         }
934     }
935 
936     return FALSE;
937 }
938 
939 UnicodeString
getRules() const940 RuleBasedNumberFormat::getRules() const
941 {
942     UnicodeString result;
943     if (ruleSets != NULL) {
944         for (NFRuleSet** p = ruleSets; *p; ++p) {
945             (*p)->appendRules(result);
946         }
947     }
948     return result;
949 }
950 
951 UnicodeString
getRuleSetName(int32_t index) const952 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
953 {
954     if (localizations) {
955         UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
956         return string;
957     }
958     else if (ruleSets) {
959         UnicodeString result;
960         for (NFRuleSet** p = ruleSets; *p; ++p) {
961             NFRuleSet* rs = *p;
962             if (rs->isPublic()) {
963                 if (--index == -1) {
964                     rs->getName(result);
965                     return result;
966                 }
967             }
968         }
969     }
970     UnicodeString empty;
971     return empty;
972 }
973 
974 int32_t
getNumberOfRuleSetNames() const975 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
976 {
977     int32_t result = 0;
978     if (localizations) {
979         result = localizations->getNumberOfRuleSets();
980     }
981     else if (ruleSets) {
982         for (NFRuleSet** p = ruleSets; *p; ++p) {
983             if ((**p).isPublic()) {
984                 ++result;
985             }
986         }
987     }
988     return result;
989 }
990 
991 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const992 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
993     if (localizations) {
994         return localizations->getNumberOfDisplayLocales();
995     }
996     return 0;
997 }
998 
999 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const1000 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1001     if (U_FAILURE(status)) {
1002         return Locale("");
1003     }
1004     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1005         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1006         char buffer[64];
1007         int32_t cap = name.length() + 1;
1008         char* bp = buffer;
1009         if (cap > 64) {
1010             bp = (char *)uprv_malloc(cap);
1011             if (bp == NULL) {
1012                 status = U_MEMORY_ALLOCATION_ERROR;
1013                 return Locale("");
1014             }
1015         }
1016         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1017         Locale retLocale(bp);
1018         if (bp != buffer) {
1019             uprv_free(bp);
1020         }
1021         return retLocale;
1022     }
1023     status = U_ILLEGAL_ARGUMENT_ERROR;
1024     Locale retLocale;
1025     return retLocale;
1026 }
1027 
1028 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)1029 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1030     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1031         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1032         int32_t len = localeName.length();
1033         UChar* localeStr = localeName.getBuffer(len + 1);
1034         while (len >= 0) {
1035             localeStr[len] = 0;
1036             int32_t ix = localizations->indexForLocale(localeStr);
1037             if (ix >= 0) {
1038                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1039                 return name;
1040             }
1041 
1042             // trim trailing portion, skipping over ommitted sections
1043             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1044             while (len > 0 && localeStr[len-1] == 0x005F) --len;
1045         }
1046         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1047         return name;
1048     }
1049     UnicodeString bogus;
1050     bogus.setToBogus();
1051     return bogus;
1052 }
1053 
1054 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1055 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1056     if (localizations) {
1057         UnicodeString rsn(ruleSetName);
1058         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1059         return getRuleSetDisplayName(ix, localeParam);
1060     }
1061     UnicodeString bogus;
1062     bogus.setToBogus();
1063     return bogus;
1064 }
1065 
1066 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1067 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1068 {
1069     if (U_SUCCESS(status) && ruleSets) {
1070         for (NFRuleSet** p = ruleSets; *p; ++p) {
1071             NFRuleSet* rs = *p;
1072             if (rs->isNamed(name)) {
1073                 return rs;
1074             }
1075         }
1076         status = U_ILLEGAL_ARGUMENT_ERROR;
1077     }
1078     return NULL;
1079 }
1080 
1081 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1082 RuleBasedNumberFormat::format(int32_t number,
1083                               UnicodeString& toAppendTo,
1084                               FieldPosition& /* pos */) const
1085 {
1086     if (defaultRuleSet) {
1087         UErrorCode status = U_ZERO_ERROR;
1088         int32_t startPos = toAppendTo.length();
1089         defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1090         adjustForCapitalizationContext(startPos, toAppendTo);
1091     }
1092     return toAppendTo;
1093 }
1094 
1095 
1096 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1097 RuleBasedNumberFormat::format(int64_t number,
1098                               UnicodeString& toAppendTo,
1099                               FieldPosition& /* pos */) const
1100 {
1101     if (defaultRuleSet) {
1102         UErrorCode status = U_ZERO_ERROR;
1103         int32_t startPos = toAppendTo.length();
1104         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1105         adjustForCapitalizationContext(startPos, toAppendTo);
1106     }
1107     return toAppendTo;
1108 }
1109 
1110 
1111 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1112 RuleBasedNumberFormat::format(double number,
1113                               UnicodeString& toAppendTo,
1114                               FieldPosition& /* pos */) const
1115 {
1116     int32_t startPos = toAppendTo.length();
1117     if (defaultRuleSet) {
1118         UErrorCode status = U_ZERO_ERROR;
1119         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1120     }
1121     return adjustForCapitalizationContext(startPos, toAppendTo);
1122 }
1123 
1124 
1125 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1126 RuleBasedNumberFormat::format(int32_t number,
1127                               const UnicodeString& ruleSetName,
1128                               UnicodeString& toAppendTo,
1129                               FieldPosition& /* pos */,
1130                               UErrorCode& status) const
1131 {
1132     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1133     if (U_SUCCESS(status)) {
1134         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1135             // throw new IllegalArgumentException("Can't use internal rule set");
1136             status = U_ILLEGAL_ARGUMENT_ERROR;
1137         } else {
1138             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1139             if (rs) {
1140                 int32_t startPos = toAppendTo.length();
1141                 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1142                 adjustForCapitalizationContext(startPos, toAppendTo);
1143             }
1144         }
1145     }
1146     return toAppendTo;
1147 }
1148 
1149 
1150 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1151 RuleBasedNumberFormat::format(int64_t number,
1152                               const UnicodeString& ruleSetName,
1153                               UnicodeString& toAppendTo,
1154                               FieldPosition& /* pos */,
1155                               UErrorCode& status) const
1156 {
1157     if (U_SUCCESS(status)) {
1158         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1159             // throw new IllegalArgumentException("Can't use internal rule set");
1160             status = U_ILLEGAL_ARGUMENT_ERROR;
1161         } else {
1162             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1163             if (rs) {
1164                 int32_t startPos = toAppendTo.length();
1165                 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1166                 adjustForCapitalizationContext(startPos, toAppendTo);
1167             }
1168         }
1169     }
1170     return toAppendTo;
1171 }
1172 
1173 
1174 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1175 RuleBasedNumberFormat::format(double number,
1176                               const UnicodeString& ruleSetName,
1177                               UnicodeString& toAppendTo,
1178                               FieldPosition& /* pos */,
1179                               UErrorCode& status) const
1180 {
1181     if (U_SUCCESS(status)) {
1182         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1183             // throw new IllegalArgumentException("Can't use internal rule set");
1184             status = U_ILLEGAL_ARGUMENT_ERROR;
1185         } else {
1186             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1187             if (rs) {
1188                 int32_t startPos = toAppendTo.length();
1189                 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1190                 adjustForCapitalizationContext(startPos, toAppendTo);
1191             }
1192         }
1193     }
1194     return toAppendTo;
1195 }
1196 
1197 UnicodeString&
adjustForCapitalizationContext(int32_t startPos,UnicodeString & currentResult) const1198 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1199                                                       UnicodeString& currentResult) const
1200 {
1201 #if !UCONFIG_NO_BREAK_ITERATION
1202     if (startPos==0 && currentResult.length() > 0) {
1203         // capitalize currentResult according to context
1204         UChar32 ch = currentResult.char32At(0);
1205         UErrorCode status = U_ZERO_ERROR;
1206         UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1207         if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1208               ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1209                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1210                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1211             // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1212             // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1213             currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1214         }
1215     }
1216 #endif
1217     return currentResult;
1218 }
1219 
1220 
1221 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1222 RuleBasedNumberFormat::parse(const UnicodeString& text,
1223                              Formattable& result,
1224                              ParsePosition& parsePosition) const
1225 {
1226     if (!ruleSets) {
1227         parsePosition.setErrorIndex(0);
1228         return;
1229     }
1230 
1231     UnicodeString workingText(text, parsePosition.getIndex());
1232     ParsePosition workingPos(0);
1233 
1234     ParsePosition high_pp(0);
1235     Formattable high_result;
1236 
1237     for (NFRuleSet** p = ruleSets; *p; ++p) {
1238         NFRuleSet *rp = *p;
1239         if (rp->isPublic() && rp->isParseable()) {
1240             ParsePosition working_pp(0);
1241             Formattable working_result;
1242 
1243             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1244             if (working_pp.getIndex() > high_pp.getIndex()) {
1245                 high_pp = working_pp;
1246                 high_result = working_result;
1247 
1248                 if (high_pp.getIndex() == workingText.length()) {
1249                     break;
1250                 }
1251             }
1252         }
1253     }
1254 
1255     int32_t startIndex = parsePosition.getIndex();
1256     parsePosition.setIndex(startIndex + high_pp.getIndex());
1257     if (high_pp.getIndex() > 0) {
1258         parsePosition.setErrorIndex(-1);
1259     } else {
1260         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1261         parsePosition.setErrorIndex(startIndex + errorIndex);
1262     }
1263     result = high_result;
1264     if (result.getType() == Formattable::kDouble) {
1265         double d = result.getDouble();
1266         if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1267             // Note: casting a double to an int when the double is too large or small
1268             //       to fit the destination is undefined behavior. The explicit range checks,
1269             //       above, are required. Just casting and checking the result value is undefined.
1270             result.setLong(static_cast<int32_t>(d));
1271         }
1272     }
1273 }
1274 
1275 #if !UCONFIG_NO_COLLATION
1276 
1277 void
setLenient(UBool enabled)1278 RuleBasedNumberFormat::setLenient(UBool enabled)
1279 {
1280     lenient = enabled;
1281     if (!enabled && collator) {
1282         delete collator;
1283         collator = NULL;
1284     }
1285 }
1286 
1287 #endif
1288 
1289 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1290 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1291     if (U_SUCCESS(status)) {
1292         if (ruleSetName.isEmpty()) {
1293           if (localizations) {
1294               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1295               defaultRuleSet = findRuleSet(name, status);
1296           } else {
1297             initDefaultRuleSet();
1298           }
1299         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1300             status = U_ILLEGAL_ARGUMENT_ERROR;
1301         } else {
1302             NFRuleSet* result = findRuleSet(ruleSetName, status);
1303             if (result != NULL) {
1304                 defaultRuleSet = result;
1305             }
1306         }
1307     }
1308 }
1309 
1310 UnicodeString
getDefaultRuleSetName() const1311 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1312     UnicodeString result;
1313     if (defaultRuleSet && defaultRuleSet->isPublic()) {
1314         defaultRuleSet->getName(result);
1315     } else {
1316         result.setToBogus();
1317     }
1318     return result;
1319 }
1320 
1321 void
initDefaultRuleSet()1322 RuleBasedNumberFormat::initDefaultRuleSet()
1323 {
1324     defaultRuleSet = NULL;
1325     if (!ruleSets) {
1326         return;
1327     }
1328 
1329     const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1330     const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1331     const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1332 
1333     NFRuleSet**p = &ruleSets[0];
1334     while (*p) {
1335         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1336             defaultRuleSet = *p;
1337             return;
1338         } else {
1339             ++p;
1340         }
1341     }
1342 
1343     defaultRuleSet = *--p;
1344     if (!defaultRuleSet->isPublic()) {
1345         while (p != ruleSets) {
1346             if ((*--p)->isPublic()) {
1347                 defaultRuleSet = *p;
1348                 break;
1349             }
1350         }
1351     }
1352 }
1353 
1354 
1355 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1356 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1357                             UParseError& pErr, UErrorCode& status)
1358 {
1359     // TODO: implement UParseError
1360     uprv_memset(&pErr, 0, sizeof(UParseError));
1361     // Note: this can leave ruleSets == NULL, so remaining code should check
1362     if (U_FAILURE(status)) {
1363         return;
1364     }
1365 
1366     initializeDecimalFormatSymbols(status);
1367     initializeDefaultInfinityRule(status);
1368     initializeDefaultNaNRule(status);
1369     if (U_FAILURE(status)) {
1370         return;
1371     }
1372 
1373     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1374 
1375     UnicodeString description(rules);
1376     if (!description.length()) {
1377         status = U_MEMORY_ALLOCATION_ERROR;
1378         return;
1379     }
1380 
1381     // start by stripping the trailing whitespace from all the rules
1382     // (this is all the whitespace follwing each semicolon in the
1383     // description).  This allows us to look for rule-set boundaries
1384     // by searching for ";%" without having to worry about whitespace
1385     // between the ; and the %
1386     stripWhitespace(description);
1387 
1388     // check to see if there's a set of lenient-parse rules.  If there
1389     // is, pull them out into our temporary holding place for them,
1390     // and delete them from the description before the real desciption-
1391     // parsing code sees them
1392     int32_t lp = description.indexOf(gLenientParse, -1, 0);
1393     if (lp != -1) {
1394         // we've got to make sure we're not in the middle of a rule
1395         // (where "%%lenient-parse" would actually get treated as
1396         // rule text)
1397         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1398             // locate the beginning and end of the actual collation
1399             // rules (there may be whitespace between the name and
1400             // the first token in the description)
1401             int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1402 
1403             if (lpEnd == -1) {
1404                 lpEnd = description.length() - 1;
1405             }
1406             int lpStart = lp + u_strlen(gLenientParse);
1407             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1408                 ++lpStart;
1409             }
1410 
1411             // copy out the lenient-parse rules and delete them
1412             // from the description
1413             lenientParseRules = new UnicodeString();
1414             /* test for NULL */
1415             if (lenientParseRules == 0) {
1416                 status = U_MEMORY_ALLOCATION_ERROR;
1417                 return;
1418             }
1419             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1420 
1421             description.remove(lp, lpEnd + 1 - lp);
1422         }
1423     }
1424 
1425     // pre-flight parsing the description and count the number of
1426     // rule sets (";%" marks the end of one rule set and the beginning
1427     // of the next)
1428     numRuleSets = 0;
1429     for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1430         ++numRuleSets;
1431         ++p;
1432     }
1433     ++numRuleSets;
1434 
1435     // our rule list is an array of the appropriate size
1436     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1437     /* test for NULL */
1438     if (ruleSets == 0) {
1439         status = U_MEMORY_ALLOCATION_ERROR;
1440         return;
1441     }
1442 
1443     for (int i = 0; i <= numRuleSets; ++i) {
1444         ruleSets[i] = NULL;
1445     }
1446 
1447     // divide up the descriptions into individual rule-set descriptions
1448     // and store them in a temporary array.  At each step, we also
1449     // new up a rule set, but all this does is initialize its name
1450     // and remove it from its description.  We can't actually parse
1451     // the rest of the descriptions and finish initializing everything
1452     // because we have to know the names and locations of all the rule
1453     // sets before we can actually set everything up
1454     if(!numRuleSets) {
1455         status = U_ILLEGAL_ARGUMENT_ERROR;
1456         return;
1457     }
1458 
1459     ruleSetDescriptions = new UnicodeString[numRuleSets];
1460     if (ruleSetDescriptions == 0) {
1461         status = U_MEMORY_ALLOCATION_ERROR;
1462         return;
1463     }
1464 
1465     {
1466         int curRuleSet = 0;
1467         int32_t start = 0;
1468         for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1469             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1470             ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1471             if (ruleSets[curRuleSet] == 0) {
1472                 status = U_MEMORY_ALLOCATION_ERROR;
1473                 return;
1474             }
1475             ++curRuleSet;
1476             start = p + 1;
1477         }
1478         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1479         ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1480         if (ruleSets[curRuleSet] == 0) {
1481             status = U_MEMORY_ALLOCATION_ERROR;
1482             return;
1483         }
1484     }
1485 
1486     // now we can take note of the formatter's default rule set, which
1487     // is the last public rule set in the description (it's the last
1488     // rather than the first so that a user can create a new formatter
1489     // from an existing formatter and change its default behavior just
1490     // by appending more rule sets to the end)
1491 
1492     // {dlf} Initialization of a fraction rule set requires the default rule
1493     // set to be known.  For purposes of initialization, this is always the
1494     // last public rule set, no matter what the localization data says.
1495     initDefaultRuleSet();
1496 
1497     // finally, we can go back through the temporary descriptions
1498     // list and finish seting up the substructure (and we throw
1499     // away the temporary descriptions as we go)
1500     {
1501         for (int i = 0; i < numRuleSets; i++) {
1502             ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
1503         }
1504     }
1505 
1506     // Now that the rules are initialized, the 'real' default rule
1507     // set can be adjusted by the localization data.
1508 
1509     // The C code keeps the localization array as is, rather than building
1510     // a separate array of the public rule set names, so we have less work
1511     // to do here-- but we still need to check the names.
1512 
1513     if (localizationInfos) {
1514         // confirm the names, if any aren't in the rules, that's an error
1515         // it is ok if the rules contain public rule sets that are not in this list
1516         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1517             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1518             NFRuleSet* rs = findRuleSet(name, status);
1519             if (rs == NULL) {
1520                 break; // error
1521             }
1522             if (i == 0) {
1523                 defaultRuleSet = rs;
1524             }
1525         }
1526     } else {
1527         defaultRuleSet = getDefaultRuleSet();
1528     }
1529     originalDescription = rules;
1530 }
1531 
1532 // override the NumberFormat implementation in order to
1533 // lazily initialize relevant items
1534 void
setContext(UDisplayContext value,UErrorCode & status)1535 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1536 {
1537     NumberFormat::setContext(value, status);
1538     if (U_SUCCESS(status)) {
1539     	if (!capitalizationInfoSet &&
1540     	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1541     	    initCapitalizationContextInfo(locale);
1542     	    capitalizationInfoSet = TRUE;
1543         }
1544 #if !UCONFIG_NO_BREAK_ITERATION
1545         if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1546                 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1547                 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1548             UErrorCode status = U_ZERO_ERROR;
1549             capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1550             if (U_FAILURE(status)) {
1551                 delete capitalizationBrkIter;
1552                 capitalizationBrkIter = NULL;
1553             }
1554         }
1555 #endif
1556     }
1557 }
1558 
1559 void
initCapitalizationContextInfo(const Locale & thelocale)1560 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1561 {
1562 #if !UCONFIG_NO_BREAK_ITERATION
1563     const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1564     UErrorCode status = U_ZERO_ERROR;
1565     UResourceBundle *rb = ures_open(NULL, localeID, &status);
1566     rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1567     rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1568     if (U_SUCCESS(status) && rb != NULL) {
1569         int32_t len = 0;
1570         const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1571         if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1572             capitalizationForUIListMenu = intVector[0];
1573             capitalizationForStandAlone = intVector[1];
1574         }
1575     }
1576     ures_close(rb);
1577 #endif
1578 }
1579 
1580 void
stripWhitespace(UnicodeString & description)1581 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1582 {
1583     // iterate through the characters...
1584     UnicodeString result;
1585 
1586     int start = 0;
1587     while (start != -1 && start < description.length()) {
1588         // seek to the first non-whitespace character...
1589         while (start < description.length()
1590             && PatternProps::isWhiteSpace(description.charAt(start))) {
1591             ++start;
1592         }
1593 
1594         // locate the next semicolon in the text and copy the text from
1595         // our current position up to that semicolon into the result
1596         int32_t p = description.indexOf(gSemiColon, start);
1597         if (p == -1) {
1598             // or if we don't find a semicolon, just copy the rest of
1599             // the string into the result
1600             result.append(description, start, description.length() - start);
1601             start = -1;
1602         }
1603         else if (p < description.length()) {
1604             result.append(description, start, p + 1 - start);
1605             start = p + 1;
1606         }
1607 
1608         // when we get here, we've seeked off the end of the sring, and
1609         // we terminate the loop (we continue until *start* is -1 rather
1610         // than until *p* is -1, because otherwise we'd miss the last
1611         // rule in the description)
1612         else {
1613             start = -1;
1614         }
1615     }
1616 
1617     description.setTo(result);
1618 }
1619 
1620 
1621 void
dispose()1622 RuleBasedNumberFormat::dispose()
1623 {
1624     if (ruleSets) {
1625         for (NFRuleSet** p = ruleSets; *p; ++p) {
1626             delete *p;
1627         }
1628         uprv_free(ruleSets);
1629         ruleSets = NULL;
1630     }
1631 
1632     if (ruleSetDescriptions) {
1633         delete [] ruleSetDescriptions;
1634         ruleSetDescriptions = NULL;
1635     }
1636 
1637 #if !UCONFIG_NO_COLLATION
1638     delete collator;
1639 #endif
1640     collator = NULL;
1641 
1642     delete decimalFormatSymbols;
1643     decimalFormatSymbols = NULL;
1644 
1645     delete defaultInfinityRule;
1646     defaultInfinityRule = NULL;
1647 
1648     delete defaultNaNRule;
1649     defaultNaNRule = NULL;
1650 
1651     delete lenientParseRules;
1652     lenientParseRules = NULL;
1653 
1654 #if !UCONFIG_NO_BREAK_ITERATION
1655     delete capitalizationBrkIter;
1656     capitalizationBrkIter = NULL;
1657 #endif
1658 
1659     if (localizations) {
1660         localizations = localizations->unref();
1661     }
1662 }
1663 
1664 
1665 //-----------------------------------------------------------------------
1666 // package-internal API
1667 //-----------------------------------------------------------------------
1668 
1669 /**
1670  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1671  * this function creates it the first time it's called.
1672  * @return The collator to use for lenient parsing, or null if lenient parsing
1673  * is turned off.
1674 */
1675 const RuleBasedCollator*
getCollator() const1676 RuleBasedNumberFormat::getCollator() const
1677 {
1678 #if !UCONFIG_NO_COLLATION
1679     if (!ruleSets) {
1680         return NULL;
1681     }
1682 
1683     // lazy-evaluate the collator
1684     if (collator == NULL && lenient) {
1685         // create a default collator based on the formatter's locale,
1686         // then pull out that collator's rules, append any additional
1687         // rules specified in the description, and create a _new_
1688         // collator based on the combinaiton of those rules
1689 
1690         UErrorCode status = U_ZERO_ERROR;
1691 
1692         Collator* temp = Collator::createInstance(locale, status);
1693         RuleBasedCollator* newCollator;
1694         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1695             if (lenientParseRules) {
1696                 UnicodeString rules(newCollator->getRules());
1697                 rules.append(*lenientParseRules);
1698 
1699                 newCollator = new RuleBasedCollator(rules, status);
1700                 // Exit if newCollator could not be created.
1701                 if (newCollator == NULL) {
1702                     return NULL;
1703                 }
1704             } else {
1705                 temp = NULL;
1706             }
1707             if (U_SUCCESS(status)) {
1708                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1709                 // cast away const
1710                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1711             } else {
1712                 delete newCollator;
1713             }
1714         }
1715         delete temp;
1716     }
1717 #endif
1718 
1719     // if lenient-parse mode is off, this will be null
1720     // (see setLenientParseMode())
1721     return collator;
1722 }
1723 
1724 
1725 DecimalFormatSymbols*
initializeDecimalFormatSymbols(UErrorCode & status)1726 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1727 {
1728     // lazy-evaluate the DecimalFormatSymbols object.  This object
1729     // is shared by all DecimalFormat instances belonging to this
1730     // formatter
1731     if (decimalFormatSymbols == NULL) {
1732         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1733         if (U_SUCCESS(status)) {
1734             decimalFormatSymbols = temp;
1735         }
1736         else {
1737             delete temp;
1738         }
1739     }
1740     return decimalFormatSymbols;
1741 }
1742 
1743 /**
1744  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1745  * instances owned by this formatter.
1746 */
1747 const DecimalFormatSymbols*
getDecimalFormatSymbols() const1748 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1749 {
1750     return decimalFormatSymbols;
1751 }
1752 
1753 NFRule*
initializeDefaultInfinityRule(UErrorCode & status)1754 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1755 {
1756     if (U_FAILURE(status)) {
1757         return NULL;
1758     }
1759     if (defaultInfinityRule == NULL) {
1760         UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1761         rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1762         NFRule* temp = new NFRule(this, rule, status);
1763         if (U_SUCCESS(status)) {
1764             defaultInfinityRule = temp;
1765         }
1766         else {
1767             delete temp;
1768         }
1769     }
1770     return defaultInfinityRule;
1771 }
1772 
1773 const NFRule*
getDefaultInfinityRule() const1774 RuleBasedNumberFormat::getDefaultInfinityRule() const
1775 {
1776     return defaultInfinityRule;
1777 }
1778 
1779 NFRule*
initializeDefaultNaNRule(UErrorCode & status)1780 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1781 {
1782     if (U_FAILURE(status)) {
1783         return NULL;
1784     }
1785     if (defaultNaNRule == NULL) {
1786         UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1787         rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1788         NFRule* temp = new NFRule(this, rule, status);
1789         if (U_SUCCESS(status)) {
1790             defaultNaNRule = temp;
1791         }
1792         else {
1793             delete temp;
1794         }
1795     }
1796     return defaultNaNRule;
1797 }
1798 
1799 const NFRule*
getDefaultNaNRule() const1800 RuleBasedNumberFormat::getDefaultNaNRule() const
1801 {
1802     return defaultNaNRule;
1803 }
1804 
1805 // De-owning the current localized symbols and adopt the new symbols.
1806 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1807 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1808 {
1809     if (symbolsToAdopt == NULL) {
1810         return; // do not allow caller to set decimalFormatSymbols to NULL
1811     }
1812 
1813     if (decimalFormatSymbols != NULL) {
1814         delete decimalFormatSymbols;
1815     }
1816 
1817     decimalFormatSymbols = symbolsToAdopt;
1818 
1819     {
1820         // Apply the new decimalFormatSymbols by reparsing the rulesets
1821         UErrorCode status = U_ZERO_ERROR;
1822 
1823         delete defaultInfinityRule;
1824         defaultInfinityRule = NULL;
1825         initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1826 
1827         delete defaultNaNRule;
1828         defaultNaNRule = NULL;
1829         initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1830 
1831         if (ruleSets) {
1832             for (int32_t i = 0; i < numRuleSets; i++) {
1833                 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1834             }
1835         }
1836     }
1837 }
1838 
1839 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1840 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1841 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1842 {
1843     adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1844 }
1845 
1846 PluralFormat *
createPluralFormat(UPluralType pluralType,const UnicodeString & pattern,UErrorCode & status) const1847 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1848                                           const UnicodeString &pattern,
1849                                           UErrorCode& status) const
1850 {
1851     return new PluralFormat(locale, pluralType, pattern, status);
1852 }
1853 
1854 U_NAMESPACE_END
1855 
1856 /* U_HAVE_RBNF */
1857 #endif
1858