1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2014, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 #include "unicode/utypes.h"
9 #include "utypeinfo.h"  // for 'typeid' to work
10 
11 #include "unicode/rbnf.h"
12 
13 #if U_HAVE_RBNF
14 
15 #include "unicode/normlzr.h"
16 #include "unicode/plurfmt.h"
17 #include "unicode/tblcoll.h"
18 #include "unicode/uchar.h"
19 #include "unicode/ucol.h"
20 #include "unicode/uloc.h"
21 #include "unicode/unum.h"
22 #include "unicode/ures.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utf16.h"
25 #include "unicode/udata.h"
26 #include "unicode/udisplaycontext.h"
27 #include "unicode/brkiter.h"
28 #include "nfrs.h"
29 
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "patternprops.h"
33 #include "uresimp.h"
34 
35 // debugging
36 // #define RBNF_DEBUG
37 
38 #ifdef RBNF_DEBUG
39 #include "stdio.h"
40 #endif
41 
42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
43 
44 static const UChar gPercentPercent[] =
45 {
46     0x25, 0x25, 0
47 }; /* "%%" */
48 
49 // All urbnf objects are created through openRules, so we init all of the
50 // Unicode string constants required by rbnf, nfrs, or nfr here.
51 static const UChar gLenientParse[] =
52 {
53     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
54 }; /* "%%lenient-parse:" */
55 static const UChar gSemiColon = 0x003B;
56 static const UChar gSemiPercent[] =
57 {
58     0x3B, 0x25, 0
59 }; /* ";%" */
60 
61 #define kSomeNumberOfBitsDiv2 22
62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
64 
65 U_NAMESPACE_BEGIN
66 
67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68 
69 /*
70 This is a utility class. It does not use ICU's RTTI.
71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72 Please make sure that intltest passes on Windows in Release mode,
73 since the string pooling per compilation unit will mess up how RTTI works.
74 The RTTI code was also removed due to lack of code coverage.
75 */
76 class LocalizationInfo : public UMemory {
77 protected:
78     virtual ~LocalizationInfo();
79     uint32_t refcount;
80 
81 public:
LocalizationInfo()82     LocalizationInfo() : refcount(0) {}
83 
ref(void)84     LocalizationInfo* ref(void) {
85         ++refcount;
86         return this;
87     }
88 
unref(void)89     LocalizationInfo* unref(void) {
90         if (refcount && --refcount == 0) {
91             delete this;
92         }
93         return NULL;
94     }
95 
96     virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98 
99     virtual int32_t getNumberOfRuleSets(void) const = 0;
100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102     virtual const UChar* getLocaleName(int32_t index) const = 0;
103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104 
105     virtual int32_t indexForLocale(const UChar* locale) const;
106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107 
108 //    virtual UClassID getDynamicClassID() const = 0;
109 //    static UClassID getStaticClassID(void);
110 };
111 
~LocalizationInfo()112 LocalizationInfo::~LocalizationInfo() {}
113 
114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
115 
116 // if both strings are NULL, this returns TRUE
117 static UBool
streq(const UChar * lhs,const UChar * rhs)118 streq(const UChar* lhs, const UChar* rhs) {
119     if (rhs == lhs) {
120         return TRUE;
121     }
122     if (lhs && rhs) {
123         return u_strcmp(lhs, rhs) == 0;
124     }
125     return FALSE;
126 }
127 
128 UBool
operator ==(const LocalizationInfo * rhs) const129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
130     if (rhs) {
131         if (this == rhs) {
132             return TRUE;
133         }
134 
135         int32_t rsc = getNumberOfRuleSets();
136         if (rsc == rhs->getNumberOfRuleSets()) {
137             for (int i = 0; i < rsc; ++i) {
138                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
139                     return FALSE;
140                 }
141             }
142             int32_t dlc = getNumberOfDisplayLocales();
143             if (dlc == rhs->getNumberOfDisplayLocales()) {
144                 for (int i = 0; i < dlc; ++i) {
145                     const UChar* locale = getLocaleName(i);
146                     int32_t ix = rhs->indexForLocale(locale);
147                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
148                     if (!streq(locale, rhs->getLocaleName(ix))) {
149                         return FALSE;
150                     }
151                     for (int j = 0; j < rsc; ++j) {
152                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
153                             return FALSE;
154                         }
155                     }
156                 }
157                 return TRUE;
158             }
159         }
160     }
161     return FALSE;
162 }
163 
164 int32_t
indexForLocale(const UChar * locale) const165 LocalizationInfo::indexForLocale(const UChar* locale) const {
166     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
167         if (streq(locale, getLocaleName(i))) {
168             return i;
169         }
170     }
171     return -1;
172 }
173 
174 int32_t
indexForRuleSet(const UChar * ruleset) const175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
176     if (ruleset) {
177         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
178             if (streq(ruleset, getRuleSetName(i))) {
179                 return i;
180             }
181         }
182     }
183     return -1;
184 }
185 
186 
187 typedef void (*Fn_Deleter)(void*);
188 
189 class VArray {
190     void** buf;
191     int32_t cap;
192     int32_t size;
193     Fn_Deleter deleter;
194 public:
VArray()195     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
196 
VArray(Fn_Deleter del)197     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
198 
~VArray()199     ~VArray() {
200         if (deleter) {
201             for (int i = 0; i < size; ++i) {
202                 (*deleter)(buf[i]);
203             }
204         }
205         uprv_free(buf);
206     }
207 
length()208     int32_t length() {
209         return size;
210     }
211 
add(void * elem,UErrorCode & status)212     void add(void* elem, UErrorCode& status) {
213         if (U_SUCCESS(status)) {
214             if (size == cap) {
215                 if (cap == 0) {
216                     cap = 1;
217                 } else if (cap < 256) {
218                     cap *= 2;
219                 } else {
220                     cap += 256;
221                 }
222                 if (buf == NULL) {
223                     buf = (void**)uprv_malloc(cap * sizeof(void*));
224                 } else {
225                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
226                 }
227                 if (buf == NULL) {
228                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
229                     status = U_MEMORY_ALLOCATION_ERROR;
230                     return;
231                 }
232                 void* start = &buf[size];
233                 size_t count = (cap - size) * sizeof(void*);
234                 uprv_memset(start, 0, count); // fill with nulls, just because
235             }
236             buf[size++] = elem;
237         }
238     }
239 
release(void)240     void** release(void) {
241         void** result = buf;
242         buf = NULL;
243         cap = 0;
244         size = 0;
245         return result;
246     }
247 };
248 
249 class LocDataParser;
250 
251 class StringLocalizationInfo : public LocalizationInfo {
252     UChar* info;
253     UChar*** data;
254     int32_t numRuleSets;
255     int32_t numLocales;
256 
257 friend class LocDataParser;
258 
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)259     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
260         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
261     {
262     }
263 
264 public:
265     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
266 
267     virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const268     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
269     virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const270     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
271     virtual const UChar* getLocaleName(int32_t index) const;
272     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
273 
274 //    virtual UClassID getDynamicClassID() const;
275 //    static UClassID getStaticClassID(void);
276 
277 private:
278     void init(UErrorCode& status) const;
279 };
280 
281 
282 enum {
283     OPEN_ANGLE = 0x003c, /* '<' */
284     CLOSE_ANGLE = 0x003e, /* '>' */
285     COMMA = 0x002c,
286     TICK = 0x0027,
287     QUOTE = 0x0022,
288     SPACE = 0x0020
289 };
290 
291 /**
292  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
293  */
294 class LocDataParser {
295     UChar* data;
296     const UChar* e;
297     UChar* p;
298     UChar ch;
299     UParseError& pe;
300     UErrorCode& ec;
301 
302 public:
LocDataParser(UParseError & parseError,UErrorCode & status)303     LocDataParser(UParseError& parseError, UErrorCode& status)
304         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()305     ~LocDataParser() {}
306 
307     /*
308     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
309     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
310     */
311     StringLocalizationInfo* parse(UChar* data, int32_t len);
312 
313 private:
314 
inc(void)315     void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)316     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)317     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)318     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const319     UBool inList(UChar c, const UChar* list) const {
320         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
321         while (*list && *list != c) ++list; return *list == c;
322     }
323     void parseError(const char* msg);
324 
325     StringLocalizationInfo* doParse(void);
326 
327     UChar** nextArray(int32_t& requiredLength);
328     UChar*  nextString(void);
329 };
330 
331 #ifdef RBNF_DEBUG
332 #define ERROR(msg) parseError(msg); return NULL;
333 #define EXPLANATION_ARG explanationArg
334 #else
335 #define ERROR(msg) parseError(NULL); return NULL;
336 #define EXPLANATION_ARG
337 #endif
338 
339 
340 static const UChar DQUOTE_STOPLIST[] = {
341     QUOTE, 0
342 };
343 
344 static const UChar SQUOTE_STOPLIST[] = {
345     TICK, 0
346 };
347 
348 static const UChar NOQUOTE_STOPLIST[] = {
349     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
350 };
351 
352 static void
DeleteFn(void * p)353 DeleteFn(void* p) {
354   uprv_free(p);
355 }
356 
357 StringLocalizationInfo*
parse(UChar * _data,int32_t len)358 LocDataParser::parse(UChar* _data, int32_t len) {
359     if (U_FAILURE(ec)) {
360         if (_data) uprv_free(_data);
361         return NULL;
362     }
363 
364     pe.line = 0;
365     pe.offset = -1;
366     pe.postContext[0] = 0;
367     pe.preContext[0] = 0;
368 
369     if (_data == NULL) {
370         ec = U_ILLEGAL_ARGUMENT_ERROR;
371         return NULL;
372     }
373 
374     if (len <= 0) {
375         ec = U_ILLEGAL_ARGUMENT_ERROR;
376         uprv_free(_data);
377         return NULL;
378     }
379 
380     data = _data;
381     e = data + len;
382     p = _data;
383     ch = 0xffff;
384 
385     return doParse();
386 }
387 
388 
389 StringLocalizationInfo*
doParse(void)390 LocDataParser::doParse(void) {
391     skipWhitespace();
392     if (!checkInc(OPEN_ANGLE)) {
393         ERROR("Missing open angle");
394     } else {
395         VArray array(DeleteFn);
396         UBool mightHaveNext = TRUE;
397         int32_t requiredLength = -1;
398         while (mightHaveNext) {
399             mightHaveNext = FALSE;
400             UChar** elem = nextArray(requiredLength);
401             skipWhitespace();
402             UBool haveComma = check(COMMA);
403             if (elem) {
404                 array.add(elem, ec);
405                 if (haveComma) {
406                     inc();
407                     mightHaveNext = TRUE;
408                 }
409             } else if (haveComma) {
410                 ERROR("Unexpected character");
411             }
412         }
413 
414         skipWhitespace();
415         if (!checkInc(CLOSE_ANGLE)) {
416             if (check(OPEN_ANGLE)) {
417                 ERROR("Missing comma in outer array");
418             } else {
419                 ERROR("Missing close angle bracket in outer array");
420             }
421         }
422 
423         skipWhitespace();
424         if (p != e) {
425             ERROR("Extra text after close of localization data");
426         }
427 
428         array.add(NULL, ec);
429         if (U_SUCCESS(ec)) {
430             int32_t numLocs = array.length() - 2; // subtract first, NULL
431             UChar*** result = (UChar***)array.release();
432 
433             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
434         }
435     }
436 
437     ERROR("Unknown error");
438 }
439 
440 UChar**
nextArray(int32_t & requiredLength)441 LocDataParser::nextArray(int32_t& requiredLength) {
442     if (U_FAILURE(ec)) {
443         return NULL;
444     }
445 
446     skipWhitespace();
447     if (!checkInc(OPEN_ANGLE)) {
448         ERROR("Missing open angle");
449     }
450 
451     VArray array;
452     UBool mightHaveNext = TRUE;
453     while (mightHaveNext) {
454         mightHaveNext = FALSE;
455         UChar* elem = nextString();
456         skipWhitespace();
457         UBool haveComma = check(COMMA);
458         if (elem) {
459             array.add(elem, ec);
460             if (haveComma) {
461                 inc();
462                 mightHaveNext = TRUE;
463             }
464         } else if (haveComma) {
465             ERROR("Unexpected comma");
466         }
467     }
468     skipWhitespace();
469     if (!checkInc(CLOSE_ANGLE)) {
470         if (check(OPEN_ANGLE)) {
471             ERROR("Missing close angle bracket in inner array");
472         } else {
473             ERROR("Missing comma in inner array");
474         }
475     }
476 
477     array.add(NULL, ec);
478     if (U_SUCCESS(ec)) {
479         if (requiredLength == -1) {
480             requiredLength = array.length() + 1;
481         } else if (array.length() != requiredLength) {
482             ec = U_ILLEGAL_ARGUMENT_ERROR;
483             ERROR("Array not of required length");
484         }
485 
486         return (UChar**)array.release();
487     }
488     ERROR("Unknown Error");
489 }
490 
491 UChar*
nextString()492 LocDataParser::nextString() {
493     UChar* result = NULL;
494 
495     skipWhitespace();
496     if (p < e) {
497         const UChar* terminators;
498         UChar c = *p;
499         UBool haveQuote = c == QUOTE || c == TICK;
500         if (haveQuote) {
501             inc();
502             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
503         } else {
504             terminators = NOQUOTE_STOPLIST;
505         }
506         UChar* start = p;
507         while (p < e && !inList(*p, terminators)) ++p;
508         if (p == e) {
509             ERROR("Unexpected end of data");
510         }
511 
512         UChar x = *p;
513         if (p > start) {
514             ch = x;
515             *p = 0x0; // terminate by writing to data
516             result = start; // just point into data
517         }
518         if (haveQuote) {
519             if (x != c) {
520                 ERROR("Missing matching quote");
521             } else if (p == start) {
522                 ERROR("Empty string");
523             }
524             inc();
525         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
526             ERROR("Unexpected character in string");
527         }
528     }
529 
530     // ok for there to be no next string
531     return result;
532 }
533 
parseError(const char * EXPLANATION_ARG)534 void LocDataParser::parseError(const char* EXPLANATION_ARG)
535 {
536     if (!data) {
537         return;
538     }
539 
540     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
541     if (start < data) {
542         start = data;
543     }
544     for (UChar* x = p; --x >= start;) {
545         if (!*x) {
546             start = x+1;
547             break;
548         }
549     }
550     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
551     if (limit > e) {
552         limit = e;
553     }
554     u_strncpy(pe.preContext, start, (int32_t)(p-start));
555     pe.preContext[p-start] = 0;
556     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
557     pe.postContext[limit-p] = 0;
558     pe.offset = (int32_t)(p - data);
559 
560 #ifdef RBNF_DEBUG
561     fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
562 
563     UnicodeString msg;
564     msg.append(start, p - start);
565     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
566     msg.append(p, limit-p);
567     msg.append(UNICODE_STRING_SIMPLE("'"));
568 
569     char buf[128];
570     int32_t len = msg.extract(0, msg.length(), buf, 128);
571     if (len >= 128) {
572         buf[127] = 0;
573     } else {
574         buf[len] = 0;
575     }
576     fprintf(stderr, "%s\n", buf);
577     fflush(stderr);
578 #endif
579 
580     uprv_free(data);
581     data = NULL;
582     p = NULL;
583     e = NULL;
584 
585     if (U_SUCCESS(ec)) {
586         ec = U_PARSE_ERROR;
587     }
588 }
589 
590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
591 
592 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
594     if (U_FAILURE(status)) {
595         return NULL;
596     }
597 
598     int32_t len = info.length();
599     if (len == 0) {
600         return NULL; // no error;
601     }
602 
603     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
604     if (!p) {
605         status = U_MEMORY_ALLOCATION_ERROR;
606         return NULL;
607     }
608     info.extract(p, len, status);
609     if (!U_FAILURE(status)) {
610         status = U_ZERO_ERROR; // clear warning about non-termination
611     }
612 
613     LocDataParser parser(perror, status);
614     return parser.parse(p, len);
615 }
616 
~StringLocalizationInfo()617 StringLocalizationInfo::~StringLocalizationInfo() {
618     for (UChar*** p = (UChar***)data; *p; ++p) {
619         // remaining data is simply pointer into our unicode string data.
620         if (*p) uprv_free(*p);
621     }
622     if (data) uprv_free(data);
623     if (info) uprv_free(info);
624 }
625 
626 
627 const UChar*
getRuleSetName(int32_t index) const628 StringLocalizationInfo::getRuleSetName(int32_t index) const {
629     if (index >= 0 && index < getNumberOfRuleSets()) {
630         return data[0][index];
631     }
632     return NULL;
633 }
634 
635 const UChar*
getLocaleName(int32_t index) const636 StringLocalizationInfo::getLocaleName(int32_t index) const {
637     if (index >= 0 && index < getNumberOfDisplayLocales()) {
638         return data[index+1][0];
639     }
640     return NULL;
641 }
642 
643 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
645     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
646         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
647         return data[localeIndex+1][ruleIndex+1];
648     }
649     return NULL;
650 }
651 
652 // ----------
653 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
655                                              const UnicodeString& locs,
656                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
657   : ruleSets(NULL)
658   , ruleSetDescriptions(NULL)
659   , numRuleSets(0)
660   , defaultRuleSet(NULL)
661   , locale(alocale)
662   , collator(NULL)
663   , decimalFormatSymbols(NULL)
664   , lenient(FALSE)
665   , lenientParseRules(NULL)
666   , localizations(NULL)
667   , capitalizationInfoSet(FALSE)
668   , capitalizationForUIListMenu(FALSE)
669   , capitalizationForStandAlone(FALSE)
670   , capitalizationBrkIter(NULL)
671 {
672   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
673   init(description, locinfo, perror, status);
674 }
675 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)676 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
677                                              const UnicodeString& locs,
678                                              UParseError& perror, UErrorCode& status)
679   : ruleSets(NULL)
680   , ruleSetDescriptions(NULL)
681   , numRuleSets(0)
682   , defaultRuleSet(NULL)
683   , locale(Locale::getDefault())
684   , collator(NULL)
685   , decimalFormatSymbols(NULL)
686   , lenient(FALSE)
687   , lenientParseRules(NULL)
688   , localizations(NULL)
689   , capitalizationInfoSet(FALSE)
690   , capitalizationForUIListMenu(FALSE)
691   , capitalizationForStandAlone(FALSE)
692   , capitalizationBrkIter(NULL)
693 {
694   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
695   init(description, locinfo, perror, status);
696 }
697 
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)698 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
699                                              LocalizationInfo* info,
700                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
701   : ruleSets(NULL)
702   , ruleSetDescriptions(NULL)
703   , numRuleSets(0)
704   , defaultRuleSet(NULL)
705   , locale(alocale)
706   , collator(NULL)
707   , decimalFormatSymbols(NULL)
708   , lenient(FALSE)
709   , lenientParseRules(NULL)
710   , localizations(NULL)
711   , capitalizationInfoSet(FALSE)
712   , capitalizationForUIListMenu(FALSE)
713   , capitalizationForStandAlone(FALSE)
714   , capitalizationBrkIter(NULL)
715 {
716   init(description, info, perror, status);
717 }
718 
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)719 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
720                          UParseError& perror,
721                          UErrorCode& status)
722   : ruleSets(NULL)
723   , ruleSetDescriptions(NULL)
724   , numRuleSets(0)
725   , defaultRuleSet(NULL)
726   , locale(Locale::getDefault())
727   , collator(NULL)
728   , decimalFormatSymbols(NULL)
729   , lenient(FALSE)
730   , lenientParseRules(NULL)
731   , localizations(NULL)
732   , capitalizationInfoSet(FALSE)
733   , capitalizationForUIListMenu(FALSE)
734   , capitalizationForStandAlone(FALSE)
735   , capitalizationBrkIter(NULL)
736 {
737     init(description, NULL, perror, status);
738 }
739 
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)740 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
741                          const Locale& aLocale,
742                          UParseError& perror,
743                          UErrorCode& status)
744   : ruleSets(NULL)
745   , ruleSetDescriptions(NULL)
746   , numRuleSets(0)
747   , defaultRuleSet(NULL)
748   , locale(aLocale)
749   , collator(NULL)
750   , decimalFormatSymbols(NULL)
751   , lenient(FALSE)
752   , lenientParseRules(NULL)
753   , localizations(NULL)
754   , capitalizationInfoSet(FALSE)
755   , capitalizationForUIListMenu(FALSE)
756   , capitalizationForStandAlone(FALSE)
757   , capitalizationBrkIter(NULL)
758 {
759     init(description, NULL, perror, status);
760 }
761 
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)762 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
763   : ruleSets(NULL)
764   , ruleSetDescriptions(NULL)
765   , numRuleSets(0)
766   , defaultRuleSet(NULL)
767   , locale(alocale)
768   , collator(NULL)
769   , decimalFormatSymbols(NULL)
770   , lenient(FALSE)
771   , lenientParseRules(NULL)
772   , localizations(NULL)
773   , capitalizationInfoSet(FALSE)
774   , capitalizationForUIListMenu(FALSE)
775   , capitalizationForStandAlone(FALSE)
776   , capitalizationBrkIter(NULL)
777 {
778     if (U_FAILURE(status)) {
779         return;
780     }
781 
782     const char* rules_tag = "RBNFRules";
783     const char* fmt_tag = "";
784     switch (tag) {
785     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
786     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
787     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
788     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
789     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
790     }
791 
792     // TODO: read localization info from resource
793     LocalizationInfo* locinfo = NULL;
794 
795     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
796     if (U_SUCCESS(status)) {
797         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
798                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
799 
800         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
801         if (U_FAILURE(status)) {
802             ures_close(nfrb);
803         }
804         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
805         if (U_FAILURE(status)) {
806             ures_close(rbnfRules);
807             ures_close(nfrb);
808             return;
809         }
810 
811         UnicodeString desc;
812         while (ures_hasNext(ruleSets)) {
813            desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
814         }
815         UParseError perror;
816 
817         init (desc, locinfo, perror, status);
818 
819         ures_close(ruleSets);
820         ures_close(rbnfRules);
821     }
822     ures_close(nfrb);
823 }
824 
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)825 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
826   : NumberFormat(rhs)
827   , ruleSets(NULL)
828   , ruleSetDescriptions(NULL)
829   , numRuleSets(0)
830   , defaultRuleSet(NULL)
831   , locale(rhs.locale)
832   , collator(NULL)
833   , decimalFormatSymbols(NULL)
834   , lenient(FALSE)
835   , lenientParseRules(NULL)
836   , localizations(NULL)
837   , capitalizationInfoSet(FALSE)
838   , capitalizationForUIListMenu(FALSE)
839   , capitalizationForStandAlone(FALSE)
840   , capitalizationBrkIter(NULL)
841 {
842     this->operator=(rhs);
843 }
844 
845 // --------
846 
847 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)848 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
849 {
850     if (this == &rhs) {
851         return *this;
852     }
853     NumberFormat::operator=(rhs);
854     UErrorCode status = U_ZERO_ERROR;
855     dispose();
856     locale = rhs.locale;
857     lenient = rhs.lenient;
858 
859     UParseError perror;
860     init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
861     setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
862     setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
863 
864     capitalizationInfoSet = rhs.capitalizationInfoSet;
865     capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
866     capitalizationForStandAlone = rhs.capitalizationForStandAlone;
867 #if !UCONFIG_NO_BREAK_ITERATION
868     capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
869 #endif
870 
871     return *this;
872 }
873 
~RuleBasedNumberFormat()874 RuleBasedNumberFormat::~RuleBasedNumberFormat()
875 {
876     dispose();
877 }
878 
879 Format*
clone(void) const880 RuleBasedNumberFormat::clone(void) const
881 {
882     return new RuleBasedNumberFormat(*this);
883 }
884 
885 UBool
operator ==(const Format & other) const886 RuleBasedNumberFormat::operator==(const Format& other) const
887 {
888     if (this == &other) {
889         return TRUE;
890     }
891 
892     if (typeid(*this) == typeid(other)) {
893         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
894         // test for capitalization info equality is adequately handled
895         // by the NumberFormat test for fCapitalizationContext equality;
896         // the info here is just derived from that.
897         if (locale == rhs.locale &&
898             lenient == rhs.lenient &&
899             (localizations == NULL
900                 ? rhs.localizations == NULL
901                 : (rhs.localizations == NULL
902                     ? FALSE
903                     : *localizations == rhs.localizations))) {
904 
905             NFRuleSet** p = ruleSets;
906             NFRuleSet** q = rhs.ruleSets;
907             if (p == NULL) {
908                 return q == NULL;
909             } else if (q == NULL) {
910                 return FALSE;
911             }
912             while (*p && *q && (**p == **q)) {
913                 ++p;
914                 ++q;
915             }
916             return *q == NULL && *p == NULL;
917         }
918     }
919 
920     return FALSE;
921 }
922 
923 UnicodeString
getRules() const924 RuleBasedNumberFormat::getRules() const
925 {
926     UnicodeString result;
927     if (ruleSets != NULL) {
928         for (NFRuleSet** p = ruleSets; *p; ++p) {
929             (*p)->appendRules(result);
930         }
931     }
932     return result;
933 }
934 
935 UnicodeString
getRuleSetName(int32_t index) const936 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
937 {
938     if (localizations) {
939       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
940       return string;
941     } else if (ruleSets) {
942         UnicodeString result;
943         for (NFRuleSet** p = ruleSets; *p; ++p) {
944             NFRuleSet* rs = *p;
945             if (rs->isPublic()) {
946                 if (--index == -1) {
947                     rs->getName(result);
948                     return result;
949                 }
950             }
951         }
952     }
953     UnicodeString empty;
954     return empty;
955 }
956 
957 int32_t
getNumberOfRuleSetNames() const958 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
959 {
960     int32_t result = 0;
961     if (localizations) {
962       result = localizations->getNumberOfRuleSets();
963     } else if (ruleSets) {
964         for (NFRuleSet** p = ruleSets; *p; ++p) {
965             if ((**p).isPublic()) {
966                 ++result;
967             }
968         }
969     }
970     return result;
971 }
972 
973 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const974 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
975     if (localizations) {
976         return localizations->getNumberOfDisplayLocales();
977     }
978     return 0;
979 }
980 
981 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const982 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
983     if (U_FAILURE(status)) {
984         return Locale("");
985     }
986     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
987         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
988         char buffer[64];
989         int32_t cap = name.length() + 1;
990         char* bp = buffer;
991         if (cap > 64) {
992             bp = (char *)uprv_malloc(cap);
993             if (bp == NULL) {
994                 status = U_MEMORY_ALLOCATION_ERROR;
995                 return Locale("");
996             }
997         }
998         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
999         Locale retLocale(bp);
1000         if (bp != buffer) {
1001             uprv_free(bp);
1002         }
1003         return retLocale;
1004     }
1005     status = U_ILLEGAL_ARGUMENT_ERROR;
1006     Locale retLocale;
1007     return retLocale;
1008 }
1009 
1010 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)1011 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1012     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1013         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1014         int32_t len = localeName.length();
1015         UChar* localeStr = localeName.getBuffer(len + 1);
1016         while (len >= 0) {
1017             localeStr[len] = 0;
1018             int32_t ix = localizations->indexForLocale(localeStr);
1019             if (ix >= 0) {
1020                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1021                 return name;
1022             }
1023 
1024             // trim trailing portion, skipping over ommitted sections
1025             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1026             while (len > 0 && localeStr[len-1] == 0x005F) --len;
1027         }
1028         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1029         return name;
1030     }
1031     UnicodeString bogus;
1032     bogus.setToBogus();
1033     return bogus;
1034 }
1035 
1036 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1037 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1038     if (localizations) {
1039         UnicodeString rsn(ruleSetName);
1040         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1041         return getRuleSetDisplayName(ix, localeParam);
1042     }
1043     UnicodeString bogus;
1044     bogus.setToBogus();
1045     return bogus;
1046 }
1047 
1048 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1049 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1050 {
1051     if (U_SUCCESS(status) && ruleSets) {
1052         for (NFRuleSet** p = ruleSets; *p; ++p) {
1053             NFRuleSet* rs = *p;
1054             if (rs->isNamed(name)) {
1055                 return rs;
1056             }
1057         }
1058         status = U_ILLEGAL_ARGUMENT_ERROR;
1059     }
1060     return NULL;
1061 }
1062 
1063 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1064 RuleBasedNumberFormat::format(int32_t number,
1065                               UnicodeString& toAppendTo,
1066                               FieldPosition& /* pos */) const
1067 {
1068     if (defaultRuleSet) {
1069         UErrorCode status = U_ZERO_ERROR;
1070         int32_t startPos = toAppendTo.length();
1071         defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
1072         adjustForCapitalizationContext(startPos, toAppendTo);
1073     }
1074     return toAppendTo;
1075 }
1076 
1077 
1078 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1079 RuleBasedNumberFormat::format(int64_t number,
1080                               UnicodeString& toAppendTo,
1081                               FieldPosition& /* pos */) const
1082 {
1083     if (defaultRuleSet) {
1084         UErrorCode status = U_ZERO_ERROR;
1085         int32_t startPos = toAppendTo.length();
1086         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
1087         adjustForCapitalizationContext(startPos, toAppendTo);
1088     }
1089     return toAppendTo;
1090 }
1091 
1092 
1093 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1094 RuleBasedNumberFormat::format(double number,
1095                               UnicodeString& toAppendTo,
1096                               FieldPosition& /* pos */) const
1097 {
1098     int32_t startPos = toAppendTo.length();
1099     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1100     if (uprv_isNaN(number)) {
1101         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1102         if (decFmtSyms) {
1103             toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1104         }
1105     } else if (defaultRuleSet) {
1106         UErrorCode status = U_ZERO_ERROR;
1107         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
1108     }
1109     return adjustForCapitalizationContext(startPos, toAppendTo);
1110 }
1111 
1112 
1113 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1114 RuleBasedNumberFormat::format(int32_t number,
1115                               const UnicodeString& ruleSetName,
1116                               UnicodeString& toAppendTo,
1117                               FieldPosition& /* pos */,
1118                               UErrorCode& status) const
1119 {
1120     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1121     if (U_SUCCESS(status)) {
1122         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1123             // throw new IllegalArgumentException("Can't use internal rule set");
1124             status = U_ILLEGAL_ARGUMENT_ERROR;
1125         } else {
1126             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1127             if (rs) {
1128                 int32_t startPos = toAppendTo.length();
1129                 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
1130                 adjustForCapitalizationContext(startPos, toAppendTo);
1131             }
1132         }
1133     }
1134     return toAppendTo;
1135 }
1136 
1137 
1138 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1139 RuleBasedNumberFormat::format(int64_t number,
1140                               const UnicodeString& ruleSetName,
1141                               UnicodeString& toAppendTo,
1142                               FieldPosition& /* pos */,
1143                               UErrorCode& status) const
1144 {
1145     if (U_SUCCESS(status)) {
1146         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1147             // throw new IllegalArgumentException("Can't use internal rule set");
1148             status = U_ILLEGAL_ARGUMENT_ERROR;
1149         } else {
1150             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151             if (rs) {
1152                 int32_t startPos = toAppendTo.length();
1153                 rs->format(number, toAppendTo, toAppendTo.length(), status);
1154                 adjustForCapitalizationContext(startPos, toAppendTo);
1155             }
1156         }
1157     }
1158     return toAppendTo;
1159 }
1160 
1161 
1162 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1163 RuleBasedNumberFormat::format(double number,
1164                               const UnicodeString& ruleSetName,
1165                               UnicodeString& toAppendTo,
1166                               FieldPosition& /* pos */,
1167                               UErrorCode& status) const
1168 {
1169     if (U_SUCCESS(status)) {
1170         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1171             // throw new IllegalArgumentException("Can't use internal rule set");
1172             status = U_ILLEGAL_ARGUMENT_ERROR;
1173         } else {
1174             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1175             if (rs) {
1176                 int32_t startPos = toAppendTo.length();
1177                 rs->format(number, toAppendTo, toAppendTo.length(), status);
1178                 adjustForCapitalizationContext(startPos, toAppendTo);
1179             }
1180         }
1181     }
1182     return toAppendTo;
1183 }
1184 
1185 UnicodeString&
adjustForCapitalizationContext(int32_t startPos,UnicodeString & currentResult) const1186 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1187                                                       UnicodeString& currentResult) const
1188 {
1189 #if !UCONFIG_NO_BREAK_ITERATION
1190     if (startPos==0 && currentResult.length() > 0) {
1191         // capitalize currentResult according to context
1192         UChar32 ch = currentResult.char32At(0);
1193         UErrorCode status = U_ZERO_ERROR;
1194         UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1195         if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1196               ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1197                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1198                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1199             // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1200             // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1201             currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1202         }
1203     }
1204 #endif
1205     return currentResult;
1206 }
1207 
1208 
1209 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1210 RuleBasedNumberFormat::parse(const UnicodeString& text,
1211                              Formattable& result,
1212                              ParsePosition& parsePosition) const
1213 {
1214     if (!ruleSets) {
1215         parsePosition.setErrorIndex(0);
1216         return;
1217     }
1218 
1219     UnicodeString workingText(text, parsePosition.getIndex());
1220     ParsePosition workingPos(0);
1221 
1222     ParsePosition high_pp(0);
1223     Formattable high_result;
1224 
1225     for (NFRuleSet** p = ruleSets; *p; ++p) {
1226         NFRuleSet *rp = *p;
1227         if (rp->isPublic() && rp->isParseable()) {
1228             ParsePosition working_pp(0);
1229             Formattable working_result;
1230 
1231             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1232             if (working_pp.getIndex() > high_pp.getIndex()) {
1233                 high_pp = working_pp;
1234                 high_result = working_result;
1235 
1236                 if (high_pp.getIndex() == workingText.length()) {
1237                     break;
1238                 }
1239             }
1240         }
1241     }
1242 
1243     int32_t startIndex = parsePosition.getIndex();
1244     parsePosition.setIndex(startIndex + high_pp.getIndex());
1245     if (high_pp.getIndex() > 0) {
1246         parsePosition.setErrorIndex(-1);
1247     } else {
1248         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1249         parsePosition.setErrorIndex(startIndex + errorIndex);
1250     }
1251     result = high_result;
1252     if (result.getType() == Formattable::kDouble) {
1253         int32_t r = (int32_t)result.getDouble();
1254         if ((double)r == result.getDouble()) {
1255             result.setLong(r);
1256         }
1257     }
1258 }
1259 
1260 #if !UCONFIG_NO_COLLATION
1261 
1262 void
setLenient(UBool enabled)1263 RuleBasedNumberFormat::setLenient(UBool enabled)
1264 {
1265     lenient = enabled;
1266     if (!enabled && collator) {
1267         delete collator;
1268         collator = NULL;
1269     }
1270 }
1271 
1272 #endif
1273 
1274 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1275 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1276     if (U_SUCCESS(status)) {
1277         if (ruleSetName.isEmpty()) {
1278           if (localizations) {
1279               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1280               defaultRuleSet = findRuleSet(name, status);
1281           } else {
1282             initDefaultRuleSet();
1283           }
1284         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1285             status = U_ILLEGAL_ARGUMENT_ERROR;
1286         } else {
1287             NFRuleSet* result = findRuleSet(ruleSetName, status);
1288             if (result != NULL) {
1289                 defaultRuleSet = result;
1290             }
1291         }
1292     }
1293 }
1294 
1295 UnicodeString
getDefaultRuleSetName() const1296 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1297   UnicodeString result;
1298   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1299     defaultRuleSet->getName(result);
1300   } else {
1301     result.setToBogus();
1302   }
1303   return result;
1304 }
1305 
1306 void
initDefaultRuleSet()1307 RuleBasedNumberFormat::initDefaultRuleSet()
1308 {
1309     defaultRuleSet = NULL;
1310     if (!ruleSets) {
1311       return;
1312     }
1313 
1314     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1315     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1316     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1317 
1318     NFRuleSet**p = &ruleSets[0];
1319     while (*p) {
1320         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1321             defaultRuleSet = *p;
1322             return;
1323         } else {
1324             ++p;
1325         }
1326     }
1327 
1328     defaultRuleSet = *--p;
1329     if (!defaultRuleSet->isPublic()) {
1330         while (p != ruleSets) {
1331             if ((*--p)->isPublic()) {
1332                 defaultRuleSet = *p;
1333                 break;
1334             }
1335         }
1336     }
1337 }
1338 
1339 
1340 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1341 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1342                             UParseError& pErr, UErrorCode& status)
1343 {
1344     // TODO: implement UParseError
1345     uprv_memset(&pErr, 0, sizeof(UParseError));
1346     // Note: this can leave ruleSets == NULL, so remaining code should check
1347     if (U_FAILURE(status)) {
1348         return;
1349     }
1350 
1351     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1352 
1353     UnicodeString description(rules);
1354     if (!description.length()) {
1355         status = U_MEMORY_ALLOCATION_ERROR;
1356         return;
1357     }
1358 
1359     // start by stripping the trailing whitespace from all the rules
1360     // (this is all the whitespace follwing each semicolon in the
1361     // description).  This allows us to look for rule-set boundaries
1362     // by searching for ";%" without having to worry about whitespace
1363     // between the ; and the %
1364     stripWhitespace(description);
1365 
1366     // check to see if there's a set of lenient-parse rules.  If there
1367     // is, pull them out into our temporary holding place for them,
1368     // and delete them from the description before the real desciption-
1369     // parsing code sees them
1370     int32_t lp = description.indexOf(gLenientParse, -1, 0);
1371     if (lp != -1) {
1372         // we've got to make sure we're not in the middle of a rule
1373         // (where "%%lenient-parse" would actually get treated as
1374         // rule text)
1375         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1376             // locate the beginning and end of the actual collation
1377             // rules (there may be whitespace between the name and
1378             // the first token in the description)
1379             int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1380 
1381             if (lpEnd == -1) {
1382                 lpEnd = description.length() - 1;
1383             }
1384             int lpStart = lp + u_strlen(gLenientParse);
1385             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1386                 ++lpStart;
1387             }
1388 
1389             // copy out the lenient-parse rules and delete them
1390             // from the description
1391             lenientParseRules = new UnicodeString();
1392             /* test for NULL */
1393             if (lenientParseRules == 0) {
1394                 status = U_MEMORY_ALLOCATION_ERROR;
1395                 return;
1396             }
1397             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1398 
1399             description.remove(lp, lpEnd + 1 - lp);
1400         }
1401     }
1402 
1403     // pre-flight parsing the description and count the number of
1404     // rule sets (";%" marks the end of one rule set and the beginning
1405     // of the next)
1406     numRuleSets = 0;
1407     for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1408         ++numRuleSets;
1409         ++p;
1410     }
1411     ++numRuleSets;
1412 
1413     // our rule list is an array of the appropriate size
1414     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1415     /* test for NULL */
1416     if (ruleSets == 0) {
1417         status = U_MEMORY_ALLOCATION_ERROR;
1418         return;
1419     }
1420 
1421     for (int i = 0; i <= numRuleSets; ++i) {
1422         ruleSets[i] = NULL;
1423     }
1424 
1425     // divide up the descriptions into individual rule-set descriptions
1426     // and store them in a temporary array.  At each step, we also
1427     // new up a rule set, but all this does is initialize its name
1428     // and remove it from its description.  We can't actually parse
1429     // the rest of the descriptions and finish initializing everything
1430     // because we have to know the names and locations of all the rule
1431     // sets before we can actually set everything up
1432     if(!numRuleSets) {
1433         status = U_ILLEGAL_ARGUMENT_ERROR;
1434         return;
1435     }
1436 
1437     ruleSetDescriptions = new UnicodeString[numRuleSets];
1438     if (ruleSetDescriptions == 0) {
1439         status = U_MEMORY_ALLOCATION_ERROR;
1440         return;
1441     }
1442 
1443     {
1444         int curRuleSet = 0;
1445         int32_t start = 0;
1446         for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1447             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1448             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1449             if (ruleSets[curRuleSet] == 0) {
1450                 status = U_MEMORY_ALLOCATION_ERROR;
1451                 return;
1452             }
1453             ++curRuleSet;
1454             start = p + 1;
1455         }
1456         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1457         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1458         if (ruleSets[curRuleSet] == 0) {
1459             status = U_MEMORY_ALLOCATION_ERROR;
1460             return;
1461         }
1462     }
1463 
1464     // now we can take note of the formatter's default rule set, which
1465     // is the last public rule set in the description (it's the last
1466     // rather than the first so that a user can create a new formatter
1467     // from an existing formatter and change its default behavior just
1468     // by appending more rule sets to the end)
1469 
1470     // {dlf} Initialization of a fraction rule set requires the default rule
1471     // set to be known.  For purposes of initialization, this is always the
1472     // last public rule set, no matter what the localization data says.
1473     initDefaultRuleSet();
1474 
1475     // finally, we can go back through the temporary descriptions
1476     // list and finish seting up the substructure (and we throw
1477     // away the temporary descriptions as we go)
1478     {
1479         for (int i = 0; i < numRuleSets; i++) {
1480             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1481         }
1482     }
1483 
1484     // Now that the rules are initialized, the 'real' default rule
1485     // set can be adjusted by the localization data.
1486 
1487     // The C code keeps the localization array as is, rather than building
1488     // a separate array of the public rule set names, so we have less work
1489     // to do here-- but we still need to check the names.
1490 
1491     if (localizationInfos) {
1492         // confirm the names, if any aren't in the rules, that's an error
1493         // it is ok if the rules contain public rule sets that are not in this list
1494         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1495             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1496             NFRuleSet* rs = findRuleSet(name, status);
1497             if (rs == NULL) {
1498                 break; // error
1499             }
1500             if (i == 0) {
1501                 defaultRuleSet = rs;
1502             }
1503         }
1504     } else {
1505         defaultRuleSet = getDefaultRuleSet();
1506     }
1507     originalDescription = rules;
1508 }
1509 
1510 // override the NumberFormat implementation in order to
1511 // lazily initialize relevant items
1512 void
setContext(UDisplayContext value,UErrorCode & status)1513 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1514 {
1515     NumberFormat::setContext(value, status);
1516     if (U_SUCCESS(status)) {
1517     	if (!capitalizationInfoSet &&
1518     	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1519     	    initCapitalizationContextInfo(locale);
1520     	    capitalizationInfoSet = TRUE;
1521         }
1522 #if !UCONFIG_NO_BREAK_ITERATION
1523         if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1524                 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1525                 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1526             UErrorCode status = U_ZERO_ERROR;
1527             capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1528             if (U_FAILURE(status)) {
1529                 delete capitalizationBrkIter;
1530                 capitalizationBrkIter = NULL;
1531             }
1532         }
1533 #endif
1534     }
1535 }
1536 
1537 void
initCapitalizationContextInfo(const Locale & thelocale)1538 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1539 {
1540 #if !UCONFIG_NO_BREAK_ITERATION
1541     const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1542     UErrorCode status = U_ZERO_ERROR;
1543     UResourceBundle *rb = ures_open(NULL, localeID, &status);
1544     rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1545     rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1546     if (U_SUCCESS(status) && rb != NULL) {
1547         int32_t len = 0;
1548         const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1549         if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1550             capitalizationForUIListMenu = intVector[0];
1551             capitalizationForStandAlone = intVector[1];
1552         }
1553     }
1554     ures_close(rb);
1555 #endif
1556 }
1557 
1558 void
stripWhitespace(UnicodeString & description)1559 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1560 {
1561     // iterate through the characters...
1562     UnicodeString result;
1563 
1564     int start = 0;
1565     while (start != -1 && start < description.length()) {
1566         // seek to the first non-whitespace character...
1567         while (start < description.length()
1568             && PatternProps::isWhiteSpace(description.charAt(start))) {
1569             ++start;
1570         }
1571 
1572         // locate the next semicolon in the text and copy the text from
1573         // our current position up to that semicolon into the result
1574         int32_t p = description.indexOf(gSemiColon, start);
1575         if (p == -1) {
1576             // or if we don't find a semicolon, just copy the rest of
1577             // the string into the result
1578             result.append(description, start, description.length() - start);
1579             start = -1;
1580         }
1581         else if (p < description.length()) {
1582             result.append(description, start, p + 1 - start);
1583             start = p + 1;
1584         }
1585 
1586         // when we get here, we've seeked off the end of the sring, and
1587         // we terminate the loop (we continue until *start* is -1 rather
1588         // than until *p* is -1, because otherwise we'd miss the last
1589         // rule in the description)
1590         else {
1591             start = -1;
1592         }
1593     }
1594 
1595     description.setTo(result);
1596 }
1597 
1598 
1599 void
dispose()1600 RuleBasedNumberFormat::dispose()
1601 {
1602     if (ruleSets) {
1603         for (NFRuleSet** p = ruleSets; *p; ++p) {
1604             delete *p;
1605         }
1606         uprv_free(ruleSets);
1607         ruleSets = NULL;
1608     }
1609 
1610     if (ruleSetDescriptions) {
1611         delete [] ruleSetDescriptions;
1612     }
1613 
1614 #if !UCONFIG_NO_COLLATION
1615     delete collator;
1616 #endif
1617     collator = NULL;
1618 
1619     delete decimalFormatSymbols;
1620     decimalFormatSymbols = NULL;
1621 
1622     delete lenientParseRules;
1623     lenientParseRules = NULL;
1624 
1625 #if !UCONFIG_NO_BREAK_ITERATION
1626    delete capitalizationBrkIter;
1627    capitalizationBrkIter = NULL;
1628 #endif
1629 
1630     if (localizations) localizations = localizations->unref();
1631 }
1632 
1633 
1634 //-----------------------------------------------------------------------
1635 // package-internal API
1636 //-----------------------------------------------------------------------
1637 
1638 /**
1639  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1640  * this function creates it the first time it's called.
1641  * @return The collator to use for lenient parsing, or null if lenient parsing
1642  * is turned off.
1643 */
1644 const RuleBasedCollator*
getCollator() const1645 RuleBasedNumberFormat::getCollator() const
1646 {
1647 #if !UCONFIG_NO_COLLATION
1648     if (!ruleSets) {
1649         return NULL;
1650     }
1651 
1652     // lazy-evaluate the collator
1653     if (collator == NULL && lenient) {
1654         // create a default collator based on the formatter's locale,
1655         // then pull out that collator's rules, append any additional
1656         // rules specified in the description, and create a _new_
1657         // collator based on the combinaiton of those rules
1658 
1659         UErrorCode status = U_ZERO_ERROR;
1660 
1661         Collator* temp = Collator::createInstance(locale, status);
1662         RuleBasedCollator* newCollator;
1663         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1664             if (lenientParseRules) {
1665                 UnicodeString rules(newCollator->getRules());
1666                 rules.append(*lenientParseRules);
1667 
1668                 newCollator = new RuleBasedCollator(rules, status);
1669                 // Exit if newCollator could not be created.
1670                 if (newCollator == NULL) {
1671                     return NULL;
1672                 }
1673             } else {
1674                 temp = NULL;
1675             }
1676             if (U_SUCCESS(status)) {
1677                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1678                 // cast away const
1679                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1680             } else {
1681                 delete newCollator;
1682             }
1683         }
1684         delete temp;
1685     }
1686 #endif
1687 
1688     // if lenient-parse mode is off, this will be null
1689     // (see setLenientParseMode())
1690     return collator;
1691 }
1692 
1693 
1694 /**
1695  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1696  * instances owned by this formatter.  This object is lazily created: this function
1697  * creates it the first time it's called.
1698  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1699  * instances owned by this formatter.
1700 */
1701 DecimalFormatSymbols*
getDecimalFormatSymbols() const1702 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1703 {
1704     // lazy-evaluate the DecimalFormatSymbols object.  This object
1705     // is shared by all DecimalFormat instances belonging to this
1706     // formatter
1707     if (decimalFormatSymbols == NULL) {
1708         UErrorCode status = U_ZERO_ERROR;
1709         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1710         if (U_SUCCESS(status)) {
1711             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1712         } else {
1713             delete temp;
1714         }
1715     }
1716     return decimalFormatSymbols;
1717 }
1718 
1719 // De-owning the current localized symbols and adopt the new symbols.
1720 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1721 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1722 {
1723     if (symbolsToAdopt == NULL) {
1724         return; // do not allow caller to set decimalFormatSymbols to NULL
1725     }
1726 
1727     if (decimalFormatSymbols != NULL) {
1728         delete decimalFormatSymbols;
1729     }
1730 
1731     decimalFormatSymbols = symbolsToAdopt;
1732 
1733     {
1734         // Apply the new decimalFormatSymbols by reparsing the rulesets
1735         UErrorCode status = U_ZERO_ERROR;
1736 
1737         for (int32_t i = 0; i < numRuleSets; i++) {
1738             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1739         }
1740     }
1741 }
1742 
1743 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1744 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1745 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1746 {
1747     adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1748 }
1749 
1750 PluralFormat *
createPluralFormat(UPluralType pluralType,const UnicodeString & pattern,UErrorCode & status) const1751 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1752                                           const UnicodeString &pattern,
1753                                           UErrorCode& status) const
1754 {
1755     return new PluralFormat(locale, pluralType, pattern, status);
1756 }
1757 
1758 U_NAMESPACE_END
1759 
1760 /* U_HAVE_RBNF */
1761 #endif
1762