1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2015, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "utypeinfo.h" // for 'typeid' to work
10
11 #include "unicode/rbnf.h"
12
13 #if U_HAVE_RBNF
14
15 #include "unicode/normlzr.h"
16 #include "unicode/plurfmt.h"
17 #include "unicode/tblcoll.h"
18 #include "unicode/uchar.h"
19 #include "unicode/ucol.h"
20 #include "unicode/uloc.h"
21 #include "unicode/unum.h"
22 #include "unicode/ures.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utf16.h"
25 #include "unicode/udata.h"
26 #include "unicode/udisplaycontext.h"
27 #include "unicode/brkiter.h"
28 #include "nfrs.h"
29
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "patternprops.h"
33 #include "uresimp.h"
34
35 // debugging
36 // #define RBNF_DEBUG
37
38 #ifdef RBNF_DEBUG
39 #include <stdio.h>
40 #endif
41
42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
43
44 static const UChar gPercentPercent[] =
45 {
46 0x25, 0x25, 0
47 }; /* "%%" */
48
49 // All urbnf objects are created through openRules, so we init all of the
50 // Unicode string constants required by rbnf, nfrs, or nfr here.
51 static const UChar gLenientParse[] =
52 {
53 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
54 }; /* "%%lenient-parse:" */
55 static const UChar gSemiColon = 0x003B;
56 static const UChar gSemiPercent[] =
57 {
58 0x3B, 0x25, 0
59 }; /* ";%" */
60
61 #define kSomeNumberOfBitsDiv2 22
62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
64
65 U_NAMESPACE_BEGIN
66
67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68
69 /*
70 This is a utility class. It does not use ICU's RTTI.
71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72 Please make sure that intltest passes on Windows in Release mode,
73 since the string pooling per compilation unit will mess up how RTTI works.
74 The RTTI code was also removed due to lack of code coverage.
75 */
76 class LocalizationInfo : public UMemory {
77 protected:
78 virtual ~LocalizationInfo();
79 uint32_t refcount;
80
81 public:
LocalizationInfo()82 LocalizationInfo() : refcount(0) {}
83
ref(void)84 LocalizationInfo* ref(void) {
85 ++refcount;
86 return this;
87 }
88
unref(void)89 LocalizationInfo* unref(void) {
90 if (refcount && --refcount == 0) {
91 delete this;
92 }
93 return NULL;
94 }
95
96 virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98
99 virtual int32_t getNumberOfRuleSets(void) const = 0;
100 virtual const UChar* getRuleSetName(int32_t index) const = 0;
101 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102 virtual const UChar* getLocaleName(int32_t index) const = 0;
103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104
105 virtual int32_t indexForLocale(const UChar* locale) const;
106 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107
108 // virtual UClassID getDynamicClassID() const = 0;
109 // static UClassID getStaticClassID(void);
110 };
111
~LocalizationInfo()112 LocalizationInfo::~LocalizationInfo() {}
113
114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
115
116 // if both strings are NULL, this returns TRUE
117 static UBool
streq(const UChar * lhs,const UChar * rhs)118 streq(const UChar* lhs, const UChar* rhs) {
119 if (rhs == lhs) {
120 return TRUE;
121 }
122 if (lhs && rhs) {
123 return u_strcmp(lhs, rhs) == 0;
124 }
125 return FALSE;
126 }
127
128 UBool
operator ==(const LocalizationInfo * rhs) const129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
130 if (rhs) {
131 if (this == rhs) {
132 return TRUE;
133 }
134
135 int32_t rsc = getNumberOfRuleSets();
136 if (rsc == rhs->getNumberOfRuleSets()) {
137 for (int i = 0; i < rsc; ++i) {
138 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
139 return FALSE;
140 }
141 }
142 int32_t dlc = getNumberOfDisplayLocales();
143 if (dlc == rhs->getNumberOfDisplayLocales()) {
144 for (int i = 0; i < dlc; ++i) {
145 const UChar* locale = getLocaleName(i);
146 int32_t ix = rhs->indexForLocale(locale);
147 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
148 if (!streq(locale, rhs->getLocaleName(ix))) {
149 return FALSE;
150 }
151 for (int j = 0; j < rsc; ++j) {
152 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
153 return FALSE;
154 }
155 }
156 }
157 return TRUE;
158 }
159 }
160 }
161 return FALSE;
162 }
163
164 int32_t
indexForLocale(const UChar * locale) const165 LocalizationInfo::indexForLocale(const UChar* locale) const {
166 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
167 if (streq(locale, getLocaleName(i))) {
168 return i;
169 }
170 }
171 return -1;
172 }
173
174 int32_t
indexForRuleSet(const UChar * ruleset) const175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
176 if (ruleset) {
177 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
178 if (streq(ruleset, getRuleSetName(i))) {
179 return i;
180 }
181 }
182 }
183 return -1;
184 }
185
186
187 typedef void (*Fn_Deleter)(void*);
188
189 class VArray {
190 void** buf;
191 int32_t cap;
192 int32_t size;
193 Fn_Deleter deleter;
194 public:
VArray()195 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
196
VArray(Fn_Deleter del)197 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
198
~VArray()199 ~VArray() {
200 if (deleter) {
201 for (int i = 0; i < size; ++i) {
202 (*deleter)(buf[i]);
203 }
204 }
205 uprv_free(buf);
206 }
207
length()208 int32_t length() {
209 return size;
210 }
211
add(void * elem,UErrorCode & status)212 void add(void* elem, UErrorCode& status) {
213 if (U_SUCCESS(status)) {
214 if (size == cap) {
215 if (cap == 0) {
216 cap = 1;
217 } else if (cap < 256) {
218 cap *= 2;
219 } else {
220 cap += 256;
221 }
222 if (buf == NULL) {
223 buf = (void**)uprv_malloc(cap * sizeof(void*));
224 } else {
225 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
226 }
227 if (buf == NULL) {
228 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
229 status = U_MEMORY_ALLOCATION_ERROR;
230 return;
231 }
232 void* start = &buf[size];
233 size_t count = (cap - size) * sizeof(void*);
234 uprv_memset(start, 0, count); // fill with nulls, just because
235 }
236 buf[size++] = elem;
237 }
238 }
239
release(void)240 void** release(void) {
241 void** result = buf;
242 buf = NULL;
243 cap = 0;
244 size = 0;
245 return result;
246 }
247 };
248
249 class LocDataParser;
250
251 class StringLocalizationInfo : public LocalizationInfo {
252 UChar* info;
253 UChar*** data;
254 int32_t numRuleSets;
255 int32_t numLocales;
256
257 friend class LocDataParser;
258
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)259 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
260 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
261 {
262 }
263
264 public:
265 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
266
267 virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const268 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
269 virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const270 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
271 virtual const UChar* getLocaleName(int32_t index) const;
272 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
273
274 // virtual UClassID getDynamicClassID() const;
275 // static UClassID getStaticClassID(void);
276
277 private:
278 void init(UErrorCode& status) const;
279 };
280
281
282 enum {
283 OPEN_ANGLE = 0x003c, /* '<' */
284 CLOSE_ANGLE = 0x003e, /* '>' */
285 COMMA = 0x002c,
286 TICK = 0x0027,
287 QUOTE = 0x0022,
288 SPACE = 0x0020
289 };
290
291 /**
292 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
293 */
294 class LocDataParser {
295 UChar* data;
296 const UChar* e;
297 UChar* p;
298 UChar ch;
299 UParseError& pe;
300 UErrorCode& ec;
301
302 public:
LocDataParser(UParseError & parseError,UErrorCode & status)303 LocDataParser(UParseError& parseError, UErrorCode& status)
304 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()305 ~LocDataParser() {}
306
307 /*
308 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
309 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
310 */
311 StringLocalizationInfo* parse(UChar* data, int32_t len);
312
313 private:
314
inc(void)315 void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)316 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)317 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)318 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const319 UBool inList(UChar c, const UChar* list) const {
320 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
321 while (*list && *list != c) ++list; return *list == c;
322 }
323 void parseError(const char* msg);
324
325 StringLocalizationInfo* doParse(void);
326
327 UChar** nextArray(int32_t& requiredLength);
328 UChar* nextString(void);
329 };
330
331 #ifdef RBNF_DEBUG
332 #define ERROR(msg) parseError(msg); return NULL;
333 #define EXPLANATION_ARG explanationArg
334 #else
335 #define ERROR(msg) parseError(NULL); return NULL;
336 #define EXPLANATION_ARG
337 #endif
338
339
340 static const UChar DQUOTE_STOPLIST[] = {
341 QUOTE, 0
342 };
343
344 static const UChar SQUOTE_STOPLIST[] = {
345 TICK, 0
346 };
347
348 static const UChar NOQUOTE_STOPLIST[] = {
349 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
350 };
351
352 static void
DeleteFn(void * p)353 DeleteFn(void* p) {
354 uprv_free(p);
355 }
356
357 StringLocalizationInfo*
parse(UChar * _data,int32_t len)358 LocDataParser::parse(UChar* _data, int32_t len) {
359 if (U_FAILURE(ec)) {
360 if (_data) uprv_free(_data);
361 return NULL;
362 }
363
364 pe.line = 0;
365 pe.offset = -1;
366 pe.postContext[0] = 0;
367 pe.preContext[0] = 0;
368
369 if (_data == NULL) {
370 ec = U_ILLEGAL_ARGUMENT_ERROR;
371 return NULL;
372 }
373
374 if (len <= 0) {
375 ec = U_ILLEGAL_ARGUMENT_ERROR;
376 uprv_free(_data);
377 return NULL;
378 }
379
380 data = _data;
381 e = data + len;
382 p = _data;
383 ch = 0xffff;
384
385 return doParse();
386 }
387
388
389 StringLocalizationInfo*
doParse(void)390 LocDataParser::doParse(void) {
391 skipWhitespace();
392 if (!checkInc(OPEN_ANGLE)) {
393 ERROR("Missing open angle");
394 } else {
395 VArray array(DeleteFn);
396 UBool mightHaveNext = TRUE;
397 int32_t requiredLength = -1;
398 while (mightHaveNext) {
399 mightHaveNext = FALSE;
400 UChar** elem = nextArray(requiredLength);
401 skipWhitespace();
402 UBool haveComma = check(COMMA);
403 if (elem) {
404 array.add(elem, ec);
405 if (haveComma) {
406 inc();
407 mightHaveNext = TRUE;
408 }
409 } else if (haveComma) {
410 ERROR("Unexpected character");
411 }
412 }
413
414 skipWhitespace();
415 if (!checkInc(CLOSE_ANGLE)) {
416 if (check(OPEN_ANGLE)) {
417 ERROR("Missing comma in outer array");
418 } else {
419 ERROR("Missing close angle bracket in outer array");
420 }
421 }
422
423 skipWhitespace();
424 if (p != e) {
425 ERROR("Extra text after close of localization data");
426 }
427
428 array.add(NULL, ec);
429 if (U_SUCCESS(ec)) {
430 int32_t numLocs = array.length() - 2; // subtract first, NULL
431 UChar*** result = (UChar***)array.release();
432
433 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
434 }
435 }
436
437 ERROR("Unknown error");
438 }
439
440 UChar**
nextArray(int32_t & requiredLength)441 LocDataParser::nextArray(int32_t& requiredLength) {
442 if (U_FAILURE(ec)) {
443 return NULL;
444 }
445
446 skipWhitespace();
447 if (!checkInc(OPEN_ANGLE)) {
448 ERROR("Missing open angle");
449 }
450
451 VArray array;
452 UBool mightHaveNext = TRUE;
453 while (mightHaveNext) {
454 mightHaveNext = FALSE;
455 UChar* elem = nextString();
456 skipWhitespace();
457 UBool haveComma = check(COMMA);
458 if (elem) {
459 array.add(elem, ec);
460 if (haveComma) {
461 inc();
462 mightHaveNext = TRUE;
463 }
464 } else if (haveComma) {
465 ERROR("Unexpected comma");
466 }
467 }
468 skipWhitespace();
469 if (!checkInc(CLOSE_ANGLE)) {
470 if (check(OPEN_ANGLE)) {
471 ERROR("Missing close angle bracket in inner array");
472 } else {
473 ERROR("Missing comma in inner array");
474 }
475 }
476
477 array.add(NULL, ec);
478 if (U_SUCCESS(ec)) {
479 if (requiredLength == -1) {
480 requiredLength = array.length() + 1;
481 } else if (array.length() != requiredLength) {
482 ec = U_ILLEGAL_ARGUMENT_ERROR;
483 ERROR("Array not of required length");
484 }
485
486 return (UChar**)array.release();
487 }
488 ERROR("Unknown Error");
489 }
490
491 UChar*
nextString()492 LocDataParser::nextString() {
493 UChar* result = NULL;
494
495 skipWhitespace();
496 if (p < e) {
497 const UChar* terminators;
498 UChar c = *p;
499 UBool haveQuote = c == QUOTE || c == TICK;
500 if (haveQuote) {
501 inc();
502 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
503 } else {
504 terminators = NOQUOTE_STOPLIST;
505 }
506 UChar* start = p;
507 while (p < e && !inList(*p, terminators)) ++p;
508 if (p == e) {
509 ERROR("Unexpected end of data");
510 }
511
512 UChar x = *p;
513 if (p > start) {
514 ch = x;
515 *p = 0x0; // terminate by writing to data
516 result = start; // just point into data
517 }
518 if (haveQuote) {
519 if (x != c) {
520 ERROR("Missing matching quote");
521 } else if (p == start) {
522 ERROR("Empty string");
523 }
524 inc();
525 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
526 ERROR("Unexpected character in string");
527 }
528 }
529
530 // ok for there to be no next string
531 return result;
532 }
533
parseError(const char * EXPLANATION_ARG)534 void LocDataParser::parseError(const char* EXPLANATION_ARG)
535 {
536 if (!data) {
537 return;
538 }
539
540 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
541 if (start < data) {
542 start = data;
543 }
544 for (UChar* x = p; --x >= start;) {
545 if (!*x) {
546 start = x+1;
547 break;
548 }
549 }
550 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
551 if (limit > e) {
552 limit = e;
553 }
554 u_strncpy(pe.preContext, start, (int32_t)(p-start));
555 pe.preContext[p-start] = 0;
556 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
557 pe.postContext[limit-p] = 0;
558 pe.offset = (int32_t)(p - data);
559
560 #ifdef RBNF_DEBUG
561 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
562
563 UnicodeString msg;
564 msg.append(start, p - start);
565 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
566 msg.append(p, limit-p);
567 msg.append(UNICODE_STRING_SIMPLE("'"));
568
569 char buf[128];
570 int32_t len = msg.extract(0, msg.length(), buf, 128);
571 if (len >= 128) {
572 buf[127] = 0;
573 } else {
574 buf[len] = 0;
575 }
576 fprintf(stderr, "%s\n", buf);
577 fflush(stderr);
578 #endif
579
580 uprv_free(data);
581 data = NULL;
582 p = NULL;
583 e = NULL;
584
585 if (U_SUCCESS(ec)) {
586 ec = U_PARSE_ERROR;
587 }
588 }
589
590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
591
592 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
594 if (U_FAILURE(status)) {
595 return NULL;
596 }
597
598 int32_t len = info.length();
599 if (len == 0) {
600 return NULL; // no error;
601 }
602
603 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
604 if (!p) {
605 status = U_MEMORY_ALLOCATION_ERROR;
606 return NULL;
607 }
608 info.extract(p, len, status);
609 if (!U_FAILURE(status)) {
610 status = U_ZERO_ERROR; // clear warning about non-termination
611 }
612
613 LocDataParser parser(perror, status);
614 return parser.parse(p, len);
615 }
616
~StringLocalizationInfo()617 StringLocalizationInfo::~StringLocalizationInfo() {
618 for (UChar*** p = (UChar***)data; *p; ++p) {
619 // remaining data is simply pointer into our unicode string data.
620 if (*p) uprv_free(*p);
621 }
622 if (data) uprv_free(data);
623 if (info) uprv_free(info);
624 }
625
626
627 const UChar*
getRuleSetName(int32_t index) const628 StringLocalizationInfo::getRuleSetName(int32_t index) const {
629 if (index >= 0 && index < getNumberOfRuleSets()) {
630 return data[0][index];
631 }
632 return NULL;
633 }
634
635 const UChar*
getLocaleName(int32_t index) const636 StringLocalizationInfo::getLocaleName(int32_t index) const {
637 if (index >= 0 && index < getNumberOfDisplayLocales()) {
638 return data[index+1][0];
639 }
640 return NULL;
641 }
642
643 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
645 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
646 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
647 return data[localeIndex+1][ruleIndex+1];
648 }
649 return NULL;
650 }
651
652 // ----------
653
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
655 const UnicodeString& locs,
656 const Locale& alocale, UParseError& perror, UErrorCode& status)
657 : ruleSets(NULL)
658 , ruleSetDescriptions(NULL)
659 , numRuleSets(0)
660 , defaultRuleSet(NULL)
661 , locale(alocale)
662 , collator(NULL)
663 , decimalFormatSymbols(NULL)
664 , defaultInfinityRule(NULL)
665 , defaultNaNRule(NULL)
666 , lenient(FALSE)
667 , lenientParseRules(NULL)
668 , localizations(NULL)
669 , capitalizationInfoSet(FALSE)
670 , capitalizationForUIListMenu(FALSE)
671 , capitalizationForStandAlone(FALSE)
672 , capitalizationBrkIter(NULL)
673 {
674 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
675 init(description, locinfo, perror, status);
676 }
677
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)678 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
679 const UnicodeString& locs,
680 UParseError& perror, UErrorCode& status)
681 : ruleSets(NULL)
682 , ruleSetDescriptions(NULL)
683 , numRuleSets(0)
684 , defaultRuleSet(NULL)
685 , locale(Locale::getDefault())
686 , collator(NULL)
687 , decimalFormatSymbols(NULL)
688 , defaultInfinityRule(NULL)
689 , defaultNaNRule(NULL)
690 , lenient(FALSE)
691 , lenientParseRules(NULL)
692 , localizations(NULL)
693 , capitalizationInfoSet(FALSE)
694 , capitalizationForUIListMenu(FALSE)
695 , capitalizationForStandAlone(FALSE)
696 , capitalizationBrkIter(NULL)
697 {
698 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
699 init(description, locinfo, perror, status);
700 }
701
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)702 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
703 LocalizationInfo* info,
704 const Locale& alocale, UParseError& perror, UErrorCode& status)
705 : ruleSets(NULL)
706 , ruleSetDescriptions(NULL)
707 , numRuleSets(0)
708 , defaultRuleSet(NULL)
709 , locale(alocale)
710 , collator(NULL)
711 , decimalFormatSymbols(NULL)
712 , defaultInfinityRule(NULL)
713 , defaultNaNRule(NULL)
714 , lenient(FALSE)
715 , lenientParseRules(NULL)
716 , localizations(NULL)
717 , capitalizationInfoSet(FALSE)
718 , capitalizationForUIListMenu(FALSE)
719 , capitalizationForStandAlone(FALSE)
720 , capitalizationBrkIter(NULL)
721 {
722 init(description, info, perror, status);
723 }
724
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)725 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
726 UParseError& perror,
727 UErrorCode& status)
728 : ruleSets(NULL)
729 , ruleSetDescriptions(NULL)
730 , numRuleSets(0)
731 , defaultRuleSet(NULL)
732 , locale(Locale::getDefault())
733 , collator(NULL)
734 , decimalFormatSymbols(NULL)
735 , defaultInfinityRule(NULL)
736 , defaultNaNRule(NULL)
737 , lenient(FALSE)
738 , lenientParseRules(NULL)
739 , localizations(NULL)
740 , capitalizationInfoSet(FALSE)
741 , capitalizationForUIListMenu(FALSE)
742 , capitalizationForStandAlone(FALSE)
743 , capitalizationBrkIter(NULL)
744 {
745 init(description, NULL, perror, status);
746 }
747
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)748 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
749 const Locale& aLocale,
750 UParseError& perror,
751 UErrorCode& status)
752 : ruleSets(NULL)
753 , ruleSetDescriptions(NULL)
754 , numRuleSets(0)
755 , defaultRuleSet(NULL)
756 , locale(aLocale)
757 , collator(NULL)
758 , decimalFormatSymbols(NULL)
759 , defaultInfinityRule(NULL)
760 , defaultNaNRule(NULL)
761 , lenient(FALSE)
762 , lenientParseRules(NULL)
763 , localizations(NULL)
764 , capitalizationInfoSet(FALSE)
765 , capitalizationForUIListMenu(FALSE)
766 , capitalizationForStandAlone(FALSE)
767 , capitalizationBrkIter(NULL)
768 {
769 init(description, NULL, perror, status);
770 }
771
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)772 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
773 : ruleSets(NULL)
774 , ruleSetDescriptions(NULL)
775 , numRuleSets(0)
776 , defaultRuleSet(NULL)
777 , locale(alocale)
778 , collator(NULL)
779 , decimalFormatSymbols(NULL)
780 , defaultInfinityRule(NULL)
781 , defaultNaNRule(NULL)
782 , lenient(FALSE)
783 , lenientParseRules(NULL)
784 , localizations(NULL)
785 , capitalizationInfoSet(FALSE)
786 , capitalizationForUIListMenu(FALSE)
787 , capitalizationForStandAlone(FALSE)
788 , capitalizationBrkIter(NULL)
789 {
790 if (U_FAILURE(status)) {
791 return;
792 }
793
794 const char* rules_tag = "RBNFRules";
795 const char* fmt_tag = "";
796 switch (tag) {
797 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
798 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
799 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
800 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
801 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
802 }
803
804 // TODO: read localization info from resource
805 LocalizationInfo* locinfo = NULL;
806
807 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
808 if (U_SUCCESS(status)) {
809 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
810 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
811
812 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
813 if (U_FAILURE(status)) {
814 ures_close(nfrb);
815 }
816 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
817 if (U_FAILURE(status)) {
818 ures_close(rbnfRules);
819 ures_close(nfrb);
820 return;
821 }
822
823 UnicodeString desc;
824 while (ures_hasNext(ruleSets)) {
825 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
826 }
827 UParseError perror;
828
829 init(desc, locinfo, perror, status);
830
831 ures_close(ruleSets);
832 ures_close(rbnfRules);
833 }
834 ures_close(nfrb);
835 }
836
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)837 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
838 : NumberFormat(rhs)
839 , ruleSets(NULL)
840 , ruleSetDescriptions(NULL)
841 , numRuleSets(0)
842 , defaultRuleSet(NULL)
843 , locale(rhs.locale)
844 , collator(NULL)
845 , decimalFormatSymbols(NULL)
846 , defaultInfinityRule(NULL)
847 , defaultNaNRule(NULL)
848 , lenient(FALSE)
849 , lenientParseRules(NULL)
850 , localizations(NULL)
851 , capitalizationInfoSet(FALSE)
852 , capitalizationForUIListMenu(FALSE)
853 , capitalizationForStandAlone(FALSE)
854 , capitalizationBrkIter(NULL)
855 {
856 this->operator=(rhs);
857 }
858
859 // --------
860
861 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)862 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
863 {
864 if (this == &rhs) {
865 return *this;
866 }
867 NumberFormat::operator=(rhs);
868 UErrorCode status = U_ZERO_ERROR;
869 dispose();
870 locale = rhs.locale;
871 lenient = rhs.lenient;
872
873 UParseError perror;
874 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
875 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
876 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
877
878 capitalizationInfoSet = rhs.capitalizationInfoSet;
879 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
880 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
881 #if !UCONFIG_NO_BREAK_ITERATION
882 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
883 #endif
884
885 return *this;
886 }
887
~RuleBasedNumberFormat()888 RuleBasedNumberFormat::~RuleBasedNumberFormat()
889 {
890 dispose();
891 }
892
893 Format*
clone(void) const894 RuleBasedNumberFormat::clone(void) const
895 {
896 return new RuleBasedNumberFormat(*this);
897 }
898
899 UBool
operator ==(const Format & other) const900 RuleBasedNumberFormat::operator==(const Format& other) const
901 {
902 if (this == &other) {
903 return TRUE;
904 }
905
906 if (typeid(*this) == typeid(other)) {
907 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
908 // test for capitalization info equality is adequately handled
909 // by the NumberFormat test for fCapitalizationContext equality;
910 // the info here is just derived from that.
911 if (locale == rhs.locale &&
912 lenient == rhs.lenient &&
913 (localizations == NULL
914 ? rhs.localizations == NULL
915 : (rhs.localizations == NULL
916 ? FALSE
917 : *localizations == rhs.localizations))) {
918
919 NFRuleSet** p = ruleSets;
920 NFRuleSet** q = rhs.ruleSets;
921 if (p == NULL) {
922 return q == NULL;
923 } else if (q == NULL) {
924 return FALSE;
925 }
926 while (*p && *q && (**p == **q)) {
927 ++p;
928 ++q;
929 }
930 return *q == NULL && *p == NULL;
931 }
932 }
933
934 return FALSE;
935 }
936
937 UnicodeString
getRules() const938 RuleBasedNumberFormat::getRules() const
939 {
940 UnicodeString result;
941 if (ruleSets != NULL) {
942 for (NFRuleSet** p = ruleSets; *p; ++p) {
943 (*p)->appendRules(result);
944 }
945 }
946 return result;
947 }
948
949 UnicodeString
getRuleSetName(int32_t index) const950 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
951 {
952 if (localizations) {
953 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
954 return string;
955 }
956 else if (ruleSets) {
957 UnicodeString result;
958 for (NFRuleSet** p = ruleSets; *p; ++p) {
959 NFRuleSet* rs = *p;
960 if (rs->isPublic()) {
961 if (--index == -1) {
962 rs->getName(result);
963 return result;
964 }
965 }
966 }
967 }
968 UnicodeString empty;
969 return empty;
970 }
971
972 int32_t
getNumberOfRuleSetNames() const973 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
974 {
975 int32_t result = 0;
976 if (localizations) {
977 result = localizations->getNumberOfRuleSets();
978 }
979 else if (ruleSets) {
980 for (NFRuleSet** p = ruleSets; *p; ++p) {
981 if ((**p).isPublic()) {
982 ++result;
983 }
984 }
985 }
986 return result;
987 }
988
989 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const990 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
991 if (localizations) {
992 return localizations->getNumberOfDisplayLocales();
993 }
994 return 0;
995 }
996
997 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const998 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
999 if (U_FAILURE(status)) {
1000 return Locale("");
1001 }
1002 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1003 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1004 char buffer[64];
1005 int32_t cap = name.length() + 1;
1006 char* bp = buffer;
1007 if (cap > 64) {
1008 bp = (char *)uprv_malloc(cap);
1009 if (bp == NULL) {
1010 status = U_MEMORY_ALLOCATION_ERROR;
1011 return Locale("");
1012 }
1013 }
1014 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1015 Locale retLocale(bp);
1016 if (bp != buffer) {
1017 uprv_free(bp);
1018 }
1019 return retLocale;
1020 }
1021 status = U_ILLEGAL_ARGUMENT_ERROR;
1022 Locale retLocale;
1023 return retLocale;
1024 }
1025
1026 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)1027 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1028 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1029 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1030 int32_t len = localeName.length();
1031 UChar* localeStr = localeName.getBuffer(len + 1);
1032 while (len >= 0) {
1033 localeStr[len] = 0;
1034 int32_t ix = localizations->indexForLocale(localeStr);
1035 if (ix >= 0) {
1036 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1037 return name;
1038 }
1039
1040 // trim trailing portion, skipping over ommitted sections
1041 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1042 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1043 }
1044 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1045 return name;
1046 }
1047 UnicodeString bogus;
1048 bogus.setToBogus();
1049 return bogus;
1050 }
1051
1052 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1053 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1054 if (localizations) {
1055 UnicodeString rsn(ruleSetName);
1056 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1057 return getRuleSetDisplayName(ix, localeParam);
1058 }
1059 UnicodeString bogus;
1060 bogus.setToBogus();
1061 return bogus;
1062 }
1063
1064 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1065 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1066 {
1067 if (U_SUCCESS(status) && ruleSets) {
1068 for (NFRuleSet** p = ruleSets; *p; ++p) {
1069 NFRuleSet* rs = *p;
1070 if (rs->isNamed(name)) {
1071 return rs;
1072 }
1073 }
1074 status = U_ILLEGAL_ARGUMENT_ERROR;
1075 }
1076 return NULL;
1077 }
1078
1079 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1080 RuleBasedNumberFormat::format(int32_t number,
1081 UnicodeString& toAppendTo,
1082 FieldPosition& /* pos */) const
1083 {
1084 if (defaultRuleSet) {
1085 UErrorCode status = U_ZERO_ERROR;
1086 int32_t startPos = toAppendTo.length();
1087 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1088 adjustForCapitalizationContext(startPos, toAppendTo);
1089 }
1090 return toAppendTo;
1091 }
1092
1093
1094 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1095 RuleBasedNumberFormat::format(int64_t number,
1096 UnicodeString& toAppendTo,
1097 FieldPosition& /* pos */) const
1098 {
1099 if (defaultRuleSet) {
1100 UErrorCode status = U_ZERO_ERROR;
1101 int32_t startPos = toAppendTo.length();
1102 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1103 adjustForCapitalizationContext(startPos, toAppendTo);
1104 }
1105 return toAppendTo;
1106 }
1107
1108
1109 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1110 RuleBasedNumberFormat::format(double number,
1111 UnicodeString& toAppendTo,
1112 FieldPosition& /* pos */) const
1113 {
1114 int32_t startPos = toAppendTo.length();
1115 if (defaultRuleSet) {
1116 UErrorCode status = U_ZERO_ERROR;
1117 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1118 }
1119 return adjustForCapitalizationContext(startPos, toAppendTo);
1120 }
1121
1122
1123 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1124 RuleBasedNumberFormat::format(int32_t number,
1125 const UnicodeString& ruleSetName,
1126 UnicodeString& toAppendTo,
1127 FieldPosition& /* pos */,
1128 UErrorCode& status) const
1129 {
1130 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1131 if (U_SUCCESS(status)) {
1132 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1133 // throw new IllegalArgumentException("Can't use internal rule set");
1134 status = U_ILLEGAL_ARGUMENT_ERROR;
1135 } else {
1136 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1137 if (rs) {
1138 int32_t startPos = toAppendTo.length();
1139 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1140 adjustForCapitalizationContext(startPos, toAppendTo);
1141 }
1142 }
1143 }
1144 return toAppendTo;
1145 }
1146
1147
1148 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1149 RuleBasedNumberFormat::format(int64_t number,
1150 const UnicodeString& ruleSetName,
1151 UnicodeString& toAppendTo,
1152 FieldPosition& /* pos */,
1153 UErrorCode& status) const
1154 {
1155 if (U_SUCCESS(status)) {
1156 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1157 // throw new IllegalArgumentException("Can't use internal rule set");
1158 status = U_ILLEGAL_ARGUMENT_ERROR;
1159 } else {
1160 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1161 if (rs) {
1162 int32_t startPos = toAppendTo.length();
1163 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1164 adjustForCapitalizationContext(startPos, toAppendTo);
1165 }
1166 }
1167 }
1168 return toAppendTo;
1169 }
1170
1171
1172 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1173 RuleBasedNumberFormat::format(double number,
1174 const UnicodeString& ruleSetName,
1175 UnicodeString& toAppendTo,
1176 FieldPosition& /* pos */,
1177 UErrorCode& status) const
1178 {
1179 if (U_SUCCESS(status)) {
1180 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1181 // throw new IllegalArgumentException("Can't use internal rule set");
1182 status = U_ILLEGAL_ARGUMENT_ERROR;
1183 } else {
1184 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1185 if (rs) {
1186 int32_t startPos = toAppendTo.length();
1187 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1188 adjustForCapitalizationContext(startPos, toAppendTo);
1189 }
1190 }
1191 }
1192 return toAppendTo;
1193 }
1194
1195 UnicodeString&
adjustForCapitalizationContext(int32_t startPos,UnicodeString & currentResult) const1196 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1197 UnicodeString& currentResult) const
1198 {
1199 #if !UCONFIG_NO_BREAK_ITERATION
1200 if (startPos==0 && currentResult.length() > 0) {
1201 // capitalize currentResult according to context
1202 UChar32 ch = currentResult.char32At(0);
1203 UErrorCode status = U_ZERO_ERROR;
1204 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1205 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1206 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1207 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1208 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1209 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1210 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1211 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1212 }
1213 }
1214 #endif
1215 return currentResult;
1216 }
1217
1218
1219 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1220 RuleBasedNumberFormat::parse(const UnicodeString& text,
1221 Formattable& result,
1222 ParsePosition& parsePosition) const
1223 {
1224 if (!ruleSets) {
1225 parsePosition.setErrorIndex(0);
1226 return;
1227 }
1228
1229 UnicodeString workingText(text, parsePosition.getIndex());
1230 ParsePosition workingPos(0);
1231
1232 ParsePosition high_pp(0);
1233 Formattable high_result;
1234
1235 for (NFRuleSet** p = ruleSets; *p; ++p) {
1236 NFRuleSet *rp = *p;
1237 if (rp->isPublic() && rp->isParseable()) {
1238 ParsePosition working_pp(0);
1239 Formattable working_result;
1240
1241 rp->parse(workingText, working_pp, kMaxDouble, working_result);
1242 if (working_pp.getIndex() > high_pp.getIndex()) {
1243 high_pp = working_pp;
1244 high_result = working_result;
1245
1246 if (high_pp.getIndex() == workingText.length()) {
1247 break;
1248 }
1249 }
1250 }
1251 }
1252
1253 int32_t startIndex = parsePosition.getIndex();
1254 parsePosition.setIndex(startIndex + high_pp.getIndex());
1255 if (high_pp.getIndex() > 0) {
1256 parsePosition.setErrorIndex(-1);
1257 } else {
1258 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1259 parsePosition.setErrorIndex(startIndex + errorIndex);
1260 }
1261 result = high_result;
1262 if (result.getType() == Formattable::kDouble) {
1263 int32_t r = (int32_t)result.getDouble();
1264 if ((double)r == result.getDouble()) {
1265 result.setLong(r);
1266 }
1267 }
1268 }
1269
1270 #if !UCONFIG_NO_COLLATION
1271
1272 void
setLenient(UBool enabled)1273 RuleBasedNumberFormat::setLenient(UBool enabled)
1274 {
1275 lenient = enabled;
1276 if (!enabled && collator) {
1277 delete collator;
1278 collator = NULL;
1279 }
1280 }
1281
1282 #endif
1283
1284 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1285 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1286 if (U_SUCCESS(status)) {
1287 if (ruleSetName.isEmpty()) {
1288 if (localizations) {
1289 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1290 defaultRuleSet = findRuleSet(name, status);
1291 } else {
1292 initDefaultRuleSet();
1293 }
1294 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1295 status = U_ILLEGAL_ARGUMENT_ERROR;
1296 } else {
1297 NFRuleSet* result = findRuleSet(ruleSetName, status);
1298 if (result != NULL) {
1299 defaultRuleSet = result;
1300 }
1301 }
1302 }
1303 }
1304
1305 UnicodeString
getDefaultRuleSetName() const1306 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1307 UnicodeString result;
1308 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1309 defaultRuleSet->getName(result);
1310 } else {
1311 result.setToBogus();
1312 }
1313 return result;
1314 }
1315
1316 void
initDefaultRuleSet()1317 RuleBasedNumberFormat::initDefaultRuleSet()
1318 {
1319 defaultRuleSet = NULL;
1320 if (!ruleSets) {
1321 return;
1322 }
1323
1324 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1325 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1326 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1327
1328 NFRuleSet**p = &ruleSets[0];
1329 while (*p) {
1330 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1331 defaultRuleSet = *p;
1332 return;
1333 } else {
1334 ++p;
1335 }
1336 }
1337
1338 defaultRuleSet = *--p;
1339 if (!defaultRuleSet->isPublic()) {
1340 while (p != ruleSets) {
1341 if ((*--p)->isPublic()) {
1342 defaultRuleSet = *p;
1343 break;
1344 }
1345 }
1346 }
1347 }
1348
1349
1350 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1351 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1352 UParseError& pErr, UErrorCode& status)
1353 {
1354 // TODO: implement UParseError
1355 uprv_memset(&pErr, 0, sizeof(UParseError));
1356 // Note: this can leave ruleSets == NULL, so remaining code should check
1357 if (U_FAILURE(status)) {
1358 return;
1359 }
1360
1361 initializeDecimalFormatSymbols(status);
1362 initializeDefaultInfinityRule(status);
1363 initializeDefaultNaNRule(status);
1364 if (U_FAILURE(status)) {
1365 return;
1366 }
1367
1368 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1369
1370 UnicodeString description(rules);
1371 if (!description.length()) {
1372 status = U_MEMORY_ALLOCATION_ERROR;
1373 return;
1374 }
1375
1376 // start by stripping the trailing whitespace from all the rules
1377 // (this is all the whitespace follwing each semicolon in the
1378 // description). This allows us to look for rule-set boundaries
1379 // by searching for ";%" without having to worry about whitespace
1380 // between the ; and the %
1381 stripWhitespace(description);
1382
1383 // check to see if there's a set of lenient-parse rules. If there
1384 // is, pull them out into our temporary holding place for them,
1385 // and delete them from the description before the real desciption-
1386 // parsing code sees them
1387 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1388 if (lp != -1) {
1389 // we've got to make sure we're not in the middle of a rule
1390 // (where "%%lenient-parse" would actually get treated as
1391 // rule text)
1392 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1393 // locate the beginning and end of the actual collation
1394 // rules (there may be whitespace between the name and
1395 // the first token in the description)
1396 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1397
1398 if (lpEnd == -1) {
1399 lpEnd = description.length() - 1;
1400 }
1401 int lpStart = lp + u_strlen(gLenientParse);
1402 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1403 ++lpStart;
1404 }
1405
1406 // copy out the lenient-parse rules and delete them
1407 // from the description
1408 lenientParseRules = new UnicodeString();
1409 /* test for NULL */
1410 if (lenientParseRules == 0) {
1411 status = U_MEMORY_ALLOCATION_ERROR;
1412 return;
1413 }
1414 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1415
1416 description.remove(lp, lpEnd + 1 - lp);
1417 }
1418 }
1419
1420 // pre-flight parsing the description and count the number of
1421 // rule sets (";%" marks the end of one rule set and the beginning
1422 // of the next)
1423 numRuleSets = 0;
1424 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1425 ++numRuleSets;
1426 ++p;
1427 }
1428 ++numRuleSets;
1429
1430 // our rule list is an array of the appropriate size
1431 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1432 /* test for NULL */
1433 if (ruleSets == 0) {
1434 status = U_MEMORY_ALLOCATION_ERROR;
1435 return;
1436 }
1437
1438 for (int i = 0; i <= numRuleSets; ++i) {
1439 ruleSets[i] = NULL;
1440 }
1441
1442 // divide up the descriptions into individual rule-set descriptions
1443 // and store them in a temporary array. At each step, we also
1444 // new up a rule set, but all this does is initialize its name
1445 // and remove it from its description. We can't actually parse
1446 // the rest of the descriptions and finish initializing everything
1447 // because we have to know the names and locations of all the rule
1448 // sets before we can actually set everything up
1449 if(!numRuleSets) {
1450 status = U_ILLEGAL_ARGUMENT_ERROR;
1451 return;
1452 }
1453
1454 ruleSetDescriptions = new UnicodeString[numRuleSets];
1455 if (ruleSetDescriptions == 0) {
1456 status = U_MEMORY_ALLOCATION_ERROR;
1457 return;
1458 }
1459
1460 {
1461 int curRuleSet = 0;
1462 int32_t start = 0;
1463 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1464 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1465 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1466 if (ruleSets[curRuleSet] == 0) {
1467 status = U_MEMORY_ALLOCATION_ERROR;
1468 return;
1469 }
1470 ++curRuleSet;
1471 start = p + 1;
1472 }
1473 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1474 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1475 if (ruleSets[curRuleSet] == 0) {
1476 status = U_MEMORY_ALLOCATION_ERROR;
1477 return;
1478 }
1479 }
1480
1481 // now we can take note of the formatter's default rule set, which
1482 // is the last public rule set in the description (it's the last
1483 // rather than the first so that a user can create a new formatter
1484 // from an existing formatter and change its default behavior just
1485 // by appending more rule sets to the end)
1486
1487 // {dlf} Initialization of a fraction rule set requires the default rule
1488 // set to be known. For purposes of initialization, this is always the
1489 // last public rule set, no matter what the localization data says.
1490 initDefaultRuleSet();
1491
1492 // finally, we can go back through the temporary descriptions
1493 // list and finish seting up the substructure (and we throw
1494 // away the temporary descriptions as we go)
1495 {
1496 for (int i = 0; i < numRuleSets; i++) {
1497 ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
1498 }
1499 }
1500
1501 // Now that the rules are initialized, the 'real' default rule
1502 // set can be adjusted by the localization data.
1503
1504 // The C code keeps the localization array as is, rather than building
1505 // a separate array of the public rule set names, so we have less work
1506 // to do here-- but we still need to check the names.
1507
1508 if (localizationInfos) {
1509 // confirm the names, if any aren't in the rules, that's an error
1510 // it is ok if the rules contain public rule sets that are not in this list
1511 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1512 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1513 NFRuleSet* rs = findRuleSet(name, status);
1514 if (rs == NULL) {
1515 break; // error
1516 }
1517 if (i == 0) {
1518 defaultRuleSet = rs;
1519 }
1520 }
1521 } else {
1522 defaultRuleSet = getDefaultRuleSet();
1523 }
1524 originalDescription = rules;
1525 }
1526
1527 // override the NumberFormat implementation in order to
1528 // lazily initialize relevant items
1529 void
setContext(UDisplayContext value,UErrorCode & status)1530 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1531 {
1532 NumberFormat::setContext(value, status);
1533 if (U_SUCCESS(status)) {
1534 if (!capitalizationInfoSet &&
1535 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1536 initCapitalizationContextInfo(locale);
1537 capitalizationInfoSet = TRUE;
1538 }
1539 #if !UCONFIG_NO_BREAK_ITERATION
1540 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1541 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1542 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1543 UErrorCode status = U_ZERO_ERROR;
1544 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1545 if (U_FAILURE(status)) {
1546 delete capitalizationBrkIter;
1547 capitalizationBrkIter = NULL;
1548 }
1549 }
1550 #endif
1551 }
1552 }
1553
1554 void
initCapitalizationContextInfo(const Locale & thelocale)1555 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1556 {
1557 #if !UCONFIG_NO_BREAK_ITERATION
1558 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1559 UErrorCode status = U_ZERO_ERROR;
1560 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1561 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1562 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1563 if (U_SUCCESS(status) && rb != NULL) {
1564 int32_t len = 0;
1565 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1566 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1567 capitalizationForUIListMenu = intVector[0];
1568 capitalizationForStandAlone = intVector[1];
1569 }
1570 }
1571 ures_close(rb);
1572 #endif
1573 }
1574
1575 void
stripWhitespace(UnicodeString & description)1576 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1577 {
1578 // iterate through the characters...
1579 UnicodeString result;
1580
1581 int start = 0;
1582 while (start != -1 && start < description.length()) {
1583 // seek to the first non-whitespace character...
1584 while (start < description.length()
1585 && PatternProps::isWhiteSpace(description.charAt(start))) {
1586 ++start;
1587 }
1588
1589 // locate the next semicolon in the text and copy the text from
1590 // our current position up to that semicolon into the result
1591 int32_t p = description.indexOf(gSemiColon, start);
1592 if (p == -1) {
1593 // or if we don't find a semicolon, just copy the rest of
1594 // the string into the result
1595 result.append(description, start, description.length() - start);
1596 start = -1;
1597 }
1598 else if (p < description.length()) {
1599 result.append(description, start, p + 1 - start);
1600 start = p + 1;
1601 }
1602
1603 // when we get here, we've seeked off the end of the sring, and
1604 // we terminate the loop (we continue until *start* is -1 rather
1605 // than until *p* is -1, because otherwise we'd miss the last
1606 // rule in the description)
1607 else {
1608 start = -1;
1609 }
1610 }
1611
1612 description.setTo(result);
1613 }
1614
1615
1616 void
dispose()1617 RuleBasedNumberFormat::dispose()
1618 {
1619 if (ruleSets) {
1620 for (NFRuleSet** p = ruleSets; *p; ++p) {
1621 delete *p;
1622 }
1623 uprv_free(ruleSets);
1624 ruleSets = NULL;
1625 }
1626
1627 if (ruleSetDescriptions) {
1628 delete [] ruleSetDescriptions;
1629 ruleSetDescriptions = NULL;
1630 }
1631
1632 #if !UCONFIG_NO_COLLATION
1633 delete collator;
1634 #endif
1635 collator = NULL;
1636
1637 delete decimalFormatSymbols;
1638 decimalFormatSymbols = NULL;
1639
1640 delete defaultInfinityRule;
1641 defaultInfinityRule = NULL;
1642
1643 delete defaultNaNRule;
1644 defaultNaNRule = NULL;
1645
1646 delete lenientParseRules;
1647 lenientParseRules = NULL;
1648
1649 #if !UCONFIG_NO_BREAK_ITERATION
1650 delete capitalizationBrkIter;
1651 capitalizationBrkIter = NULL;
1652 #endif
1653
1654 if (localizations) {
1655 localizations = localizations->unref();
1656 }
1657 }
1658
1659
1660 //-----------------------------------------------------------------------
1661 // package-internal API
1662 //-----------------------------------------------------------------------
1663
1664 /**
1665 * Returns the collator to use for lenient parsing. The collator is lazily created:
1666 * this function creates it the first time it's called.
1667 * @return The collator to use for lenient parsing, or null if lenient parsing
1668 * is turned off.
1669 */
1670 const RuleBasedCollator*
getCollator() const1671 RuleBasedNumberFormat::getCollator() const
1672 {
1673 #if !UCONFIG_NO_COLLATION
1674 if (!ruleSets) {
1675 return NULL;
1676 }
1677
1678 // lazy-evaluate the collator
1679 if (collator == NULL && lenient) {
1680 // create a default collator based on the formatter's locale,
1681 // then pull out that collator's rules, append any additional
1682 // rules specified in the description, and create a _new_
1683 // collator based on the combinaiton of those rules
1684
1685 UErrorCode status = U_ZERO_ERROR;
1686
1687 Collator* temp = Collator::createInstance(locale, status);
1688 RuleBasedCollator* newCollator;
1689 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1690 if (lenientParseRules) {
1691 UnicodeString rules(newCollator->getRules());
1692 rules.append(*lenientParseRules);
1693
1694 newCollator = new RuleBasedCollator(rules, status);
1695 // Exit if newCollator could not be created.
1696 if (newCollator == NULL) {
1697 return NULL;
1698 }
1699 } else {
1700 temp = NULL;
1701 }
1702 if (U_SUCCESS(status)) {
1703 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1704 // cast away const
1705 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1706 } else {
1707 delete newCollator;
1708 }
1709 }
1710 delete temp;
1711 }
1712 #endif
1713
1714 // if lenient-parse mode is off, this will be null
1715 // (see setLenientParseMode())
1716 return collator;
1717 }
1718
1719
1720 DecimalFormatSymbols*
initializeDecimalFormatSymbols(UErrorCode & status)1721 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1722 {
1723 // lazy-evaluate the DecimalFormatSymbols object. This object
1724 // is shared by all DecimalFormat instances belonging to this
1725 // formatter
1726 if (decimalFormatSymbols == NULL) {
1727 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1728 if (U_SUCCESS(status)) {
1729 decimalFormatSymbols = temp;
1730 }
1731 else {
1732 delete temp;
1733 }
1734 }
1735 return decimalFormatSymbols;
1736 }
1737
1738 /**
1739 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1740 * instances owned by this formatter.
1741 */
1742 const DecimalFormatSymbols*
getDecimalFormatSymbols() const1743 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1744 {
1745 return decimalFormatSymbols;
1746 }
1747
1748 NFRule*
initializeDefaultInfinityRule(UErrorCode & status)1749 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1750 {
1751 if (U_FAILURE(status)) {
1752 return NULL;
1753 }
1754 if (defaultInfinityRule == NULL) {
1755 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1756 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1757 NFRule* temp = new NFRule(this, rule, status);
1758 if (U_SUCCESS(status)) {
1759 defaultInfinityRule = temp;
1760 }
1761 else {
1762 delete temp;
1763 }
1764 }
1765 return defaultInfinityRule;
1766 }
1767
1768 const NFRule*
getDefaultInfinityRule() const1769 RuleBasedNumberFormat::getDefaultInfinityRule() const
1770 {
1771 return defaultInfinityRule;
1772 }
1773
1774 NFRule*
initializeDefaultNaNRule(UErrorCode & status)1775 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1776 {
1777 if (U_FAILURE(status)) {
1778 return NULL;
1779 }
1780 if (defaultNaNRule == NULL) {
1781 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1782 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1783 NFRule* temp = new NFRule(this, rule, status);
1784 if (U_SUCCESS(status)) {
1785 defaultNaNRule = temp;
1786 }
1787 else {
1788 delete temp;
1789 }
1790 }
1791 return defaultNaNRule;
1792 }
1793
1794 const NFRule*
getDefaultNaNRule() const1795 RuleBasedNumberFormat::getDefaultNaNRule() const
1796 {
1797 return defaultNaNRule;
1798 }
1799
1800 // De-owning the current localized symbols and adopt the new symbols.
1801 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1802 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1803 {
1804 if (symbolsToAdopt == NULL) {
1805 return; // do not allow caller to set decimalFormatSymbols to NULL
1806 }
1807
1808 if (decimalFormatSymbols != NULL) {
1809 delete decimalFormatSymbols;
1810 }
1811
1812 decimalFormatSymbols = symbolsToAdopt;
1813
1814 {
1815 // Apply the new decimalFormatSymbols by reparsing the rulesets
1816 UErrorCode status = U_ZERO_ERROR;
1817
1818 delete defaultInfinityRule;
1819 defaultInfinityRule = NULL;
1820 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1821
1822 delete defaultNaNRule;
1823 defaultNaNRule = NULL;
1824 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1825
1826 if (ruleSets) {
1827 for (int32_t i = 0; i < numRuleSets; i++) {
1828 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1829 }
1830 }
1831 }
1832 }
1833
1834 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1835 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1836 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1837 {
1838 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1839 }
1840
1841 PluralFormat *
createPluralFormat(UPluralType pluralType,const UnicodeString & pattern,UErrorCode & status) const1842 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1843 const UnicodeString &pattern,
1844 UErrorCode& status) const
1845 {
1846 return new PluralFormat(locale, pluralType, pattern, status);
1847 }
1848
1849 U_NAMESPACE_END
1850
1851 /* U_HAVE_RBNF */
1852 #endif
1853