1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2014, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "utypeinfo.h" // for 'typeid' to work
10
11 #include "unicode/rbnf.h"
12
13 #if U_HAVE_RBNF
14
15 #include "unicode/normlzr.h"
16 #include "unicode/plurfmt.h"
17 #include "unicode/tblcoll.h"
18 #include "unicode/uchar.h"
19 #include "unicode/ucol.h"
20 #include "unicode/uloc.h"
21 #include "unicode/unum.h"
22 #include "unicode/ures.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utf16.h"
25 #include "unicode/udata.h"
26 #include "unicode/udisplaycontext.h"
27 #include "unicode/brkiter.h"
28 #include "nfrs.h"
29
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "patternprops.h"
33 #include "uresimp.h"
34
35 // debugging
36 // #define RBNF_DEBUG
37
38 #ifdef RBNF_DEBUG
39 #include "stdio.h"
40 #endif
41
42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
43
44 static const UChar gPercentPercent[] =
45 {
46 0x25, 0x25, 0
47 }; /* "%%" */
48
49 // All urbnf objects are created through openRules, so we init all of the
50 // Unicode string constants required by rbnf, nfrs, or nfr here.
51 static const UChar gLenientParse[] =
52 {
53 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
54 }; /* "%%lenient-parse:" */
55 static const UChar gSemiColon = 0x003B;
56 static const UChar gSemiPercent[] =
57 {
58 0x3B, 0x25, 0
59 }; /* ";%" */
60
61 #define kSomeNumberOfBitsDiv2 22
62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
64
65 U_NAMESPACE_BEGIN
66
67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68
69 /*
70 This is a utility class. It does not use ICU's RTTI.
71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72 Please make sure that intltest passes on Windows in Release mode,
73 since the string pooling per compilation unit will mess up how RTTI works.
74 The RTTI code was also removed due to lack of code coverage.
75 */
76 class LocalizationInfo : public UMemory {
77 protected:
78 virtual ~LocalizationInfo();
79 uint32_t refcount;
80
81 public:
LocalizationInfo()82 LocalizationInfo() : refcount(0) {}
83
ref(void)84 LocalizationInfo* ref(void) {
85 ++refcount;
86 return this;
87 }
88
unref(void)89 LocalizationInfo* unref(void) {
90 if (refcount && --refcount == 0) {
91 delete this;
92 }
93 return NULL;
94 }
95
96 virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98
99 virtual int32_t getNumberOfRuleSets(void) const = 0;
100 virtual const UChar* getRuleSetName(int32_t index) const = 0;
101 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102 virtual const UChar* getLocaleName(int32_t index) const = 0;
103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104
105 virtual int32_t indexForLocale(const UChar* locale) const;
106 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107
108 // virtual UClassID getDynamicClassID() const = 0;
109 // static UClassID getStaticClassID(void);
110 };
111
~LocalizationInfo()112 LocalizationInfo::~LocalizationInfo() {}
113
114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
115
116 // if both strings are NULL, this returns TRUE
117 static UBool
streq(const UChar * lhs,const UChar * rhs)118 streq(const UChar* lhs, const UChar* rhs) {
119 if (rhs == lhs) {
120 return TRUE;
121 }
122 if (lhs && rhs) {
123 return u_strcmp(lhs, rhs) == 0;
124 }
125 return FALSE;
126 }
127
128 UBool
operator ==(const LocalizationInfo * rhs) const129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
130 if (rhs) {
131 if (this == rhs) {
132 return TRUE;
133 }
134
135 int32_t rsc = getNumberOfRuleSets();
136 if (rsc == rhs->getNumberOfRuleSets()) {
137 for (int i = 0; i < rsc; ++i) {
138 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
139 return FALSE;
140 }
141 }
142 int32_t dlc = getNumberOfDisplayLocales();
143 if (dlc == rhs->getNumberOfDisplayLocales()) {
144 for (int i = 0; i < dlc; ++i) {
145 const UChar* locale = getLocaleName(i);
146 int32_t ix = rhs->indexForLocale(locale);
147 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
148 if (!streq(locale, rhs->getLocaleName(ix))) {
149 return FALSE;
150 }
151 for (int j = 0; j < rsc; ++j) {
152 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
153 return FALSE;
154 }
155 }
156 }
157 return TRUE;
158 }
159 }
160 }
161 return FALSE;
162 }
163
164 int32_t
indexForLocale(const UChar * locale) const165 LocalizationInfo::indexForLocale(const UChar* locale) const {
166 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
167 if (streq(locale, getLocaleName(i))) {
168 return i;
169 }
170 }
171 return -1;
172 }
173
174 int32_t
indexForRuleSet(const UChar * ruleset) const175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
176 if (ruleset) {
177 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
178 if (streq(ruleset, getRuleSetName(i))) {
179 return i;
180 }
181 }
182 }
183 return -1;
184 }
185
186
187 typedef void (*Fn_Deleter)(void*);
188
189 class VArray {
190 void** buf;
191 int32_t cap;
192 int32_t size;
193 Fn_Deleter deleter;
194 public:
VArray()195 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
196
VArray(Fn_Deleter del)197 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
198
~VArray()199 ~VArray() {
200 if (deleter) {
201 for (int i = 0; i < size; ++i) {
202 (*deleter)(buf[i]);
203 }
204 }
205 uprv_free(buf);
206 }
207
length()208 int32_t length() {
209 return size;
210 }
211
add(void * elem,UErrorCode & status)212 void add(void* elem, UErrorCode& status) {
213 if (U_SUCCESS(status)) {
214 if (size == cap) {
215 if (cap == 0) {
216 cap = 1;
217 } else if (cap < 256) {
218 cap *= 2;
219 } else {
220 cap += 256;
221 }
222 if (buf == NULL) {
223 buf = (void**)uprv_malloc(cap * sizeof(void*));
224 } else {
225 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
226 }
227 if (buf == NULL) {
228 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
229 status = U_MEMORY_ALLOCATION_ERROR;
230 return;
231 }
232 void* start = &buf[size];
233 size_t count = (cap - size) * sizeof(void*);
234 uprv_memset(start, 0, count); // fill with nulls, just because
235 }
236 buf[size++] = elem;
237 }
238 }
239
release(void)240 void** release(void) {
241 void** result = buf;
242 buf = NULL;
243 cap = 0;
244 size = 0;
245 return result;
246 }
247 };
248
249 class LocDataParser;
250
251 class StringLocalizationInfo : public LocalizationInfo {
252 UChar* info;
253 UChar*** data;
254 int32_t numRuleSets;
255 int32_t numLocales;
256
257 friend class LocDataParser;
258
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)259 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
260 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
261 {
262 }
263
264 public:
265 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
266
267 virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const268 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
269 virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const270 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
271 virtual const UChar* getLocaleName(int32_t index) const;
272 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
273
274 // virtual UClassID getDynamicClassID() const;
275 // static UClassID getStaticClassID(void);
276
277 private:
278 void init(UErrorCode& status) const;
279 };
280
281
282 enum {
283 OPEN_ANGLE = 0x003c, /* '<' */
284 CLOSE_ANGLE = 0x003e, /* '>' */
285 COMMA = 0x002c,
286 TICK = 0x0027,
287 QUOTE = 0x0022,
288 SPACE = 0x0020
289 };
290
291 /**
292 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
293 */
294 class LocDataParser {
295 UChar* data;
296 const UChar* e;
297 UChar* p;
298 UChar ch;
299 UParseError& pe;
300 UErrorCode& ec;
301
302 public:
LocDataParser(UParseError & parseError,UErrorCode & status)303 LocDataParser(UParseError& parseError, UErrorCode& status)
304 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()305 ~LocDataParser() {}
306
307 /*
308 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
309 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
310 */
311 StringLocalizationInfo* parse(UChar* data, int32_t len);
312
313 private:
314
inc(void)315 void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)316 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)317 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)318 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const319 UBool inList(UChar c, const UChar* list) const {
320 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
321 while (*list && *list != c) ++list; return *list == c;
322 }
323 void parseError(const char* msg);
324
325 StringLocalizationInfo* doParse(void);
326
327 UChar** nextArray(int32_t& requiredLength);
328 UChar* nextString(void);
329 };
330
331 #ifdef RBNF_DEBUG
332 #define ERROR(msg) parseError(msg); return NULL;
333 #define EXPLANATION_ARG explanationArg
334 #else
335 #define ERROR(msg) parseError(NULL); return NULL;
336 #define EXPLANATION_ARG
337 #endif
338
339
340 static const UChar DQUOTE_STOPLIST[] = {
341 QUOTE, 0
342 };
343
344 static const UChar SQUOTE_STOPLIST[] = {
345 TICK, 0
346 };
347
348 static const UChar NOQUOTE_STOPLIST[] = {
349 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
350 };
351
352 static void
DeleteFn(void * p)353 DeleteFn(void* p) {
354 uprv_free(p);
355 }
356
357 StringLocalizationInfo*
parse(UChar * _data,int32_t len)358 LocDataParser::parse(UChar* _data, int32_t len) {
359 if (U_FAILURE(ec)) {
360 if (_data) uprv_free(_data);
361 return NULL;
362 }
363
364 pe.line = 0;
365 pe.offset = -1;
366 pe.postContext[0] = 0;
367 pe.preContext[0] = 0;
368
369 if (_data == NULL) {
370 ec = U_ILLEGAL_ARGUMENT_ERROR;
371 return NULL;
372 }
373
374 if (len <= 0) {
375 ec = U_ILLEGAL_ARGUMENT_ERROR;
376 uprv_free(_data);
377 return NULL;
378 }
379
380 data = _data;
381 e = data + len;
382 p = _data;
383 ch = 0xffff;
384
385 return doParse();
386 }
387
388
389 StringLocalizationInfo*
doParse(void)390 LocDataParser::doParse(void) {
391 skipWhitespace();
392 if (!checkInc(OPEN_ANGLE)) {
393 ERROR("Missing open angle");
394 } else {
395 VArray array(DeleteFn);
396 UBool mightHaveNext = TRUE;
397 int32_t requiredLength = -1;
398 while (mightHaveNext) {
399 mightHaveNext = FALSE;
400 UChar** elem = nextArray(requiredLength);
401 skipWhitespace();
402 UBool haveComma = check(COMMA);
403 if (elem) {
404 array.add(elem, ec);
405 if (haveComma) {
406 inc();
407 mightHaveNext = TRUE;
408 }
409 } else if (haveComma) {
410 ERROR("Unexpected character");
411 }
412 }
413
414 skipWhitespace();
415 if (!checkInc(CLOSE_ANGLE)) {
416 if (check(OPEN_ANGLE)) {
417 ERROR("Missing comma in outer array");
418 } else {
419 ERROR("Missing close angle bracket in outer array");
420 }
421 }
422
423 skipWhitespace();
424 if (p != e) {
425 ERROR("Extra text after close of localization data");
426 }
427
428 array.add(NULL, ec);
429 if (U_SUCCESS(ec)) {
430 int32_t numLocs = array.length() - 2; // subtract first, NULL
431 UChar*** result = (UChar***)array.release();
432
433 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
434 }
435 }
436
437 ERROR("Unknown error");
438 }
439
440 UChar**
nextArray(int32_t & requiredLength)441 LocDataParser::nextArray(int32_t& requiredLength) {
442 if (U_FAILURE(ec)) {
443 return NULL;
444 }
445
446 skipWhitespace();
447 if (!checkInc(OPEN_ANGLE)) {
448 ERROR("Missing open angle");
449 }
450
451 VArray array;
452 UBool mightHaveNext = TRUE;
453 while (mightHaveNext) {
454 mightHaveNext = FALSE;
455 UChar* elem = nextString();
456 skipWhitespace();
457 UBool haveComma = check(COMMA);
458 if (elem) {
459 array.add(elem, ec);
460 if (haveComma) {
461 inc();
462 mightHaveNext = TRUE;
463 }
464 } else if (haveComma) {
465 ERROR("Unexpected comma");
466 }
467 }
468 skipWhitespace();
469 if (!checkInc(CLOSE_ANGLE)) {
470 if (check(OPEN_ANGLE)) {
471 ERROR("Missing close angle bracket in inner array");
472 } else {
473 ERROR("Missing comma in inner array");
474 }
475 }
476
477 array.add(NULL, ec);
478 if (U_SUCCESS(ec)) {
479 if (requiredLength == -1) {
480 requiredLength = array.length() + 1;
481 } else if (array.length() != requiredLength) {
482 ec = U_ILLEGAL_ARGUMENT_ERROR;
483 ERROR("Array not of required length");
484 }
485
486 return (UChar**)array.release();
487 }
488 ERROR("Unknown Error");
489 }
490
491 UChar*
nextString()492 LocDataParser::nextString() {
493 UChar* result = NULL;
494
495 skipWhitespace();
496 if (p < e) {
497 const UChar* terminators;
498 UChar c = *p;
499 UBool haveQuote = c == QUOTE || c == TICK;
500 if (haveQuote) {
501 inc();
502 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
503 } else {
504 terminators = NOQUOTE_STOPLIST;
505 }
506 UChar* start = p;
507 while (p < e && !inList(*p, terminators)) ++p;
508 if (p == e) {
509 ERROR("Unexpected end of data");
510 }
511
512 UChar x = *p;
513 if (p > start) {
514 ch = x;
515 *p = 0x0; // terminate by writing to data
516 result = start; // just point into data
517 }
518 if (haveQuote) {
519 if (x != c) {
520 ERROR("Missing matching quote");
521 } else if (p == start) {
522 ERROR("Empty string");
523 }
524 inc();
525 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
526 ERROR("Unexpected character in string");
527 }
528 }
529
530 // ok for there to be no next string
531 return result;
532 }
533
parseError(const char * EXPLANATION_ARG)534 void LocDataParser::parseError(const char* EXPLANATION_ARG)
535 {
536 if (!data) {
537 return;
538 }
539
540 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
541 if (start < data) {
542 start = data;
543 }
544 for (UChar* x = p; --x >= start;) {
545 if (!*x) {
546 start = x+1;
547 break;
548 }
549 }
550 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
551 if (limit > e) {
552 limit = e;
553 }
554 u_strncpy(pe.preContext, start, (int32_t)(p-start));
555 pe.preContext[p-start] = 0;
556 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
557 pe.postContext[limit-p] = 0;
558 pe.offset = (int32_t)(p - data);
559
560 #ifdef RBNF_DEBUG
561 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
562
563 UnicodeString msg;
564 msg.append(start, p - start);
565 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
566 msg.append(p, limit-p);
567 msg.append(UNICODE_STRING_SIMPLE("'"));
568
569 char buf[128];
570 int32_t len = msg.extract(0, msg.length(), buf, 128);
571 if (len >= 128) {
572 buf[127] = 0;
573 } else {
574 buf[len] = 0;
575 }
576 fprintf(stderr, "%s\n", buf);
577 fflush(stderr);
578 #endif
579
580 uprv_free(data);
581 data = NULL;
582 p = NULL;
583 e = NULL;
584
585 if (U_SUCCESS(ec)) {
586 ec = U_PARSE_ERROR;
587 }
588 }
589
590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
591
592 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
594 if (U_FAILURE(status)) {
595 return NULL;
596 }
597
598 int32_t len = info.length();
599 if (len == 0) {
600 return NULL; // no error;
601 }
602
603 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
604 if (!p) {
605 status = U_MEMORY_ALLOCATION_ERROR;
606 return NULL;
607 }
608 info.extract(p, len, status);
609 if (!U_FAILURE(status)) {
610 status = U_ZERO_ERROR; // clear warning about non-termination
611 }
612
613 LocDataParser parser(perror, status);
614 return parser.parse(p, len);
615 }
616
~StringLocalizationInfo()617 StringLocalizationInfo::~StringLocalizationInfo() {
618 for (UChar*** p = (UChar***)data; *p; ++p) {
619 // remaining data is simply pointer into our unicode string data.
620 if (*p) uprv_free(*p);
621 }
622 if (data) uprv_free(data);
623 if (info) uprv_free(info);
624 }
625
626
627 const UChar*
getRuleSetName(int32_t index) const628 StringLocalizationInfo::getRuleSetName(int32_t index) const {
629 if (index >= 0 && index < getNumberOfRuleSets()) {
630 return data[0][index];
631 }
632 return NULL;
633 }
634
635 const UChar*
getLocaleName(int32_t index) const636 StringLocalizationInfo::getLocaleName(int32_t index) const {
637 if (index >= 0 && index < getNumberOfDisplayLocales()) {
638 return data[index+1][0];
639 }
640 return NULL;
641 }
642
643 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
645 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
646 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
647 return data[localeIndex+1][ruleIndex+1];
648 }
649 return NULL;
650 }
651
652 // ----------
653
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
655 const UnicodeString& locs,
656 const Locale& alocale, UParseError& perror, UErrorCode& status)
657 : ruleSets(NULL)
658 , ruleSetDescriptions(NULL)
659 , numRuleSets(0)
660 , defaultRuleSet(NULL)
661 , locale(alocale)
662 , collator(NULL)
663 , decimalFormatSymbols(NULL)
664 , lenient(FALSE)
665 , lenientParseRules(NULL)
666 , localizations(NULL)
667 , capitalizationInfoSet(FALSE)
668 , capitalizationForUIListMenu(FALSE)
669 , capitalizationForStandAlone(FALSE)
670 , capitalizationBrkIter(NULL)
671 {
672 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
673 init(description, locinfo, perror, status);
674 }
675
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)676 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
677 const UnicodeString& locs,
678 UParseError& perror, UErrorCode& status)
679 : ruleSets(NULL)
680 , ruleSetDescriptions(NULL)
681 , numRuleSets(0)
682 , defaultRuleSet(NULL)
683 , locale(Locale::getDefault())
684 , collator(NULL)
685 , decimalFormatSymbols(NULL)
686 , lenient(FALSE)
687 , lenientParseRules(NULL)
688 , localizations(NULL)
689 , capitalizationInfoSet(FALSE)
690 , capitalizationForUIListMenu(FALSE)
691 , capitalizationForStandAlone(FALSE)
692 , capitalizationBrkIter(NULL)
693 {
694 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
695 init(description, locinfo, perror, status);
696 }
697
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)698 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
699 LocalizationInfo* info,
700 const Locale& alocale, UParseError& perror, UErrorCode& status)
701 : ruleSets(NULL)
702 , ruleSetDescriptions(NULL)
703 , numRuleSets(0)
704 , defaultRuleSet(NULL)
705 , locale(alocale)
706 , collator(NULL)
707 , decimalFormatSymbols(NULL)
708 , lenient(FALSE)
709 , lenientParseRules(NULL)
710 , localizations(NULL)
711 , capitalizationInfoSet(FALSE)
712 , capitalizationForUIListMenu(FALSE)
713 , capitalizationForStandAlone(FALSE)
714 , capitalizationBrkIter(NULL)
715 {
716 init(description, info, perror, status);
717 }
718
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)719 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
720 UParseError& perror,
721 UErrorCode& status)
722 : ruleSets(NULL)
723 , ruleSetDescriptions(NULL)
724 , numRuleSets(0)
725 , defaultRuleSet(NULL)
726 , locale(Locale::getDefault())
727 , collator(NULL)
728 , decimalFormatSymbols(NULL)
729 , lenient(FALSE)
730 , lenientParseRules(NULL)
731 , localizations(NULL)
732 , capitalizationInfoSet(FALSE)
733 , capitalizationForUIListMenu(FALSE)
734 , capitalizationForStandAlone(FALSE)
735 , capitalizationBrkIter(NULL)
736 {
737 init(description, NULL, perror, status);
738 }
739
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)740 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
741 const Locale& aLocale,
742 UParseError& perror,
743 UErrorCode& status)
744 : ruleSets(NULL)
745 , ruleSetDescriptions(NULL)
746 , numRuleSets(0)
747 , defaultRuleSet(NULL)
748 , locale(aLocale)
749 , collator(NULL)
750 , decimalFormatSymbols(NULL)
751 , lenient(FALSE)
752 , lenientParseRules(NULL)
753 , localizations(NULL)
754 , capitalizationInfoSet(FALSE)
755 , capitalizationForUIListMenu(FALSE)
756 , capitalizationForStandAlone(FALSE)
757 , capitalizationBrkIter(NULL)
758 {
759 init(description, NULL, perror, status);
760 }
761
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)762 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
763 : ruleSets(NULL)
764 , ruleSetDescriptions(NULL)
765 , numRuleSets(0)
766 , defaultRuleSet(NULL)
767 , locale(alocale)
768 , collator(NULL)
769 , decimalFormatSymbols(NULL)
770 , lenient(FALSE)
771 , lenientParseRules(NULL)
772 , localizations(NULL)
773 , capitalizationInfoSet(FALSE)
774 , capitalizationForUIListMenu(FALSE)
775 , capitalizationForStandAlone(FALSE)
776 , capitalizationBrkIter(NULL)
777 {
778 if (U_FAILURE(status)) {
779 return;
780 }
781
782 const char* rules_tag = "RBNFRules";
783 const char* fmt_tag = "";
784 switch (tag) {
785 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
786 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
787 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
788 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
789 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
790 }
791
792 // TODO: read localization info from resource
793 LocalizationInfo* locinfo = NULL;
794
795 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
796 if (U_SUCCESS(status)) {
797 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
798 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
799
800 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
801 if (U_FAILURE(status)) {
802 ures_close(nfrb);
803 }
804 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
805 if (U_FAILURE(status)) {
806 ures_close(rbnfRules);
807 ures_close(nfrb);
808 return;
809 }
810
811 UnicodeString desc;
812 while (ures_hasNext(ruleSets)) {
813 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
814 }
815 UParseError perror;
816
817 init (desc, locinfo, perror, status);
818
819 ures_close(ruleSets);
820 ures_close(rbnfRules);
821 }
822 ures_close(nfrb);
823 }
824
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)825 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
826 : NumberFormat(rhs)
827 , ruleSets(NULL)
828 , ruleSetDescriptions(NULL)
829 , numRuleSets(0)
830 , defaultRuleSet(NULL)
831 , locale(rhs.locale)
832 , collator(NULL)
833 , decimalFormatSymbols(NULL)
834 , lenient(FALSE)
835 , lenientParseRules(NULL)
836 , localizations(NULL)
837 , capitalizationInfoSet(FALSE)
838 , capitalizationForUIListMenu(FALSE)
839 , capitalizationForStandAlone(FALSE)
840 , capitalizationBrkIter(NULL)
841 {
842 this->operator=(rhs);
843 }
844
845 // --------
846
847 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)848 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
849 {
850 if (this == &rhs) {
851 return *this;
852 }
853 NumberFormat::operator=(rhs);
854 UErrorCode status = U_ZERO_ERROR;
855 dispose();
856 locale = rhs.locale;
857 lenient = rhs.lenient;
858
859 UParseError perror;
860 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
861 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
862 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
863
864 capitalizationInfoSet = rhs.capitalizationInfoSet;
865 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
866 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
867 #if !UCONFIG_NO_BREAK_ITERATION
868 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
869 #endif
870
871 return *this;
872 }
873
~RuleBasedNumberFormat()874 RuleBasedNumberFormat::~RuleBasedNumberFormat()
875 {
876 dispose();
877 }
878
879 Format*
clone(void) const880 RuleBasedNumberFormat::clone(void) const
881 {
882 return new RuleBasedNumberFormat(*this);
883 }
884
885 UBool
operator ==(const Format & other) const886 RuleBasedNumberFormat::operator==(const Format& other) const
887 {
888 if (this == &other) {
889 return TRUE;
890 }
891
892 if (typeid(*this) == typeid(other)) {
893 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
894 // test for capitalization info equality is adequately handled
895 // by the NumberFormat test for fCapitalizationContext equality;
896 // the info here is just derived from that.
897 if (locale == rhs.locale &&
898 lenient == rhs.lenient &&
899 (localizations == NULL
900 ? rhs.localizations == NULL
901 : (rhs.localizations == NULL
902 ? FALSE
903 : *localizations == rhs.localizations))) {
904
905 NFRuleSet** p = ruleSets;
906 NFRuleSet** q = rhs.ruleSets;
907 if (p == NULL) {
908 return q == NULL;
909 } else if (q == NULL) {
910 return FALSE;
911 }
912 while (*p && *q && (**p == **q)) {
913 ++p;
914 ++q;
915 }
916 return *q == NULL && *p == NULL;
917 }
918 }
919
920 return FALSE;
921 }
922
923 UnicodeString
getRules() const924 RuleBasedNumberFormat::getRules() const
925 {
926 UnicodeString result;
927 if (ruleSets != NULL) {
928 for (NFRuleSet** p = ruleSets; *p; ++p) {
929 (*p)->appendRules(result);
930 }
931 }
932 return result;
933 }
934
935 UnicodeString
getRuleSetName(int32_t index) const936 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
937 {
938 if (localizations) {
939 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
940 return string;
941 } else if (ruleSets) {
942 UnicodeString result;
943 for (NFRuleSet** p = ruleSets; *p; ++p) {
944 NFRuleSet* rs = *p;
945 if (rs->isPublic()) {
946 if (--index == -1) {
947 rs->getName(result);
948 return result;
949 }
950 }
951 }
952 }
953 UnicodeString empty;
954 return empty;
955 }
956
957 int32_t
getNumberOfRuleSetNames() const958 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
959 {
960 int32_t result = 0;
961 if (localizations) {
962 result = localizations->getNumberOfRuleSets();
963 } else if (ruleSets) {
964 for (NFRuleSet** p = ruleSets; *p; ++p) {
965 if ((**p).isPublic()) {
966 ++result;
967 }
968 }
969 }
970 return result;
971 }
972
973 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const974 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
975 if (localizations) {
976 return localizations->getNumberOfDisplayLocales();
977 }
978 return 0;
979 }
980
981 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const982 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
983 if (U_FAILURE(status)) {
984 return Locale("");
985 }
986 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
987 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
988 char buffer[64];
989 int32_t cap = name.length() + 1;
990 char* bp = buffer;
991 if (cap > 64) {
992 bp = (char *)uprv_malloc(cap);
993 if (bp == NULL) {
994 status = U_MEMORY_ALLOCATION_ERROR;
995 return Locale("");
996 }
997 }
998 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
999 Locale retLocale(bp);
1000 if (bp != buffer) {
1001 uprv_free(bp);
1002 }
1003 return retLocale;
1004 }
1005 status = U_ILLEGAL_ARGUMENT_ERROR;
1006 Locale retLocale;
1007 return retLocale;
1008 }
1009
1010 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)1011 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1012 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1013 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1014 int32_t len = localeName.length();
1015 UChar* localeStr = localeName.getBuffer(len + 1);
1016 while (len >= 0) {
1017 localeStr[len] = 0;
1018 int32_t ix = localizations->indexForLocale(localeStr);
1019 if (ix >= 0) {
1020 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1021 return name;
1022 }
1023
1024 // trim trailing portion, skipping over ommitted sections
1025 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1026 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1027 }
1028 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1029 return name;
1030 }
1031 UnicodeString bogus;
1032 bogus.setToBogus();
1033 return bogus;
1034 }
1035
1036 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1037 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1038 if (localizations) {
1039 UnicodeString rsn(ruleSetName);
1040 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1041 return getRuleSetDisplayName(ix, localeParam);
1042 }
1043 UnicodeString bogus;
1044 bogus.setToBogus();
1045 return bogus;
1046 }
1047
1048 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1049 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1050 {
1051 if (U_SUCCESS(status) && ruleSets) {
1052 for (NFRuleSet** p = ruleSets; *p; ++p) {
1053 NFRuleSet* rs = *p;
1054 if (rs->isNamed(name)) {
1055 return rs;
1056 }
1057 }
1058 status = U_ILLEGAL_ARGUMENT_ERROR;
1059 }
1060 return NULL;
1061 }
1062
1063 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1064 RuleBasedNumberFormat::format(int32_t number,
1065 UnicodeString& toAppendTo,
1066 FieldPosition& /* pos */) const
1067 {
1068 if (defaultRuleSet) {
1069 UErrorCode status = U_ZERO_ERROR;
1070 int32_t startPos = toAppendTo.length();
1071 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
1072 adjustForCapitalizationContext(startPos, toAppendTo);
1073 }
1074 return toAppendTo;
1075 }
1076
1077
1078 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1079 RuleBasedNumberFormat::format(int64_t number,
1080 UnicodeString& toAppendTo,
1081 FieldPosition& /* pos */) const
1082 {
1083 if (defaultRuleSet) {
1084 UErrorCode status = U_ZERO_ERROR;
1085 int32_t startPos = toAppendTo.length();
1086 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
1087 adjustForCapitalizationContext(startPos, toAppendTo);
1088 }
1089 return toAppendTo;
1090 }
1091
1092
1093 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1094 RuleBasedNumberFormat::format(double number,
1095 UnicodeString& toAppendTo,
1096 FieldPosition& /* pos */) const
1097 {
1098 int32_t startPos = toAppendTo.length();
1099 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1100 if (uprv_isNaN(number)) {
1101 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1102 if (decFmtSyms) {
1103 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1104 }
1105 } else if (defaultRuleSet) {
1106 UErrorCode status = U_ZERO_ERROR;
1107 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
1108 }
1109 return adjustForCapitalizationContext(startPos, toAppendTo);
1110 }
1111
1112
1113 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1114 RuleBasedNumberFormat::format(int32_t number,
1115 const UnicodeString& ruleSetName,
1116 UnicodeString& toAppendTo,
1117 FieldPosition& /* pos */,
1118 UErrorCode& status) const
1119 {
1120 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1121 if (U_SUCCESS(status)) {
1122 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1123 // throw new IllegalArgumentException("Can't use internal rule set");
1124 status = U_ILLEGAL_ARGUMENT_ERROR;
1125 } else {
1126 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1127 if (rs) {
1128 int32_t startPos = toAppendTo.length();
1129 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
1130 adjustForCapitalizationContext(startPos, toAppendTo);
1131 }
1132 }
1133 }
1134 return toAppendTo;
1135 }
1136
1137
1138 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1139 RuleBasedNumberFormat::format(int64_t number,
1140 const UnicodeString& ruleSetName,
1141 UnicodeString& toAppendTo,
1142 FieldPosition& /* pos */,
1143 UErrorCode& status) const
1144 {
1145 if (U_SUCCESS(status)) {
1146 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1147 // throw new IllegalArgumentException("Can't use internal rule set");
1148 status = U_ILLEGAL_ARGUMENT_ERROR;
1149 } else {
1150 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151 if (rs) {
1152 int32_t startPos = toAppendTo.length();
1153 rs->format(number, toAppendTo, toAppendTo.length(), status);
1154 adjustForCapitalizationContext(startPos, toAppendTo);
1155 }
1156 }
1157 }
1158 return toAppendTo;
1159 }
1160
1161
1162 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1163 RuleBasedNumberFormat::format(double number,
1164 const UnicodeString& ruleSetName,
1165 UnicodeString& toAppendTo,
1166 FieldPosition& /* pos */,
1167 UErrorCode& status) const
1168 {
1169 if (U_SUCCESS(status)) {
1170 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1171 // throw new IllegalArgumentException("Can't use internal rule set");
1172 status = U_ILLEGAL_ARGUMENT_ERROR;
1173 } else {
1174 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1175 if (rs) {
1176 int32_t startPos = toAppendTo.length();
1177 rs->format(number, toAppendTo, toAppendTo.length(), status);
1178 adjustForCapitalizationContext(startPos, toAppendTo);
1179 }
1180 }
1181 }
1182 return toAppendTo;
1183 }
1184
1185 UnicodeString&
adjustForCapitalizationContext(int32_t startPos,UnicodeString & currentResult) const1186 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1187 UnicodeString& currentResult) const
1188 {
1189 #if !UCONFIG_NO_BREAK_ITERATION
1190 if (startPos==0 && currentResult.length() > 0) {
1191 // capitalize currentResult according to context
1192 UChar32 ch = currentResult.char32At(0);
1193 UErrorCode status = U_ZERO_ERROR;
1194 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1195 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1196 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1197 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1198 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1199 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1200 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1201 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1202 }
1203 }
1204 #endif
1205 return currentResult;
1206 }
1207
1208
1209 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1210 RuleBasedNumberFormat::parse(const UnicodeString& text,
1211 Formattable& result,
1212 ParsePosition& parsePosition) const
1213 {
1214 if (!ruleSets) {
1215 parsePosition.setErrorIndex(0);
1216 return;
1217 }
1218
1219 UnicodeString workingText(text, parsePosition.getIndex());
1220 ParsePosition workingPos(0);
1221
1222 ParsePosition high_pp(0);
1223 Formattable high_result;
1224
1225 for (NFRuleSet** p = ruleSets; *p; ++p) {
1226 NFRuleSet *rp = *p;
1227 if (rp->isPublic() && rp->isParseable()) {
1228 ParsePosition working_pp(0);
1229 Formattable working_result;
1230
1231 rp->parse(workingText, working_pp, kMaxDouble, working_result);
1232 if (working_pp.getIndex() > high_pp.getIndex()) {
1233 high_pp = working_pp;
1234 high_result = working_result;
1235
1236 if (high_pp.getIndex() == workingText.length()) {
1237 break;
1238 }
1239 }
1240 }
1241 }
1242
1243 int32_t startIndex = parsePosition.getIndex();
1244 parsePosition.setIndex(startIndex + high_pp.getIndex());
1245 if (high_pp.getIndex() > 0) {
1246 parsePosition.setErrorIndex(-1);
1247 } else {
1248 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1249 parsePosition.setErrorIndex(startIndex + errorIndex);
1250 }
1251 result = high_result;
1252 if (result.getType() == Formattable::kDouble) {
1253 int32_t r = (int32_t)result.getDouble();
1254 if ((double)r == result.getDouble()) {
1255 result.setLong(r);
1256 }
1257 }
1258 }
1259
1260 #if !UCONFIG_NO_COLLATION
1261
1262 void
setLenient(UBool enabled)1263 RuleBasedNumberFormat::setLenient(UBool enabled)
1264 {
1265 lenient = enabled;
1266 if (!enabled && collator) {
1267 delete collator;
1268 collator = NULL;
1269 }
1270 }
1271
1272 #endif
1273
1274 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1275 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1276 if (U_SUCCESS(status)) {
1277 if (ruleSetName.isEmpty()) {
1278 if (localizations) {
1279 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1280 defaultRuleSet = findRuleSet(name, status);
1281 } else {
1282 initDefaultRuleSet();
1283 }
1284 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1285 status = U_ILLEGAL_ARGUMENT_ERROR;
1286 } else {
1287 NFRuleSet* result = findRuleSet(ruleSetName, status);
1288 if (result != NULL) {
1289 defaultRuleSet = result;
1290 }
1291 }
1292 }
1293 }
1294
1295 UnicodeString
getDefaultRuleSetName() const1296 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1297 UnicodeString result;
1298 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1299 defaultRuleSet->getName(result);
1300 } else {
1301 result.setToBogus();
1302 }
1303 return result;
1304 }
1305
1306 void
initDefaultRuleSet()1307 RuleBasedNumberFormat::initDefaultRuleSet()
1308 {
1309 defaultRuleSet = NULL;
1310 if (!ruleSets) {
1311 return;
1312 }
1313
1314 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1315 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1316 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1317
1318 NFRuleSet**p = &ruleSets[0];
1319 while (*p) {
1320 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1321 defaultRuleSet = *p;
1322 return;
1323 } else {
1324 ++p;
1325 }
1326 }
1327
1328 defaultRuleSet = *--p;
1329 if (!defaultRuleSet->isPublic()) {
1330 while (p != ruleSets) {
1331 if ((*--p)->isPublic()) {
1332 defaultRuleSet = *p;
1333 break;
1334 }
1335 }
1336 }
1337 }
1338
1339
1340 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1341 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1342 UParseError& pErr, UErrorCode& status)
1343 {
1344 // TODO: implement UParseError
1345 uprv_memset(&pErr, 0, sizeof(UParseError));
1346 // Note: this can leave ruleSets == NULL, so remaining code should check
1347 if (U_FAILURE(status)) {
1348 return;
1349 }
1350
1351 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1352
1353 UnicodeString description(rules);
1354 if (!description.length()) {
1355 status = U_MEMORY_ALLOCATION_ERROR;
1356 return;
1357 }
1358
1359 // start by stripping the trailing whitespace from all the rules
1360 // (this is all the whitespace follwing each semicolon in the
1361 // description). This allows us to look for rule-set boundaries
1362 // by searching for ";%" without having to worry about whitespace
1363 // between the ; and the %
1364 stripWhitespace(description);
1365
1366 // check to see if there's a set of lenient-parse rules. If there
1367 // is, pull them out into our temporary holding place for them,
1368 // and delete them from the description before the real desciption-
1369 // parsing code sees them
1370 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1371 if (lp != -1) {
1372 // we've got to make sure we're not in the middle of a rule
1373 // (where "%%lenient-parse" would actually get treated as
1374 // rule text)
1375 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1376 // locate the beginning and end of the actual collation
1377 // rules (there may be whitespace between the name and
1378 // the first token in the description)
1379 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1380
1381 if (lpEnd == -1) {
1382 lpEnd = description.length() - 1;
1383 }
1384 int lpStart = lp + u_strlen(gLenientParse);
1385 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1386 ++lpStart;
1387 }
1388
1389 // copy out the lenient-parse rules and delete them
1390 // from the description
1391 lenientParseRules = new UnicodeString();
1392 /* test for NULL */
1393 if (lenientParseRules == 0) {
1394 status = U_MEMORY_ALLOCATION_ERROR;
1395 return;
1396 }
1397 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1398
1399 description.remove(lp, lpEnd + 1 - lp);
1400 }
1401 }
1402
1403 // pre-flight parsing the description and count the number of
1404 // rule sets (";%" marks the end of one rule set and the beginning
1405 // of the next)
1406 numRuleSets = 0;
1407 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1408 ++numRuleSets;
1409 ++p;
1410 }
1411 ++numRuleSets;
1412
1413 // our rule list is an array of the appropriate size
1414 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1415 /* test for NULL */
1416 if (ruleSets == 0) {
1417 status = U_MEMORY_ALLOCATION_ERROR;
1418 return;
1419 }
1420
1421 for (int i = 0; i <= numRuleSets; ++i) {
1422 ruleSets[i] = NULL;
1423 }
1424
1425 // divide up the descriptions into individual rule-set descriptions
1426 // and store them in a temporary array. At each step, we also
1427 // new up a rule set, but all this does is initialize its name
1428 // and remove it from its description. We can't actually parse
1429 // the rest of the descriptions and finish initializing everything
1430 // because we have to know the names and locations of all the rule
1431 // sets before we can actually set everything up
1432 if(!numRuleSets) {
1433 status = U_ILLEGAL_ARGUMENT_ERROR;
1434 return;
1435 }
1436
1437 ruleSetDescriptions = new UnicodeString[numRuleSets];
1438 if (ruleSetDescriptions == 0) {
1439 status = U_MEMORY_ALLOCATION_ERROR;
1440 return;
1441 }
1442
1443 {
1444 int curRuleSet = 0;
1445 int32_t start = 0;
1446 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1447 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1448 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1449 if (ruleSets[curRuleSet] == 0) {
1450 status = U_MEMORY_ALLOCATION_ERROR;
1451 return;
1452 }
1453 ++curRuleSet;
1454 start = p + 1;
1455 }
1456 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1457 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1458 if (ruleSets[curRuleSet] == 0) {
1459 status = U_MEMORY_ALLOCATION_ERROR;
1460 return;
1461 }
1462 }
1463
1464 // now we can take note of the formatter's default rule set, which
1465 // is the last public rule set in the description (it's the last
1466 // rather than the first so that a user can create a new formatter
1467 // from an existing formatter and change its default behavior just
1468 // by appending more rule sets to the end)
1469
1470 // {dlf} Initialization of a fraction rule set requires the default rule
1471 // set to be known. For purposes of initialization, this is always the
1472 // last public rule set, no matter what the localization data says.
1473 initDefaultRuleSet();
1474
1475 // finally, we can go back through the temporary descriptions
1476 // list and finish seting up the substructure (and we throw
1477 // away the temporary descriptions as we go)
1478 {
1479 for (int i = 0; i < numRuleSets; i++) {
1480 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1481 }
1482 }
1483
1484 // Now that the rules are initialized, the 'real' default rule
1485 // set can be adjusted by the localization data.
1486
1487 // The C code keeps the localization array as is, rather than building
1488 // a separate array of the public rule set names, so we have less work
1489 // to do here-- but we still need to check the names.
1490
1491 if (localizationInfos) {
1492 // confirm the names, if any aren't in the rules, that's an error
1493 // it is ok if the rules contain public rule sets that are not in this list
1494 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1495 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1496 NFRuleSet* rs = findRuleSet(name, status);
1497 if (rs == NULL) {
1498 break; // error
1499 }
1500 if (i == 0) {
1501 defaultRuleSet = rs;
1502 }
1503 }
1504 } else {
1505 defaultRuleSet = getDefaultRuleSet();
1506 }
1507 originalDescription = rules;
1508 }
1509
1510 // override the NumberFormat implementation in order to
1511 // lazily initialize relevant items
1512 void
setContext(UDisplayContext value,UErrorCode & status)1513 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1514 {
1515 NumberFormat::setContext(value, status);
1516 if (U_SUCCESS(status)) {
1517 if (!capitalizationInfoSet &&
1518 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1519 initCapitalizationContextInfo(locale);
1520 capitalizationInfoSet = TRUE;
1521 }
1522 #if !UCONFIG_NO_BREAK_ITERATION
1523 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1524 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1525 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1526 UErrorCode status = U_ZERO_ERROR;
1527 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1528 if (U_FAILURE(status)) {
1529 delete capitalizationBrkIter;
1530 capitalizationBrkIter = NULL;
1531 }
1532 }
1533 #endif
1534 }
1535 }
1536
1537 void
initCapitalizationContextInfo(const Locale & thelocale)1538 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1539 {
1540 #if !UCONFIG_NO_BREAK_ITERATION
1541 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1542 UErrorCode status = U_ZERO_ERROR;
1543 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1544 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1545 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1546 if (U_SUCCESS(status) && rb != NULL) {
1547 int32_t len = 0;
1548 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1549 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1550 capitalizationForUIListMenu = intVector[0];
1551 capitalizationForStandAlone = intVector[1];
1552 }
1553 }
1554 ures_close(rb);
1555 #endif
1556 }
1557
1558 void
stripWhitespace(UnicodeString & description)1559 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1560 {
1561 // iterate through the characters...
1562 UnicodeString result;
1563
1564 int start = 0;
1565 while (start != -1 && start < description.length()) {
1566 // seek to the first non-whitespace character...
1567 while (start < description.length()
1568 && PatternProps::isWhiteSpace(description.charAt(start))) {
1569 ++start;
1570 }
1571
1572 // locate the next semicolon in the text and copy the text from
1573 // our current position up to that semicolon into the result
1574 int32_t p = description.indexOf(gSemiColon, start);
1575 if (p == -1) {
1576 // or if we don't find a semicolon, just copy the rest of
1577 // the string into the result
1578 result.append(description, start, description.length() - start);
1579 start = -1;
1580 }
1581 else if (p < description.length()) {
1582 result.append(description, start, p + 1 - start);
1583 start = p + 1;
1584 }
1585
1586 // when we get here, we've seeked off the end of the sring, and
1587 // we terminate the loop (we continue until *start* is -1 rather
1588 // than until *p* is -1, because otherwise we'd miss the last
1589 // rule in the description)
1590 else {
1591 start = -1;
1592 }
1593 }
1594
1595 description.setTo(result);
1596 }
1597
1598
1599 void
dispose()1600 RuleBasedNumberFormat::dispose()
1601 {
1602 if (ruleSets) {
1603 for (NFRuleSet** p = ruleSets; *p; ++p) {
1604 delete *p;
1605 }
1606 uprv_free(ruleSets);
1607 ruleSets = NULL;
1608 }
1609
1610 if (ruleSetDescriptions) {
1611 delete [] ruleSetDescriptions;
1612 }
1613
1614 #if !UCONFIG_NO_COLLATION
1615 delete collator;
1616 #endif
1617 collator = NULL;
1618
1619 delete decimalFormatSymbols;
1620 decimalFormatSymbols = NULL;
1621
1622 delete lenientParseRules;
1623 lenientParseRules = NULL;
1624
1625 #if !UCONFIG_NO_BREAK_ITERATION
1626 delete capitalizationBrkIter;
1627 capitalizationBrkIter = NULL;
1628 #endif
1629
1630 if (localizations) localizations = localizations->unref();
1631 }
1632
1633
1634 //-----------------------------------------------------------------------
1635 // package-internal API
1636 //-----------------------------------------------------------------------
1637
1638 /**
1639 * Returns the collator to use for lenient parsing. The collator is lazily created:
1640 * this function creates it the first time it's called.
1641 * @return The collator to use for lenient parsing, or null if lenient parsing
1642 * is turned off.
1643 */
1644 const RuleBasedCollator*
getCollator() const1645 RuleBasedNumberFormat::getCollator() const
1646 {
1647 #if !UCONFIG_NO_COLLATION
1648 if (!ruleSets) {
1649 return NULL;
1650 }
1651
1652 // lazy-evaluate the collator
1653 if (collator == NULL && lenient) {
1654 // create a default collator based on the formatter's locale,
1655 // then pull out that collator's rules, append any additional
1656 // rules specified in the description, and create a _new_
1657 // collator based on the combinaiton of those rules
1658
1659 UErrorCode status = U_ZERO_ERROR;
1660
1661 Collator* temp = Collator::createInstance(locale, status);
1662 RuleBasedCollator* newCollator;
1663 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1664 if (lenientParseRules) {
1665 UnicodeString rules(newCollator->getRules());
1666 rules.append(*lenientParseRules);
1667
1668 newCollator = new RuleBasedCollator(rules, status);
1669 // Exit if newCollator could not be created.
1670 if (newCollator == NULL) {
1671 return NULL;
1672 }
1673 } else {
1674 temp = NULL;
1675 }
1676 if (U_SUCCESS(status)) {
1677 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1678 // cast away const
1679 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1680 } else {
1681 delete newCollator;
1682 }
1683 }
1684 delete temp;
1685 }
1686 #endif
1687
1688 // if lenient-parse mode is off, this will be null
1689 // (see setLenientParseMode())
1690 return collator;
1691 }
1692
1693
1694 /**
1695 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1696 * instances owned by this formatter. This object is lazily created: this function
1697 * creates it the first time it's called.
1698 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1699 * instances owned by this formatter.
1700 */
1701 DecimalFormatSymbols*
getDecimalFormatSymbols() const1702 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1703 {
1704 // lazy-evaluate the DecimalFormatSymbols object. This object
1705 // is shared by all DecimalFormat instances belonging to this
1706 // formatter
1707 if (decimalFormatSymbols == NULL) {
1708 UErrorCode status = U_ZERO_ERROR;
1709 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1710 if (U_SUCCESS(status)) {
1711 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1712 } else {
1713 delete temp;
1714 }
1715 }
1716 return decimalFormatSymbols;
1717 }
1718
1719 // De-owning the current localized symbols and adopt the new symbols.
1720 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1721 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1722 {
1723 if (symbolsToAdopt == NULL) {
1724 return; // do not allow caller to set decimalFormatSymbols to NULL
1725 }
1726
1727 if (decimalFormatSymbols != NULL) {
1728 delete decimalFormatSymbols;
1729 }
1730
1731 decimalFormatSymbols = symbolsToAdopt;
1732
1733 {
1734 // Apply the new decimalFormatSymbols by reparsing the rulesets
1735 UErrorCode status = U_ZERO_ERROR;
1736
1737 for (int32_t i = 0; i < numRuleSets; i++) {
1738 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1739 }
1740 }
1741 }
1742
1743 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1744 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1745 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1746 {
1747 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1748 }
1749
1750 PluralFormat *
createPluralFormat(UPluralType pluralType,const UnicodeString & pattern,UErrorCode & status) const1751 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1752 const UnicodeString &pattern,
1753 UErrorCode& status) const
1754 {
1755 return new PluralFormat(locale, pluralType, pattern, status);
1756 }
1757
1758 U_NAMESPACE_END
1759
1760 /* U_HAVE_RBNF */
1761 #endif
1762