• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright (C) 2016 and later: Unicode, Inc. and others.
2  // License & terms of use: http://www.unicode.org/copyright.html
3  /*
4  *******************************************************************************
5  * Copyright (C) 1997-2015, International Business Machines Corporation
6  * and others. All Rights Reserved.
7  *******************************************************************************
8  */
9  
10  #include "unicode/utypes.h"
11  #include "utypeinfo.h"  // for 'typeid' to work
12  
13  #include "unicode/rbnf.h"
14  
15  #if U_HAVE_RBNF
16  
17  #include "unicode/normlzr.h"
18  #include "unicode/plurfmt.h"
19  #include "unicode/tblcoll.h"
20  #include "unicode/uchar.h"
21  #include "unicode/ucol.h"
22  #include "unicode/uloc.h"
23  #include "unicode/unum.h"
24  #include "unicode/ures.h"
25  #include "unicode/ustring.h"
26  #include "unicode/utf16.h"
27  #include "unicode/udata.h"
28  #include "unicode/udisplaycontext.h"
29  #include "unicode/brkiter.h"
30  #include "nfrs.h"
31  
32  #include "cmemory.h"
33  #include "cstring.h"
34  #include "patternprops.h"
35  #include "uresimp.h"
36  
37  // debugging
38  // #define RBNF_DEBUG
39  
40  #ifdef RBNF_DEBUG
41  #include <stdio.h>
42  #endif
43  
44  #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
45  
46  static const UChar gPercentPercent[] =
47  {
48      0x25, 0x25, 0
49  }; /* "%%" */
50  
51  // All urbnf objects are created through openRules, so we init all of the
52  // Unicode string constants required by rbnf, nfrs, or nfr here.
53  static const UChar gLenientParse[] =
54  {
55      0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
56  }; /* "%%lenient-parse:" */
57  static const UChar gSemiColon = 0x003B;
58  static const UChar gSemiPercent[] =
59  {
60      0x3B, 0x25, 0
61  }; /* ";%" */
62  
63  #define kSomeNumberOfBitsDiv2 22
64  #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
65  #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
66  
67  U_NAMESPACE_BEGIN
68  
69  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
70  
71  /*
72  This is a utility class. It does not use ICU's RTTI.
73  If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
74  Please make sure that intltest passes on Windows in Release mode,
75  since the string pooling per compilation unit will mess up how RTTI works.
76  The RTTI code was also removed due to lack of code coverage.
77  */
78  class LocalizationInfo : public UMemory {
79  protected:
80      virtual ~LocalizationInfo();
81      uint32_t refcount;
82  
83  public:
LocalizationInfo()84      LocalizationInfo() : refcount(0) {}
85  
ref(void)86      LocalizationInfo* ref(void) {
87          ++refcount;
88          return this;
89      }
90  
unref(void)91      LocalizationInfo* unref(void) {
92          if (refcount && --refcount == 0) {
93              delete this;
94          }
95          return NULL;
96      }
97  
98      virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const99      inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
100  
101      virtual int32_t getNumberOfRuleSets(void) const = 0;
102      virtual const UChar* getRuleSetName(int32_t index) const = 0;
103      virtual int32_t getNumberOfDisplayLocales(void) const = 0;
104      virtual const UChar* getLocaleName(int32_t index) const = 0;
105      virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
106  
107      virtual int32_t indexForLocale(const UChar* locale) const;
108      virtual int32_t indexForRuleSet(const UChar* ruleset) const;
109  
110  //    virtual UClassID getDynamicClassID() const = 0;
111  //    static UClassID getStaticClassID(void);
112  };
113  
~LocalizationInfo()114  LocalizationInfo::~LocalizationInfo() {}
115  
116  //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
117  
118  // if both strings are NULL, this returns TRUE
119  static UBool
streq(const UChar * lhs,const UChar * rhs)120  streq(const UChar* lhs, const UChar* rhs) {
121      if (rhs == lhs) {
122          return TRUE;
123      }
124      if (lhs && rhs) {
125          return u_strcmp(lhs, rhs) == 0;
126      }
127      return FALSE;
128  }
129  
130  UBool
operator ==(const LocalizationInfo * rhs) const131  LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
132      if (rhs) {
133          if (this == rhs) {
134              return TRUE;
135          }
136  
137          int32_t rsc = getNumberOfRuleSets();
138          if (rsc == rhs->getNumberOfRuleSets()) {
139              for (int i = 0; i < rsc; ++i) {
140                  if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
141                      return FALSE;
142                  }
143              }
144              int32_t dlc = getNumberOfDisplayLocales();
145              if (dlc == rhs->getNumberOfDisplayLocales()) {
146                  for (int i = 0; i < dlc; ++i) {
147                      const UChar* locale = getLocaleName(i);
148                      int32_t ix = rhs->indexForLocale(locale);
149                      // if no locale, ix is -1, getLocaleName returns null, so streq returns false
150                      if (!streq(locale, rhs->getLocaleName(ix))) {
151                          return FALSE;
152                      }
153                      for (int j = 0; j < rsc; ++j) {
154                          if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
155                              return FALSE;
156                          }
157                      }
158                  }
159                  return TRUE;
160              }
161          }
162      }
163      return FALSE;
164  }
165  
166  int32_t
indexForLocale(const UChar * locale) const167  LocalizationInfo::indexForLocale(const UChar* locale) const {
168      for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
169          if (streq(locale, getLocaleName(i))) {
170              return i;
171          }
172      }
173      return -1;
174  }
175  
176  int32_t
indexForRuleSet(const UChar * ruleset) const177  LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
178      if (ruleset) {
179          for (int i = 0; i < getNumberOfRuleSets(); ++i) {
180              if (streq(ruleset, getRuleSetName(i))) {
181                  return i;
182              }
183          }
184      }
185      return -1;
186  }
187  
188  
189  typedef void (*Fn_Deleter)(void*);
190  
191  class VArray {
192      void** buf;
193      int32_t cap;
194      int32_t size;
195      Fn_Deleter deleter;
196  public:
VArray()197      VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
198  
VArray(Fn_Deleter del)199      VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
200  
~VArray()201      ~VArray() {
202          if (deleter) {
203              for (int i = 0; i < size; ++i) {
204                  (*deleter)(buf[i]);
205              }
206          }
207          uprv_free(buf);
208      }
209  
length()210      int32_t length() {
211          return size;
212      }
213  
add(void * elem,UErrorCode & status)214      void add(void* elem, UErrorCode& status) {
215          if (U_SUCCESS(status)) {
216              if (size == cap) {
217                  if (cap == 0) {
218                      cap = 1;
219                  } else if (cap < 256) {
220                      cap *= 2;
221                  } else {
222                      cap += 256;
223                  }
224                  if (buf == NULL) {
225                      buf = (void**)uprv_malloc(cap * sizeof(void*));
226                  } else {
227                      buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
228                  }
229                  if (buf == NULL) {
230                      // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
231                      status = U_MEMORY_ALLOCATION_ERROR;
232                      return;
233                  }
234                  void* start = &buf[size];
235                  size_t count = (cap - size) * sizeof(void*);
236                  uprv_memset(start, 0, count); // fill with nulls, just because
237              }
238              buf[size++] = elem;
239          }
240      }
241  
release(void)242      void** release(void) {
243          void** result = buf;
244          buf = NULL;
245          cap = 0;
246          size = 0;
247          return result;
248      }
249  };
250  
251  class LocDataParser;
252  
253  class StringLocalizationInfo : public LocalizationInfo {
254      UChar* info;
255      UChar*** data;
256      int32_t numRuleSets;
257      int32_t numLocales;
258  
259  friend class LocDataParser;
260  
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)261      StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
262          : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
263      {
264      }
265  
266  public:
267      static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
268  
269      virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const270      virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
271      virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const272      virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
273      virtual const UChar* getLocaleName(int32_t index) const;
274      virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
275  
276  //    virtual UClassID getDynamicClassID() const;
277  //    static UClassID getStaticClassID(void);
278  
279  private:
280      void init(UErrorCode& status) const;
281  };
282  
283  
284  enum {
285      OPEN_ANGLE = 0x003c, /* '<' */
286      CLOSE_ANGLE = 0x003e, /* '>' */
287      COMMA = 0x002c,
288      TICK = 0x0027,
289      QUOTE = 0x0022,
290      SPACE = 0x0020
291  };
292  
293  /**
294   * Utility for parsing a localization string and returning a StringLocalizationInfo*.
295   */
296  class LocDataParser {
297      UChar* data;
298      const UChar* e;
299      UChar* p;
300      UChar ch;
301      UParseError& pe;
302      UErrorCode& ec;
303  
304  public:
LocDataParser(UParseError & parseError,UErrorCode & status)305      LocDataParser(UParseError& parseError, UErrorCode& status)
306          : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()307      ~LocDataParser() {}
308  
309      /*
310      * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
311      * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
312      */
313      StringLocalizationInfo* parse(UChar* data, int32_t len);
314  
315  private:
316  
inc(void)317      void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)318      UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)319      UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)320      void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const321      UBool inList(UChar c, const UChar* list) const {
322          if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
323          while (*list && *list != c) ++list; return *list == c;
324      }
325      void parseError(const char* msg);
326  
327      StringLocalizationInfo* doParse(void);
328  
329      UChar** nextArray(int32_t& requiredLength);
330      UChar*  nextString(void);
331  };
332  
333  #ifdef RBNF_DEBUG
334  #define ERROR(msg) parseError(msg); return NULL;
335  #define EXPLANATION_ARG explanationArg
336  #else
337  #define ERROR(msg) parseError(NULL); return NULL;
338  #define EXPLANATION_ARG
339  #endif
340  
341  
342  static const UChar DQUOTE_STOPLIST[] = {
343      QUOTE, 0
344  };
345  
346  static const UChar SQUOTE_STOPLIST[] = {
347      TICK, 0
348  };
349  
350  static const UChar NOQUOTE_STOPLIST[] = {
351      SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
352  };
353  
354  static void
DeleteFn(void * p)355  DeleteFn(void* p) {
356    uprv_free(p);
357  }
358  
359  StringLocalizationInfo*
parse(UChar * _data,int32_t len)360  LocDataParser::parse(UChar* _data, int32_t len) {
361      if (U_FAILURE(ec)) {
362          if (_data) uprv_free(_data);
363          return NULL;
364      }
365  
366      pe.line = 0;
367      pe.offset = -1;
368      pe.postContext[0] = 0;
369      pe.preContext[0] = 0;
370  
371      if (_data == NULL) {
372          ec = U_ILLEGAL_ARGUMENT_ERROR;
373          return NULL;
374      }
375  
376      if (len <= 0) {
377          ec = U_ILLEGAL_ARGUMENT_ERROR;
378          uprv_free(_data);
379          return NULL;
380      }
381  
382      data = _data;
383      e = data + len;
384      p = _data;
385      ch = 0xffff;
386  
387      return doParse();
388  }
389  
390  
391  StringLocalizationInfo*
doParse(void)392  LocDataParser::doParse(void) {
393      skipWhitespace();
394      if (!checkInc(OPEN_ANGLE)) {
395          ERROR("Missing open angle");
396      } else {
397          VArray array(DeleteFn);
398          UBool mightHaveNext = TRUE;
399          int32_t requiredLength = -1;
400          while (mightHaveNext) {
401              mightHaveNext = FALSE;
402              UChar** elem = nextArray(requiredLength);
403              skipWhitespace();
404              UBool haveComma = check(COMMA);
405              if (elem) {
406                  array.add(elem, ec);
407                  if (haveComma) {
408                      inc();
409                      mightHaveNext = TRUE;
410                  }
411              } else if (haveComma) {
412                  ERROR("Unexpected character");
413              }
414          }
415  
416          skipWhitespace();
417          if (!checkInc(CLOSE_ANGLE)) {
418              if (check(OPEN_ANGLE)) {
419                  ERROR("Missing comma in outer array");
420              } else {
421                  ERROR("Missing close angle bracket in outer array");
422              }
423          }
424  
425          skipWhitespace();
426          if (p != e) {
427              ERROR("Extra text after close of localization data");
428          }
429  
430          array.add(NULL, ec);
431          if (U_SUCCESS(ec)) {
432              int32_t numLocs = array.length() - 2; // subtract first, NULL
433              UChar*** result = (UChar***)array.release();
434  
435              return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
436          }
437      }
438  
439      ERROR("Unknown error");
440  }
441  
442  UChar**
nextArray(int32_t & requiredLength)443  LocDataParser::nextArray(int32_t& requiredLength) {
444      if (U_FAILURE(ec)) {
445          return NULL;
446      }
447  
448      skipWhitespace();
449      if (!checkInc(OPEN_ANGLE)) {
450          ERROR("Missing open angle");
451      }
452  
453      VArray array;
454      UBool mightHaveNext = TRUE;
455      while (mightHaveNext) {
456          mightHaveNext = FALSE;
457          UChar* elem = nextString();
458          skipWhitespace();
459          UBool haveComma = check(COMMA);
460          if (elem) {
461              array.add(elem, ec);
462              if (haveComma) {
463                  inc();
464                  mightHaveNext = TRUE;
465              }
466          } else if (haveComma) {
467              ERROR("Unexpected comma");
468          }
469      }
470      skipWhitespace();
471      if (!checkInc(CLOSE_ANGLE)) {
472          if (check(OPEN_ANGLE)) {
473              ERROR("Missing close angle bracket in inner array");
474          } else {
475              ERROR("Missing comma in inner array");
476          }
477      }
478  
479      array.add(NULL, ec);
480      if (U_SUCCESS(ec)) {
481          if (requiredLength == -1) {
482              requiredLength = array.length() + 1;
483          } else if (array.length() != requiredLength) {
484              ec = U_ILLEGAL_ARGUMENT_ERROR;
485              ERROR("Array not of required length");
486          }
487  
488          return (UChar**)array.release();
489      }
490      ERROR("Unknown Error");
491  }
492  
493  UChar*
nextString()494  LocDataParser::nextString() {
495      UChar* result = NULL;
496  
497      skipWhitespace();
498      if (p < e) {
499          const UChar* terminators;
500          UChar c = *p;
501          UBool haveQuote = c == QUOTE || c == TICK;
502          if (haveQuote) {
503              inc();
504              terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
505          } else {
506              terminators = NOQUOTE_STOPLIST;
507          }
508          UChar* start = p;
509          while (p < e && !inList(*p, terminators)) ++p;
510          if (p == e) {
511              ERROR("Unexpected end of data");
512          }
513  
514          UChar x = *p;
515          if (p > start) {
516              ch = x;
517              *p = 0x0; // terminate by writing to data
518              result = start; // just point into data
519          }
520          if (haveQuote) {
521              if (x != c) {
522                  ERROR("Missing matching quote");
523              } else if (p == start) {
524                  ERROR("Empty string");
525              }
526              inc();
527          } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
528              ERROR("Unexpected character in string");
529          }
530      }
531  
532      // ok for there to be no next string
533      return result;
534  }
535  
parseError(const char * EXPLANATION_ARG)536  void LocDataParser::parseError(const char* EXPLANATION_ARG)
537  {
538      if (!data) {
539          return;
540      }
541  
542      const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
543      if (start < data) {
544          start = data;
545      }
546      for (UChar* x = p; --x >= start;) {
547          if (!*x) {
548              start = x+1;
549              break;
550          }
551      }
552      const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
553      if (limit > e) {
554          limit = e;
555      }
556      u_strncpy(pe.preContext, start, (int32_t)(p-start));
557      pe.preContext[p-start] = 0;
558      u_strncpy(pe.postContext, p, (int32_t)(limit-p));
559      pe.postContext[limit-p] = 0;
560      pe.offset = (int32_t)(p - data);
561  
562  #ifdef RBNF_DEBUG
563      fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
564  
565      UnicodeString msg;
566      msg.append(start, p - start);
567      msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
568      msg.append(p, limit-p);
569      msg.append(UNICODE_STRING_SIMPLE("'"));
570  
571      char buf[128];
572      int32_t len = msg.extract(0, msg.length(), buf, 128);
573      if (len >= 128) {
574          buf[127] = 0;
575      } else {
576          buf[len] = 0;
577      }
578      fprintf(stderr, "%s\n", buf);
579      fflush(stderr);
580  #endif
581  
582      uprv_free(data);
583      data = NULL;
584      p = NULL;
585      e = NULL;
586  
587      if (U_SUCCESS(ec)) {
588          ec = U_PARSE_ERROR;
589      }
590  }
591  
592  //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
593  
594  StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)595  StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
596      if (U_FAILURE(status)) {
597          return NULL;
598      }
599  
600      int32_t len = info.length();
601      if (len == 0) {
602          return NULL; // no error;
603      }
604  
605      UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
606      if (!p) {
607          status = U_MEMORY_ALLOCATION_ERROR;
608          return NULL;
609      }
610      info.extract(p, len, status);
611      if (!U_FAILURE(status)) {
612          status = U_ZERO_ERROR; // clear warning about non-termination
613      }
614  
615      LocDataParser parser(perror, status);
616      return parser.parse(p, len);
617  }
618  
~StringLocalizationInfo()619  StringLocalizationInfo::~StringLocalizationInfo() {
620      for (UChar*** p = (UChar***)data; *p; ++p) {
621          // remaining data is simply pointer into our unicode string data.
622          if (*p) uprv_free(*p);
623      }
624      if (data) uprv_free(data);
625      if (info) uprv_free(info);
626  }
627  
628  
629  const UChar*
getRuleSetName(int32_t index) const630  StringLocalizationInfo::getRuleSetName(int32_t index) const {
631      if (index >= 0 && index < getNumberOfRuleSets()) {
632          return data[0][index];
633      }
634      return NULL;
635  }
636  
637  const UChar*
getLocaleName(int32_t index) const638  StringLocalizationInfo::getLocaleName(int32_t index) const {
639      if (index >= 0 && index < getNumberOfDisplayLocales()) {
640          return data[index+1][0];
641      }
642      return NULL;
643  }
644  
645  const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const646  StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
647      if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
648          ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
649          return data[localeIndex+1][ruleIndex+1];
650      }
651      return NULL;
652  }
653  
654  // ----------
655  
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)656  RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
657                                               const UnicodeString& locs,
658                                               const Locale& alocale, UParseError& perror, UErrorCode& status)
659    : ruleSets(NULL)
660    , ruleSetDescriptions(NULL)
661    , numRuleSets(0)
662    , defaultRuleSet(NULL)
663    , locale(alocale)
664    , collator(NULL)
665    , decimalFormatSymbols(NULL)
666    , defaultInfinityRule(NULL)
667    , defaultNaNRule(NULL)
668    , lenient(FALSE)
669    , lenientParseRules(NULL)
670    , localizations(NULL)
671    , capitalizationInfoSet(FALSE)
672    , capitalizationForUIListMenu(FALSE)
673    , capitalizationForStandAlone(FALSE)
674    , capitalizationBrkIter(NULL)
675  {
676    LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
677    init(description, locinfo, perror, status);
678  }
679  
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)680  RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
681                                               const UnicodeString& locs,
682                                               UParseError& perror, UErrorCode& status)
683    : ruleSets(NULL)
684    , ruleSetDescriptions(NULL)
685    , numRuleSets(0)
686    , defaultRuleSet(NULL)
687    , locale(Locale::getDefault())
688    , collator(NULL)
689    , decimalFormatSymbols(NULL)
690    , defaultInfinityRule(NULL)
691    , defaultNaNRule(NULL)
692    , lenient(FALSE)
693    , lenientParseRules(NULL)
694    , localizations(NULL)
695    , capitalizationInfoSet(FALSE)
696    , capitalizationForUIListMenu(FALSE)
697    , capitalizationForStandAlone(FALSE)
698    , capitalizationBrkIter(NULL)
699  {
700    LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
701    init(description, locinfo, perror, status);
702  }
703  
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)704  RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
705                                               LocalizationInfo* info,
706                                               const Locale& alocale, UParseError& perror, UErrorCode& status)
707    : ruleSets(NULL)
708    , ruleSetDescriptions(NULL)
709    , numRuleSets(0)
710    , defaultRuleSet(NULL)
711    , locale(alocale)
712    , collator(NULL)
713    , decimalFormatSymbols(NULL)
714    , defaultInfinityRule(NULL)
715    , defaultNaNRule(NULL)
716    , lenient(FALSE)
717    , lenientParseRules(NULL)
718    , localizations(NULL)
719    , capitalizationInfoSet(FALSE)
720    , capitalizationForUIListMenu(FALSE)
721    , capitalizationForStandAlone(FALSE)
722    , capitalizationBrkIter(NULL)
723  {
724    init(description, info, perror, status);
725  }
726  
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)727  RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
728                           UParseError& perror,
729                           UErrorCode& status)
730    : ruleSets(NULL)
731    , ruleSetDescriptions(NULL)
732    , numRuleSets(0)
733    , defaultRuleSet(NULL)
734    , locale(Locale::getDefault())
735    , collator(NULL)
736    , decimalFormatSymbols(NULL)
737    , defaultInfinityRule(NULL)
738    , defaultNaNRule(NULL)
739    , lenient(FALSE)
740    , lenientParseRules(NULL)
741    , localizations(NULL)
742    , capitalizationInfoSet(FALSE)
743    , capitalizationForUIListMenu(FALSE)
744    , capitalizationForStandAlone(FALSE)
745    , capitalizationBrkIter(NULL)
746  {
747      init(description, NULL, perror, status);
748  }
749  
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)750  RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
751                           const Locale& aLocale,
752                           UParseError& perror,
753                           UErrorCode& status)
754    : ruleSets(NULL)
755    , ruleSetDescriptions(NULL)
756    , numRuleSets(0)
757    , defaultRuleSet(NULL)
758    , locale(aLocale)
759    , collator(NULL)
760    , decimalFormatSymbols(NULL)
761    , defaultInfinityRule(NULL)
762    , defaultNaNRule(NULL)
763    , lenient(FALSE)
764    , lenientParseRules(NULL)
765    , localizations(NULL)
766    , capitalizationInfoSet(FALSE)
767    , capitalizationForUIListMenu(FALSE)
768    , capitalizationForStandAlone(FALSE)
769    , capitalizationBrkIter(NULL)
770  {
771      init(description, NULL, perror, status);
772  }
773  
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)774  RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
775    : ruleSets(NULL)
776    , ruleSetDescriptions(NULL)
777    , numRuleSets(0)
778    , defaultRuleSet(NULL)
779    , locale(alocale)
780    , collator(NULL)
781    , decimalFormatSymbols(NULL)
782    , defaultInfinityRule(NULL)
783    , defaultNaNRule(NULL)
784    , lenient(FALSE)
785    , lenientParseRules(NULL)
786    , localizations(NULL)
787    , capitalizationInfoSet(FALSE)
788    , capitalizationForUIListMenu(FALSE)
789    , capitalizationForStandAlone(FALSE)
790    , capitalizationBrkIter(NULL)
791  {
792      if (U_FAILURE(status)) {
793          return;
794      }
795  
796      const char* rules_tag = "RBNFRules";
797      const char* fmt_tag = "";
798      switch (tag) {
799      case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
800      case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
801      case URBNF_DURATION: fmt_tag = "DurationRules"; break;
802      case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
803      default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
804      }
805  
806      // TODO: read localization info from resource
807      LocalizationInfo* locinfo = NULL;
808  
809      UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
810      if (U_SUCCESS(status)) {
811          setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
812                       ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
813  
814          UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
815          if (U_FAILURE(status)) {
816              ures_close(nfrb);
817          }
818          UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
819          if (U_FAILURE(status)) {
820              ures_close(rbnfRules);
821              ures_close(nfrb);
822              return;
823          }
824  
825          UnicodeString desc;
826          while (ures_hasNext(ruleSets)) {
827             desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
828          }
829          UParseError perror;
830  
831          init(desc, locinfo, perror, status);
832  
833          ures_close(ruleSets);
834          ures_close(rbnfRules);
835      }
836      ures_close(nfrb);
837  }
838  
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)839  RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
840    : NumberFormat(rhs)
841    , ruleSets(NULL)
842    , ruleSetDescriptions(NULL)
843    , numRuleSets(0)
844    , defaultRuleSet(NULL)
845    , locale(rhs.locale)
846    , collator(NULL)
847    , decimalFormatSymbols(NULL)
848    , defaultInfinityRule(NULL)
849    , defaultNaNRule(NULL)
850    , lenient(FALSE)
851    , lenientParseRules(NULL)
852    , localizations(NULL)
853    , capitalizationInfoSet(FALSE)
854    , capitalizationForUIListMenu(FALSE)
855    , capitalizationForStandAlone(FALSE)
856    , capitalizationBrkIter(NULL)
857  {
858      this->operator=(rhs);
859  }
860  
861  // --------
862  
863  RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)864  RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
865  {
866      if (this == &rhs) {
867          return *this;
868      }
869      NumberFormat::operator=(rhs);
870      UErrorCode status = U_ZERO_ERROR;
871      dispose();
872      locale = rhs.locale;
873      lenient = rhs.lenient;
874  
875      UParseError perror;
876      setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
877      init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
878      setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
879  
880      capitalizationInfoSet = rhs.capitalizationInfoSet;
881      capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
882      capitalizationForStandAlone = rhs.capitalizationForStandAlone;
883  #if !UCONFIG_NO_BREAK_ITERATION
884      capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
885  #endif
886  
887      return *this;
888  }
889  
~RuleBasedNumberFormat()890  RuleBasedNumberFormat::~RuleBasedNumberFormat()
891  {
892      dispose();
893  }
894  
895  Format*
clone(void) const896  RuleBasedNumberFormat::clone(void) const
897  {
898      return new RuleBasedNumberFormat(*this);
899  }
900  
901  UBool
operator ==(const Format & other) const902  RuleBasedNumberFormat::operator==(const Format& other) const
903  {
904      if (this == &other) {
905          return TRUE;
906      }
907  
908      if (typeid(*this) == typeid(other)) {
909          const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
910          // test for capitalization info equality is adequately handled
911          // by the NumberFormat test for fCapitalizationContext equality;
912          // the info here is just derived from that.
913          if (locale == rhs.locale &&
914              lenient == rhs.lenient &&
915              (localizations == NULL
916                  ? rhs.localizations == NULL
917                  : (rhs.localizations == NULL
918                      ? FALSE
919                      : *localizations == rhs.localizations))) {
920  
921              NFRuleSet** p = ruleSets;
922              NFRuleSet** q = rhs.ruleSets;
923              if (p == NULL) {
924                  return q == NULL;
925              } else if (q == NULL) {
926                  return FALSE;
927              }
928              while (*p && *q && (**p == **q)) {
929                  ++p;
930                  ++q;
931              }
932              return *q == NULL && *p == NULL;
933          }
934      }
935  
936      return FALSE;
937  }
938  
939  UnicodeString
getRules() const940  RuleBasedNumberFormat::getRules() const
941  {
942      UnicodeString result;
943      if (ruleSets != NULL) {
944          for (NFRuleSet** p = ruleSets; *p; ++p) {
945              (*p)->appendRules(result);
946          }
947      }
948      return result;
949  }
950  
951  UnicodeString
getRuleSetName(int32_t index) const952  RuleBasedNumberFormat::getRuleSetName(int32_t index) const
953  {
954      if (localizations) {
955          UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
956          return string;
957      }
958      else if (ruleSets) {
959          UnicodeString result;
960          for (NFRuleSet** p = ruleSets; *p; ++p) {
961              NFRuleSet* rs = *p;
962              if (rs->isPublic()) {
963                  if (--index == -1) {
964                      rs->getName(result);
965                      return result;
966                  }
967              }
968          }
969      }
970      UnicodeString empty;
971      return empty;
972  }
973  
974  int32_t
getNumberOfRuleSetNames() const975  RuleBasedNumberFormat::getNumberOfRuleSetNames() const
976  {
977      int32_t result = 0;
978      if (localizations) {
979          result = localizations->getNumberOfRuleSets();
980      }
981      else if (ruleSets) {
982          for (NFRuleSet** p = ruleSets; *p; ++p) {
983              if ((**p).isPublic()) {
984                  ++result;
985              }
986          }
987      }
988      return result;
989  }
990  
991  int32_t
getNumberOfRuleSetDisplayNameLocales(void) const992  RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
993      if (localizations) {
994          return localizations->getNumberOfDisplayLocales();
995      }
996      return 0;
997  }
998  
999  Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const1000  RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1001      if (U_FAILURE(status)) {
1002          return Locale("");
1003      }
1004      if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1005          UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1006          char buffer[64];
1007          int32_t cap = name.length() + 1;
1008          char* bp = buffer;
1009          if (cap > 64) {
1010              bp = (char *)uprv_malloc(cap);
1011              if (bp == NULL) {
1012                  status = U_MEMORY_ALLOCATION_ERROR;
1013                  return Locale("");
1014              }
1015          }
1016          name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1017          Locale retLocale(bp);
1018          if (bp != buffer) {
1019              uprv_free(bp);
1020          }
1021          return retLocale;
1022      }
1023      status = U_ILLEGAL_ARGUMENT_ERROR;
1024      Locale retLocale;
1025      return retLocale;
1026  }
1027  
1028  UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)1029  RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1030      if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1031          UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1032          int32_t len = localeName.length();
1033          UChar* localeStr = localeName.getBuffer(len + 1);
1034          while (len >= 0) {
1035              localeStr[len] = 0;
1036              int32_t ix = localizations->indexForLocale(localeStr);
1037              if (ix >= 0) {
1038                  UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1039                  return name;
1040              }
1041  
1042              // trim trailing portion, skipping over ommitted sections
1043              do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1044              while (len > 0 && localeStr[len-1] == 0x005F) --len;
1045          }
1046          UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1047          return name;
1048      }
1049      UnicodeString bogus;
1050      bogus.setToBogus();
1051      return bogus;
1052  }
1053  
1054  UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1055  RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1056      if (localizations) {
1057          UnicodeString rsn(ruleSetName);
1058          int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1059          return getRuleSetDisplayName(ix, localeParam);
1060      }
1061      UnicodeString bogus;
1062      bogus.setToBogus();
1063      return bogus;
1064  }
1065  
1066  NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1067  RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1068  {
1069      if (U_SUCCESS(status) && ruleSets) {
1070          for (NFRuleSet** p = ruleSets; *p; ++p) {
1071              NFRuleSet* rs = *p;
1072              if (rs->isNamed(name)) {
1073                  return rs;
1074              }
1075          }
1076          status = U_ILLEGAL_ARGUMENT_ERROR;
1077      }
1078      return NULL;
1079  }
1080  
1081  UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1082  RuleBasedNumberFormat::format(int32_t number,
1083                                UnicodeString& toAppendTo,
1084                                FieldPosition& /* pos */) const
1085  {
1086      if (defaultRuleSet) {
1087          UErrorCode status = U_ZERO_ERROR;
1088          int32_t startPos = toAppendTo.length();
1089          defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1090          adjustForCapitalizationContext(startPos, toAppendTo);
1091      }
1092      return toAppendTo;
1093  }
1094  
1095  
1096  UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1097  RuleBasedNumberFormat::format(int64_t number,
1098                                UnicodeString& toAppendTo,
1099                                FieldPosition& /* pos */) const
1100  {
1101      if (defaultRuleSet) {
1102          UErrorCode status = U_ZERO_ERROR;
1103          int32_t startPos = toAppendTo.length();
1104          defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1105          adjustForCapitalizationContext(startPos, toAppendTo);
1106      }
1107      return toAppendTo;
1108  }
1109  
1110  
1111  UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1112  RuleBasedNumberFormat::format(double number,
1113                                UnicodeString& toAppendTo,
1114                                FieldPosition& /* pos */) const
1115  {
1116      int32_t startPos = toAppendTo.length();
1117      if (defaultRuleSet) {
1118          UErrorCode status = U_ZERO_ERROR;
1119          defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1120      }
1121      return adjustForCapitalizationContext(startPos, toAppendTo);
1122  }
1123  
1124  
1125  UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1126  RuleBasedNumberFormat::format(int32_t number,
1127                                const UnicodeString& ruleSetName,
1128                                UnicodeString& toAppendTo,
1129                                FieldPosition& /* pos */,
1130                                UErrorCode& status) const
1131  {
1132      // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1133      if (U_SUCCESS(status)) {
1134          if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1135              // throw new IllegalArgumentException("Can't use internal rule set");
1136              status = U_ILLEGAL_ARGUMENT_ERROR;
1137          } else {
1138              NFRuleSet *rs = findRuleSet(ruleSetName, status);
1139              if (rs) {
1140                  int32_t startPos = toAppendTo.length();
1141                  rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1142                  adjustForCapitalizationContext(startPos, toAppendTo);
1143              }
1144          }
1145      }
1146      return toAppendTo;
1147  }
1148  
1149  
1150  UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1151  RuleBasedNumberFormat::format(int64_t number,
1152                                const UnicodeString& ruleSetName,
1153                                UnicodeString& toAppendTo,
1154                                FieldPosition& /* pos */,
1155                                UErrorCode& status) const
1156  {
1157      if (U_SUCCESS(status)) {
1158          if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1159              // throw new IllegalArgumentException("Can't use internal rule set");
1160              status = U_ILLEGAL_ARGUMENT_ERROR;
1161          } else {
1162              NFRuleSet *rs = findRuleSet(ruleSetName, status);
1163              if (rs) {
1164                  int32_t startPos = toAppendTo.length();
1165                  rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1166                  adjustForCapitalizationContext(startPos, toAppendTo);
1167              }
1168          }
1169      }
1170      return toAppendTo;
1171  }
1172  
1173  
1174  UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1175  RuleBasedNumberFormat::format(double number,
1176                                const UnicodeString& ruleSetName,
1177                                UnicodeString& toAppendTo,
1178                                FieldPosition& /* pos */,
1179                                UErrorCode& status) const
1180  {
1181      if (U_SUCCESS(status)) {
1182          if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1183              // throw new IllegalArgumentException("Can't use internal rule set");
1184              status = U_ILLEGAL_ARGUMENT_ERROR;
1185          } else {
1186              NFRuleSet *rs = findRuleSet(ruleSetName, status);
1187              if (rs) {
1188                  int32_t startPos = toAppendTo.length();
1189                  rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1190                  adjustForCapitalizationContext(startPos, toAppendTo);
1191              }
1192          }
1193      }
1194      return toAppendTo;
1195  }
1196  
1197  UnicodeString&
adjustForCapitalizationContext(int32_t startPos,UnicodeString & currentResult) const1198  RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1199                                                        UnicodeString& currentResult) const
1200  {
1201  #if !UCONFIG_NO_BREAK_ITERATION
1202      if (startPos==0 && currentResult.length() > 0) {
1203          // capitalize currentResult according to context
1204          UChar32 ch = currentResult.char32At(0);
1205          UErrorCode status = U_ZERO_ERROR;
1206          UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1207          if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1208                ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1209                  (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1210                  (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1211              // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1212              // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1213              currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1214          }
1215      }
1216  #endif
1217      return currentResult;
1218  }
1219  
1220  
1221  void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1222  RuleBasedNumberFormat::parse(const UnicodeString& text,
1223                               Formattable& result,
1224                               ParsePosition& parsePosition) const
1225  {
1226      if (!ruleSets) {
1227          parsePosition.setErrorIndex(0);
1228          return;
1229      }
1230  
1231      UnicodeString workingText(text, parsePosition.getIndex());
1232      ParsePosition workingPos(0);
1233  
1234      ParsePosition high_pp(0);
1235      Formattable high_result;
1236  
1237      for (NFRuleSet** p = ruleSets; *p; ++p) {
1238          NFRuleSet *rp = *p;
1239          if (rp->isPublic() && rp->isParseable()) {
1240              ParsePosition working_pp(0);
1241              Formattable working_result;
1242  
1243              rp->parse(workingText, working_pp, kMaxDouble, working_result);
1244              if (working_pp.getIndex() > high_pp.getIndex()) {
1245                  high_pp = working_pp;
1246                  high_result = working_result;
1247  
1248                  if (high_pp.getIndex() == workingText.length()) {
1249                      break;
1250                  }
1251              }
1252          }
1253      }
1254  
1255      int32_t startIndex = parsePosition.getIndex();
1256      parsePosition.setIndex(startIndex + high_pp.getIndex());
1257      if (high_pp.getIndex() > 0) {
1258          parsePosition.setErrorIndex(-1);
1259      } else {
1260          int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1261          parsePosition.setErrorIndex(startIndex + errorIndex);
1262      }
1263      result = high_result;
1264      if (result.getType() == Formattable::kDouble) {
1265          double d = result.getDouble();
1266          if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1267              // Note: casting a double to an int when the double is too large or small
1268              //       to fit the destination is undefined behavior. The explicit range checks,
1269              //       above, are required. Just casting and checking the result value is undefined.
1270              result.setLong(static_cast<int32_t>(d));
1271          }
1272      }
1273  }
1274  
1275  #if !UCONFIG_NO_COLLATION
1276  
1277  void
setLenient(UBool enabled)1278  RuleBasedNumberFormat::setLenient(UBool enabled)
1279  {
1280      lenient = enabled;
1281      if (!enabled && collator) {
1282          delete collator;
1283          collator = NULL;
1284      }
1285  }
1286  
1287  #endif
1288  
1289  void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1290  RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1291      if (U_SUCCESS(status)) {
1292          if (ruleSetName.isEmpty()) {
1293            if (localizations) {
1294                UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1295                defaultRuleSet = findRuleSet(name, status);
1296            } else {
1297              initDefaultRuleSet();
1298            }
1299          } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1300              status = U_ILLEGAL_ARGUMENT_ERROR;
1301          } else {
1302              NFRuleSet* result = findRuleSet(ruleSetName, status);
1303              if (result != NULL) {
1304                  defaultRuleSet = result;
1305              }
1306          }
1307      }
1308  }
1309  
1310  UnicodeString
getDefaultRuleSetName() const1311  RuleBasedNumberFormat::getDefaultRuleSetName() const {
1312      UnicodeString result;
1313      if (defaultRuleSet && defaultRuleSet->isPublic()) {
1314          defaultRuleSet->getName(result);
1315      } else {
1316          result.setToBogus();
1317      }
1318      return result;
1319  }
1320  
1321  void
initDefaultRuleSet()1322  RuleBasedNumberFormat::initDefaultRuleSet()
1323  {
1324      defaultRuleSet = NULL;
1325      if (!ruleSets) {
1326          return;
1327      }
1328  
1329      const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1330      const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1331      const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1332  
1333      NFRuleSet**p = &ruleSets[0];
1334      while (*p) {
1335          if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1336              defaultRuleSet = *p;
1337              return;
1338          } else {
1339              ++p;
1340          }
1341      }
1342  
1343      defaultRuleSet = *--p;
1344      if (!defaultRuleSet->isPublic()) {
1345          while (p != ruleSets) {
1346              if ((*--p)->isPublic()) {
1347                  defaultRuleSet = *p;
1348                  break;
1349              }
1350          }
1351      }
1352  }
1353  
1354  
1355  void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1356  RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1357                              UParseError& pErr, UErrorCode& status)
1358  {
1359      // TODO: implement UParseError
1360      uprv_memset(&pErr, 0, sizeof(UParseError));
1361      // Note: this can leave ruleSets == NULL, so remaining code should check
1362      if (U_FAILURE(status)) {
1363          return;
1364      }
1365  
1366      initializeDecimalFormatSymbols(status);
1367      initializeDefaultInfinityRule(status);
1368      initializeDefaultNaNRule(status);
1369      if (U_FAILURE(status)) {
1370          return;
1371      }
1372  
1373      this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1374  
1375      UnicodeString description(rules);
1376      if (!description.length()) {
1377          status = U_MEMORY_ALLOCATION_ERROR;
1378          return;
1379      }
1380  
1381      // start by stripping the trailing whitespace from all the rules
1382      // (this is all the whitespace follwing each semicolon in the
1383      // description).  This allows us to look for rule-set boundaries
1384      // by searching for ";%" without having to worry about whitespace
1385      // between the ; and the %
1386      stripWhitespace(description);
1387  
1388      // check to see if there's a set of lenient-parse rules.  If there
1389      // is, pull them out into our temporary holding place for them,
1390      // and delete them from the description before the real desciption-
1391      // parsing code sees them
1392      int32_t lp = description.indexOf(gLenientParse, -1, 0);
1393      if (lp != -1) {
1394          // we've got to make sure we're not in the middle of a rule
1395          // (where "%%lenient-parse" would actually get treated as
1396          // rule text)
1397          if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1398              // locate the beginning and end of the actual collation
1399              // rules (there may be whitespace between the name and
1400              // the first token in the description)
1401              int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1402  
1403              if (lpEnd == -1) {
1404                  lpEnd = description.length() - 1;
1405              }
1406              int lpStart = lp + u_strlen(gLenientParse);
1407              while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1408                  ++lpStart;
1409              }
1410  
1411              // copy out the lenient-parse rules and delete them
1412              // from the description
1413              lenientParseRules = new UnicodeString();
1414              /* test for NULL */
1415              if (lenientParseRules == 0) {
1416                  status = U_MEMORY_ALLOCATION_ERROR;
1417                  return;
1418              }
1419              lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1420  
1421              description.remove(lp, lpEnd + 1 - lp);
1422          }
1423      }
1424  
1425      // pre-flight parsing the description and count the number of
1426      // rule sets (";%" marks the end of one rule set and the beginning
1427      // of the next)
1428      numRuleSets = 0;
1429      for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1430          ++numRuleSets;
1431          ++p;
1432      }
1433      ++numRuleSets;
1434  
1435      // our rule list is an array of the appropriate size
1436      ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1437      /* test for NULL */
1438      if (ruleSets == 0) {
1439          status = U_MEMORY_ALLOCATION_ERROR;
1440          return;
1441      }
1442  
1443      for (int i = 0; i <= numRuleSets; ++i) {
1444          ruleSets[i] = NULL;
1445      }
1446  
1447      // divide up the descriptions into individual rule-set descriptions
1448      // and store them in a temporary array.  At each step, we also
1449      // new up a rule set, but all this does is initialize its name
1450      // and remove it from its description.  We can't actually parse
1451      // the rest of the descriptions and finish initializing everything
1452      // because we have to know the names and locations of all the rule
1453      // sets before we can actually set everything up
1454      if(!numRuleSets) {
1455          status = U_ILLEGAL_ARGUMENT_ERROR;
1456          return;
1457      }
1458  
1459      ruleSetDescriptions = new UnicodeString[numRuleSets];
1460      if (ruleSetDescriptions == 0) {
1461          status = U_MEMORY_ALLOCATION_ERROR;
1462          return;
1463      }
1464  
1465      {
1466          int curRuleSet = 0;
1467          int32_t start = 0;
1468          for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1469              ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1470              ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1471              if (ruleSets[curRuleSet] == 0) {
1472                  status = U_MEMORY_ALLOCATION_ERROR;
1473                  return;
1474              }
1475              ++curRuleSet;
1476              start = p + 1;
1477          }
1478          ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1479          ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1480          if (ruleSets[curRuleSet] == 0) {
1481              status = U_MEMORY_ALLOCATION_ERROR;
1482              return;
1483          }
1484      }
1485  
1486      // now we can take note of the formatter's default rule set, which
1487      // is the last public rule set in the description (it's the last
1488      // rather than the first so that a user can create a new formatter
1489      // from an existing formatter and change its default behavior just
1490      // by appending more rule sets to the end)
1491  
1492      // {dlf} Initialization of a fraction rule set requires the default rule
1493      // set to be known.  For purposes of initialization, this is always the
1494      // last public rule set, no matter what the localization data says.
1495      initDefaultRuleSet();
1496  
1497      // finally, we can go back through the temporary descriptions
1498      // list and finish seting up the substructure (and we throw
1499      // away the temporary descriptions as we go)
1500      {
1501          for (int i = 0; i < numRuleSets; i++) {
1502              ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
1503          }
1504      }
1505  
1506      // Now that the rules are initialized, the 'real' default rule
1507      // set can be adjusted by the localization data.
1508  
1509      // The C code keeps the localization array as is, rather than building
1510      // a separate array of the public rule set names, so we have less work
1511      // to do here-- but we still need to check the names.
1512  
1513      if (localizationInfos) {
1514          // confirm the names, if any aren't in the rules, that's an error
1515          // it is ok if the rules contain public rule sets that are not in this list
1516          for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1517              UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1518              NFRuleSet* rs = findRuleSet(name, status);
1519              if (rs == NULL) {
1520                  break; // error
1521              }
1522              if (i == 0) {
1523                  defaultRuleSet = rs;
1524              }
1525          }
1526      } else {
1527          defaultRuleSet = getDefaultRuleSet();
1528      }
1529      originalDescription = rules;
1530  }
1531  
1532  // override the NumberFormat implementation in order to
1533  // lazily initialize relevant items
1534  void
setContext(UDisplayContext value,UErrorCode & status)1535  RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1536  {
1537      NumberFormat::setContext(value, status);
1538      if (U_SUCCESS(status)) {
1539      	if (!capitalizationInfoSet &&
1540      	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1541      	    initCapitalizationContextInfo(locale);
1542      	    capitalizationInfoSet = TRUE;
1543          }
1544  #if !UCONFIG_NO_BREAK_ITERATION
1545          if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1546                  (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1547                  (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1548              UErrorCode status = U_ZERO_ERROR;
1549              capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1550              if (U_FAILURE(status)) {
1551                  delete capitalizationBrkIter;
1552                  capitalizationBrkIter = NULL;
1553              }
1554          }
1555  #endif
1556      }
1557  }
1558  
1559  void
initCapitalizationContextInfo(const Locale & thelocale)1560  RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1561  {
1562  #if !UCONFIG_NO_BREAK_ITERATION
1563      const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1564      UErrorCode status = U_ZERO_ERROR;
1565      UResourceBundle *rb = ures_open(NULL, localeID, &status);
1566      rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1567      rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1568      if (U_SUCCESS(status) && rb != NULL) {
1569          int32_t len = 0;
1570          const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1571          if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1572              capitalizationForUIListMenu = intVector[0];
1573              capitalizationForStandAlone = intVector[1];
1574          }
1575      }
1576      ures_close(rb);
1577  #endif
1578  }
1579  
1580  void
stripWhitespace(UnicodeString & description)1581  RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1582  {
1583      // iterate through the characters...
1584      UnicodeString result;
1585  
1586      int start = 0;
1587      while (start != -1 && start < description.length()) {
1588          // seek to the first non-whitespace character...
1589          while (start < description.length()
1590              && PatternProps::isWhiteSpace(description.charAt(start))) {
1591              ++start;
1592          }
1593  
1594          // locate the next semicolon in the text and copy the text from
1595          // our current position up to that semicolon into the result
1596          int32_t p = description.indexOf(gSemiColon, start);
1597          if (p == -1) {
1598              // or if we don't find a semicolon, just copy the rest of
1599              // the string into the result
1600              result.append(description, start, description.length() - start);
1601              start = -1;
1602          }
1603          else if (p < description.length()) {
1604              result.append(description, start, p + 1 - start);
1605              start = p + 1;
1606          }
1607  
1608          // when we get here, we've seeked off the end of the sring, and
1609          // we terminate the loop (we continue until *start* is -1 rather
1610          // than until *p* is -1, because otherwise we'd miss the last
1611          // rule in the description)
1612          else {
1613              start = -1;
1614          }
1615      }
1616  
1617      description.setTo(result);
1618  }
1619  
1620  
1621  void
dispose()1622  RuleBasedNumberFormat::dispose()
1623  {
1624      if (ruleSets) {
1625          for (NFRuleSet** p = ruleSets; *p; ++p) {
1626              delete *p;
1627          }
1628          uprv_free(ruleSets);
1629          ruleSets = NULL;
1630      }
1631  
1632      if (ruleSetDescriptions) {
1633          delete [] ruleSetDescriptions;
1634          ruleSetDescriptions = NULL;
1635      }
1636  
1637  #if !UCONFIG_NO_COLLATION
1638      delete collator;
1639  #endif
1640      collator = NULL;
1641  
1642      delete decimalFormatSymbols;
1643      decimalFormatSymbols = NULL;
1644  
1645      delete defaultInfinityRule;
1646      defaultInfinityRule = NULL;
1647  
1648      delete defaultNaNRule;
1649      defaultNaNRule = NULL;
1650  
1651      delete lenientParseRules;
1652      lenientParseRules = NULL;
1653  
1654  #if !UCONFIG_NO_BREAK_ITERATION
1655      delete capitalizationBrkIter;
1656      capitalizationBrkIter = NULL;
1657  #endif
1658  
1659      if (localizations) {
1660          localizations = localizations->unref();
1661      }
1662  }
1663  
1664  
1665  //-----------------------------------------------------------------------
1666  // package-internal API
1667  //-----------------------------------------------------------------------
1668  
1669  /**
1670   * Returns the collator to use for lenient parsing.  The collator is lazily created:
1671   * this function creates it the first time it's called.
1672   * @return The collator to use for lenient parsing, or null if lenient parsing
1673   * is turned off.
1674  */
1675  const RuleBasedCollator*
getCollator() const1676  RuleBasedNumberFormat::getCollator() const
1677  {
1678  #if !UCONFIG_NO_COLLATION
1679      if (!ruleSets) {
1680          return NULL;
1681      }
1682  
1683      // lazy-evaluate the collator
1684      if (collator == NULL && lenient) {
1685          // create a default collator based on the formatter's locale,
1686          // then pull out that collator's rules, append any additional
1687          // rules specified in the description, and create a _new_
1688          // collator based on the combinaiton of those rules
1689  
1690          UErrorCode status = U_ZERO_ERROR;
1691  
1692          Collator* temp = Collator::createInstance(locale, status);
1693          RuleBasedCollator* newCollator;
1694          if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1695              if (lenientParseRules) {
1696                  UnicodeString rules(newCollator->getRules());
1697                  rules.append(*lenientParseRules);
1698  
1699                  newCollator = new RuleBasedCollator(rules, status);
1700                  // Exit if newCollator could not be created.
1701                  if (newCollator == NULL) {
1702                      return NULL;
1703                  }
1704              } else {
1705                  temp = NULL;
1706              }
1707              if (U_SUCCESS(status)) {
1708                  newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1709                  // cast away const
1710                  ((RuleBasedNumberFormat*)this)->collator = newCollator;
1711              } else {
1712                  delete newCollator;
1713              }
1714          }
1715          delete temp;
1716      }
1717  #endif
1718  
1719      // if lenient-parse mode is off, this will be null
1720      // (see setLenientParseMode())
1721      return collator;
1722  }
1723  
1724  
1725  DecimalFormatSymbols*
initializeDecimalFormatSymbols(UErrorCode & status)1726  RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1727  {
1728      // lazy-evaluate the DecimalFormatSymbols object.  This object
1729      // is shared by all DecimalFormat instances belonging to this
1730      // formatter
1731      if (decimalFormatSymbols == NULL) {
1732          DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1733          if (U_SUCCESS(status)) {
1734              decimalFormatSymbols = temp;
1735          }
1736          else {
1737              delete temp;
1738          }
1739      }
1740      return decimalFormatSymbols;
1741  }
1742  
1743  /**
1744   * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1745   * instances owned by this formatter.
1746  */
1747  const DecimalFormatSymbols*
getDecimalFormatSymbols() const1748  RuleBasedNumberFormat::getDecimalFormatSymbols() const
1749  {
1750      return decimalFormatSymbols;
1751  }
1752  
1753  NFRule*
initializeDefaultInfinityRule(UErrorCode & status)1754  RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1755  {
1756      if (U_FAILURE(status)) {
1757          return NULL;
1758      }
1759      if (defaultInfinityRule == NULL) {
1760          UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1761          rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1762          NFRule* temp = new NFRule(this, rule, status);
1763          if (U_SUCCESS(status)) {
1764              defaultInfinityRule = temp;
1765          }
1766          else {
1767              delete temp;
1768          }
1769      }
1770      return defaultInfinityRule;
1771  }
1772  
1773  const NFRule*
getDefaultInfinityRule() const1774  RuleBasedNumberFormat::getDefaultInfinityRule() const
1775  {
1776      return defaultInfinityRule;
1777  }
1778  
1779  NFRule*
initializeDefaultNaNRule(UErrorCode & status)1780  RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1781  {
1782      if (U_FAILURE(status)) {
1783          return NULL;
1784      }
1785      if (defaultNaNRule == NULL) {
1786          UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1787          rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1788          NFRule* temp = new NFRule(this, rule, status);
1789          if (U_SUCCESS(status)) {
1790              defaultNaNRule = temp;
1791          }
1792          else {
1793              delete temp;
1794          }
1795      }
1796      return defaultNaNRule;
1797  }
1798  
1799  const NFRule*
getDefaultNaNRule() const1800  RuleBasedNumberFormat::getDefaultNaNRule() const
1801  {
1802      return defaultNaNRule;
1803  }
1804  
1805  // De-owning the current localized symbols and adopt the new symbols.
1806  void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1807  RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1808  {
1809      if (symbolsToAdopt == NULL) {
1810          return; // do not allow caller to set decimalFormatSymbols to NULL
1811      }
1812  
1813      if (decimalFormatSymbols != NULL) {
1814          delete decimalFormatSymbols;
1815      }
1816  
1817      decimalFormatSymbols = symbolsToAdopt;
1818  
1819      {
1820          // Apply the new decimalFormatSymbols by reparsing the rulesets
1821          UErrorCode status = U_ZERO_ERROR;
1822  
1823          delete defaultInfinityRule;
1824          defaultInfinityRule = NULL;
1825          initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1826  
1827          delete defaultNaNRule;
1828          defaultNaNRule = NULL;
1829          initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1830  
1831          if (ruleSets) {
1832              for (int32_t i = 0; i < numRuleSets; i++) {
1833                  ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1834              }
1835          }
1836      }
1837  }
1838  
1839  // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1840  void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1841  RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1842  {
1843      adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1844  }
1845  
1846  PluralFormat *
createPluralFormat(UPluralType pluralType,const UnicodeString & pattern,UErrorCode & status) const1847  RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1848                                            const UnicodeString &pattern,
1849                                            UErrorCode& status) const
1850  {
1851      return new PluralFormat(locale, pluralType, pattern, status);
1852  }
1853  
1854  U_NAMESPACE_END
1855  
1856  /* U_HAVE_RBNF */
1857  #endif
1858