• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // © 2016 and later: Unicode, Inc. and others.
2  // License & terms of use: http://www.unicode.org/copyright.html
3  /*
4  *******************************************************************************
5  * Copyright (C) 1997-2013, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  *
9  * File CHOICFMT.CPP
10  *
11  * Modification History:
12  *
13  *   Date        Name        Description
14  *   02/19/97    aliu        Converted from java.
15  *   03/20/97    helena      Finished first cut of implementation and got rid
16  *                           of nextDouble/previousDouble and replaced with
17  *                           boolean array.
18  *   4/10/97     aliu        Clean up.  Modified to work on AIX.
19  *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
20  *                           wchar.h.
21  *   07/09/97    helena      Made ParsePosition into a class.
22  *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
23  *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
24  *   02/22/99    stephen     Removed character literals for EBCDIC safety
25  ********************************************************************************
26  */
27  
28  #include "unicode/utypes.h"
29  
30  #if !UCONFIG_NO_FORMATTING
31  
32  #include "unicode/choicfmt.h"
33  #include "unicode/numfmt.h"
34  #include "unicode/locid.h"
35  #include "cpputils.h"
36  #include "cstring.h"
37  #include "messageimpl.h"
38  #include "putilimp.h"
39  #include "uassert.h"
40  #include <stdio.h>
41  #include <float.h>
42  
43  // *****************************************************************************
44  // class ChoiceFormat
45  // *****************************************************************************
46  
47  U_NAMESPACE_BEGIN
48  
49  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
50  
51  // Special characters used by ChoiceFormat.  There are two characters
52  // used interchangeably to indicate <=.  Either is parsed, but only
53  // LESS_EQUAL is generated by toPattern().
54  #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
55  #define LESS_THAN    ((UChar)0x003C)   /*<*/
56  #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
57  #define LESS_EQUAL2  ((UChar)0x2264)
58  #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
59  #define MINUS        ((UChar)0x002D)   /*-*/
60  
61  static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
62  static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
63  
64  #ifdef INFINITY
65  #undef INFINITY
66  #endif
67  #define INFINITY     ((UChar)0x221E)
68  
69  //static const UChar gPositiveInfinity[] = {INFINITY, 0};
70  //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
71  #define POSITIVE_INF_STRLEN 1
72  #define NEGATIVE_INF_STRLEN 2
73  
74  // -------------------------------------
75  // Creates a ChoiceFormat instance based on the pattern.
76  
ChoiceFormat(const UnicodeString & newPattern,UErrorCode & status)77  ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78                             UErrorCode& status)
79  : constructorErrorCode(status),
80    msgPattern(status)
81  {
82      applyPattern(newPattern, status);
83  }
84  
85  // -------------------------------------
86  // Creates a ChoiceFormat instance with the limit array and
87  // format strings for each limit.
88  
ChoiceFormat(const double * limits,const UnicodeString * formats,int32_t cnt)89  ChoiceFormat::ChoiceFormat(const double* limits,
90                             const UnicodeString* formats,
91                             int32_t cnt )
92  : constructorErrorCode(U_ZERO_ERROR),
93    msgPattern(constructorErrorCode)
94  {
95      setChoices(limits, NULL, formats, cnt, constructorErrorCode);
96  }
97  
98  // -------------------------------------
99  
ChoiceFormat(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)100  ChoiceFormat::ChoiceFormat(const double* limits,
101                             const UBool* closures,
102                             const UnicodeString* formats,
103                             int32_t cnt )
104  : constructorErrorCode(U_ZERO_ERROR),
105    msgPattern(constructorErrorCode)
106  {
107      setChoices(limits, closures, formats, cnt, constructorErrorCode);
108  }
109  
110  // -------------------------------------
111  // copy constructor
112  
ChoiceFormat(const ChoiceFormat & that)113  ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
114  : NumberFormat(that),
115    constructorErrorCode(that.constructorErrorCode),
116    msgPattern(that.msgPattern)
117  {
118  }
119  
120  // -------------------------------------
121  // Private constructor that creates a
122  // ChoiceFormat instance based on the
123  // pattern and populates UParseError
124  
ChoiceFormat(const UnicodeString & newPattern,UParseError & parseError,UErrorCode & status)125  ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126                             UParseError& parseError,
127                             UErrorCode& status)
128  : constructorErrorCode(status),
129    msgPattern(status)
130  {
131      applyPattern(newPattern,parseError, status);
132  }
133  // -------------------------------------
134  
135  UBool
operator ==(const Format & that) const136  ChoiceFormat::operator==(const Format& that) const
137  {
138      if (this == &that) return TRUE;
139      if (!NumberFormat::operator==(that)) return FALSE;
140      ChoiceFormat& thatAlias = (ChoiceFormat&)that;
141      return msgPattern == thatAlias.msgPattern;
142  }
143  
144  // -------------------------------------
145  // copy constructor
146  
147  const ChoiceFormat&
operator =(const ChoiceFormat & that)148  ChoiceFormat::operator=(const   ChoiceFormat& that)
149  {
150      if (this != &that) {
151          NumberFormat::operator=(that);
152          constructorErrorCode = that.constructorErrorCode;
153          msgPattern = that.msgPattern;
154      }
155      return *this;
156  }
157  
158  // -------------------------------------
159  
~ChoiceFormat()160  ChoiceFormat::~ChoiceFormat()
161  {
162  }
163  
164  // -------------------------------------
165  
166  /**
167   * Convert a double value to a string without the overhead of NumberFormat.
168   */
169  UnicodeString&
dtos(double value,UnicodeString & string)170  ChoiceFormat::dtos(double value,
171                     UnicodeString& string)
172  {
173      /* Buffer to contain the digits and any extra formatting stuff. */
174      char temp[DBL_DIG + 16];
175      char *itrPtr = temp;
176      char *expPtr;
177  
178      sprintf(temp, "%.*g", DBL_DIG, value);
179  
180      /* Find and convert the decimal point.
181         Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182      */
183      while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
184          itrPtr++;
185      }
186      if (*itrPtr != 0 && *itrPtr != 'e') {
187          /* We reached something that looks like a decimal point.
188          In case someone used setlocale(), which changes the decimal point. */
189          *itrPtr = '.';
190          itrPtr++;
191      }
192      /* Search for the exponent */
193      while (*itrPtr && *itrPtr != 'e') {
194          itrPtr++;
195      }
196      if (*itrPtr == 'e') {
197          itrPtr++;
198          /* Verify the exponent sign */
199          if (*itrPtr == '+' || *itrPtr == '-') {
200              itrPtr++;
201          }
202          /* Remove leading zeros. You will see this on Windows machines. */
203          expPtr = itrPtr;
204          while (*itrPtr == '0') {
205              itrPtr++;
206          }
207          if (*itrPtr && expPtr != itrPtr) {
208              /* Shift the exponent without zeros. */
209              while (*itrPtr) {
210                  *(expPtr++)  = *(itrPtr++);
211              }
212              // NULL terminate
213              *expPtr = 0;
214          }
215      }
216  
217      string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
218      return string;
219  }
220  
221  // -------------------------------------
222  // calls the overloaded applyPattern method.
223  
224  void
applyPattern(const UnicodeString & pattern,UErrorCode & status)225  ChoiceFormat::applyPattern(const UnicodeString& pattern,
226                             UErrorCode& status)
227  {
228      msgPattern.parseChoiceStyle(pattern, NULL, status);
229      constructorErrorCode = status;
230  }
231  
232  // -------------------------------------
233  // Applies the pattern to this ChoiceFormat instance.
234  
235  void
applyPattern(const UnicodeString & pattern,UParseError & parseError,UErrorCode & status)236  ChoiceFormat::applyPattern(const UnicodeString& pattern,
237                             UParseError& parseError,
238                             UErrorCode& status)
239  {
240      msgPattern.parseChoiceStyle(pattern, &parseError, status);
241      constructorErrorCode = status;
242  }
243  // -------------------------------------
244  // Returns the input pattern string.
245  
246  UnicodeString&
toPattern(UnicodeString & result) const247  ChoiceFormat::toPattern(UnicodeString& result) const
248  {
249      return result = msgPattern.getPatternString();
250  }
251  
252  // -------------------------------------
253  // Sets the limit and format arrays.
254  void
setChoices(const double * limits,const UnicodeString * formats,int32_t cnt)255  ChoiceFormat::setChoices(  const double* limits,
256                             const UnicodeString* formats,
257                             int32_t cnt )
258  {
259      UErrorCode errorCode = U_ZERO_ERROR;
260      setChoices(limits, NULL, formats, cnt, errorCode);
261  }
262  
263  // -------------------------------------
264  // Sets the limit and format arrays.
265  void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)266  ChoiceFormat::setChoices(  const double* limits,
267                             const UBool* closures,
268                             const UnicodeString* formats,
269                             int32_t cnt )
270  {
271      UErrorCode errorCode = U_ZERO_ERROR;
272      setChoices(limits, closures, formats, cnt, errorCode);
273  }
274  
275  void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t count,UErrorCode & errorCode)276  ChoiceFormat::setChoices(const double* limits,
277                           const UBool* closures,
278                           const UnicodeString* formats,
279                           int32_t count,
280                           UErrorCode &errorCode) {
281      if (U_FAILURE(errorCode)) {
282          return;
283      }
284      if (limits == NULL || formats == NULL) {
285          errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286          return;
287      }
288      // Reconstruct the original input pattern.
289      // Modified version of the pre-ICU 4.8 toPattern() implementation.
290      UnicodeString result;
291      for (int32_t i = 0; i < count; ++i) {
292          if (i != 0) {
293              result += VERTICAL_BAR;
294          }
295          UnicodeString buf;
296          if (uprv_isPositiveInfinity(limits[i])) {
297              result += INFINITY;
298          } else if (uprv_isNegativeInfinity(limits[i])) {
299              result += MINUS;
300              result += INFINITY;
301          } else {
302              result += dtos(limits[i], buf);
303          }
304          if (closures != NULL && closures[i]) {
305              result += LESS_THAN;
306          } else {
307              result += LESS_EQUAL;
308          }
309          // Append formats[i], using quotes if there are special
310          // characters.  Single quotes themselves must be escaped in
311          // either case.
312          const UnicodeString& text = formats[i];
313          int32_t textLength = text.length();
314          int32_t nestingLevel = 0;
315          for (int32_t j = 0; j < textLength; ++j) {
316              UChar c = text[j];
317              if (c == SINGLE_QUOTE && nestingLevel == 0) {
318                  // Double each top-level apostrophe.
319                  result.append(c);
320              } else if (c == VERTICAL_BAR && nestingLevel == 0) {
321                  // Surround each pipe symbol with apostrophes for quoting.
322                  // If the next character is an apostrophe, then that will be doubled,
323                  // and although the parser will see the apostrophe pairs beginning
324                  // and ending one character earlier than our doubling, the result
325                  // is as desired.
326                  //   | -> '|'
327                  //   |' -> '|'''
328                  //   |'' -> '|''''' etc.
329                  result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330                  continue;  // Skip the append(c) at the end of the loop body.
331              } else if (c == LEFT_CURLY_BRACE) {
332                  ++nestingLevel;
333              } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
334                  --nestingLevel;
335              }
336              result.append(c);
337          }
338      }
339      // Apply the reconstructed pattern.
340      applyPattern(result, errorCode);
341  }
342  
343  // -------------------------------------
344  // Gets the limit array.
345  
346  const double*
getLimits(int32_t & cnt) const347  ChoiceFormat::getLimits(int32_t& cnt) const
348  {
349      cnt = 0;
350      return NULL;
351  }
352  
353  // -------------------------------------
354  // Gets the closures array.
355  
356  const UBool*
getClosures(int32_t & cnt) const357  ChoiceFormat::getClosures(int32_t& cnt) const
358  {
359      cnt = 0;
360      return NULL;
361  }
362  
363  // -------------------------------------
364  // Gets the format array.
365  
366  const UnicodeString*
getFormats(int32_t & cnt) const367  ChoiceFormat::getFormats(int32_t& cnt) const
368  {
369      cnt = 0;
370      return NULL;
371  }
372  
373  // -------------------------------------
374  // Formats an int64 number, it's actually formatted as
375  // a double.  The returned format string may differ
376  // from the input number because of this.
377  
378  UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & status) const379  ChoiceFormat::format(int64_t number,
380                       UnicodeString& appendTo,
381                       FieldPosition& status) const
382  {
383      return format((double) number, appendTo, status);
384  }
385  
386  // -------------------------------------
387  // Formats an int32_t number, it's actually formatted as
388  // a double.
389  
390  UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & status) const391  ChoiceFormat::format(int32_t number,
392                       UnicodeString& appendTo,
393                       FieldPosition& status) const
394  {
395      return format((double) number, appendTo, status);
396  }
397  
398  // -------------------------------------
399  // Formats a double number.
400  
401  UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition &) const402  ChoiceFormat::format(double number,
403                       UnicodeString& appendTo,
404                       FieldPosition& /*pos*/) const
405  {
406      if (msgPattern.countParts() == 0) {
407          // No pattern was applied, or it failed.
408          return appendTo;
409      }
410      // Get the appropriate sub-message.
411      int32_t msgStart = findSubMessage(msgPattern, 0, number);
412      if (!MessageImpl::jdkAposMode(msgPattern)) {
413          int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414          int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415          appendTo.append(msgPattern.getPatternString(),
416                          patternStart,
417                          msgPattern.getPatternIndex(msgLimit) - patternStart);
418          return appendTo;
419      }
420      // JDK compatibility mode: Remove SKIP_SYNTAX.
421      return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422  }
423  
424  int32_t
findSubMessage(const MessagePattern & pattern,int32_t partIndex,double number)425  ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426      int32_t count = pattern.countParts();
427      int32_t msgStart;
428      // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
429      // until ARG_LIMIT or end of choice-only pattern.
430      // Ignore the first number and selector and start the loop on the first message.
431      partIndex += 2;
432      for (;;) {
433          // Skip but remember the current sub-message.
434          msgStart = partIndex;
435          partIndex = pattern.getLimitPartIndex(partIndex);
436          if (++partIndex >= count) {
437              // Reached the end of the choice-only pattern.
438              // Return with the last sub-message.
439              break;
440          }
441          const MessagePattern::Part &part = pattern.getPart(partIndex++);
442          UMessagePatternPartType type = part.getType();
443          if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444              // Reached the end of the ChoiceFormat style.
445              // Return with the last sub-message.
446              break;
447          }
448          // part is an ARG_INT or ARG_DOUBLE
449          U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450          double boundary = pattern.getNumericValue(part);
451          // Fetch the ARG_SELECTOR character.
452          int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453          UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454          if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455              // The number is in the interval between the previous boundary and the current one.
456              // Return with the sub-message between them.
457              // The !(a>b) and !(a>=b) comparisons are equivalent to
458              // (a<=b) and (a<b) except they "catch" NaN.
459              break;
460          }
461      }
462      return msgStart;
463  }
464  
465  // -------------------------------------
466  // Formats an array of objects. Checks if the data type of the objects
467  // to get the right value for formatting.
468  
469  UnicodeString&
format(const Formattable * objs,int32_t cnt,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const470  ChoiceFormat::format(const Formattable* objs,
471                       int32_t cnt,
472                       UnicodeString& appendTo,
473                       FieldPosition& pos,
474                       UErrorCode& status) const
475  {
476      if(cnt < 0) {
477          status = U_ILLEGAL_ARGUMENT_ERROR;
478          return appendTo;
479      }
480      if (msgPattern.countParts() == 0) {
481          status = U_INVALID_STATE_ERROR;
482          return appendTo;
483      }
484  
485      for (int32_t i = 0; i < cnt; i++) {
486          double objDouble = objs[i].getDouble(status);
487          if (U_SUCCESS(status)) {
488              format(objDouble, appendTo, pos);
489          }
490      }
491  
492      return appendTo;
493  }
494  
495  // -------------------------------------
496  
497  void
parse(const UnicodeString & text,Formattable & result,ParsePosition & pos) const498  ChoiceFormat::parse(const UnicodeString& text,
499                      Formattable& result,
500                      ParsePosition& pos) const
501  {
502      result.setDouble(parseArgument(msgPattern, 0, text, pos));
503  }
504  
505  double
parseArgument(const MessagePattern & pattern,int32_t partIndex,const UnicodeString & source,ParsePosition & pos)506  ChoiceFormat::parseArgument(
507          const MessagePattern &pattern, int32_t partIndex,
508          const UnicodeString &source, ParsePosition &pos) {
509      // find the best number (defined as the one with the longest parse)
510      int32_t start = pos.getIndex();
511      int32_t furthest = start;
512      double bestNumber = uprv_getNaN();
513      double tempNumber = 0.0;
514      int32_t count = pattern.countParts();
515      while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516          tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517          partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
518          int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519          int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520          if (len >= 0) {
521              int32_t newIndex = start + len;
522              if (newIndex > furthest) {
523                  furthest = newIndex;
524                  bestNumber = tempNumber;
525                  if (furthest == source.length()) {
526                      break;
527                  }
528              }
529          }
530          partIndex = msgLimit + 1;
531      }
532      if (furthest == start) {
533          pos.setErrorIndex(start);
534      } else {
535          pos.setIndex(furthest);
536      }
537      return bestNumber;
538  }
539  
540  int32_t
matchStringUntilLimitPart(const MessagePattern & pattern,int32_t partIndex,int32_t limitPartIndex,const UnicodeString & source,int32_t sourceOffset)541  ChoiceFormat::matchStringUntilLimitPart(
542          const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543          const UnicodeString &source, int32_t sourceOffset) {
544      int32_t matchingSourceLength = 0;
545      const UnicodeString &msgString = pattern.getPatternString();
546      int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547      for (;;) {
548          const MessagePattern::Part &part = pattern.getPart(++partIndex);
549          if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550              int32_t index = part.getIndex();
551              int32_t length = index - prevIndex;
552              if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553                  return -1;  // mismatch
554              }
555              matchingSourceLength += length;
556              if (partIndex == limitPartIndex) {
557                  return matchingSourceLength;
558              }
559              prevIndex = part.getLimit();  // SKIP_SYNTAX
560          }
561      }
562  }
563  
564  // -------------------------------------
565  
566  ChoiceFormat*
clone() const567  ChoiceFormat::clone() const
568  {
569      ChoiceFormat *aCopy = new ChoiceFormat(*this);
570      return aCopy;
571  }
572  
573  U_NAMESPACE_END
574  
575  #endif /* #if !UCONFIG_NO_FORMATTING */
576  
577  //eof
578