1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 *
7 * File CHOICFMT.CPP
8 *
9 * Modification History:
10 *
11 *   Date        Name        Description
12 *   02/19/97    aliu        Converted from java.
13 *   03/20/97    helena      Finished first cut of implementation and got rid
14 *                           of nextDouble/previousDouble and replaced with
15 *                           boolean array.
16 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
17 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
18 *                           wchar.h.
19 *   07/09/97    helena      Made ParsePosition into a class.
20 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
21 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
22 *   02/22/99    stephen     Removed character literals for EBCDIC safety
23 ********************************************************************************
24 */
25 
26 #include "unicode/utypes.h"
27 
28 #if !UCONFIG_NO_FORMATTING
29 
30 #include "unicode/choicfmt.h"
31 #include "unicode/numfmt.h"
32 #include "unicode/locid.h"
33 #include "cpputils.h"
34 #include "cstring.h"
35 #include "messageimpl.h"
36 #include "putilimp.h"
37 #include "uassert.h"
38 #include <stdio.h>
39 #include <float.h>
40 
41 // *****************************************************************************
42 // class ChoiceFormat
43 // *****************************************************************************
44 
45 U_NAMESPACE_BEGIN
46 
47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
48 
49 // Special characters used by ChoiceFormat.  There are two characters
50 // used interchangeably to indicate <=.  Either is parsed, but only
51 // LESS_EQUAL is generated by toPattern().
52 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
53 #define LESS_THAN    ((UChar)0x003C)   /*<*/
54 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
55 #define LESS_EQUAL2  ((UChar)0x2264)
56 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
57 #define MINUS        ((UChar)0x002D)   /*-*/
58 
59 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
60 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
61 
62 #ifdef INFINITY
63 #undef INFINITY
64 #endif
65 #define INFINITY     ((UChar)0x221E)
66 
67 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
69 #define POSITIVE_INF_STRLEN 1
70 #define NEGATIVE_INF_STRLEN 2
71 
72 // -------------------------------------
73 // Creates a ChoiceFormat instance based on the pattern.
74 
ChoiceFormat(const UnicodeString & newPattern,UErrorCode & status)75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
76                            UErrorCode& status)
77 : constructorErrorCode(status),
78   msgPattern(status)
79 {
80     applyPattern(newPattern, status);
81 }
82 
83 // -------------------------------------
84 // Creates a ChoiceFormat instance with the limit array and
85 // format strings for each limit.
86 
ChoiceFormat(const double * limits,const UnicodeString * formats,int32_t cnt)87 ChoiceFormat::ChoiceFormat(const double* limits,
88                            const UnicodeString* formats,
89                            int32_t cnt )
90 : constructorErrorCode(U_ZERO_ERROR),
91   msgPattern(constructorErrorCode)
92 {
93     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
94 }
95 
96 // -------------------------------------
97 
ChoiceFormat(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)98 ChoiceFormat::ChoiceFormat(const double* limits,
99                            const UBool* closures,
100                            const UnicodeString* formats,
101                            int32_t cnt )
102 : constructorErrorCode(U_ZERO_ERROR),
103   msgPattern(constructorErrorCode)
104 {
105     setChoices(limits, closures, formats, cnt, constructorErrorCode);
106 }
107 
108 // -------------------------------------
109 // copy constructor
110 
ChoiceFormat(const ChoiceFormat & that)111 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
112 : NumberFormat(that),
113   constructorErrorCode(that.constructorErrorCode),
114   msgPattern(that.msgPattern)
115 {
116 }
117 
118 // -------------------------------------
119 // Private constructor that creates a
120 // ChoiceFormat instance based on the
121 // pattern and populates UParseError
122 
ChoiceFormat(const UnicodeString & newPattern,UParseError & parseError,UErrorCode & status)123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
124                            UParseError& parseError,
125                            UErrorCode& status)
126 : constructorErrorCode(status),
127   msgPattern(status)
128 {
129     applyPattern(newPattern,parseError, status);
130 }
131 // -------------------------------------
132 
133 UBool
operator ==(const Format & that) const134 ChoiceFormat::operator==(const Format& that) const
135 {
136     if (this == &that) return TRUE;
137     if (!NumberFormat::operator==(that)) return FALSE;
138     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
139     return msgPattern == thatAlias.msgPattern;
140 }
141 
142 // -------------------------------------
143 // copy constructor
144 
145 const ChoiceFormat&
operator =(const ChoiceFormat & that)146 ChoiceFormat::operator=(const   ChoiceFormat& that)
147 {
148     if (this != &that) {
149         NumberFormat::operator=(that);
150         constructorErrorCode = that.constructorErrorCode;
151         msgPattern = that.msgPattern;
152     }
153     return *this;
154 }
155 
156 // -------------------------------------
157 
~ChoiceFormat()158 ChoiceFormat::~ChoiceFormat()
159 {
160 }
161 
162 // -------------------------------------
163 
164 /**
165  * Convert a double value to a string without the overhead of NumberFormat.
166  */
167 UnicodeString&
dtos(double value,UnicodeString & string)168 ChoiceFormat::dtos(double value,
169                    UnicodeString& string)
170 {
171     /* Buffer to contain the digits and any extra formatting stuff. */
172     char temp[DBL_DIG + 16];
173     char *itrPtr = temp;
174     char *expPtr;
175 
176     sprintf(temp, "%.*g", DBL_DIG, value);
177 
178     /* Find and convert the decimal point.
179        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
180     */
181     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
182         itrPtr++;
183     }
184     if (*itrPtr != 0 && *itrPtr != 'e') {
185         /* We reached something that looks like a decimal point.
186         In case someone used setlocale(), which changes the decimal point. */
187         *itrPtr = '.';
188         itrPtr++;
189     }
190     /* Search for the exponent */
191     while (*itrPtr && *itrPtr != 'e') {
192         itrPtr++;
193     }
194     if (*itrPtr == 'e') {
195         itrPtr++;
196         /* Verify the exponent sign */
197         if (*itrPtr == '+' || *itrPtr == '-') {
198             itrPtr++;
199         }
200         /* Remove leading zeros. You will see this on Windows machines. */
201         expPtr = itrPtr;
202         while (*itrPtr == '0') {
203             itrPtr++;
204         }
205         if (*itrPtr && expPtr != itrPtr) {
206             /* Shift the exponent without zeros. */
207             while (*itrPtr) {
208                 *(expPtr++)  = *(itrPtr++);
209             }
210             // NULL terminate
211             *expPtr = 0;
212         }
213     }
214 
215     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
216     return string;
217 }
218 
219 // -------------------------------------
220 // calls the overloaded applyPattern method.
221 
222 void
applyPattern(const UnicodeString & pattern,UErrorCode & status)223 ChoiceFormat::applyPattern(const UnicodeString& pattern,
224                            UErrorCode& status)
225 {
226     msgPattern.parseChoiceStyle(pattern, NULL, status);
227     constructorErrorCode = status;
228 }
229 
230 // -------------------------------------
231 // Applies the pattern to this ChoiceFormat instance.
232 
233 void
applyPattern(const UnicodeString & pattern,UParseError & parseError,UErrorCode & status)234 ChoiceFormat::applyPattern(const UnicodeString& pattern,
235                            UParseError& parseError,
236                            UErrorCode& status)
237 {
238     msgPattern.parseChoiceStyle(pattern, &parseError, status);
239     constructorErrorCode = status;
240 }
241 // -------------------------------------
242 // Returns the input pattern string.
243 
244 UnicodeString&
toPattern(UnicodeString & result) const245 ChoiceFormat::toPattern(UnicodeString& result) const
246 {
247     return result = msgPattern.getPatternString();
248 }
249 
250 // -------------------------------------
251 // Sets the limit and format arrays.
252 void
setChoices(const double * limits,const UnicodeString * formats,int32_t cnt)253 ChoiceFormat::setChoices(  const double* limits,
254                            const UnicodeString* formats,
255                            int32_t cnt )
256 {
257     UErrorCode errorCode = U_ZERO_ERROR;
258     setChoices(limits, NULL, formats, cnt, errorCode);
259 }
260 
261 // -------------------------------------
262 // Sets the limit and format arrays.
263 void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)264 ChoiceFormat::setChoices(  const double* limits,
265                            const UBool* closures,
266                            const UnicodeString* formats,
267                            int32_t cnt )
268 {
269     UErrorCode errorCode = U_ZERO_ERROR;
270     setChoices(limits, closures, formats, cnt, errorCode);
271 }
272 
273 void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t count,UErrorCode & errorCode)274 ChoiceFormat::setChoices(const double* limits,
275                          const UBool* closures,
276                          const UnicodeString* formats,
277                          int32_t count,
278                          UErrorCode &errorCode) {
279     if (U_FAILURE(errorCode)) {
280         return;
281     }
282     if (limits == NULL || formats == NULL) {
283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
284         return;
285     }
286     // Reconstruct the original input pattern.
287     // Modified version of the pre-ICU 4.8 toPattern() implementation.
288     UnicodeString result;
289     for (int32_t i = 0; i < count; ++i) {
290         if (i != 0) {
291             result += VERTICAL_BAR;
292         }
293         UnicodeString buf;
294         if (uprv_isPositiveInfinity(limits[i])) {
295             result += INFINITY;
296         } else if (uprv_isNegativeInfinity(limits[i])) {
297             result += MINUS;
298             result += INFINITY;
299         } else {
300             result += dtos(limits[i], buf);
301         }
302         if (closures != NULL && closures[i]) {
303             result += LESS_THAN;
304         } else {
305             result += LESS_EQUAL;
306         }
307         // Append formats[i], using quotes if there are special
308         // characters.  Single quotes themselves must be escaped in
309         // either case.
310         const UnicodeString& text = formats[i];
311         int32_t textLength = text.length();
312         int32_t nestingLevel = 0;
313         for (int32_t j = 0; j < textLength; ++j) {
314             UChar c = text[j];
315             if (c == SINGLE_QUOTE && nestingLevel == 0) {
316                 // Double each top-level apostrophe.
317                 result.append(c);
318             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
319                 // Surround each pipe symbol with apostrophes for quoting.
320                 // If the next character is an apostrophe, then that will be doubled,
321                 // and although the parser will see the apostrophe pairs beginning
322                 // and ending one character earlier than our doubling, the result
323                 // is as desired.
324                 //   | -> '|'
325                 //   |' -> '|'''
326                 //   |'' -> '|''''' etc.
327                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
328                 continue;  // Skip the append(c) at the end of the loop body.
329             } else if (c == LEFT_CURLY_BRACE) {
330                 ++nestingLevel;
331             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
332                 --nestingLevel;
333             }
334             result.append(c);
335         }
336     }
337     // Apply the reconstructed pattern.
338     applyPattern(result, errorCode);
339 }
340 
341 // -------------------------------------
342 // Gets the limit array.
343 
344 const double*
getLimits(int32_t & cnt) const345 ChoiceFormat::getLimits(int32_t& cnt) const
346 {
347     cnt = 0;
348     return NULL;
349 }
350 
351 // -------------------------------------
352 // Gets the closures array.
353 
354 const UBool*
getClosures(int32_t & cnt) const355 ChoiceFormat::getClosures(int32_t& cnt) const
356 {
357     cnt = 0;
358     return NULL;
359 }
360 
361 // -------------------------------------
362 // Gets the format array.
363 
364 const UnicodeString*
getFormats(int32_t & cnt) const365 ChoiceFormat::getFormats(int32_t& cnt) const
366 {
367     cnt = 0;
368     return NULL;
369 }
370 
371 // -------------------------------------
372 // Formats an int64 number, it's actually formatted as
373 // a double.  The returned format string may differ
374 // from the input number because of this.
375 
376 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & status) const377 ChoiceFormat::format(int64_t number,
378                      UnicodeString& appendTo,
379                      FieldPosition& status) const
380 {
381     return format((double) number, appendTo, status);
382 }
383 
384 // -------------------------------------
385 // Formats an int32_t number, it's actually formatted as
386 // a double.
387 
388 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & status) const389 ChoiceFormat::format(int32_t number,
390                      UnicodeString& appendTo,
391                      FieldPosition& status) const
392 {
393     return format((double) number, appendTo, status);
394 }
395 
396 // -------------------------------------
397 // Formats a double number.
398 
399 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition &) const400 ChoiceFormat::format(double number,
401                      UnicodeString& appendTo,
402                      FieldPosition& /*pos*/) const
403 {
404     if (msgPattern.countParts() == 0) {
405         // No pattern was applied, or it failed.
406         return appendTo;
407     }
408     // Get the appropriate sub-message.
409     int32_t msgStart = findSubMessage(msgPattern, 0, number);
410     if (!MessageImpl::jdkAposMode(msgPattern)) {
411         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
412         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
413         appendTo.append(msgPattern.getPatternString(),
414                         patternStart,
415                         msgPattern.getPatternIndex(msgLimit) - patternStart);
416         return appendTo;
417     }
418     // JDK compatibility mode: Remove SKIP_SYNTAX.
419     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
420 }
421 
422 int32_t
findSubMessage(const MessagePattern & pattern,int32_t partIndex,double number)423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
424     int32_t count = pattern.countParts();
425     int32_t msgStart;
426     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
427     // until ARG_LIMIT or end of choice-only pattern.
428     // Ignore the first number and selector and start the loop on the first message.
429     partIndex += 2;
430     for (;;) {
431         // Skip but remember the current sub-message.
432         msgStart = partIndex;
433         partIndex = pattern.getLimitPartIndex(partIndex);
434         if (++partIndex >= count) {
435             // Reached the end of the choice-only pattern.
436             // Return with the last sub-message.
437             break;
438         }
439         const MessagePattern::Part &part = pattern.getPart(partIndex++);
440         UMessagePatternPartType type = part.getType();
441         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
442             // Reached the end of the ChoiceFormat style.
443             // Return with the last sub-message.
444             break;
445         }
446         // part is an ARG_INT or ARG_DOUBLE
447         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
448         double boundary = pattern.getNumericValue(part);
449         // Fetch the ARG_SELECTOR character.
450         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
451         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
452         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
453             // The number is in the interval between the previous boundary and the current one.
454             // Return with the sub-message between them.
455             // The !(a>b) and !(a>=b) comparisons are equivalent to
456             // (a<=b) and (a<b) except they "catch" NaN.
457             break;
458         }
459     }
460     return msgStart;
461 }
462 
463 // -------------------------------------
464 // Formats an array of objects. Checks if the data type of the objects
465 // to get the right value for formatting.
466 
467 UnicodeString&
format(const Formattable * objs,int32_t cnt,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const468 ChoiceFormat::format(const Formattable* objs,
469                      int32_t cnt,
470                      UnicodeString& appendTo,
471                      FieldPosition& pos,
472                      UErrorCode& status) const
473 {
474     if(cnt < 0) {
475         status = U_ILLEGAL_ARGUMENT_ERROR;
476         return appendTo;
477     }
478     if (msgPattern.countParts() == 0) {
479         status = U_INVALID_STATE_ERROR;
480         return appendTo;
481     }
482 
483     for (int32_t i = 0; i < cnt; i++) {
484         double objDouble = objs[i].getDouble(status);
485         if (U_SUCCESS(status)) {
486             format(objDouble, appendTo, pos);
487         }
488     }
489 
490     return appendTo;
491 }
492 
493 // -------------------------------------
494 
495 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & pos) const496 ChoiceFormat::parse(const UnicodeString& text,
497                     Formattable& result,
498                     ParsePosition& pos) const
499 {
500     result.setDouble(parseArgument(msgPattern, 0, text, pos));
501 }
502 
503 double
parseArgument(const MessagePattern & pattern,int32_t partIndex,const UnicodeString & source,ParsePosition & pos)504 ChoiceFormat::parseArgument(
505         const MessagePattern &pattern, int32_t partIndex,
506         const UnicodeString &source, ParsePosition &pos) {
507     // find the best number (defined as the one with the longest parse)
508     int32_t start = pos.getIndex();
509     int32_t furthest = start;
510     double bestNumber = uprv_getNaN();
511     double tempNumber = 0.0;
512     int32_t count = pattern.countParts();
513     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
514         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
515         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
516         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
517         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
518         if (len >= 0) {
519             int32_t newIndex = start + len;
520             if (newIndex > furthest) {
521                 furthest = newIndex;
522                 bestNumber = tempNumber;
523                 if (furthest == source.length()) {
524                     break;
525                 }
526             }
527         }
528         partIndex = msgLimit + 1;
529     }
530     if (furthest == start) {
531         pos.setErrorIndex(start);
532     } else {
533         pos.setIndex(furthest);
534     }
535     return bestNumber;
536 }
537 
538 int32_t
matchStringUntilLimitPart(const MessagePattern & pattern,int32_t partIndex,int32_t limitPartIndex,const UnicodeString & source,int32_t sourceOffset)539 ChoiceFormat::matchStringUntilLimitPart(
540         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
541         const UnicodeString &source, int32_t sourceOffset) {
542     int32_t matchingSourceLength = 0;
543     const UnicodeString &msgString = pattern.getPatternString();
544     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
545     for (;;) {
546         const MessagePattern::Part &part = pattern.getPart(++partIndex);
547         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
548             int32_t index = part.getIndex();
549             int32_t length = index - prevIndex;
550             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
551                 return -1;  // mismatch
552             }
553             matchingSourceLength += length;
554             if (partIndex == limitPartIndex) {
555                 return matchingSourceLength;
556             }
557             prevIndex = part.getLimit();  // SKIP_SYNTAX
558         }
559     }
560 }
561 
562 // -------------------------------------
563 
564 Format*
clone() const565 ChoiceFormat::clone() const
566 {
567     ChoiceFormat *aCopy = new ChoiceFormat(*this);
568     return aCopy;
569 }
570 
571 U_NAMESPACE_END
572 
573 #endif /* #if !UCONFIG_NO_FORMATTING */
574 
575 //eof
576