1 /*
2 *******************************************************************************
3 * Copyright (C) 2009-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURFMT.CPP
8 *******************************************************************************
9 */
10 
11 #include "unicode/decimfmt.h"
12 #include "unicode/messagepattern.h"
13 #include "unicode/plurfmt.h"
14 #include "unicode/plurrule.h"
15 #include "unicode/utypes.h"
16 #include "cmemory.h"
17 #include "messageimpl.h"
18 #include "nfrule.h"
19 #include "plurrule_impl.h"
20 #include "uassert.h"
21 #include "uhash.h"
22 
23 #if !UCONFIG_NO_FORMATTING
24 
25 U_NAMESPACE_BEGIN
26 
27 static const UChar OTHER_STRING[] = {
28     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
29 };
30 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
32 
33 PluralFormat::PluralFormat(UErrorCode& status)
34         : locale(Locale::getDefault()),
35           msgPattern(status),
36           numberFormat(NULL),
37           offset(0) {
38     init(NULL, UPLURAL_TYPE_CARDINAL, status);
39 }
40 
PluralFormat(const Locale & loc,UErrorCode & status)41 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
42         : locale(loc),
43           msgPattern(status),
44           numberFormat(NULL),
45           offset(0) {
46     init(NULL, UPLURAL_TYPE_CARDINAL, status);
47 }
48 
PluralFormat(const PluralRules & rules,UErrorCode & status)49 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
50         : locale(Locale::getDefault()),
51           msgPattern(status),
52           numberFormat(NULL),
53           offset(0) {
54     init(&rules, UPLURAL_TYPE_COUNT, status);
55 }
56 
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)57 PluralFormat::PluralFormat(const Locale& loc,
58                            const PluralRules& rules,
59                            UErrorCode& status)
60         : locale(loc),
61           msgPattern(status),
62           numberFormat(NULL),
63           offset(0) {
64     init(&rules, UPLURAL_TYPE_COUNT, status);
65 }
66 
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)67 PluralFormat::PluralFormat(const Locale& loc,
68                            UPluralType type,
69                            UErrorCode& status)
70         : locale(loc),
71           msgPattern(status),
72           numberFormat(NULL),
73           offset(0) {
74     init(NULL, type, status);
75 }
76 
PluralFormat(const UnicodeString & pat,UErrorCode & status)77 PluralFormat::PluralFormat(const UnicodeString& pat,
78                            UErrorCode& status)
79         : locale(Locale::getDefault()),
80           msgPattern(status),
81           numberFormat(NULL),
82           offset(0) {
83     init(NULL, UPLURAL_TYPE_CARDINAL, status);
84     applyPattern(pat, status);
85 }
86 
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)87 PluralFormat::PluralFormat(const Locale& loc,
88                            const UnicodeString& pat,
89                            UErrorCode& status)
90         : locale(loc),
91           msgPattern(status),
92           numberFormat(NULL),
93           offset(0) {
94     init(NULL, UPLURAL_TYPE_CARDINAL, status);
95     applyPattern(pat, status);
96 }
97 
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)98 PluralFormat::PluralFormat(const PluralRules& rules,
99                            const UnicodeString& pat,
100                            UErrorCode& status)
101         : locale(Locale::getDefault()),
102           msgPattern(status),
103           numberFormat(NULL),
104           offset(0) {
105     init(&rules, UPLURAL_TYPE_COUNT, status);
106     applyPattern(pat, status);
107 }
108 
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)109 PluralFormat::PluralFormat(const Locale& loc,
110                            const PluralRules& rules,
111                            const UnicodeString& pat,
112                            UErrorCode& status)
113         : locale(loc),
114           msgPattern(status),
115           numberFormat(NULL),
116           offset(0) {
117     init(&rules, UPLURAL_TYPE_COUNT, status);
118     applyPattern(pat, status);
119 }
120 
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)121 PluralFormat::PluralFormat(const Locale& loc,
122                            UPluralType type,
123                            const UnicodeString& pat,
124                            UErrorCode& status)
125         : locale(loc),
126           msgPattern(status),
127           numberFormat(NULL),
128           offset(0) {
129     init(NULL, type, status);
130     applyPattern(pat, status);
131 }
132 
PluralFormat(const PluralFormat & other)133 PluralFormat::PluralFormat(const PluralFormat& other)
134         : Format(other),
135           locale(other.locale),
136           msgPattern(other.msgPattern),
137           numberFormat(NULL),
138           offset(other.offset) {
139     copyObjects(other);
140 }
141 
142 void
copyObjects(const PluralFormat & other)143 PluralFormat::copyObjects(const PluralFormat& other) {
144     UErrorCode status = U_ZERO_ERROR;
145     if (numberFormat != NULL) {
146         delete numberFormat;
147     }
148     if (pluralRulesWrapper.pluralRules != NULL) {
149         delete pluralRulesWrapper.pluralRules;
150     }
151 
152     if (other.numberFormat == NULL) {
153         numberFormat = NumberFormat::createInstance(locale, status);
154     } else {
155         numberFormat = (NumberFormat*)other.numberFormat->clone();
156     }
157     if (other.pluralRulesWrapper.pluralRules == NULL) {
158         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
159     } else {
160         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
161     }
162 }
163 
164 
~PluralFormat()165 PluralFormat::~PluralFormat() {
166     delete numberFormat;
167 }
168 
169 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)170 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
171     if (U_FAILURE(status)) {
172         return;
173     }
174 
175     if (rules==NULL) {
176         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
177     } else {
178         pluralRulesWrapper.pluralRules = rules->clone();
179         if (pluralRulesWrapper.pluralRules == NULL) {
180             status = U_MEMORY_ALLOCATION_ERROR;
181             return;
182         }
183     }
184 
185     numberFormat= NumberFormat::createInstance(locale, status);
186 }
187 
188 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)189 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
190     msgPattern.parsePluralStyle(newPattern, NULL, status);
191     if (U_FAILURE(status)) {
192         msgPattern.clear();
193         offset = 0;
194         return;
195     }
196     offset = msgPattern.getPluralOffset(0);
197 }
198 
199 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const200 PluralFormat::format(const Formattable& obj,
201                    UnicodeString& appendTo,
202                    FieldPosition& pos,
203                    UErrorCode& status) const
204 {
205     if (U_FAILURE(status)) return appendTo;
206 
207     if (obj.isNumeric()) {
208         return format(obj, obj.getDouble(), appendTo, pos, status);
209     } else {
210         status = U_ILLEGAL_ARGUMENT_ERROR;
211         return appendTo;
212     }
213 }
214 
215 UnicodeString
format(int32_t number,UErrorCode & status) const216 PluralFormat::format(int32_t number, UErrorCode& status) const {
217     FieldPosition fpos(0);
218     UnicodeString result;
219     return format(Formattable(number), number, result, fpos, status);
220 }
221 
222 UnicodeString
format(double number,UErrorCode & status) const223 PluralFormat::format(double number, UErrorCode& status) const {
224     FieldPosition fpos(0);
225     UnicodeString result;
226     return format(Formattable(number), number, result, fpos, status);
227 }
228 
229 
230 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const231 PluralFormat::format(int32_t number,
232                      UnicodeString& appendTo,
233                      FieldPosition& pos,
234                      UErrorCode& status) const {
235     return format(Formattable(number), (double)number, appendTo, pos, status);
236 }
237 
238 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const239 PluralFormat::format(double number,
240                      UnicodeString& appendTo,
241                      FieldPosition& pos,
242                      UErrorCode& status) const {
243     return format(Formattable(number), (double)number, appendTo, pos, status);
244 }
245 
246 UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const247 PluralFormat::format(const Formattable& numberObject, double number,
248                      UnicodeString& appendTo,
249                      FieldPosition& pos,
250                      UErrorCode& status) const {
251     if (U_FAILURE(status)) {
252         return appendTo;
253     }
254     if (msgPattern.countParts() == 0) {
255         return numberFormat->format(numberObject, appendTo, pos, status);
256     }
257     // Get the appropriate sub-message.
258     // Select it based on the formatted number-offset.
259     double numberMinusOffset = number - offset;
260     UnicodeString numberString;
261     FieldPosition ignorePos;
262     FixedDecimal dec(numberMinusOffset);
263     if (offset == 0) {
264         numberFormat->format(numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
265         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
266         if(decFmt != NULL) {
267             dec = decFmt->getFixedDecimal(numberObject, status);
268         }
269     } else {
270         numberFormat->format(numberMinusOffset, numberString, ignorePos, status);
271         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
272         if(decFmt != NULL) {
273             dec = decFmt->getFixedDecimal(numberMinusOffset, status);
274         }
275     }
276     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
277     if (U_FAILURE(status)) { return appendTo; }
278     // Replace syntactic # signs in the top level of this sub-message
279     // (not in nested arguments) with the formatted number-offset.
280     const UnicodeString& pattern = msgPattern.getPatternString();
281     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
282     for (;;) {
283         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
284         const UMessagePatternPartType type = part.getType();
285         int32_t index = part.getIndex();
286         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
287             return appendTo.append(pattern, prevIndex, index - prevIndex);
288         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
289             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
290             appendTo.append(pattern, prevIndex, index - prevIndex);
291             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
292                 appendTo.append(numberString);
293             }
294             prevIndex = part.getLimit();
295         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
296             appendTo.append(pattern, prevIndex, index - prevIndex);
297             prevIndex = index;
298             partIndex = msgPattern.getLimitPartIndex(partIndex);
299             index = msgPattern.getPart(partIndex).getLimit();
300             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
301             prevIndex = index;
302         }
303     }
304 }
305 
306 UnicodeString&
toPattern(UnicodeString & appendTo)307 PluralFormat::toPattern(UnicodeString& appendTo) {
308     if (0 == msgPattern.countParts()) {
309         appendTo.setToBogus();
310     } else {
311         appendTo.append(msgPattern.getPatternString());
312     }
313     return appendTo;
314 }
315 
316 void
setLocale(const Locale & loc,UErrorCode & status)317 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
318     if (U_FAILURE(status)) {
319         return;
320     }
321     locale = loc;
322     msgPattern.clear();
323     delete numberFormat;
324     offset = 0;
325     numberFormat = NULL;
326     pluralRulesWrapper.reset();
327     init(NULL, UPLURAL_TYPE_CARDINAL, status);
328 }
329 
330 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)331 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
332     if (U_FAILURE(status)) {
333         return;
334     }
335     NumberFormat* nf = (NumberFormat*)format->clone();
336     if (nf != NULL) {
337         delete numberFormat;
338         numberFormat = nf;
339     } else {
340         status = U_MEMORY_ALLOCATION_ERROR;
341     }
342 }
343 
344 Format*
clone() const345 PluralFormat::clone() const
346 {
347     return new PluralFormat(*this);
348 }
349 
350 
351 PluralFormat&
operator =(const PluralFormat & other)352 PluralFormat::operator=(const PluralFormat& other) {
353     if (this != &other) {
354         locale = other.locale;
355         msgPattern = other.msgPattern;
356         offset = other.offset;
357         copyObjects(other);
358     }
359 
360     return *this;
361 }
362 
363 UBool
operator ==(const Format & other) const364 PluralFormat::operator==(const Format& other) const {
365     if (this == &other) {
366         return TRUE;
367     }
368     if (!Format::operator==(other)) {
369         return FALSE;
370     }
371     const PluralFormat& o = (const PluralFormat&)other;
372     return
373         locale == o.locale &&
374         msgPattern == o.msgPattern &&  // implies same offset
375         (numberFormat == NULL) == (o.numberFormat == NULL) &&
376         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
377         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
378         (pluralRulesWrapper.pluralRules == NULL ||
379             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
380 }
381 
382 UBool
operator !=(const Format & other) const383 PluralFormat::operator!=(const Format& other) const {
384     return  !operator==(other);
385 }
386 
387 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const388 PluralFormat::parseObject(const UnicodeString& /*source*/,
389                         Formattable& /*result*/,
390                         ParsePosition& pos) const
391 {
392     // Parsing not supported.
393     pos.setErrorIndex(pos.getIndex());
394 }
395 
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)396 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
397                                      const PluralSelector& selector, void *context,
398                                      double number, UErrorCode& ec) {
399     if (U_FAILURE(ec)) {
400         return 0;
401     }
402     int32_t count=pattern.countParts();
403     double offset;
404     const MessagePattern::Part* part=&pattern.getPart(partIndex);
405     if (MessagePattern::Part::hasNumericValue(part->getType())) {
406         offset=pattern.getNumericValue(*part);
407         ++partIndex;
408     } else {
409         offset=0;
410     }
411     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
412     // Then we get the keyword from the selector.
413     // (In other words, we never call the selector if we match against an explicit value,
414     // or if the only non-explicit keyword is "other".)
415     UnicodeString keyword;
416     UnicodeString other(FALSE, OTHER_STRING, 5);
417     // When we find a match, we set msgStart>0 and also set this boolean to true
418     // to avoid matching the keyword again (duplicates are allowed)
419     // while we continue to look for an explicit-value match.
420     UBool haveKeywordMatch=FALSE;
421     // msgStart is 0 until we find any appropriate sub-message.
422     // We remember the first "other" sub-message if we have not seen any
423     // appropriate sub-message before.
424     // We remember the first matching-keyword sub-message if we have not seen
425     // one of those before.
426     // (The parser allows [does not check for] duplicate keywords.
427     // We just have to make sure to take the first one.)
428     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
429     // at the first keyword match.
430     // We keep going until we find an explicit-value match or reach the end of the plural style.
431     int32_t msgStart=0;
432     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
433     // until ARG_LIMIT or end of plural-only pattern.
434     do {
435         part=&pattern.getPart(partIndex++);
436         const UMessagePatternPartType type = part->getType();
437         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
438             break;
439         }
440         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
441         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
442         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
443             // explicit value like "=2"
444             part=&pattern.getPart(partIndex++);
445             if(number==pattern.getNumericValue(*part)) {
446                 // matches explicit value
447                 return partIndex;
448             }
449         } else if(!haveKeywordMatch) {
450             // plural keyword like "few" or "other"
451             // Compare "other" first and call the selector if this is not "other".
452             if(pattern.partSubstringMatches(*part, other)) {
453                 if(msgStart==0) {
454                     msgStart=partIndex;
455                     if(0 == keyword.compare(other)) {
456                         // This is the first "other" sub-message,
457                         // and the selected keyword is also "other".
458                         // Do not match "other" again.
459                         haveKeywordMatch=TRUE;
460                     }
461                 }
462             } else {
463                 if(keyword.isEmpty()) {
464                     keyword=selector.select(context, number-offset, ec);
465                     if(msgStart!=0 && (0 == keyword.compare(other))) {
466                         // We have already seen an "other" sub-message.
467                         // Do not match "other" again.
468                         haveKeywordMatch=TRUE;
469                         // Skip keyword matching but do getLimitPartIndex().
470                     }
471                 }
472                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
473                     // keyword matches
474                     msgStart=partIndex;
475                     // Do not match this keyword again.
476                     haveKeywordMatch=TRUE;
477                 }
478             }
479         }
480         partIndex=pattern.getLimitPartIndex(partIndex);
481     } while(++partIndex<count);
482     return msgStart;
483 }
484 
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const485 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
486     // If no pattern was applied, return null.
487     if (msgPattern.countParts() == 0) {
488         pos.setBeginIndex(-1);
489         pos.setEndIndex(-1);
490         return;
491     }
492     int partIndex = 0;
493     int currMatchIndex;
494     int count=msgPattern.countParts();
495     int startingAt = pos.getBeginIndex();
496     if (startingAt < 0) {
497         startingAt = 0;
498     }
499 
500     // The keyword is null until we need to match against a non-explicit, not-"other" value.
501     // Then we get the keyword from the selector.
502     // (In other words, we never call the selector if we match against an explicit value,
503     // or if the only non-explicit keyword is "other".)
504     UnicodeString keyword;
505     UnicodeString matchedWord;
506     const UnicodeString& pattern = msgPattern.getPatternString();
507     int matchedIndex = -1;
508     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
509     // until the end of the plural-only pattern.
510     while (partIndex < count) {
511         const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
512         if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
513             // Bad format
514             continue;
515         }
516 
517         const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
518         if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
519             // Bad format
520             continue;
521         }
522 
523         const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
524         if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
525             // Bad format
526             continue;
527         }
528 
529         UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
530         if (rbnfLenientScanner != NULL) {
531             // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
532             int32_t length = -1;
533             currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
534         }
535         else {
536             currMatchIndex = source.indexOf(currArg, startingAt);
537         }
538         if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
539             matchedIndex = currMatchIndex;
540             matchedWord = currArg;
541             keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
542         }
543     }
544     if (matchedIndex >= 0) {
545         pos.setBeginIndex(matchedIndex);
546         pos.setEndIndex(matchedIndex + matchedWord.length());
547         result.setString(keyword);
548         return;
549     }
550 
551     // Not found!
552     pos.setBeginIndex(-1);
553     pos.setEndIndex(-1);
554 }
555 
~PluralSelector()556 PluralFormat::PluralSelector::~PluralSelector() {}
557 
~PluralSelectorAdapter()558 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
559     delete pluralRules;
560 }
561 
select(void * context,double number,UErrorCode &) const562 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
563                                                           UErrorCode& /*ec*/) const {
564     (void)number;  // unused except in the assertion
565     FixedDecimal *dec=static_cast<FixedDecimal *>(context);
566     U_ASSERT(dec->source==number);
567     return pluralRules->select(*dec);
568 }
569 
reset()570 void PluralFormat::PluralSelectorAdapter::reset() {
571     delete pluralRules;
572     pluralRules = NULL;
573 }
574 
575 
576 U_NAMESPACE_END
577 
578 
579 #endif /* #if !UCONFIG_NO_FORMATTING */
580 
581 //eof
582