1 /*
2 *******************************************************************************
3 * Copyright (C) 2009-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURFMT.CPP
8 *******************************************************************************
9 */
10
11 #include "unicode/decimfmt.h"
12 #include "unicode/messagepattern.h"
13 #include "unicode/plurfmt.h"
14 #include "unicode/plurrule.h"
15 #include "unicode/utypes.h"
16 #include "cmemory.h"
17 #include "messageimpl.h"
18 #include "nfrule.h"
19 #include "plurrule_impl.h"
20 #include "uassert.h"
21 #include "uhash.h"
22
23 #if !UCONFIG_NO_FORMATTING
24
25 U_NAMESPACE_BEGIN
26
27 static const UChar OTHER_STRING[] = {
28 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
29 };
30
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
32
33 PluralFormat::PluralFormat(UErrorCode& status)
34 : locale(Locale::getDefault()),
35 msgPattern(status),
36 numberFormat(NULL),
37 offset(0) {
38 init(NULL, UPLURAL_TYPE_CARDINAL, status);
39 }
40
PluralFormat(const Locale & loc,UErrorCode & status)41 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
42 : locale(loc),
43 msgPattern(status),
44 numberFormat(NULL),
45 offset(0) {
46 init(NULL, UPLURAL_TYPE_CARDINAL, status);
47 }
48
PluralFormat(const PluralRules & rules,UErrorCode & status)49 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
50 : locale(Locale::getDefault()),
51 msgPattern(status),
52 numberFormat(NULL),
53 offset(0) {
54 init(&rules, UPLURAL_TYPE_COUNT, status);
55 }
56
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)57 PluralFormat::PluralFormat(const Locale& loc,
58 const PluralRules& rules,
59 UErrorCode& status)
60 : locale(loc),
61 msgPattern(status),
62 numberFormat(NULL),
63 offset(0) {
64 init(&rules, UPLURAL_TYPE_COUNT, status);
65 }
66
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)67 PluralFormat::PluralFormat(const Locale& loc,
68 UPluralType type,
69 UErrorCode& status)
70 : locale(loc),
71 msgPattern(status),
72 numberFormat(NULL),
73 offset(0) {
74 init(NULL, type, status);
75 }
76
PluralFormat(const UnicodeString & pat,UErrorCode & status)77 PluralFormat::PluralFormat(const UnicodeString& pat,
78 UErrorCode& status)
79 : locale(Locale::getDefault()),
80 msgPattern(status),
81 numberFormat(NULL),
82 offset(0) {
83 init(NULL, UPLURAL_TYPE_CARDINAL, status);
84 applyPattern(pat, status);
85 }
86
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)87 PluralFormat::PluralFormat(const Locale& loc,
88 const UnicodeString& pat,
89 UErrorCode& status)
90 : locale(loc),
91 msgPattern(status),
92 numberFormat(NULL),
93 offset(0) {
94 init(NULL, UPLURAL_TYPE_CARDINAL, status);
95 applyPattern(pat, status);
96 }
97
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)98 PluralFormat::PluralFormat(const PluralRules& rules,
99 const UnicodeString& pat,
100 UErrorCode& status)
101 : locale(Locale::getDefault()),
102 msgPattern(status),
103 numberFormat(NULL),
104 offset(0) {
105 init(&rules, UPLURAL_TYPE_COUNT, status);
106 applyPattern(pat, status);
107 }
108
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)109 PluralFormat::PluralFormat(const Locale& loc,
110 const PluralRules& rules,
111 const UnicodeString& pat,
112 UErrorCode& status)
113 : locale(loc),
114 msgPattern(status),
115 numberFormat(NULL),
116 offset(0) {
117 init(&rules, UPLURAL_TYPE_COUNT, status);
118 applyPattern(pat, status);
119 }
120
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)121 PluralFormat::PluralFormat(const Locale& loc,
122 UPluralType type,
123 const UnicodeString& pat,
124 UErrorCode& status)
125 : locale(loc),
126 msgPattern(status),
127 numberFormat(NULL),
128 offset(0) {
129 init(NULL, type, status);
130 applyPattern(pat, status);
131 }
132
PluralFormat(const PluralFormat & other)133 PluralFormat::PluralFormat(const PluralFormat& other)
134 : Format(other),
135 locale(other.locale),
136 msgPattern(other.msgPattern),
137 numberFormat(NULL),
138 offset(other.offset) {
139 copyObjects(other);
140 }
141
142 void
copyObjects(const PluralFormat & other)143 PluralFormat::copyObjects(const PluralFormat& other) {
144 UErrorCode status = U_ZERO_ERROR;
145 if (numberFormat != NULL) {
146 delete numberFormat;
147 }
148 if (pluralRulesWrapper.pluralRules != NULL) {
149 delete pluralRulesWrapper.pluralRules;
150 }
151
152 if (other.numberFormat == NULL) {
153 numberFormat = NumberFormat::createInstance(locale, status);
154 } else {
155 numberFormat = (NumberFormat*)other.numberFormat->clone();
156 }
157 if (other.pluralRulesWrapper.pluralRules == NULL) {
158 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
159 } else {
160 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
161 }
162 }
163
164
~PluralFormat()165 PluralFormat::~PluralFormat() {
166 delete numberFormat;
167 }
168
169 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)170 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
171 if (U_FAILURE(status)) {
172 return;
173 }
174
175 if (rules==NULL) {
176 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
177 } else {
178 pluralRulesWrapper.pluralRules = rules->clone();
179 if (pluralRulesWrapper.pluralRules == NULL) {
180 status = U_MEMORY_ALLOCATION_ERROR;
181 return;
182 }
183 }
184
185 numberFormat= NumberFormat::createInstance(locale, status);
186 }
187
188 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)189 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
190 msgPattern.parsePluralStyle(newPattern, NULL, status);
191 if (U_FAILURE(status)) {
192 msgPattern.clear();
193 offset = 0;
194 return;
195 }
196 offset = msgPattern.getPluralOffset(0);
197 }
198
199 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const200 PluralFormat::format(const Formattable& obj,
201 UnicodeString& appendTo,
202 FieldPosition& pos,
203 UErrorCode& status) const
204 {
205 if (U_FAILURE(status)) return appendTo;
206
207 if (obj.isNumeric()) {
208 return format(obj, obj.getDouble(), appendTo, pos, status);
209 } else {
210 status = U_ILLEGAL_ARGUMENT_ERROR;
211 return appendTo;
212 }
213 }
214
215 UnicodeString
format(int32_t number,UErrorCode & status) const216 PluralFormat::format(int32_t number, UErrorCode& status) const {
217 FieldPosition fpos(0);
218 UnicodeString result;
219 return format(Formattable(number), number, result, fpos, status);
220 }
221
222 UnicodeString
format(double number,UErrorCode & status) const223 PluralFormat::format(double number, UErrorCode& status) const {
224 FieldPosition fpos(0);
225 UnicodeString result;
226 return format(Formattable(number), number, result, fpos, status);
227 }
228
229
230 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const231 PluralFormat::format(int32_t number,
232 UnicodeString& appendTo,
233 FieldPosition& pos,
234 UErrorCode& status) const {
235 return format(Formattable(number), (double)number, appendTo, pos, status);
236 }
237
238 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const239 PluralFormat::format(double number,
240 UnicodeString& appendTo,
241 FieldPosition& pos,
242 UErrorCode& status) const {
243 return format(Formattable(number), (double)number, appendTo, pos, status);
244 }
245
246 UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const247 PluralFormat::format(const Formattable& numberObject, double number,
248 UnicodeString& appendTo,
249 FieldPosition& pos,
250 UErrorCode& status) const {
251 if (U_FAILURE(status)) {
252 return appendTo;
253 }
254 if (msgPattern.countParts() == 0) {
255 return numberFormat->format(numberObject, appendTo, pos, status);
256 }
257 // Get the appropriate sub-message.
258 // Select it based on the formatted number-offset.
259 double numberMinusOffset = number - offset;
260 UnicodeString numberString;
261 FieldPosition ignorePos;
262 FixedDecimal dec(numberMinusOffset);
263 if (offset == 0) {
264 numberFormat->format(numberObject, numberString, ignorePos, status); // could be BigDecimal etc.
265 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
266 if(decFmt != NULL) {
267 dec = decFmt->getFixedDecimal(numberObject, status);
268 }
269 } else {
270 numberFormat->format(numberMinusOffset, numberString, ignorePos, status);
271 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
272 if(decFmt != NULL) {
273 dec = decFmt->getFixedDecimal(numberMinusOffset, status);
274 }
275 }
276 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
277 if (U_FAILURE(status)) { return appendTo; }
278 // Replace syntactic # signs in the top level of this sub-message
279 // (not in nested arguments) with the formatted number-offset.
280 const UnicodeString& pattern = msgPattern.getPatternString();
281 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
282 for (;;) {
283 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
284 const UMessagePatternPartType type = part.getType();
285 int32_t index = part.getIndex();
286 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
287 return appendTo.append(pattern, prevIndex, index - prevIndex);
288 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
289 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
290 appendTo.append(pattern, prevIndex, index - prevIndex);
291 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
292 appendTo.append(numberString);
293 }
294 prevIndex = part.getLimit();
295 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
296 appendTo.append(pattern, prevIndex, index - prevIndex);
297 prevIndex = index;
298 partIndex = msgPattern.getLimitPartIndex(partIndex);
299 index = msgPattern.getPart(partIndex).getLimit();
300 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
301 prevIndex = index;
302 }
303 }
304 }
305
306 UnicodeString&
toPattern(UnicodeString & appendTo)307 PluralFormat::toPattern(UnicodeString& appendTo) {
308 if (0 == msgPattern.countParts()) {
309 appendTo.setToBogus();
310 } else {
311 appendTo.append(msgPattern.getPatternString());
312 }
313 return appendTo;
314 }
315
316 void
setLocale(const Locale & loc,UErrorCode & status)317 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
318 if (U_FAILURE(status)) {
319 return;
320 }
321 locale = loc;
322 msgPattern.clear();
323 delete numberFormat;
324 offset = 0;
325 numberFormat = NULL;
326 pluralRulesWrapper.reset();
327 init(NULL, UPLURAL_TYPE_CARDINAL, status);
328 }
329
330 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)331 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
332 if (U_FAILURE(status)) {
333 return;
334 }
335 NumberFormat* nf = (NumberFormat*)format->clone();
336 if (nf != NULL) {
337 delete numberFormat;
338 numberFormat = nf;
339 } else {
340 status = U_MEMORY_ALLOCATION_ERROR;
341 }
342 }
343
344 Format*
clone() const345 PluralFormat::clone() const
346 {
347 return new PluralFormat(*this);
348 }
349
350
351 PluralFormat&
operator =(const PluralFormat & other)352 PluralFormat::operator=(const PluralFormat& other) {
353 if (this != &other) {
354 locale = other.locale;
355 msgPattern = other.msgPattern;
356 offset = other.offset;
357 copyObjects(other);
358 }
359
360 return *this;
361 }
362
363 UBool
operator ==(const Format & other) const364 PluralFormat::operator==(const Format& other) const {
365 if (this == &other) {
366 return TRUE;
367 }
368 if (!Format::operator==(other)) {
369 return FALSE;
370 }
371 const PluralFormat& o = (const PluralFormat&)other;
372 return
373 locale == o.locale &&
374 msgPattern == o.msgPattern && // implies same offset
375 (numberFormat == NULL) == (o.numberFormat == NULL) &&
376 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
377 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
378 (pluralRulesWrapper.pluralRules == NULL ||
379 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
380 }
381
382 UBool
operator !=(const Format & other) const383 PluralFormat::operator!=(const Format& other) const {
384 return !operator==(other);
385 }
386
387 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const388 PluralFormat::parseObject(const UnicodeString& /*source*/,
389 Formattable& /*result*/,
390 ParsePosition& pos) const
391 {
392 // Parsing not supported.
393 pos.setErrorIndex(pos.getIndex());
394 }
395
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)396 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
397 const PluralSelector& selector, void *context,
398 double number, UErrorCode& ec) {
399 if (U_FAILURE(ec)) {
400 return 0;
401 }
402 int32_t count=pattern.countParts();
403 double offset;
404 const MessagePattern::Part* part=&pattern.getPart(partIndex);
405 if (MessagePattern::Part::hasNumericValue(part->getType())) {
406 offset=pattern.getNumericValue(*part);
407 ++partIndex;
408 } else {
409 offset=0;
410 }
411 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
412 // Then we get the keyword from the selector.
413 // (In other words, we never call the selector if we match against an explicit value,
414 // or if the only non-explicit keyword is "other".)
415 UnicodeString keyword;
416 UnicodeString other(FALSE, OTHER_STRING, 5);
417 // When we find a match, we set msgStart>0 and also set this boolean to true
418 // to avoid matching the keyword again (duplicates are allowed)
419 // while we continue to look for an explicit-value match.
420 UBool haveKeywordMatch=FALSE;
421 // msgStart is 0 until we find any appropriate sub-message.
422 // We remember the first "other" sub-message if we have not seen any
423 // appropriate sub-message before.
424 // We remember the first matching-keyword sub-message if we have not seen
425 // one of those before.
426 // (The parser allows [does not check for] duplicate keywords.
427 // We just have to make sure to take the first one.)
428 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
429 // at the first keyword match.
430 // We keep going until we find an explicit-value match or reach the end of the plural style.
431 int32_t msgStart=0;
432 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
433 // until ARG_LIMIT or end of plural-only pattern.
434 do {
435 part=&pattern.getPart(partIndex++);
436 const UMessagePatternPartType type = part->getType();
437 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
438 break;
439 }
440 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
441 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
442 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
443 // explicit value like "=2"
444 part=&pattern.getPart(partIndex++);
445 if(number==pattern.getNumericValue(*part)) {
446 // matches explicit value
447 return partIndex;
448 }
449 } else if(!haveKeywordMatch) {
450 // plural keyword like "few" or "other"
451 // Compare "other" first and call the selector if this is not "other".
452 if(pattern.partSubstringMatches(*part, other)) {
453 if(msgStart==0) {
454 msgStart=partIndex;
455 if(0 == keyword.compare(other)) {
456 // This is the first "other" sub-message,
457 // and the selected keyword is also "other".
458 // Do not match "other" again.
459 haveKeywordMatch=TRUE;
460 }
461 }
462 } else {
463 if(keyword.isEmpty()) {
464 keyword=selector.select(context, number-offset, ec);
465 if(msgStart!=0 && (0 == keyword.compare(other))) {
466 // We have already seen an "other" sub-message.
467 // Do not match "other" again.
468 haveKeywordMatch=TRUE;
469 // Skip keyword matching but do getLimitPartIndex().
470 }
471 }
472 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
473 // keyword matches
474 msgStart=partIndex;
475 // Do not match this keyword again.
476 haveKeywordMatch=TRUE;
477 }
478 }
479 }
480 partIndex=pattern.getLimitPartIndex(partIndex);
481 } while(++partIndex<count);
482 return msgStart;
483 }
484
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const485 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
486 // If no pattern was applied, return null.
487 if (msgPattern.countParts() == 0) {
488 pos.setBeginIndex(-1);
489 pos.setEndIndex(-1);
490 return;
491 }
492 int partIndex = 0;
493 int currMatchIndex;
494 int count=msgPattern.countParts();
495 int startingAt = pos.getBeginIndex();
496 if (startingAt < 0) {
497 startingAt = 0;
498 }
499
500 // The keyword is null until we need to match against a non-explicit, not-"other" value.
501 // Then we get the keyword from the selector.
502 // (In other words, we never call the selector if we match against an explicit value,
503 // or if the only non-explicit keyword is "other".)
504 UnicodeString keyword;
505 UnicodeString matchedWord;
506 const UnicodeString& pattern = msgPattern.getPatternString();
507 int matchedIndex = -1;
508 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
509 // until the end of the plural-only pattern.
510 while (partIndex < count) {
511 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
512 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
513 // Bad format
514 continue;
515 }
516
517 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
518 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
519 // Bad format
520 continue;
521 }
522
523 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
524 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
525 // Bad format
526 continue;
527 }
528
529 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
530 if (rbnfLenientScanner != NULL) {
531 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
532 int32_t length = -1;
533 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
534 }
535 else {
536 currMatchIndex = source.indexOf(currArg, startingAt);
537 }
538 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
539 matchedIndex = currMatchIndex;
540 matchedWord = currArg;
541 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
542 }
543 }
544 if (matchedIndex >= 0) {
545 pos.setBeginIndex(matchedIndex);
546 pos.setEndIndex(matchedIndex + matchedWord.length());
547 result.setString(keyword);
548 return;
549 }
550
551 // Not found!
552 pos.setBeginIndex(-1);
553 pos.setEndIndex(-1);
554 }
555
~PluralSelector()556 PluralFormat::PluralSelector::~PluralSelector() {}
557
~PluralSelectorAdapter()558 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
559 delete pluralRules;
560 }
561
select(void * context,double number,UErrorCode &) const562 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
563 UErrorCode& /*ec*/) const {
564 (void)number; // unused except in the assertion
565 FixedDecimal *dec=static_cast<FixedDecimal *>(context);
566 U_ASSERT(dec->source==number);
567 return pluralRules->select(*dec);
568 }
569
reset()570 void PluralFormat::PluralSelectorAdapter::reset() {
571 delete pluralRules;
572 pluralRules = NULL;
573 }
574
575
576 U_NAMESPACE_END
577
578
579 #endif /* #if !UCONFIG_NO_FORMATTING */
580
581 //eof
582