1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2015, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * affixpatternparser.h
9 *
10 * created on: 2015jan06
11 * created by: Travis Keep
12 */
13 
14 #ifndef __AFFIX_PATTERN_PARSER_H__
15 #define __AFFIX_PATTERN_PARSER_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_FORMATTING
20 
21 #include "unicode/unistr.h"
22 #include "unicode/uobject.h"
23 #include "pluralaffix.h"
24 
25 U_NAMESPACE_BEGIN
26 
27 class PluralRules;
28 class FixedPrecision;
29 class DecimalFormatSymbols;
30 
31 /**
32  * A representation of the various forms of a particular currency according
33  * to some locale and usage context.
34  *
35  * Includes the symbol, ISO code form, and long form(s) of the currency name
36  * for each plural variation.
37  */
38 class U_I18N_API CurrencyAffixInfo : public UMemory {
39 public:
40     /**
41      * Symbol is \u00a4; ISO form is \u00a4\u00a4;
42      *  long form is \u00a4\u00a4\u00a4.
43      */
44     CurrencyAffixInfo();
45 
getSymbol()46     const UnicodeString &getSymbol() const { return fSymbol; }
getISO()47     const UnicodeString &getISO() const { return fISO; }
getLong()48     const PluralAffix &getLong() const { return fLong; }
setSymbol(const UnicodeString & symbol)49     void setSymbol(const UnicodeString &symbol) {
50         fSymbol = symbol;
51         fIsDefault = FALSE;
52     }
setISO(const UnicodeString & iso)53     void setISO(const UnicodeString &iso) {
54         fISO = iso;
55         fIsDefault = FALSE;
56     }
57     UBool
equals(const CurrencyAffixInfo & other)58     equals(const CurrencyAffixInfo &other) const {
59         return (fSymbol == other.fSymbol)
60                 && (fISO == other.fISO)
61                 && (fLong.equals(other.fLong))
62                 && (fIsDefault == other.fIsDefault);
63     }
64 
65     /**
66      * Intializes this instance.
67      *
68      * @param locale the locale for the currency forms.
69      * @param rules The plural rules for the locale.
70      * @param currency the null terminated, 3 character ISO code of the
71      * currency. If NULL, resets this instance as if it were just created.
72      * In this case, the first 2 parameters may be NULL as well.
73      * @param status any error returned here.
74      */
75     void set(
76             const char *locale, const PluralRules *rules,
77             const UChar *currency, UErrorCode &status);
78 
79     /**
80      * Returns true if this instance is the default. That is has no real
81      * currency. For instance never initialized with set()
82      * or reset with set(NULL, NULL, NULL, status).
83      */
isDefault()84     UBool isDefault() const { return fIsDefault; }
85 
86     /**
87      * Adjusts the precision used for a particular currency.
88      * @param currency the null terminated, 3 character ISO code of the
89      * currency.
90      * @param usage the usage of the currency
91      * @param precision min/max fraction digits and rounding increment
92      *  adjusted.
93      * @params status any error reported here.
94      */
95     static void adjustPrecision(
96             const UChar *currency, const UCurrencyUsage usage,
97             FixedPrecision &precision, UErrorCode &status);
98 
99 private:
100     /**
101      * The symbol form of the currency.
102      */
103     UnicodeString fSymbol;
104 
105     /**
106      * The ISO form of the currency, usually three letter abbreviation.
107      */
108     UnicodeString fISO;
109 
110     /**
111      * The long forms of the currency keyed by plural variation.
112      */
113     PluralAffix fLong;
114 
115     UBool fIsDefault;
116 
117 };
118 
119 class AffixPatternIterator;
120 
121 /**
122  * A locale agnostic representation of an affix pattern.
123  */
124 class U_I18N_API AffixPattern : public UMemory {
125 public:
126 
127     /**
128      * The token types that can appear in an affix pattern.
129      */
130     enum ETokenType {
131         kLiteral,
132         kPercent,
133         kPerMill,
134         kCurrency,
135         kNegative,
136         kPositive
137     };
138 
139     /**
140      * An empty affix pattern.
141      */
AffixPattern()142     AffixPattern()
143             : tokens(), literals(), hasCurrencyToken(FALSE),
144               hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
145     }
146 
147     /**
148      * Adds a string literal to this affix pattern.
149      */
150     void addLiteral(const UChar *, int32_t start, int32_t len);
151 
152     /**
153      * Adds a token to this affix pattern. t must not be kLiteral as
154      * the addLiteral() method adds literals.
155      * @param t the token type to add
156      */
157     void add(ETokenType t);
158 
159     /**
160      * Adds a currency token with specific count to this affix pattern.
161      * @param count the token count. Used to distinguish between
162      *  one, two, or three currency symbols. Note that adding a currency
163      *  token with count=2 (Use ISO code) is different than adding two
164      *  currency tokens each with count=1 (two currency symbols).
165      */
166     void addCurrency(uint8_t count);
167 
168     /**
169      * Makes this instance be an empty affix pattern.
170      */
171     void remove();
172 
173     /**
174      * Provides an iterator over the tokens in this instance.
175      * @param result this is initialized to point just before the
176      *   first token of this instance. Caller must call nextToken()
177      *   on the iterator once it is set up to have it actually point
178      *   to the first token. This first call to nextToken() will return
179      *   FALSE if the AffixPattern being iterated over is empty.
180      * @return result
181      */
182     AffixPatternIterator &iterator(AffixPatternIterator &result) const;
183 
184     /**
185      * Returns TRUE if this instance has currency tokens in it.
186      */
usesCurrency()187     UBool usesCurrency() const {
188         return hasCurrencyToken;
189     }
190 
usesPercent()191     UBool usesPercent() const {
192         return hasPercentToken;
193     }
194 
usesPermill()195     UBool usesPermill() const {
196         return hasPermillToken;
197     }
198 
199     /**
200      * Returns the number of code points a string of this instance
201      * would have if none of the special tokens were escaped.
202      * Used to compute the padding size.
203      */
countChar32()204     int32_t countChar32() const {
205         return char32Count;
206     }
207 
208     /**
209      * Appends other to this instance mutating this instance in place.
210      * @param other The pattern appended to the end of this one.
211      * @return a reference to this instance for chaining.
212      */
213     AffixPattern &append(const AffixPattern &other);
214 
215     /**
216      * Converts this AffixPattern back into a user string.
217      * It is the inverse of parseUserAffixString.
218      */
219     UnicodeString &toUserString(UnicodeString &appendTo) const;
220 
221     /**
222      * Converts this AffixPattern back into a string.
223      * It is the inverse of parseAffixString.
224      */
225     UnicodeString &toString(UnicodeString &appendTo) const;
226 
227     /**
228      * Parses an affix pattern string appending it to an AffixPattern.
229      * Parses affix pattern strings produced from using
230      * DecimalFormatPatternParser to parse a format pattern. Affix patterns
231      * include the positive prefix and suffix and the negative prefix
232      * and suffix. This method expects affix patterns strings to be in the
233      * same format that DecimalFormatPatternParser produces. Namely special
234      * characters in the affix that correspond to a field type must be
235      * prefixed with an apostrophe ('). These special character sequences
236      * inluce minus (-), percent (%), permile (U+2030), plus (+),
237      * short currency (U+00a4), medium currency (u+00a4 * 2),
238      * long currency (u+a4 * 3), and apostrophe (')
239      * (apostrophe does not correspond to a field type but has to be escaped
240      * because it itself is the escape character).
241      * Since the expansion of these special character
242      * sequences is locale dependent, these sequences are not expanded in
243      * an AffixPattern instance.
244      * If these special characters are not prefixed with an apostrophe in
245      * the affix pattern string, then they are treated verbatim just as
246      * any other character. If an apostrophe prefixes a non special
247      * character in the affix pattern, the apostrophe is simply ignored.
248      *
249      * @param affixStr the string from DecimalFormatPatternParser
250      * @param appendTo parsed result appended here.
251      * @param status any error parsing returned here.
252      */
253     static AffixPattern &parseAffixString(
254             const UnicodeString &affixStr,
255             AffixPattern &appendTo,
256             UErrorCode &status);
257 
258     /**
259      * Parses an affix pattern string appending it to an AffixPattern.
260      * Parses affix pattern strings as the user would supply them.
261      * In this function, quoting makes special characters like normal
262      * characters whereas in parseAffixString, quoting makes special
263      * characters special.
264      *
265      * @param affixStr the string from the user
266      * @param appendTo parsed result appended here.
267      * @param status any error parsing returned here.
268      */
269     static AffixPattern &parseUserAffixString(
270             const UnicodeString &affixStr,
271             AffixPattern &appendTo,
272             UErrorCode &status);
273 
equals(const AffixPattern & other)274     UBool equals(const AffixPattern &other) const {
275         return (tokens == other.tokens)
276                 && (literals == other.literals)
277                 && (hasCurrencyToken == other.hasCurrencyToken)
278                 && (hasPercentToken == other.hasPercentToken)
279                 && (hasPermillToken == other.hasPermillToken)
280                 && (char32Count == other.char32Count);
281     }
282 
283 private:
284     /*
285      * Tokens stored here. Each UChar generally stands for one token. A
286      * Each token is of form 'etttttttllllllll' llllllll is the length of
287      * the token and ranges from 0-255. ttttttt is the token type and ranges
288      * from 0-127. If e is set it means this is an extendo token (to be
289      * described later). To accomodate token lengths above 255, each normal
290      * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
291      * the same type. Right now only kLiteral Tokens have extendo tokens.
292      * Each extendo token provides the next 8 higher bits for the length.
293      * If a kLiteral token is followed by 2 extendo tokens then, then the
294      * llllllll of the next extendo token contains bits 8-15 of the length
295      * and the last extendo token contains bits 16-23 of the length.
296      */
297     UnicodeString tokens;
298 
299     /*
300      * The characters of the kLiteral tokens are concatenated together here.
301      * The first characters go with the first kLiteral token, the next
302      * characters go with the next kLiteral token etc.
303      */
304     UnicodeString literals;
305     UBool hasCurrencyToken;
306     UBool hasPercentToken;
307     UBool hasPermillToken;
308     int32_t char32Count;
309     void add(ETokenType t, uint8_t count);
310 
311 };
312 
313 /**
314  * An iterator over the tokens in an AffixPattern instance.
315  */
316 class U_I18N_API AffixPatternIterator : public UMemory {
317 public:
318 
319     /**
320      * Using an iterator without first calling iterator on an AffixPattern
321      * instance to initialize the iterator results in
322      * undefined behavior.
323      */
AffixPatternIterator()324     AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
325     /**
326      * Advances this iterator to the next token. Returns FALSE when there
327      * are no more tokens. Calling the other methods after nextToken()
328      * returns FALSE results in undefined behavior.
329      */
330     UBool nextToken();
331 
332     /**
333      * Returns the type of token.
334      */
335     AffixPattern::ETokenType getTokenType() const;
336 
337     /**
338      * For literal tokens, returns the literal string. Calling this for
339      * other token types results in undefined behavior.
340      * @param result replaced with a read-only alias to the literal string.
341      * @return result
342      */
343     UnicodeString &getLiteral(UnicodeString &result) const;
344 
345     /**
346      * Returns the token length. Usually 1, but for currency tokens may
347      * be 2 for ISO code and 3 for long form.
348      */
349     int32_t getTokenLength() const;
350 private:
351     int32_t nextLiteralIndex;
352     int32_t lastLiteralLength;
353     int32_t nextTokenIndex;
354     const UnicodeString *tokens;
355     const UnicodeString *literals;
356     friend class AffixPattern;
357     AffixPatternIterator(const AffixPatternIterator &);
358     AffixPatternIterator &operator=(const AffixPatternIterator &);
359 };
360 
361 /**
362  * A locale aware class that converts locale independent AffixPattern
363  * instances into locale dependent PluralAffix instances.
364  */
365 class U_I18N_API AffixPatternParser : public UMemory {
366 public:
367 AffixPatternParser();
368 AffixPatternParser(const DecimalFormatSymbols &symbols);
369 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
370 
371 /**
372  * Parses affixPattern appending the result to appendTo.
373  * @param affixPattern The affix pattern.
374  * @param currencyAffixInfo contains the currency forms.
375  * @param appendTo The result of parsing affixPattern is appended here.
376  * @param status any error returned here.
377  * @return appendTo.
378  */
379 PluralAffix &parse(
380         const AffixPattern &affixPattern,
381         const CurrencyAffixInfo &currencyAffixInfo,
382         PluralAffix &appendTo,
383         UErrorCode &status) const;
384 
equals(const AffixPatternParser & other)385 UBool equals(const AffixPatternParser &other) const {
386     return (fPercent == other.fPercent)
387             && (fPermill == other.fPermill)
388             && (fNegative == other.fNegative)
389             && (fPositive == other.fPositive);
390 }
391 
392 private:
393 UnicodeString fPercent;
394 UnicodeString fPermill;
395 UnicodeString fNegative;
396 UnicodeString fPositive;
397 };
398 
399 
400 U_NAMESPACE_END
401 #endif /* #if !UCONFIG_NO_FORMATTING */
402 #endif  // __AFFIX_PATTERN_PARSER_H__
403