1 /*
2 *******************************************************************************
3 * Copyright (C) 2015, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * affixpatternparser.h
7 *
8 * created on: 2015jan06
9 * created by: Travis Keep
10 */
11 
12 #ifndef __AFFIX_PATTERN_PARSER_H__
13 #define __AFFIX_PATTERN_PARSER_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_FORMATTING
18 
19 #include "unicode/unistr.h"
20 #include "unicode/uobject.h"
21 #include "pluralaffix.h"
22 
23 U_NAMESPACE_BEGIN
24 
25 class PluralRules;
26 class FixedPrecision;
27 class DecimalFormatSymbols;
28 
29 /**
30  * A representation of the various forms of a particular currency according
31  * to some locale and usage context.
32  *
33  * Includes the symbol, ISO code form, and long form(s) of the currency name
34  * for each plural variation.
35  */
36 class U_I18N_API CurrencyAffixInfo : public UMemory {
37 public:
38     /**
39      * Symbol is \u00a4; ISO form is \u00a4\u00a4;
40      *  long form is \u00a4\u00a4\u00a4.
41      */
42     CurrencyAffixInfo();
43 
getSymbol()44     const UnicodeString &getSymbol() const { return fSymbol; }
getISO()45     const UnicodeString &getISO() const { return fISO; }
getLong()46     const PluralAffix &getLong() const { return fLong; }
setSymbol(const UnicodeString & symbol)47     void setSymbol(const UnicodeString &symbol) {
48         fSymbol = symbol;
49         fIsDefault = FALSE;
50     }
setISO(const UnicodeString & iso)51     void setISO(const UnicodeString &iso) {
52         fISO = iso;
53         fIsDefault = FALSE;
54     }
55     UBool
equals(const CurrencyAffixInfo & other)56     equals(const CurrencyAffixInfo &other) const {
57         return (fSymbol == other.fSymbol)
58                 && (fISO == other.fISO)
59                 && (fLong.equals(other.fLong))
60                 && (fIsDefault == other.fIsDefault);
61     }
62 
63     /**
64      * Intializes this instance.
65      *
66      * @param locale the locale for the currency forms.
67      * @param rules The plural rules for the locale.
68      * @param currency the null terminated, 3 character ISO code of the
69      * currency. If NULL, resets this instance as if it were just created.
70      * In this case, the first 2 parameters may be NULL as well.
71      * @param status any error returned here.
72      */
73     void set(
74             const char *locale, const PluralRules *rules,
75             const UChar *currency, UErrorCode &status);
76 
77     /**
78      * Returns true if this instance is the default. That is has no real
79      * currency. For instance never initialized with set()
80      * or reset with set(NULL, NULL, NULL, status).
81      */
isDefault()82     UBool isDefault() const { return fIsDefault; }
83 
84     /**
85      * Adjusts the precision used for a particular currency.
86      * @param currency the null terminated, 3 character ISO code of the
87      * currency.
88      * @param usage the usage of the currency
89      * @param precision min/max fraction digits and rounding increment
90      *  adjusted.
91      * @params status any error reported here.
92      */
93     static void adjustPrecision(
94             const UChar *currency, const UCurrencyUsage usage,
95             FixedPrecision &precision, UErrorCode &status);
96 
97 private:
98     /**
99      * The symbol form of the currency.
100      */
101     UnicodeString fSymbol;
102 
103     /**
104      * The ISO form of the currency, usually three letter abbreviation.
105      */
106     UnicodeString fISO;
107 
108     /**
109      * The long forms of the currency keyed by plural variation.
110      */
111     PluralAffix fLong;
112 
113     UBool fIsDefault;
114 
115 };
116 
117 class AffixPatternIterator;
118 
119 /**
120  * A locale agnostic representation of an affix pattern.
121  */
122 class U_I18N_API AffixPattern : public UMemory {
123 public:
124 
125     /**
126      * The token types that can appear in an affix pattern.
127      */
128     enum ETokenType {
129         kLiteral,
130         kPercent,
131         kPerMill,
132         kCurrency,
133         kNegative,
134         kPositive
135     };
136 
137     /**
138      * An empty affix pattern.
139      */
AffixPattern()140     AffixPattern()
141             : tokens(), literals(), hasCurrencyToken(FALSE),
142               hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
143     }
144 
145     /**
146      * Adds a string literal to this affix pattern.
147      */
148     void addLiteral(const UChar *, int32_t start, int32_t len);
149 
150     /**
151      * Adds a token to this affix pattern. t must not be kLiteral as
152      * the addLiteral() method adds literals.
153      * @param t the token type to add
154      */
155     void add(ETokenType t);
156 
157     /**
158      * Adds a currency token with specific count to this affix pattern.
159      * @param count the token count. Used to distinguish between
160      *  one, two, or three currency symbols. Note that adding a currency
161      *  token with count=2 (Use ISO code) is different than adding two
162      *  currency tokens each with count=1 (two currency symbols).
163      */
164     void addCurrency(uint8_t count);
165 
166     /**
167      * Makes this instance be an empty affix pattern.
168      */
169     void remove();
170 
171     /**
172      * Provides an iterator over the tokens in this instance.
173      * @param result this is initialized to point just before the
174      *   first token of this instance. Caller must call nextToken()
175      *   on the iterator once it is set up to have it actually point
176      *   to the first token. This first call to nextToken() will return
177      *   FALSE if the AffixPattern being iterated over is empty.
178      * @return result
179      */
180     AffixPatternIterator &iterator(AffixPatternIterator &result) const;
181 
182     /**
183      * Returns TRUE if this instance has currency tokens in it.
184      */
usesCurrency()185     UBool usesCurrency() const {
186         return hasCurrencyToken;
187     }
188 
usesPercent()189     UBool usesPercent() const {
190         return hasPercentToken;
191     }
192 
usesPermill()193     UBool usesPermill() const {
194         return hasPermillToken;
195     }
196 
197     /**
198      * Returns the number of code points a string of this instance
199      * would have if none of the special tokens were escaped.
200      * Used to compute the padding size.
201      */
countChar32()202     int32_t countChar32() const {
203         return char32Count;
204     }
205 
206     /**
207      * Appends other to this instance mutating this instance in place.
208      * @param other The pattern appended to the end of this one.
209      * @return a reference to this instance for chaining.
210      */
211     AffixPattern &append(const AffixPattern &other);
212 
213     /**
214      * Converts this AffixPattern back into a user string.
215      * It is the inverse of parseUserAffixString.
216      */
217     UnicodeString &toUserString(UnicodeString &appendTo) const;
218 
219     /**
220      * Converts this AffixPattern back into a string.
221      * It is the inverse of parseAffixString.
222      */
223     UnicodeString &toString(UnicodeString &appendTo) const;
224 
225     /**
226      * Parses an affix pattern string appending it to an AffixPattern.
227      * Parses affix pattern strings produced from using
228      * DecimalFormatPatternParser to parse a format pattern. Affix patterns
229      * include the positive prefix and suffix and the negative prefix
230      * and suffix. This method expects affix patterns strings to be in the
231      * same format that DecimalFormatPatternParser produces. Namely special
232      * characters in the affix that correspond to a field type must be
233      * prefixed with an apostrophe ('). These special character sequences
234      * inluce minus (-), percent (%), permile (U+2030), plus (+),
235      * short currency (U+00a4), medium currency (u+00a4 * 2),
236      * long currency (u+a4 * 3), and apostrophe (')
237      * (apostrophe does not correspond to a field type but has to be escaped
238      * because it itself is the escape character).
239      * Since the expansion of these special character
240      * sequences is locale dependent, these sequences are not expanded in
241      * an AffixPattern instance.
242      * If these special characters are not prefixed with an apostrophe in
243      * the affix pattern string, then they are treated verbatim just as
244      * any other character. If an apostrophe prefixes a non special
245      * character in the affix pattern, the apostrophe is simply ignored.
246      *
247      * @param affixStr the string from DecimalFormatPatternParser
248      * @param appendTo parsed result appended here.
249      * @param status any error parsing returned here.
250      */
251     static AffixPattern &parseAffixString(
252             const UnicodeString &affixStr,
253             AffixPattern &appendTo,
254             UErrorCode &status);
255 
256     /**
257      * Parses an affix pattern string appending it to an AffixPattern.
258      * Parses affix pattern strings as the user would supply them.
259      * In this function, quoting makes special characters like normal
260      * characters whereas in parseAffixString, quoting makes special
261      * characters special.
262      *
263      * @param affixStr the string from the user
264      * @param appendTo parsed result appended here.
265      * @param status any error parsing returned here.
266      */
267     static AffixPattern &parseUserAffixString(
268             const UnicodeString &affixStr,
269             AffixPattern &appendTo,
270             UErrorCode &status);
271 
equals(const AffixPattern & other)272     UBool equals(const AffixPattern &other) const {
273         return (tokens == other.tokens)
274                 && (literals == other.literals)
275                 && (hasCurrencyToken == other.hasCurrencyToken)
276                 && (hasPercentToken == other.hasPercentToken)
277                 && (hasPermillToken == other.hasPermillToken)
278                 && (char32Count == other.char32Count);
279     }
280 
281 private:
282     /*
283      * Tokens stored here. Each UChar generally stands for one token. A
284      * Each token is of form 'etttttttllllllll' llllllll is the length of
285      * the token and ranges from 0-255. ttttttt is the token type and ranges
286      * from 0-127. If e is set it means this is an extendo token (to be
287      * described later). To accomodate token lengths above 255, each normal
288      * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
289      * the same type. Right now only kLiteral Tokens have extendo tokens.
290      * Each extendo token provides the next 8 higher bits for the length.
291      * If a kLiteral token is followed by 2 extendo tokens then, then the
292      * llllllll of the next extendo token contains bits 8-15 of the length
293      * and the last extendo token contains bits 16-23 of the length.
294      */
295     UnicodeString tokens;
296 
297     /*
298      * The characters of the kLiteral tokens are concatenated together here.
299      * The first characters go with the first kLiteral token, the next
300      * characters go with the next kLiteral token etc.
301      */
302     UnicodeString literals;
303     UBool hasCurrencyToken;
304     UBool hasPercentToken;
305     UBool hasPermillToken;
306     int32_t char32Count;
307     void add(ETokenType t, uint8_t count);
308 
309 };
310 
311 /**
312  * An iterator over the tokens in an AffixPattern instance.
313  */
314 class U_I18N_API AffixPatternIterator : public UMemory {
315 public:
316 
317     /**
318      * Using an iterator without first calling iterator on an AffixPattern
319      * instance to initialize the iterator results in
320      * undefined behavior.
321      */
AffixPatternIterator()322     AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
323     /**
324      * Advances this iterator to the next token. Returns FALSE when there
325      * are no more tokens. Calling the other methods after nextToken()
326      * returns FALSE results in undefined behavior.
327      */
328     UBool nextToken();
329 
330     /**
331      * Returns the type of token.
332      */
333     AffixPattern::ETokenType getTokenType() const;
334 
335     /**
336      * For literal tokens, returns the literal string. Calling this for
337      * other token types results in undefined behavior.
338      * @param result replaced with a read-only alias to the literal string.
339      * @return result
340      */
341     UnicodeString &getLiteral(UnicodeString &result) const;
342 
343     /**
344      * Returns the token length. Usually 1, but for currency tokens may
345      * be 2 for ISO code and 3 for long form.
346      */
347     int32_t getTokenLength() const;
348 private:
349     int32_t nextLiteralIndex;
350     int32_t lastLiteralLength;
351     int32_t nextTokenIndex;
352     const UnicodeString *tokens;
353     const UnicodeString *literals;
354     friend class AffixPattern;
355     AffixPatternIterator(const AffixPatternIterator &);
356     AffixPatternIterator &operator=(const AffixPatternIterator &);
357 };
358 
359 /**
360  * A locale aware class that converts locale independent AffixPattern
361  * instances into locale dependent PluralAffix instances.
362  */
363 class U_I18N_API AffixPatternParser : public UMemory {
364 public:
365 AffixPatternParser();
366 AffixPatternParser(const DecimalFormatSymbols &symbols);
367 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
368 
369 /**
370  * Parses affixPattern appending the result to appendTo.
371  * @param affixPattern The affix pattern.
372  * @param currencyAffixInfo contains the currency forms.
373  * @param appendTo The result of parsing affixPattern is appended here.
374  * @param status any error returned here.
375  * @return appendTo.
376  */
377 PluralAffix &parse(
378         const AffixPattern &affixPattern,
379         const CurrencyAffixInfo &currencyAffixInfo,
380         PluralAffix &appendTo,
381         UErrorCode &status) const;
382 
equals(const AffixPatternParser & other)383 UBool equals(const AffixPatternParser &other) const {
384     return (fPercent == other.fPercent)
385             && (fPermill == other.fPermill)
386             && (fNegative == other.fNegative)
387             && (fPositive == other.fPositive);
388 }
389 
390 private:
391 UnicodeString fPercent;
392 UnicodeString fPermill;
393 UnicodeString fNegative;
394 UnicodeString fPositive;
395 };
396 
397 
398 U_NAMESPACE_END
399 #endif /* #if !UCONFIG_NO_FORMATTING */
400 #endif  // __AFFIX_PATTERN_PARSER_H__
401