1 /*
2 *******************************************************************************
3 * Copyright (C) 2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * affixpatternparser.h
7 *
8 * created on: 2015jan06
9 * created by: Travis Keep
10 */
11
12 #ifndef __AFFIX_PATTERN_PARSER_H__
13 #define __AFFIX_PATTERN_PARSER_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_FORMATTING
18
19 #include "unicode/unistr.h"
20 #include "unicode/uobject.h"
21 #include "pluralaffix.h"
22
23 U_NAMESPACE_BEGIN
24
25 class PluralRules;
26 class FixedPrecision;
27 class DecimalFormatSymbols;
28
29 /**
30 * A representation of the various forms of a particular currency according
31 * to some locale and usage context.
32 *
33 * Includes the symbol, ISO code form, and long form(s) of the currency name
34 * for each plural variation.
35 */
36 class U_I18N_API CurrencyAffixInfo : public UMemory {
37 public:
38 /**
39 * Symbol is \u00a4; ISO form is \u00a4\u00a4;
40 * long form is \u00a4\u00a4\u00a4.
41 */
42 CurrencyAffixInfo();
43
getSymbol()44 const UnicodeString &getSymbol() const { return fSymbol; }
getISO()45 const UnicodeString &getISO() const { return fISO; }
getLong()46 const PluralAffix &getLong() const { return fLong; }
setSymbol(const UnicodeString & symbol)47 void setSymbol(const UnicodeString &symbol) {
48 fSymbol = symbol;
49 fIsDefault = FALSE;
50 }
setISO(const UnicodeString & iso)51 void setISO(const UnicodeString &iso) {
52 fISO = iso;
53 fIsDefault = FALSE;
54 }
55 UBool
equals(const CurrencyAffixInfo & other)56 equals(const CurrencyAffixInfo &other) const {
57 return (fSymbol == other.fSymbol)
58 && (fISO == other.fISO)
59 && (fLong.equals(other.fLong))
60 && (fIsDefault == other.fIsDefault);
61 }
62
63 /**
64 * Intializes this instance.
65 *
66 * @param locale the locale for the currency forms.
67 * @param rules The plural rules for the locale.
68 * @param currency the null terminated, 3 character ISO code of the
69 * currency. If NULL, resets this instance as if it were just created.
70 * In this case, the first 2 parameters may be NULL as well.
71 * @param status any error returned here.
72 */
73 void set(
74 const char *locale, const PluralRules *rules,
75 const UChar *currency, UErrorCode &status);
76
77 /**
78 * Returns true if this instance is the default. That is has no real
79 * currency. For instance never initialized with set()
80 * or reset with set(NULL, NULL, NULL, status).
81 */
isDefault()82 UBool isDefault() const { return fIsDefault; }
83
84 /**
85 * Adjusts the precision used for a particular currency.
86 * @param currency the null terminated, 3 character ISO code of the
87 * currency.
88 * @param usage the usage of the currency
89 * @param precision min/max fraction digits and rounding increment
90 * adjusted.
91 * @params status any error reported here.
92 */
93 static void adjustPrecision(
94 const UChar *currency, const UCurrencyUsage usage,
95 FixedPrecision &precision, UErrorCode &status);
96
97 private:
98 /**
99 * The symbol form of the currency.
100 */
101 UnicodeString fSymbol;
102
103 /**
104 * The ISO form of the currency, usually three letter abbreviation.
105 */
106 UnicodeString fISO;
107
108 /**
109 * The long forms of the currency keyed by plural variation.
110 */
111 PluralAffix fLong;
112
113 UBool fIsDefault;
114
115 };
116
117 class AffixPatternIterator;
118
119 /**
120 * A locale agnostic representation of an affix pattern.
121 */
122 class U_I18N_API AffixPattern : public UMemory {
123 public:
124
125 /**
126 * The token types that can appear in an affix pattern.
127 */
128 enum ETokenType {
129 kLiteral,
130 kPercent,
131 kPerMill,
132 kCurrency,
133 kNegative,
134 kPositive
135 };
136
137 /**
138 * An empty affix pattern.
139 */
AffixPattern()140 AffixPattern()
141 : tokens(), literals(), hasCurrencyToken(FALSE),
142 hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) {
143 }
144
145 /**
146 * Adds a string literal to this affix pattern.
147 */
148 void addLiteral(const UChar *, int32_t start, int32_t len);
149
150 /**
151 * Adds a token to this affix pattern. t must not be kLiteral as
152 * the addLiteral() method adds literals.
153 * @param t the token type to add
154 */
155 void add(ETokenType t);
156
157 /**
158 * Adds a currency token with specific count to this affix pattern.
159 * @param count the token count. Used to distinguish between
160 * one, two, or three currency symbols. Note that adding a currency
161 * token with count=2 (Use ISO code) is different than adding two
162 * currency tokens each with count=1 (two currency symbols).
163 */
164 void addCurrency(uint8_t count);
165
166 /**
167 * Makes this instance be an empty affix pattern.
168 */
169 void remove();
170
171 /**
172 * Provides an iterator over the tokens in this instance.
173 * @param result this is initialized to point just before the
174 * first token of this instance. Caller must call nextToken()
175 * on the iterator once it is set up to have it actually point
176 * to the first token. This first call to nextToken() will return
177 * FALSE if the AffixPattern being iterated over is empty.
178 * @return result
179 */
180 AffixPatternIterator &iterator(AffixPatternIterator &result) const;
181
182 /**
183 * Returns TRUE if this instance has currency tokens in it.
184 */
usesCurrency()185 UBool usesCurrency() const {
186 return hasCurrencyToken;
187 }
188
usesPercent()189 UBool usesPercent() const {
190 return hasPercentToken;
191 }
192
usesPermill()193 UBool usesPermill() const {
194 return hasPermillToken;
195 }
196
197 /**
198 * Returns the number of code points a string of this instance
199 * would have if none of the special tokens were escaped.
200 * Used to compute the padding size.
201 */
countChar32()202 int32_t countChar32() const {
203 return char32Count;
204 }
205
206 /**
207 * Appends other to this instance mutating this instance in place.
208 * @param other The pattern appended to the end of this one.
209 * @return a reference to this instance for chaining.
210 */
211 AffixPattern &append(const AffixPattern &other);
212
213 /**
214 * Converts this AffixPattern back into a user string.
215 * It is the inverse of parseUserAffixString.
216 */
217 UnicodeString &toUserString(UnicodeString &appendTo) const;
218
219 /**
220 * Converts this AffixPattern back into a string.
221 * It is the inverse of parseAffixString.
222 */
223 UnicodeString &toString(UnicodeString &appendTo) const;
224
225 /**
226 * Parses an affix pattern string appending it to an AffixPattern.
227 * Parses affix pattern strings produced from using
228 * DecimalFormatPatternParser to parse a format pattern. Affix patterns
229 * include the positive prefix and suffix and the negative prefix
230 * and suffix. This method expects affix patterns strings to be in the
231 * same format that DecimalFormatPatternParser produces. Namely special
232 * characters in the affix that correspond to a field type must be
233 * prefixed with an apostrophe ('). These special character sequences
234 * inluce minus (-), percent (%), permile (U+2030), plus (+),
235 * short currency (U+00a4), medium currency (u+00a4 * 2),
236 * long currency (u+a4 * 3), and apostrophe (')
237 * (apostrophe does not correspond to a field type but has to be escaped
238 * because it itself is the escape character).
239 * Since the expansion of these special character
240 * sequences is locale dependent, these sequences are not expanded in
241 * an AffixPattern instance.
242 * If these special characters are not prefixed with an apostrophe in
243 * the affix pattern string, then they are treated verbatim just as
244 * any other character. If an apostrophe prefixes a non special
245 * character in the affix pattern, the apostrophe is simply ignored.
246 *
247 * @param affixStr the string from DecimalFormatPatternParser
248 * @param appendTo parsed result appended here.
249 * @param status any error parsing returned here.
250 */
251 static AffixPattern &parseAffixString(
252 const UnicodeString &affixStr,
253 AffixPattern &appendTo,
254 UErrorCode &status);
255
256 /**
257 * Parses an affix pattern string appending it to an AffixPattern.
258 * Parses affix pattern strings as the user would supply them.
259 * In this function, quoting makes special characters like normal
260 * characters whereas in parseAffixString, quoting makes special
261 * characters special.
262 *
263 * @param affixStr the string from the user
264 * @param appendTo parsed result appended here.
265 * @param status any error parsing returned here.
266 */
267 static AffixPattern &parseUserAffixString(
268 const UnicodeString &affixStr,
269 AffixPattern &appendTo,
270 UErrorCode &status);
271
equals(const AffixPattern & other)272 UBool equals(const AffixPattern &other) const {
273 return (tokens == other.tokens)
274 && (literals == other.literals)
275 && (hasCurrencyToken == other.hasCurrencyToken)
276 && (hasPercentToken == other.hasPercentToken)
277 && (hasPermillToken == other.hasPermillToken)
278 && (char32Count == other.char32Count);
279 }
280
281 private:
282 /*
283 * Tokens stored here. Each UChar generally stands for one token. A
284 * Each token is of form 'etttttttllllllll' llllllll is the length of
285 * the token and ranges from 0-255. ttttttt is the token type and ranges
286 * from 0-127. If e is set it means this is an extendo token (to be
287 * described later). To accomodate token lengths above 255, each normal
288 * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
289 * the same type. Right now only kLiteral Tokens have extendo tokens.
290 * Each extendo token provides the next 8 higher bits for the length.
291 * If a kLiteral token is followed by 2 extendo tokens then, then the
292 * llllllll of the next extendo token contains bits 8-15 of the length
293 * and the last extendo token contains bits 16-23 of the length.
294 */
295 UnicodeString tokens;
296
297 /*
298 * The characters of the kLiteral tokens are concatenated together here.
299 * The first characters go with the first kLiteral token, the next
300 * characters go with the next kLiteral token etc.
301 */
302 UnicodeString literals;
303 UBool hasCurrencyToken;
304 UBool hasPercentToken;
305 UBool hasPermillToken;
306 int32_t char32Count;
307 void add(ETokenType t, uint8_t count);
308
309 };
310
311 /**
312 * An iterator over the tokens in an AffixPattern instance.
313 */
314 class U_I18N_API AffixPatternIterator : public UMemory {
315 public:
316
317 /**
318 * Using an iterator without first calling iterator on an AffixPattern
319 * instance to initialize the iterator results in
320 * undefined behavior.
321 */
AffixPatternIterator()322 AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
323 /**
324 * Advances this iterator to the next token. Returns FALSE when there
325 * are no more tokens. Calling the other methods after nextToken()
326 * returns FALSE results in undefined behavior.
327 */
328 UBool nextToken();
329
330 /**
331 * Returns the type of token.
332 */
333 AffixPattern::ETokenType getTokenType() const;
334
335 /**
336 * For literal tokens, returns the literal string. Calling this for
337 * other token types results in undefined behavior.
338 * @param result replaced with a read-only alias to the literal string.
339 * @return result
340 */
341 UnicodeString &getLiteral(UnicodeString &result) const;
342
343 /**
344 * Returns the token length. Usually 1, but for currency tokens may
345 * be 2 for ISO code and 3 for long form.
346 */
347 int32_t getTokenLength() const;
348 private:
349 int32_t nextLiteralIndex;
350 int32_t lastLiteralLength;
351 int32_t nextTokenIndex;
352 const UnicodeString *tokens;
353 const UnicodeString *literals;
354 friend class AffixPattern;
355 AffixPatternIterator(const AffixPatternIterator &);
356 AffixPatternIterator &operator=(const AffixPatternIterator &);
357 };
358
359 /**
360 * A locale aware class that converts locale independent AffixPattern
361 * instances into locale dependent PluralAffix instances.
362 */
363 class U_I18N_API AffixPatternParser : public UMemory {
364 public:
365 AffixPatternParser();
366 AffixPatternParser(const DecimalFormatSymbols &symbols);
367 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
368
369 /**
370 * Parses affixPattern appending the result to appendTo.
371 * @param affixPattern The affix pattern.
372 * @param currencyAffixInfo contains the currency forms.
373 * @param appendTo The result of parsing affixPattern is appended here.
374 * @param status any error returned here.
375 * @return appendTo.
376 */
377 PluralAffix &parse(
378 const AffixPattern &affixPattern,
379 const CurrencyAffixInfo ¤cyAffixInfo,
380 PluralAffix &appendTo,
381 UErrorCode &status) const;
382
equals(const AffixPatternParser & other)383 UBool equals(const AffixPatternParser &other) const {
384 return (fPercent == other.fPercent)
385 && (fPermill == other.fPermill)
386 && (fNegative == other.fNegative)
387 && (fPositive == other.fPositive);
388 }
389
390 private:
391 UnicodeString fPercent;
392 UnicodeString fPermill;
393 UnicodeString fNegative;
394 UnicodeString fPositive;
395 };
396
397
398 U_NAMESPACE_END
399 #endif /* #if !UCONFIG_NO_FORMATTING */
400 #endif // __AFFIX_PATTERN_PARSER_H__
401