1 /*
2 ******************************************************************************
3 * Copyright (C) 2014-2016, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 * simplepatternformatter.h
7 */
8 
9 #ifndef __SIMPLEPATTERNFORMATTER_H__
10 #define __SIMPLEPATTERNFORMATTER_H__
11 
12 #include "unicode/utypes.h"
13 #include "unicode/unistr.h"
14 
15 U_NAMESPACE_BEGIN
16 
17 /**
18  * Formats simple patterns like "{1} was born in {0}".
19  * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
20  * Supports only numbered arguments with no type nor style parameters,
21  * and formats only string values.
22  * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
23  *
24  * Factory methods throw exceptions for syntax errors
25  * and for too few or too many arguments/placeholders.
26  *
27  * SimplePatternFormatter objects are immutable and can be safely cached like strings.
28  *
29  * Example:
30  * <pre>
31  * UErrorCode errorCode = U_ZERO_ERROR;
32  * SimplePatternFormatter fmt("{1} '{born}' in {0}", errorCode);
33  * UnicodeString result;
34  *
35  * // Output: "paul {born} in england"
36  * fmt.format("england", "paul", result, errorCode);
37  * </pre>
38  *
39  * @see MessageFormat
40  * @see UMessagePatternApostropheMode
41  */
42 class U_COMMON_API SimplePatternFormatter : public UMemory {
43 public:
44     /**
45      * Default constructor.
46      */
SimplePatternFormatter()47     SimplePatternFormatter() : compiledPattern((UChar)0) {}
48 
49     /**
50      * Constructs a formatter from the pattern string.
51      *
52      * @param pattern The pattern string.
53      */
SimplePatternFormatter(const UnicodeString & pattern,UErrorCode & errorCode)54     explicit SimplePatternFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
55         compile(pattern, errorCode);
56     }
57 
58     /**
59      * Constructs a formatter from the pattern string.
60      *
61      * @param pattern The pattern string.
62      * @param min The pattern must have at least this many placeholders.
63      * @param max The pattern must have at most this many placeholders.
64      */
SimplePatternFormatter(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)65     SimplePatternFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
66                            UErrorCode &errorCode) {
67         compileMinMaxPlaceholders(pattern, min, max, errorCode);
68     }
69 
70     /**
71      * Copy constructor.
72      */
SimplePatternFormatter(const SimplePatternFormatter & other)73     SimplePatternFormatter(const SimplePatternFormatter& other)
74             : compiledPattern(other.compiledPattern) {}
75 
76     /**
77      * Assignment operator.
78      */
79     SimplePatternFormatter &operator=(const SimplePatternFormatter& other);
80 
81     /**
82      * Destructor.
83      */
84     ~SimplePatternFormatter();
85 
86     /**
87      * Changes this object according to the new pattern.
88      *
89      * @param pattern The pattern string.
90      * @return TRUE if U_SUCCESS(errorCode).
91      */
compile(const UnicodeString & pattern,UErrorCode & errorCode)92     UBool compile(const UnicodeString &pattern, UErrorCode &errorCode) {
93         return compileMinMaxPlaceholders(pattern, 0, INT32_MAX, errorCode);
94     }
95 
96     /**
97      * Changes this object according to the new pattern.
98      *
99      * @param pattern The pattern string.
100      * @param min The pattern must have at least this many placeholders.
101      * @param max The pattern must have at most this many placeholders.
102      * @return TRUE if U_SUCCESS(errorCode).
103      */
104     UBool compileMinMaxPlaceholders(const UnicodeString &pattern,
105                                     int32_t min, int32_t max, UErrorCode &errorCode);
106 
107     /**
108      * @return The max argument number/placeholder ID + 1.
109      */
getPlaceholderCount()110     int32_t getPlaceholderCount() const {
111         return getPlaceholderCount(compiledPattern.getBuffer(), compiledPattern.length());
112     }
113 
114     /**
115      * Formats the given value, appending to the appendTo builder.
116      * The placeholder value must not be the same object as appendTo.
117      * getPlaceholderCount() must be at most 1.
118      *
119      * @param value0 Value for argument {0}.
120      * @param appendTo Gets the formatted pattern and value appended.
121      * @param errorCode ICU error code in/out parameter.
122      *                  Must fulfill U_SUCCESS before the function call.
123      * @return appendTo
124      */
125     UnicodeString &format(
126             const UnicodeString &value0,
127             UnicodeString &appendTo, UErrorCode &errorCode) const;
128 
129     /**
130      * Formats the given values, appending to the appendTo builder.
131      * A placeholder value must not be the same object as appendTo.
132      * getPlaceholderCount() must be at most 2.
133      *
134      * @param value0 Value for argument {0}.
135      * @param value1 Value for argument {1}.
136      * @param appendTo Gets the formatted pattern and values appended.
137      * @param errorCode ICU error code in/out parameter.
138      *                  Must fulfill U_SUCCESS before the function call.
139      * @return appendTo
140      */
141     UnicodeString &format(
142             const UnicodeString &value0,
143             const UnicodeString &value1,
144             UnicodeString &appendTo, UErrorCode &errorCode) const;
145 
146     /**
147      * Formats the given values, appending to the appendTo builder.
148      * A placeholder value must not be the same object as appendTo.
149      * getPlaceholderCount() must be at most 3.
150      *
151      * @param value0 Value for argument {0}.
152      * @param value1 Value for argument {1}.
153      * @param value2 Value for argument {2}.
154      * @param appendTo Gets the formatted pattern and values appended.
155      * @param errorCode ICU error code in/out parameter.
156      *                  Must fulfill U_SUCCESS before the function call.
157      * @return appendTo
158      */
159     UnicodeString &format(
160             const UnicodeString &value0,
161             const UnicodeString &value1,
162             const UnicodeString &value2,
163             UnicodeString &appendTo, UErrorCode &errorCode) const;
164 
165     /**
166      * Formats the given values, appending to the appendTo string.
167      *
168      * @param values The placeholder values.
169      *               A placeholder value must not be the same object as appendTo.
170      *               Can be NULL if valuesLength==getPlaceholderCount()==0.
171      * @param valuesLength The length of the values array.
172      *                     Must be at least getPlaceholderCount().
173      * @param appendTo Gets the formatted pattern and values appended.
174      * @param offsets offsets[i] receives the offset of where
175      *                values[i] replaced pattern argument {i}.
176      *                Can be shorter or longer than values. Can be NULL if offsetsLength==0.
177      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
178      * @param offsetsLength The length of the offsets array.
179      * @param errorCode ICU error code in/out parameter.
180      *                  Must fulfill U_SUCCESS before the function call.
181      * @return appendTo
182      */
183     UnicodeString &formatAndAppend(
184             const UnicodeString *const *values, int32_t valuesLength,
185             UnicodeString &appendTo,
186             int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
187 
188     /**
189      * Formats the given values, replacing the contents of the result string.
190      * May optimize by actually appending to the result if it is the same object
191      * as the initial argument's corresponding value.
192      *
193      * @param values The placeholder values.
194      *               A placeholder value may be the same object as result.
195      *               Can be NULL if valuesLength==getPlaceholderCount()==0.
196      * @param valuesLength The length of the values array.
197      *                     Must be at least getPlaceholderCount().
198      * @param result Gets its contents replaced by the formatted pattern and values.
199      * @param offsets offsets[i] receives the offset of where
200      *                values[i] replaced pattern argument {i}.
201      *                Can be shorter or longer than values. Can be NULL if offsetsLength==0.
202      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
203      * @param offsetsLength The length of the offsets array.
204      * @param errorCode ICU error code in/out parameter.
205      *                  Must fulfill U_SUCCESS before the function call.
206      * @return result
207      */
208     UnicodeString &formatAndReplace(
209             const UnicodeString *const *values, int32_t valuesLength,
210             UnicodeString &result,
211             int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
212 
213     /**
214      * Returns the pattern text with none of the placeholders.
215      * Like formatting with all-empty string values.
216      */
getTextWithNoPlaceholders()217     UnicodeString getTextWithNoPlaceholders() const {
218         return getTextWithNoPlaceholders(compiledPattern.getBuffer(), compiledPattern.length());
219     }
220 
221 private:
222     /**
223      * Binary representation of the compiled pattern.
224      * Index 0: One more than the highest argument number.
225      * Followed by zero or more arguments or literal-text segments.
226      *
227      * An argument is stored as its number, less than ARG_NUM_LIMIT.
228      * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
229      * followed by that many chars.
230      */
231     UnicodeString compiledPattern;
232 
getPlaceholderCount(const UChar * compiledPattern,int32_t compiledPatternLength)233     static inline int32_t getPlaceholderCount(const UChar *compiledPattern,
234                                               int32_t compiledPatternLength) {
235         return compiledPatternLength == 0 ? 0 : compiledPattern[0];
236     }
237 
238     static UnicodeString getTextWithNoPlaceholders(const UChar *compiledPattern, int32_t compiledPatternLength);
239 
240     static UnicodeString &format(
241             const UChar *compiledPattern, int32_t compiledPatternLength,
242             const UnicodeString *const *values,
243             UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
244             int32_t *offsets, int32_t offsetsLength,
245             UErrorCode &errorCode);
246 };
247 
248 U_NAMESPACE_END
249 
250 #endif
251