1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 * simpleformatter.h
9 */
10 
11 #ifndef __SIMPLEFORMATTER_H__
12 #define __SIMPLEFORMATTER_H__
13 
14 /**
15  * \file
16  * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
17  */
18 
19 #include "unicode/utypes.h"
20 #include "unicode/unistr.h"
21 
22 #ifndef U_HIDE_DRAFT_API
23 
24 U_NAMESPACE_BEGIN
25 
26 /**
27  * Formats simple patterns like "{1} was born in {0}".
28  * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
29  * Supports only numbered arguments with no type nor style parameters,
30  * and formats only string values.
31  * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
32  *
33  * Factory methods set error codes for syntax errors
34  * and for too few or too many arguments/placeholders.
35  *
36  * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
37  *
38  * Example:
39  * <pre>
40  * UErrorCode errorCode = U_ZERO_ERROR;
41  * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
42  * UnicodeString result;
43  *
44  * // Output: "paul {born} in england"
45  * fmt.format("england", "paul", result, errorCode);
46  * </pre>
47  *
48  * This class is not intended for public subclassing.
49  *
50  * @see MessageFormat
51  * @see UMessagePatternApostropheMode
52  * @draft ICU 57
53  */
54 class U_COMMON_API SimpleFormatter U_FINAL : public UMemory {
55 public:
56     /**
57      * Default constructor.
58      * @draft ICU 57
59      */
SimpleFormatter()60     SimpleFormatter() : compiledPattern((UChar)0) {}
61 
62     /**
63      * Constructs a formatter from the pattern string.
64      *
65      * @param pattern The pattern string.
66      * @param errorCode ICU error code in/out parameter.
67      *                  Must fulfill U_SUCCESS before the function call.
68      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
69      * @draft ICU 57
70      */
SimpleFormatter(const UnicodeString & pattern,UErrorCode & errorCode)71     SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
72         applyPattern(pattern, errorCode);
73     }
74 
75     /**
76      * Constructs a formatter from the pattern string.
77      * The number of arguments checked against the given limits is the
78      * highest argument number plus one, not the number of occurrences of arguments.
79      *
80      * @param pattern The pattern string.
81      * @param min The pattern must have at least this many arguments.
82      * @param max The pattern must have at most this many arguments.
83      * @param errorCode ICU error code in/out parameter.
84      *                  Must fulfill U_SUCCESS before the function call.
85      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
86      *                  too few or too many arguments.
87      * @draft ICU 57
88      */
SimpleFormatter(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)89     SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
90                     UErrorCode &errorCode) {
91         applyPatternMinMaxArguments(pattern, min, max, errorCode);
92     }
93 
94     /**
95      * Copy constructor.
96      * @draft ICU 57
97      */
SimpleFormatter(const SimpleFormatter & other)98     SimpleFormatter(const SimpleFormatter& other)
99             : compiledPattern(other.compiledPattern) {}
100 
101     /**
102      * Assignment operator.
103      * @draft ICU 57
104      */
105     SimpleFormatter &operator=(const SimpleFormatter& other);
106 
107     /**
108      * Destructor.
109      * @draft ICU 57
110      */
111     ~SimpleFormatter();
112 
113     /**
114      * Changes this object according to the new pattern.
115      *
116      * @param pattern The pattern string.
117      * @param errorCode ICU error code in/out parameter.
118      *                  Must fulfill U_SUCCESS before the function call.
119      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
120      * @return TRUE if U_SUCCESS(errorCode).
121      * @draft ICU 57
122      */
applyPattern(const UnicodeString & pattern,UErrorCode & errorCode)123     UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
124         return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
125     }
126 
127     /**
128      * Changes this object according to the new pattern.
129      * The number of arguments checked against the given limits is the
130      * highest argument number plus one, not the number of occurrences of arguments.
131      *
132      * @param pattern The pattern string.
133      * @param min The pattern must have at least this many arguments.
134      * @param max The pattern must have at most this many arguments.
135      * @param errorCode ICU error code in/out parameter.
136      *                  Must fulfill U_SUCCESS before the function call.
137      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
138      *                  too few or too many arguments.
139      * @return TRUE if U_SUCCESS(errorCode).
140      * @draft ICU 57
141      */
142     UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
143                                       int32_t min, int32_t max, UErrorCode &errorCode);
144 
145     /**
146      * @return The max argument number + 1.
147      * @draft ICU 57
148      */
getArgumentLimit()149     int32_t getArgumentLimit() const {
150         return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
151     }
152 
153     /**
154      * Formats the given value, appending to the appendTo builder.
155      * The argument value must not be the same object as appendTo.
156      * getArgumentLimit() must be at most 1.
157      *
158      * @param value0 Value for argument {0}.
159      * @param appendTo Gets the formatted pattern and value appended.
160      * @param errorCode ICU error code in/out parameter.
161      *                  Must fulfill U_SUCCESS before the function call.
162      * @return appendTo
163      * @draft ICU 57
164      */
165     UnicodeString &format(
166             const UnicodeString &value0,
167             UnicodeString &appendTo, UErrorCode &errorCode) const;
168 
169     /**
170      * Formats the given values, appending to the appendTo builder.
171      * An argument value must not be the same object as appendTo.
172      * getArgumentLimit() must be at most 2.
173      *
174      * @param value0 Value for argument {0}.
175      * @param value1 Value for argument {1}.
176      * @param appendTo Gets the formatted pattern and values appended.
177      * @param errorCode ICU error code in/out parameter.
178      *                  Must fulfill U_SUCCESS before the function call.
179      * @return appendTo
180      * @draft ICU 57
181      */
182     UnicodeString &format(
183             const UnicodeString &value0,
184             const UnicodeString &value1,
185             UnicodeString &appendTo, UErrorCode &errorCode) const;
186 
187     /**
188      * Formats the given values, appending to the appendTo builder.
189      * An argument value must not be the same object as appendTo.
190      * getArgumentLimit() must be at most 3.
191      *
192      * @param value0 Value for argument {0}.
193      * @param value1 Value for argument {1}.
194      * @param value2 Value for argument {2}.
195      * @param appendTo Gets the formatted pattern and values appended.
196      * @param errorCode ICU error code in/out parameter.
197      *                  Must fulfill U_SUCCESS before the function call.
198      * @return appendTo
199      * @draft ICU 57
200      */
201     UnicodeString &format(
202             const UnicodeString &value0,
203             const UnicodeString &value1,
204             const UnicodeString &value2,
205             UnicodeString &appendTo, UErrorCode &errorCode) const;
206 
207     /**
208      * Formats the given values, appending to the appendTo string.
209      *
210      * @param values The argument values.
211      *               An argument value must not be the same object as appendTo.
212      *               Can be NULL if valuesLength==getArgumentLimit()==0.
213      * @param valuesLength The length of the values array.
214      *                     Must be at least getArgumentLimit().
215      * @param appendTo Gets the formatted pattern and values appended.
216      * @param offsets offsets[i] receives the offset of where
217      *                values[i] replaced pattern argument {i}.
218      *                Can be shorter or longer than values. Can be NULL if offsetsLength==0.
219      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
220      * @param offsetsLength The length of the offsets array.
221      * @param errorCode ICU error code in/out parameter.
222      *                  Must fulfill U_SUCCESS before the function call.
223      * @return appendTo
224      * @draft ICU 57
225      */
226     UnicodeString &formatAndAppend(
227             const UnicodeString *const *values, int32_t valuesLength,
228             UnicodeString &appendTo,
229             int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
230 
231     /**
232      * Formats the given values, replacing the contents of the result string.
233      * May optimize by actually appending to the result if it is the same object
234      * as the value corresponding to the initial argument in the pattern.
235      *
236      * @param values The argument values.
237      *               An argument value may be the same object as result.
238      *               Can be NULL if valuesLength==getArgumentLimit()==0.
239      * @param valuesLength The length of the values array.
240      *                     Must be at least getArgumentLimit().
241      * @param result Gets its contents replaced by the formatted pattern and values.
242      * @param offsets offsets[i] receives the offset of where
243      *                values[i] replaced pattern argument {i}.
244      *                Can be shorter or longer than values. Can be NULL if offsetsLength==0.
245      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
246      * @param offsetsLength The length of the offsets array.
247      * @param errorCode ICU error code in/out parameter.
248      *                  Must fulfill U_SUCCESS before the function call.
249      * @return result
250      * @draft ICU 57
251      */
252     UnicodeString &formatAndReplace(
253             const UnicodeString *const *values, int32_t valuesLength,
254             UnicodeString &result,
255             int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
256 
257     /**
258      * Returns the pattern text with none of the arguments.
259      * Like formatting with all-empty string values.
260      * @draft ICU 57
261      */
getTextWithNoArguments()262     UnicodeString getTextWithNoArguments() const {
263         return getTextWithNoArguments(compiledPattern.getBuffer(), compiledPattern.length());
264     }
265 
266 private:
267     /**
268      * Binary representation of the compiled pattern.
269      * Index 0: One more than the highest argument number.
270      * Followed by zero or more arguments or literal-text segments.
271      *
272      * An argument is stored as its number, less than ARG_NUM_LIMIT.
273      * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
274      * followed by that many chars.
275      */
276     UnicodeString compiledPattern;
277 
getArgumentLimit(const UChar * compiledPattern,int32_t compiledPatternLength)278     static inline int32_t getArgumentLimit(const UChar *compiledPattern,
279                                               int32_t compiledPatternLength) {
280         return compiledPatternLength == 0 ? 0 : compiledPattern[0];
281     }
282 
283     static UnicodeString getTextWithNoArguments(const UChar *compiledPattern, int32_t compiledPatternLength);
284 
285     static UnicodeString &format(
286             const UChar *compiledPattern, int32_t compiledPatternLength,
287             const UnicodeString *const *values,
288             UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
289             int32_t *offsets, int32_t offsetsLength,
290             UErrorCode &errorCode);
291 };
292 
293 U_NAMESPACE_END
294 
295 #endif /* U_HIDE_DRAFT_API */
296 
297 #endif  // __SIMPLEFORMATTER_H__
298