1 /*
2 ******************************************************************************
3 * Copyright (C) 2014-2016, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 * simplepatternformatter.cpp
7 */
8 
9 #include "unicode/utypes.h"
10 #include "unicode/unistr.h"
11 #include "simplepatternformatter.h"
12 #include "uassert.h"
13 
14 U_NAMESPACE_BEGIN
15 
16 namespace {
17 
18 /**
19  * Argument numbers must be smaller than this limit.
20  * Text segment lengths are offset by this much.
21  * This is currently the only unused char value in compiled patterns,
22  * except it is the maximum value of the first unit (max arg +1).
23  */
24 const int32_t ARG_NUM_LIMIT = 0x100;
25 /**
26  * Initial and maximum char/UChar value set for a text segment.
27  * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
28  * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
29  */
30 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
31 /**
32  * Maximum length of a text segment. Longer segments are split into shorter ones.
33  */
34 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
35 
36 enum {
37     APOS = 0x27,
38     DIGIT_ZERO = 0x30,
39     DIGIT_ONE = 0x31,
40     DIGIT_NINE = 0x39,
41     OPEN_BRACE = 0x7b,
42     CLOSE_BRACE = 0x7d
43 };
44 
isInvalidArray(const void * array,int32_t length)45 inline UBool isInvalidArray(const void *array, int32_t length) {
46    return (length < 0 || (array == NULL && length != 0));
47 }
48 
49 }  // namespace
50 
operator =(const SimplePatternFormatter & other)51 SimplePatternFormatter &SimplePatternFormatter::operator=(
52         const SimplePatternFormatter& other) {
53     if (this == &other) {
54         return *this;
55     }
56     compiledPattern = other.compiledPattern;
57     return *this;
58 }
59 
~SimplePatternFormatter()60 SimplePatternFormatter::~SimplePatternFormatter() {}
61 
compileMinMaxPlaceholders(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)62 UBool SimplePatternFormatter::compileMinMaxPlaceholders(
63         const UnicodeString &pattern,
64         int32_t min, int32_t max,
65         UErrorCode &errorCode) {
66     if (U_FAILURE(errorCode)) {
67         return FALSE;
68     }
69     // Parse consistent with MessagePattern, but
70     // - support only simple numbered arguments
71     // - build a simple binary structure into the result string
72     const UChar *patternBuffer = pattern.getBuffer();
73     int32_t patternLength = pattern.length();
74     // Reserve the first char for the number of arguments.
75     compiledPattern.setTo((UChar)0);
76     int32_t textLength = 0;
77     int32_t maxArg = -1;
78     UBool inQuote = FALSE;
79     for (int32_t i = 0; i < patternLength;) {
80         UChar c = patternBuffer[i++];
81         if (c == APOS) {
82             if (i < patternLength && (c = patternBuffer[i]) == APOS) {
83                 // double apostrophe, skip the second one
84                 ++i;
85             } else if (inQuote) {
86                 // skip the quote-ending apostrophe
87                 inQuote = FALSE;
88                 continue;
89             } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
90                 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
91                 ++i;
92                 inQuote = TRUE;
93             } else {
94                 // The apostrophe is part of literal text.
95                 c = APOS;
96             }
97         } else if (!inQuote && c == OPEN_BRACE) {
98             if (textLength > 0) {
99                 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
100                                           (UChar)(ARG_NUM_LIMIT + textLength));
101                 textLength = 0;
102             }
103             int32_t argNumber;
104             if ((i + 1) < patternLength &&
105                     0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
106                     patternBuffer[i + 1] == CLOSE_BRACE) {
107                 i += 2;
108             } else {
109                 // Multi-digit argument number (no leading zero) or syntax error.
110                 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
111                 // around the number, but this class does not.
112                 argNumber = -1;
113                 if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
114                     argNumber = c - DIGIT_ZERO;
115                     while (i < patternLength &&
116                             DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
117                         argNumber = argNumber * 10 + (c - DIGIT_ZERO);
118                         if (argNumber >= ARG_NUM_LIMIT) {
119                             break;
120                         }
121                     }
122                 }
123                 if (argNumber < 0 || c != CLOSE_BRACE) {
124                     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
125                     return FALSE;
126                 }
127             }
128             if (argNumber > maxArg) {
129                 maxArg = argNumber;
130             }
131             compiledPattern.append((UChar)argNumber);
132             continue;
133         }  // else: c is part of literal text
134         // Append c and track the literal-text segment length.
135         if (textLength == 0) {
136             // Reserve a char for the length of a new text segment, preset the maximum length.
137             compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
138         }
139         compiledPattern.append(c);
140         if (++textLength == MAX_SEGMENT_LENGTH) {
141             textLength = 0;
142         }
143     }
144     if (textLength > 0) {
145         compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
146                                   (UChar)(ARG_NUM_LIMIT + textLength));
147     }
148     int32_t argCount = maxArg + 1;
149     if (argCount < min || max < argCount) {
150         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
151         return FALSE;
152     }
153     compiledPattern.setCharAt(0, (UChar)argCount);
154     return TRUE;
155 }
156 
format(const UnicodeString & value0,UnicodeString & appendTo,UErrorCode & errorCode) const157 UnicodeString& SimplePatternFormatter::format(
158         const UnicodeString &value0,
159         UnicodeString &appendTo, UErrorCode &errorCode) const {
160     const UnicodeString *values[] = { &value0 };
161     return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
162 }
163 
format(const UnicodeString & value0,const UnicodeString & value1,UnicodeString & appendTo,UErrorCode & errorCode) const164 UnicodeString& SimplePatternFormatter::format(
165         const UnicodeString &value0,
166         const UnicodeString &value1,
167         UnicodeString &appendTo, UErrorCode &errorCode) const {
168     const UnicodeString *values[] = { &value0, &value1 };
169     return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
170 }
171 
format(const UnicodeString & value0,const UnicodeString & value1,const UnicodeString & value2,UnicodeString & appendTo,UErrorCode & errorCode) const172 UnicodeString& SimplePatternFormatter::format(
173         const UnicodeString &value0,
174         const UnicodeString &value1,
175         const UnicodeString &value2,
176         UnicodeString &appendTo, UErrorCode &errorCode) const {
177     const UnicodeString *values[] = { &value0, &value1, &value2 };
178     return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
179 }
180 
formatAndAppend(const UnicodeString * const * values,int32_t valuesLength,UnicodeString & appendTo,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode) const181 UnicodeString& SimplePatternFormatter::formatAndAppend(
182         const UnicodeString *const *values, int32_t valuesLength,
183         UnicodeString &appendTo,
184         int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
185     if (U_FAILURE(errorCode)) {
186         return appendTo;
187     }
188     if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
189             valuesLength < getPlaceholderCount()) {
190         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
191         return appendTo;
192     }
193     return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
194                   appendTo, NULL, TRUE,
195                   offsets, offsetsLength, errorCode);
196 }
197 
formatAndReplace(const UnicodeString * const * values,int32_t valuesLength,UnicodeString & result,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode) const198 UnicodeString &SimplePatternFormatter::formatAndReplace(
199         const UnicodeString *const *values, int32_t valuesLength,
200         UnicodeString &result,
201         int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
202     if (U_FAILURE(errorCode)) {
203         return result;
204     }
205     if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
206         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
207         return result;
208     }
209     const UChar *cp = compiledPattern.getBuffer();
210     int32_t cpLength = compiledPattern.length();
211     if (valuesLength < getPlaceholderCount(cp, cpLength)) {
212         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
213         return result;
214     }
215 
216     // If the pattern starts with an argument whose value is the same object
217     // as the result, then we keep the result contents and append to it.
218     // Otherwise we replace its contents.
219     int32_t firstArg = -1;
220     // If any non-initial argument value is the same object as the result,
221     // then we first copy its contents and use that instead while formatting.
222     UnicodeString resultCopy;
223     if (getPlaceholderCount(cp, cpLength) > 0) {
224         for (int32_t i = 1; i < cpLength;) {
225             int32_t n = cp[i++];
226             if (n < ARG_NUM_LIMIT) {
227                 if (values[n] == &result) {
228                     if (i == 2) {
229                         firstArg = n;
230                     } else if (resultCopy.isEmpty() && !result.isEmpty()) {
231                         resultCopy = result;
232                     }
233                 }
234             } else {
235                 i += n - ARG_NUM_LIMIT;
236             }
237         }
238     }
239     if (firstArg < 0) {
240         result.remove();
241     }
242     return format(cp, cpLength, values,
243                   result, &resultCopy, FALSE,
244                   offsets, offsetsLength, errorCode);
245 }
246 
getTextWithNoPlaceholders(const UChar * compiledPattern,int32_t compiledPatternLength)247 UnicodeString SimplePatternFormatter::getTextWithNoPlaceholders(
248         const UChar *compiledPattern, int32_t compiledPatternLength) {
249     int32_t capacity = compiledPatternLength - 1 -
250             getPlaceholderCount(compiledPattern, compiledPatternLength);
251     UnicodeString sb(capacity, 0, 0);  // Java: StringBuilder
252     for (int32_t i = 1; i < compiledPatternLength;) {
253         int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT;
254         if (segmentLength > 0) {
255             sb.append(compiledPattern + i, segmentLength);
256             i += segmentLength;
257         }
258     }
259     return sb;
260 }
261 
format(const UChar * compiledPattern,int32_t compiledPatternLength,const UnicodeString * const * values,UnicodeString & result,const UnicodeString * resultCopy,UBool forbidResultAsValue,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode)262 UnicodeString &SimplePatternFormatter::format(
263         const UChar *compiledPattern, int32_t compiledPatternLength,
264         const UnicodeString *const *values,
265         UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
266         int32_t *offsets, int32_t offsetsLength,
267         UErrorCode &errorCode) {
268     if (U_FAILURE(errorCode)) {
269         return result;
270     }
271     for (int32_t i = 0; i < offsetsLength; i++) {
272         offsets[i] = -1;
273     }
274     for (int32_t i = 1; i < compiledPatternLength;) {
275         int32_t n = compiledPattern[i++];
276         if (n < ARG_NUM_LIMIT) {
277             const UnicodeString *value = values[n];
278             if (value == NULL) {
279                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
280                 return result;
281             }
282             if (value == &result) {
283                 if (forbidResultAsValue) {
284                     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
285                     return result;
286                 }
287                 if (i == 2) {
288                     // We are appending to result which is also the first value object.
289                     if (n < offsetsLength) {
290                         offsets[n] = 0;
291                     }
292                 } else {
293                     if (n < offsetsLength) {
294                         offsets[n] = result.length();
295                     }
296                     result.append(*resultCopy);
297                 }
298             } else {
299                 if (n < offsetsLength) {
300                     offsets[n] = result.length();
301                 }
302                 result.append(*value);
303             }
304         } else {
305             int32_t length = n - ARG_NUM_LIMIT;
306             result.append(compiledPattern + i, length);
307             i += length;
308         }
309     }
310     return result;
311 }
312 
313 U_NAMESPACE_END
314