1 /*
2 ******************************************************************************
3 * Copyright (C) 2014-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * simplepatternformatter.cpp
7 */
8
9 #include "unicode/utypes.h"
10 #include "unicode/unistr.h"
11 #include "simplepatternformatter.h"
12 #include "uassert.h"
13
14 U_NAMESPACE_BEGIN
15
16 namespace {
17
18 /**
19 * Argument numbers must be smaller than this limit.
20 * Text segment lengths are offset by this much.
21 * This is currently the only unused char value in compiled patterns,
22 * except it is the maximum value of the first unit (max arg +1).
23 */
24 const int32_t ARG_NUM_LIMIT = 0x100;
25 /**
26 * Initial and maximum char/UChar value set for a text segment.
27 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
28 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
29 */
30 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
31 /**
32 * Maximum length of a text segment. Longer segments are split into shorter ones.
33 */
34 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
35
36 enum {
37 APOS = 0x27,
38 DIGIT_ZERO = 0x30,
39 DIGIT_ONE = 0x31,
40 DIGIT_NINE = 0x39,
41 OPEN_BRACE = 0x7b,
42 CLOSE_BRACE = 0x7d
43 };
44
isInvalidArray(const void * array,int32_t length)45 inline UBool isInvalidArray(const void *array, int32_t length) {
46 return (length < 0 || (array == NULL && length != 0));
47 }
48
49 } // namespace
50
operator =(const SimplePatternFormatter & other)51 SimplePatternFormatter &SimplePatternFormatter::operator=(
52 const SimplePatternFormatter& other) {
53 if (this == &other) {
54 return *this;
55 }
56 compiledPattern = other.compiledPattern;
57 return *this;
58 }
59
~SimplePatternFormatter()60 SimplePatternFormatter::~SimplePatternFormatter() {}
61
compileMinMaxPlaceholders(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)62 UBool SimplePatternFormatter::compileMinMaxPlaceholders(
63 const UnicodeString &pattern,
64 int32_t min, int32_t max,
65 UErrorCode &errorCode) {
66 if (U_FAILURE(errorCode)) {
67 return FALSE;
68 }
69 // Parse consistent with MessagePattern, but
70 // - support only simple numbered arguments
71 // - build a simple binary structure into the result string
72 const UChar *patternBuffer = pattern.getBuffer();
73 int32_t patternLength = pattern.length();
74 // Reserve the first char for the number of arguments.
75 compiledPattern.setTo((UChar)0);
76 int32_t textLength = 0;
77 int32_t maxArg = -1;
78 UBool inQuote = FALSE;
79 for (int32_t i = 0; i < patternLength;) {
80 UChar c = patternBuffer[i++];
81 if (c == APOS) {
82 if (i < patternLength && (c = patternBuffer[i]) == APOS) {
83 // double apostrophe, skip the second one
84 ++i;
85 } else if (inQuote) {
86 // skip the quote-ending apostrophe
87 inQuote = FALSE;
88 continue;
89 } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
90 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
91 ++i;
92 inQuote = TRUE;
93 } else {
94 // The apostrophe is part of literal text.
95 c = APOS;
96 }
97 } else if (!inQuote && c == OPEN_BRACE) {
98 if (textLength > 0) {
99 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
100 (UChar)(ARG_NUM_LIMIT + textLength));
101 textLength = 0;
102 }
103 int32_t argNumber;
104 if ((i + 1) < patternLength &&
105 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
106 patternBuffer[i + 1] == CLOSE_BRACE) {
107 i += 2;
108 } else {
109 // Multi-digit argument number (no leading zero) or syntax error.
110 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
111 // around the number, but this class does not.
112 argNumber = -1;
113 if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
114 argNumber = c - DIGIT_ZERO;
115 while (i < patternLength &&
116 DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
117 argNumber = argNumber * 10 + (c - DIGIT_ZERO);
118 if (argNumber >= ARG_NUM_LIMIT) {
119 break;
120 }
121 }
122 }
123 if (argNumber < 0 || c != CLOSE_BRACE) {
124 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
125 return FALSE;
126 }
127 }
128 if (argNumber > maxArg) {
129 maxArg = argNumber;
130 }
131 compiledPattern.append((UChar)argNumber);
132 continue;
133 } // else: c is part of literal text
134 // Append c and track the literal-text segment length.
135 if (textLength == 0) {
136 // Reserve a char for the length of a new text segment, preset the maximum length.
137 compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
138 }
139 compiledPattern.append(c);
140 if (++textLength == MAX_SEGMENT_LENGTH) {
141 textLength = 0;
142 }
143 }
144 if (textLength > 0) {
145 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
146 (UChar)(ARG_NUM_LIMIT + textLength));
147 }
148 int32_t argCount = maxArg + 1;
149 if (argCount < min || max < argCount) {
150 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
151 return FALSE;
152 }
153 compiledPattern.setCharAt(0, (UChar)argCount);
154 return TRUE;
155 }
156
format(const UnicodeString & value0,UnicodeString & appendTo,UErrorCode & errorCode) const157 UnicodeString& SimplePatternFormatter::format(
158 const UnicodeString &value0,
159 UnicodeString &appendTo, UErrorCode &errorCode) const {
160 const UnicodeString *values[] = { &value0 };
161 return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
162 }
163
format(const UnicodeString & value0,const UnicodeString & value1,UnicodeString & appendTo,UErrorCode & errorCode) const164 UnicodeString& SimplePatternFormatter::format(
165 const UnicodeString &value0,
166 const UnicodeString &value1,
167 UnicodeString &appendTo, UErrorCode &errorCode) const {
168 const UnicodeString *values[] = { &value0, &value1 };
169 return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
170 }
171
format(const UnicodeString & value0,const UnicodeString & value1,const UnicodeString & value2,UnicodeString & appendTo,UErrorCode & errorCode) const172 UnicodeString& SimplePatternFormatter::format(
173 const UnicodeString &value0,
174 const UnicodeString &value1,
175 const UnicodeString &value2,
176 UnicodeString &appendTo, UErrorCode &errorCode) const {
177 const UnicodeString *values[] = { &value0, &value1, &value2 };
178 return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
179 }
180
formatAndAppend(const UnicodeString * const * values,int32_t valuesLength,UnicodeString & appendTo,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode) const181 UnicodeString& SimplePatternFormatter::formatAndAppend(
182 const UnicodeString *const *values, int32_t valuesLength,
183 UnicodeString &appendTo,
184 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
185 if (U_FAILURE(errorCode)) {
186 return appendTo;
187 }
188 if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
189 valuesLength < getPlaceholderCount()) {
190 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
191 return appendTo;
192 }
193 return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
194 appendTo, NULL, TRUE,
195 offsets, offsetsLength, errorCode);
196 }
197
formatAndReplace(const UnicodeString * const * values,int32_t valuesLength,UnicodeString & result,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode) const198 UnicodeString &SimplePatternFormatter::formatAndReplace(
199 const UnicodeString *const *values, int32_t valuesLength,
200 UnicodeString &result,
201 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
202 if (U_FAILURE(errorCode)) {
203 return result;
204 }
205 if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
206 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
207 return result;
208 }
209 const UChar *cp = compiledPattern.getBuffer();
210 int32_t cpLength = compiledPattern.length();
211 if (valuesLength < getPlaceholderCount(cp, cpLength)) {
212 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
213 return result;
214 }
215
216 // If the pattern starts with an argument whose value is the same object
217 // as the result, then we keep the result contents and append to it.
218 // Otherwise we replace its contents.
219 int32_t firstArg = -1;
220 // If any non-initial argument value is the same object as the result,
221 // then we first copy its contents and use that instead while formatting.
222 UnicodeString resultCopy;
223 if (getPlaceholderCount(cp, cpLength) > 0) {
224 for (int32_t i = 1; i < cpLength;) {
225 int32_t n = cp[i++];
226 if (n < ARG_NUM_LIMIT) {
227 if (values[n] == &result) {
228 if (i == 2) {
229 firstArg = n;
230 } else if (resultCopy.isEmpty() && !result.isEmpty()) {
231 resultCopy = result;
232 }
233 }
234 } else {
235 i += n - ARG_NUM_LIMIT;
236 }
237 }
238 }
239 if (firstArg < 0) {
240 result.remove();
241 }
242 return format(cp, cpLength, values,
243 result, &resultCopy, FALSE,
244 offsets, offsetsLength, errorCode);
245 }
246
getTextWithNoPlaceholders(const UChar * compiledPattern,int32_t compiledPatternLength)247 UnicodeString SimplePatternFormatter::getTextWithNoPlaceholders(
248 const UChar *compiledPattern, int32_t compiledPatternLength) {
249 int32_t capacity = compiledPatternLength - 1 -
250 getPlaceholderCount(compiledPattern, compiledPatternLength);
251 UnicodeString sb(capacity, 0, 0); // Java: StringBuilder
252 for (int32_t i = 1; i < compiledPatternLength;) {
253 int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT;
254 if (segmentLength > 0) {
255 sb.append(compiledPattern + i, segmentLength);
256 i += segmentLength;
257 }
258 }
259 return sb;
260 }
261
format(const UChar * compiledPattern,int32_t compiledPatternLength,const UnicodeString * const * values,UnicodeString & result,const UnicodeString * resultCopy,UBool forbidResultAsValue,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode)262 UnicodeString &SimplePatternFormatter::format(
263 const UChar *compiledPattern, int32_t compiledPatternLength,
264 const UnicodeString *const *values,
265 UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
266 int32_t *offsets, int32_t offsetsLength,
267 UErrorCode &errorCode) {
268 if (U_FAILURE(errorCode)) {
269 return result;
270 }
271 for (int32_t i = 0; i < offsetsLength; i++) {
272 offsets[i] = -1;
273 }
274 for (int32_t i = 1; i < compiledPatternLength;) {
275 int32_t n = compiledPattern[i++];
276 if (n < ARG_NUM_LIMIT) {
277 const UnicodeString *value = values[n];
278 if (value == NULL) {
279 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
280 return result;
281 }
282 if (value == &result) {
283 if (forbidResultAsValue) {
284 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
285 return result;
286 }
287 if (i == 2) {
288 // We are appending to result which is also the first value object.
289 if (n < offsetsLength) {
290 offsets[n] = 0;
291 }
292 } else {
293 if (n < offsetsLength) {
294 offsets[n] = result.length();
295 }
296 result.append(*resultCopy);
297 }
298 } else {
299 if (n < offsetsLength) {
300 offsets[n] = result.length();
301 }
302 result.append(*value);
303 }
304 } else {
305 int32_t length = n - ARG_NUM_LIMIT;
306 result.append(compiledPattern + i, length);
307 i += length;
308 }
309 }
310 return result;
311 }
312
313 U_NAMESPACE_END
314