1 /*
2 ******************************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * simplepatternformatter.cpp
7 */
8 #include "simplepatternformatter.h"
9 #include "cstring.h"
10 #include "uassert.h"
11
12 U_NAMESPACE_BEGIN
13
isInvalidArray(const void * array,int32_t size)14 static UBool isInvalidArray(const void *array, int32_t size) {
15 return (size < 0 || (size > 0 && array == NULL));
16 }
17
18 typedef enum SimplePatternFormatterCompileState {
19 INIT,
20 APOSTROPHE,
21 PLACEHOLDER
22 } SimplePatternFormatterCompileState;
23
24 // Handles parsing placeholders in the pattern string, e.g {4} or {35}
25 class SimplePatternFormatterIdBuilder {
26 public:
SimplePatternFormatterIdBuilder()27 SimplePatternFormatterIdBuilder() : id(0), idLen(0) { }
~SimplePatternFormatterIdBuilder()28 ~SimplePatternFormatterIdBuilder() { }
29
30 // Resets so that this object has seen no placeholder ID.
reset()31 void reset() { id = 0; idLen = 0; }
32
33 // Returns the numeric placeholder ID parsed so far
getId() const34 int32_t getId() const { return id; }
35
36 // Appends the numeric placeholder ID parsed so far back to a
37 // UChar buffer. Used to recover if parser using this object finds
38 // no closing curly brace.
39 void appendTo(UChar *buffer, int32_t *len) const;
40
41 // Returns true if this object has seen a placeholder ID.
isValid() const42 UBool isValid() const { return (idLen > 0); }
43
44 // Processes a single digit character. Pattern string parser calls this
45 // as it processes digits after an opening curly brace.
46 void add(UChar ch);
47 private:
48 int32_t id;
49 int32_t idLen;
50 SimplePatternFormatterIdBuilder(
51 const SimplePatternFormatterIdBuilder &other);
52 SimplePatternFormatterIdBuilder &operator=(
53 const SimplePatternFormatterIdBuilder &other);
54 };
55
appendTo(UChar * buffer,int32_t * len) const56 void SimplePatternFormatterIdBuilder::appendTo(
57 UChar *buffer, int32_t *len) const {
58 int32_t origLen = *len;
59 int32_t kId = id;
60 for (int32_t i = origLen + idLen - 1; i >= origLen; i--) {
61 int32_t digit = kId % 10;
62 buffer[i] = digit + 0x30;
63 kId /= 10;
64 }
65 *len = origLen + idLen;
66 }
67
add(UChar ch)68 void SimplePatternFormatterIdBuilder::add(UChar ch) {
69 id = id * 10 + (ch - 0x30);
70 idLen++;
71 }
72
73 // Represents placeholder values.
74 class SimplePatternFormatterPlaceholderValues : public UMemory {
75 public:
76 SimplePatternFormatterPlaceholderValues(
77 const UnicodeString * const *values,
78 int32_t valuesCount);
79
80 // Returns TRUE if appendTo value is at any index besides exceptIndex.
81 UBool isAppendToInAnyIndexExcept(
82 const UnicodeString &appendTo, int32_t exceptIndex) const;
83
84 // For each appendTo value, stores the snapshot of it in its place.
85 void snapshotAppendTo(const UnicodeString &appendTo);
86
87 // Returns the placeholder value at index. No range checking performed.
88 // Returned reference is valid for as long as this object exists.
89 const UnicodeString &get(int32_t index) const;
90 private:
91 const UnicodeString * const *fValues;
92 int32_t fValuesCount;
93 const UnicodeString *fAppendTo;
94 UnicodeString fAppendToCopy;
95 SimplePatternFormatterPlaceholderValues(
96 const SimplePatternFormatterPlaceholderValues &);
97 SimplePatternFormatterPlaceholderValues &operator=(
98 const SimplePatternFormatterPlaceholderValues &);
99 };
100
SimplePatternFormatterPlaceholderValues(const UnicodeString * const * values,int32_t valuesCount)101 SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues(
102 const UnicodeString * const *values,
103 int32_t valuesCount)
104 : fValues(values),
105 fValuesCount(valuesCount),
106 fAppendTo(NULL),
107 fAppendToCopy() {
108 }
109
isAppendToInAnyIndexExcept(const UnicodeString & appendTo,int32_t exceptIndex) const110 UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept(
111 const UnicodeString &appendTo, int32_t exceptIndex) const {
112 for (int32_t i = 0; i < fValuesCount; ++i) {
113 if (i != exceptIndex && fValues[i] == &appendTo) {
114 return TRUE;
115 }
116 }
117 return FALSE;
118 }
119
snapshotAppendTo(const UnicodeString & appendTo)120 void SimplePatternFormatterPlaceholderValues::snapshotAppendTo(
121 const UnicodeString &appendTo) {
122 fAppendTo = &appendTo;
123 fAppendToCopy = appendTo;
124 }
125
get(int32_t index) const126 const UnicodeString &SimplePatternFormatterPlaceholderValues::get(
127 int32_t index) const {
128 if (fAppendTo == NULL || fAppendTo != fValues[index]) {
129 return *fValues[index];
130 }
131 return fAppendToCopy;
132 }
133
SimplePatternFormatter()134 SimplePatternFormatter::SimplePatternFormatter() :
135 noPlaceholders(),
136 placeholders(),
137 placeholderSize(0),
138 placeholderCount(0),
139 firstPlaceholderReused(FALSE) {
140 }
141
SimplePatternFormatter(const UnicodeString & pattern)142 SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
143 noPlaceholders(),
144 placeholders(),
145 placeholderSize(0),
146 placeholderCount(0),
147 firstPlaceholderReused(FALSE) {
148 UErrorCode status = U_ZERO_ERROR;
149 compile(pattern, status);
150 }
151
SimplePatternFormatter(const SimplePatternFormatter & other)152 SimplePatternFormatter::SimplePatternFormatter(
153 const SimplePatternFormatter &other) :
154 noPlaceholders(other.noPlaceholders),
155 placeholders(),
156 placeholderSize(0),
157 placeholderCount(other.placeholderCount),
158 firstPlaceholderReused(other.firstPlaceholderReused) {
159 placeholderSize = ensureCapacity(other.placeholderSize);
160 uprv_memcpy(
161 placeholders.getAlias(),
162 other.placeholders.getAlias(),
163 placeholderSize * sizeof(PlaceholderInfo));
164 }
165
operator =(const SimplePatternFormatter & other)166 SimplePatternFormatter &SimplePatternFormatter::operator=(
167 const SimplePatternFormatter& other) {
168 if (this == &other) {
169 return *this;
170 }
171 noPlaceholders = other.noPlaceholders;
172 placeholderSize = ensureCapacity(other.placeholderSize);
173 placeholderCount = other.placeholderCount;
174 firstPlaceholderReused = other.firstPlaceholderReused;
175 uprv_memcpy(
176 placeholders.getAlias(),
177 other.placeholders.getAlias(),
178 placeholderSize * sizeof(PlaceholderInfo));
179 return *this;
180 }
181
~SimplePatternFormatter()182 SimplePatternFormatter::~SimplePatternFormatter() {
183 }
184
compile(const UnicodeString & pattern,UErrorCode & status)185 UBool SimplePatternFormatter::compile(
186 const UnicodeString &pattern, UErrorCode &status) {
187 if (U_FAILURE(status)) {
188 return FALSE;
189 }
190 const UChar *patternBuffer = pattern.getBuffer();
191 int32_t patternLength = pattern.length();
192 UChar *buffer = noPlaceholders.getBuffer(patternLength);
193 int32_t len = 0;
194 placeholderSize = 0;
195 placeholderCount = 0;
196 SimplePatternFormatterCompileState state = INIT;
197 SimplePatternFormatterIdBuilder idBuilder;
198 for (int32_t i = 0; i < patternLength; ++i) {
199 UChar ch = patternBuffer[i];
200 switch (state) {
201 case INIT:
202 if (ch == 0x27) {
203 state = APOSTROPHE;
204 } else if (ch == 0x7B) {
205 state = PLACEHOLDER;
206 idBuilder.reset();
207 } else {
208 buffer[len++] = ch;
209 }
210 break;
211 case APOSTROPHE:
212 if (ch == 0x27) {
213 buffer[len++] = 0x27;
214 } else if (ch == 0x7B) {
215 buffer[len++] = 0x7B;
216 } else {
217 buffer[len++] = 0x27;
218 buffer[len++] = ch;
219 }
220 state = INIT;
221 break;
222 case PLACEHOLDER:
223 if (ch >= 0x30 && ch <= 0x39) {
224 idBuilder.add(ch);
225 } else if (ch == 0x7D && idBuilder.isValid()) {
226 if (!addPlaceholder(idBuilder.getId(), len)) {
227 status = U_MEMORY_ALLOCATION_ERROR;
228 return FALSE;
229 }
230 state = INIT;
231 } else {
232 buffer[len++] = 0x7B;
233 idBuilder.appendTo(buffer, &len);
234 buffer[len++] = ch;
235 state = INIT;
236 }
237 break;
238 default:
239 U_ASSERT(FALSE);
240 break;
241 }
242 }
243 switch (state) {
244 case INIT:
245 break;
246 case APOSTROPHE:
247 buffer[len++] = 0x27;
248 break;
249 case PLACEHOLDER:
250 buffer[len++] = 0X7B;
251 idBuilder.appendTo(buffer, &len);
252 break;
253 default:
254 U_ASSERT(false);
255 break;
256 }
257 noPlaceholders.releaseBuffer(len);
258 return TRUE;
259 }
260
format(const UnicodeString & arg0,UnicodeString & appendTo,UErrorCode & status) const261 UnicodeString& SimplePatternFormatter::format(
262 const UnicodeString &arg0,
263 UnicodeString &appendTo,
264 UErrorCode &status) const {
265 const UnicodeString *params[] = {&arg0};
266 return formatAndAppend(
267 params,
268 UPRV_LENGTHOF(params),
269 appendTo,
270 NULL,
271 0,
272 status);
273 }
274
format(const UnicodeString & arg0,const UnicodeString & arg1,UnicodeString & appendTo,UErrorCode & status) const275 UnicodeString& SimplePatternFormatter::format(
276 const UnicodeString &arg0,
277 const UnicodeString &arg1,
278 UnicodeString &appendTo,
279 UErrorCode &status) const {
280 const UnicodeString *params[] = {&arg0, &arg1};
281 return formatAndAppend(
282 params,
283 UPRV_LENGTHOF(params),
284 appendTo,
285 NULL,
286 0,
287 status);
288 }
289
format(const UnicodeString & arg0,const UnicodeString & arg1,const UnicodeString & arg2,UnicodeString & appendTo,UErrorCode & status) const290 UnicodeString& SimplePatternFormatter::format(
291 const UnicodeString &arg0,
292 const UnicodeString &arg1,
293 const UnicodeString &arg2,
294 UnicodeString &appendTo,
295 UErrorCode &status) const {
296 const UnicodeString *params[] = {&arg0, &arg1, &arg2};
297 return formatAndAppend(
298 params,
299 UPRV_LENGTHOF(params),
300 appendTo,
301 NULL,
302 0,
303 status);
304 }
305
updatePlaceholderOffset(int32_t placeholderId,int32_t placeholderOffset,int32_t * offsetArray,int32_t offsetArrayLength)306 static void updatePlaceholderOffset(
307 int32_t placeholderId,
308 int32_t placeholderOffset,
309 int32_t *offsetArray,
310 int32_t offsetArrayLength) {
311 if (placeholderId < offsetArrayLength) {
312 offsetArray[placeholderId] = placeholderOffset;
313 }
314 }
315
appendRange(const UnicodeString & src,int32_t start,int32_t end,UnicodeString & dest)316 static void appendRange(
317 const UnicodeString &src,
318 int32_t start,
319 int32_t end,
320 UnicodeString &dest) {
321 // This check improves performance significantly.
322 if (start == end) {
323 return;
324 }
325 dest.append(src, start, end - start);
326 }
327
formatAndAppend(const UnicodeString * const * placeholderValues,int32_t placeholderValueCount,UnicodeString & appendTo,int32_t * offsetArray,int32_t offsetArrayLength,UErrorCode & status) const328 UnicodeString& SimplePatternFormatter::formatAndAppend(
329 const UnicodeString * const *placeholderValues,
330 int32_t placeholderValueCount,
331 UnicodeString &appendTo,
332 int32_t *offsetArray,
333 int32_t offsetArrayLength,
334 UErrorCode &status) const {
335 if (U_FAILURE(status)) {
336 return appendTo;
337 }
338 if (isInvalidArray(placeholderValues, placeholderValueCount)
339 || isInvalidArray(offsetArray, offsetArrayLength)) {
340 status = U_ILLEGAL_ARGUMENT_ERROR;
341 return appendTo;
342 }
343 if (placeholderValueCount < placeholderCount) {
344 status = U_ILLEGAL_ARGUMENT_ERROR;
345 return appendTo;
346 }
347
348 // Since we are disallowing parameter values that are the same as
349 // appendTo, we have to check all placeholderValues as opposed to
350 // the first placeholderCount placeholder values.
351 SimplePatternFormatterPlaceholderValues values(
352 placeholderValues, placeholderValueCount);
353 if (values.isAppendToInAnyIndexExcept(appendTo, -1)) {
354 status = U_ILLEGAL_ARGUMENT_ERROR;
355 return appendTo;
356 }
357 return formatAndAppend(
358 values,
359 appendTo,
360 offsetArray,
361 offsetArrayLength);
362 }
363
formatAndReplace(const UnicodeString * const * placeholderValues,int32_t placeholderValueCount,UnicodeString & result,int32_t * offsetArray,int32_t offsetArrayLength,UErrorCode & status) const364 UnicodeString& SimplePatternFormatter::formatAndReplace(
365 const UnicodeString * const *placeholderValues,
366 int32_t placeholderValueCount,
367 UnicodeString &result,
368 int32_t *offsetArray,
369 int32_t offsetArrayLength,
370 UErrorCode &status) const {
371 if (U_FAILURE(status)) {
372 return result;
373 }
374 if (isInvalidArray(placeholderValues, placeholderValueCount)
375 || isInvalidArray(offsetArray, offsetArrayLength)) {
376 status = U_ILLEGAL_ARGUMENT_ERROR;
377 return result;
378 }
379 if (placeholderValueCount < placeholderCount) {
380 status = U_ILLEGAL_ARGUMENT_ERROR;
381 return result;
382 }
383 SimplePatternFormatterPlaceholderValues values(
384 placeholderValues, placeholderCount);
385 int32_t placeholderAtStart = getUniquePlaceholderAtStart();
386
387 // If pattern starts with a unique placeholder and that placeholder
388 // value is result, we may be able to optimize by just appending to result.
389 if (placeholderAtStart >= 0
390 && placeholderValues[placeholderAtStart] == &result) {
391
392 // If result is the value for other placeholders, call off optimization.
393 if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) {
394 values.snapshotAppendTo(result);
395 result.remove();
396 return formatAndAppend(
397 values,
398 result,
399 offsetArray,
400 offsetArrayLength);
401 }
402
403 // Otherwise we can optimize
404 formatAndAppend(
405 values,
406 result,
407 offsetArray,
408 offsetArrayLength);
409
410 // We have to make the offset for the placeholderAtStart
411 // placeholder be 0. Otherwise it would be the length of the
412 // previous value of result.
413 if (offsetArrayLength > placeholderAtStart) {
414 offsetArray[placeholderAtStart] = 0;
415 }
416 return result;
417 }
418 if (values.isAppendToInAnyIndexExcept(result, -1)) {
419 values.snapshotAppendTo(result);
420 }
421 result.remove();
422 return formatAndAppend(
423 values,
424 result,
425 offsetArray,
426 offsetArrayLength);
427 }
428
formatAndAppend(const SimplePatternFormatterPlaceholderValues & values,UnicodeString & appendTo,int32_t * offsetArray,int32_t offsetArrayLength) const429 UnicodeString& SimplePatternFormatter::formatAndAppend(
430 const SimplePatternFormatterPlaceholderValues &values,
431 UnicodeString &appendTo,
432 int32_t *offsetArray,
433 int32_t offsetArrayLength) const {
434 for (int32_t i = 0; i < offsetArrayLength; ++i) {
435 offsetArray[i] = -1;
436 }
437 if (placeholderSize == 0) {
438 appendTo.append(noPlaceholders);
439 return appendTo;
440 }
441 appendRange(
442 noPlaceholders,
443 0,
444 placeholders[0].offset,
445 appendTo);
446 updatePlaceholderOffset(
447 placeholders[0].id,
448 appendTo.length(),
449 offsetArray,
450 offsetArrayLength);
451 const UnicodeString *placeholderValue = &values.get(placeholders[0].id);
452 if (placeholderValue != &appendTo) {
453 appendTo.append(*placeholderValue);
454 }
455 for (int32_t i = 1; i < placeholderSize; ++i) {
456 appendRange(
457 noPlaceholders,
458 placeholders[i - 1].offset,
459 placeholders[i].offset,
460 appendTo);
461 updatePlaceholderOffset(
462 placeholders[i].id,
463 appendTo.length(),
464 offsetArray,
465 offsetArrayLength);
466 placeholderValue = &values.get(placeholders[i].id);
467 if (placeholderValue != &appendTo) {
468 appendTo.append(*placeholderValue);
469 }
470 }
471 appendRange(
472 noPlaceholders,
473 placeholders[placeholderSize - 1].offset,
474 noPlaceholders.length(),
475 appendTo);
476 return appendTo;
477 }
478
getUniquePlaceholderAtStart() const479 int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const {
480 if (placeholderSize == 0
481 || firstPlaceholderReused || placeholders[0].offset != 0) {
482 return -1;
483 }
484 return placeholders[0].id;
485 }
486
ensureCapacity(int32_t desiredCapacity,int32_t allocationSize)487 int32_t SimplePatternFormatter::ensureCapacity(
488 int32_t desiredCapacity, int32_t allocationSize) {
489 if (allocationSize < desiredCapacity) {
490 allocationSize = desiredCapacity;
491 }
492 if (desiredCapacity <= placeholders.getCapacity()) {
493 return desiredCapacity;
494 }
495 // allocate new buffer
496 if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
497 return placeholders.getCapacity();
498 }
499 return desiredCapacity;
500 }
501
addPlaceholder(int32_t id,int32_t offset)502 UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
503 if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
504 return FALSE;
505 }
506 ++placeholderSize;
507 PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
508 placeholderEnd->offset = offset;
509 placeholderEnd->id = id;
510 if (id >= placeholderCount) {
511 placeholderCount = id + 1;
512 }
513 if (placeholderSize > 1
514 && placeholders[placeholderSize - 1].id == placeholders[0].id) {
515 firstPlaceholderReused = TRUE;
516 }
517 return TRUE;
518 }
519
520 U_NAMESPACE_END
521