1 /*
2 ******************************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 * simplepatternformatter.cpp
7 */
8 #include "simplepatternformatter.h"
9 #include "cstring.h"
10 #include "uassert.h"
11 
12 U_NAMESPACE_BEGIN
13 
isInvalidArray(const void * array,int32_t size)14 static UBool isInvalidArray(const void *array, int32_t size) {
15    return (size < 0 || (size > 0 && array == NULL));
16 }
17 
18 typedef enum SimplePatternFormatterCompileState {
19     INIT,
20     APOSTROPHE,
21     PLACEHOLDER
22 } SimplePatternFormatterCompileState;
23 
24 // Handles parsing placeholders in the pattern string, e.g {4} or {35}
25 class SimplePatternFormatterIdBuilder {
26 public:
SimplePatternFormatterIdBuilder()27     SimplePatternFormatterIdBuilder() : id(0), idLen(0) { }
~SimplePatternFormatterIdBuilder()28     ~SimplePatternFormatterIdBuilder() { }
29 
30     // Resets so that this object has seen no placeholder ID.
reset()31     void reset() { id = 0; idLen = 0; }
32 
33     // Returns the numeric placeholder ID parsed so far
getId() const34     int32_t getId() const { return id; }
35 
36     // Appends the numeric placeholder ID parsed so far back to a
37     // UChar buffer. Used to recover if parser using this object finds
38     // no closing curly brace.
39     void appendTo(UChar *buffer, int32_t *len) const;
40 
41     // Returns true if this object has seen a placeholder ID.
isValid() const42     UBool isValid() const { return (idLen > 0); }
43 
44     // Processes a single digit character. Pattern string parser calls this
45     // as it processes digits after an opening curly brace.
46     void add(UChar ch);
47 private:
48     int32_t id;
49     int32_t idLen;
50     SimplePatternFormatterIdBuilder(
51             const SimplePatternFormatterIdBuilder &other);
52     SimplePatternFormatterIdBuilder &operator=(
53             const SimplePatternFormatterIdBuilder &other);
54 };
55 
appendTo(UChar * buffer,int32_t * len) const56 void SimplePatternFormatterIdBuilder::appendTo(
57         UChar *buffer, int32_t *len) const {
58     int32_t origLen = *len;
59     int32_t kId = id;
60     for (int32_t i = origLen + idLen - 1; i >= origLen; i--) {
61         int32_t digit = kId % 10;
62         buffer[i] = digit + 0x30;
63         kId /= 10;
64     }
65     *len = origLen + idLen;
66 }
67 
add(UChar ch)68 void SimplePatternFormatterIdBuilder::add(UChar ch) {
69     id = id * 10 + (ch - 0x30);
70     idLen++;
71 }
72 
73 // Represents placeholder values.
74 class SimplePatternFormatterPlaceholderValues : public UMemory {
75 public:
76     SimplePatternFormatterPlaceholderValues(
77             const UnicodeString * const *values,
78             int32_t valuesCount);
79 
80     // Returns TRUE if appendTo value is at any index besides exceptIndex.
81     UBool isAppendToInAnyIndexExcept(
82             const UnicodeString &appendTo, int32_t exceptIndex) const;
83 
84     // For each appendTo value, stores the snapshot of it in its place.
85     void snapshotAppendTo(const UnicodeString &appendTo);
86 
87     // Returns the placeholder value at index. No range checking performed.
88     // Returned reference is valid for as long as this object exists.
89     const UnicodeString &get(int32_t index) const;
90 private:
91     const UnicodeString * const *fValues;
92     int32_t fValuesCount;
93     const UnicodeString *fAppendTo;
94     UnicodeString fAppendToCopy;
95     SimplePatternFormatterPlaceholderValues(
96             const SimplePatternFormatterPlaceholderValues &);
97     SimplePatternFormatterPlaceholderValues &operator=(
98             const SimplePatternFormatterPlaceholderValues &);
99 };
100 
SimplePatternFormatterPlaceholderValues(const UnicodeString * const * values,int32_t valuesCount)101 SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues(
102         const UnicodeString * const *values,
103         int32_t valuesCount)
104         : fValues(values),
105           fValuesCount(valuesCount),
106           fAppendTo(NULL),
107           fAppendToCopy() {
108 }
109 
isAppendToInAnyIndexExcept(const UnicodeString & appendTo,int32_t exceptIndex) const110 UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept(
111         const UnicodeString &appendTo, int32_t exceptIndex) const {
112     for (int32_t i = 0; i < fValuesCount; ++i) {
113         if (i != exceptIndex && fValues[i] == &appendTo) {
114             return TRUE;
115         }
116     }
117     return FALSE;
118 }
119 
snapshotAppendTo(const UnicodeString & appendTo)120 void SimplePatternFormatterPlaceholderValues::snapshotAppendTo(
121         const UnicodeString &appendTo) {
122     fAppendTo = &appendTo;
123     fAppendToCopy = appendTo;
124 }
125 
get(int32_t index) const126 const UnicodeString &SimplePatternFormatterPlaceholderValues::get(
127         int32_t index) const {
128     if (fAppendTo == NULL || fAppendTo != fValues[index]) {
129         return *fValues[index];
130     }
131     return fAppendToCopy;
132 }
133 
SimplePatternFormatter()134 SimplePatternFormatter::SimplePatternFormatter() :
135         noPlaceholders(),
136         placeholders(),
137         placeholderSize(0),
138         placeholderCount(0),
139         firstPlaceholderReused(FALSE) {
140 }
141 
SimplePatternFormatter(const UnicodeString & pattern)142 SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
143         noPlaceholders(),
144         placeholders(),
145         placeholderSize(0),
146         placeholderCount(0),
147         firstPlaceholderReused(FALSE) {
148     UErrorCode status = U_ZERO_ERROR;
149     compile(pattern, status);
150 }
151 
SimplePatternFormatter(const SimplePatternFormatter & other)152 SimplePatternFormatter::SimplePatternFormatter(
153         const SimplePatternFormatter &other) :
154         noPlaceholders(other.noPlaceholders),
155         placeholders(),
156         placeholderSize(0),
157         placeholderCount(other.placeholderCount),
158         firstPlaceholderReused(other.firstPlaceholderReused) {
159     placeholderSize = ensureCapacity(other.placeholderSize);
160     uprv_memcpy(
161             placeholders.getAlias(),
162             other.placeholders.getAlias(),
163             placeholderSize * sizeof(PlaceholderInfo));
164 }
165 
operator =(const SimplePatternFormatter & other)166 SimplePatternFormatter &SimplePatternFormatter::operator=(
167         const SimplePatternFormatter& other) {
168     if (this == &other) {
169         return *this;
170     }
171     noPlaceholders = other.noPlaceholders;
172     placeholderSize = ensureCapacity(other.placeholderSize);
173     placeholderCount = other.placeholderCount;
174     firstPlaceholderReused = other.firstPlaceholderReused;
175     uprv_memcpy(
176             placeholders.getAlias(),
177             other.placeholders.getAlias(),
178             placeholderSize * sizeof(PlaceholderInfo));
179     return *this;
180 }
181 
~SimplePatternFormatter()182 SimplePatternFormatter::~SimplePatternFormatter() {
183 }
184 
compile(const UnicodeString & pattern,UErrorCode & status)185 UBool SimplePatternFormatter::compile(
186         const UnicodeString &pattern, UErrorCode &status) {
187     if (U_FAILURE(status)) {
188         return FALSE;
189     }
190     const UChar *patternBuffer = pattern.getBuffer();
191     int32_t patternLength = pattern.length();
192     UChar *buffer = noPlaceholders.getBuffer(patternLength);
193     int32_t len = 0;
194     placeholderSize = 0;
195     placeholderCount = 0;
196     SimplePatternFormatterCompileState state = INIT;
197     SimplePatternFormatterIdBuilder idBuilder;
198     for (int32_t i = 0; i < patternLength; ++i) {
199         UChar ch = patternBuffer[i];
200         switch (state) {
201         case INIT:
202             if (ch == 0x27) {
203                 state = APOSTROPHE;
204             } else if (ch == 0x7B) {
205                 state = PLACEHOLDER;
206                 idBuilder.reset();
207             } else {
208                buffer[len++] = ch;
209             }
210             break;
211         case APOSTROPHE:
212             if (ch == 0x27) {
213                 buffer[len++] = 0x27;
214             } else if (ch == 0x7B) {
215                 buffer[len++] = 0x7B;
216             } else {
217                 buffer[len++] = 0x27;
218                 buffer[len++] = ch;
219             }
220             state = INIT;
221             break;
222         case PLACEHOLDER:
223             if (ch >= 0x30 && ch <= 0x39) {
224                 idBuilder.add(ch);
225             } else if (ch == 0x7D && idBuilder.isValid()) {
226                 if (!addPlaceholder(idBuilder.getId(), len)) {
227                     status = U_MEMORY_ALLOCATION_ERROR;
228                     return FALSE;
229                 }
230                 state = INIT;
231             } else {
232                 buffer[len++] = 0x7B;
233                 idBuilder.appendTo(buffer, &len);
234                 buffer[len++] = ch;
235                 state = INIT;
236             }
237             break;
238         default:
239             U_ASSERT(FALSE);
240             break;
241         }
242     }
243     switch (state) {
244     case INIT:
245         break;
246     case APOSTROPHE:
247         buffer[len++] = 0x27;
248         break;
249     case PLACEHOLDER:
250         buffer[len++] = 0X7B;
251         idBuilder.appendTo(buffer, &len);
252         break;
253     default:
254         U_ASSERT(false);
255         break;
256     }
257     noPlaceholders.releaseBuffer(len);
258     return TRUE;
259 }
260 
format(const UnicodeString & arg0,UnicodeString & appendTo,UErrorCode & status) const261 UnicodeString& SimplePatternFormatter::format(
262         const UnicodeString &arg0,
263         UnicodeString &appendTo,
264         UErrorCode &status) const {
265     const UnicodeString *params[] = {&arg0};
266     return formatAndAppend(
267             params,
268             UPRV_LENGTHOF(params),
269             appendTo,
270             NULL,
271             0,
272             status);
273 }
274 
format(const UnicodeString & arg0,const UnicodeString & arg1,UnicodeString & appendTo,UErrorCode & status) const275 UnicodeString& SimplePatternFormatter::format(
276         const UnicodeString &arg0,
277         const UnicodeString &arg1,
278         UnicodeString &appendTo,
279         UErrorCode &status) const {
280     const UnicodeString *params[] = {&arg0, &arg1};
281     return formatAndAppend(
282             params,
283             UPRV_LENGTHOF(params),
284             appendTo,
285             NULL,
286             0,
287             status);
288 }
289 
format(const UnicodeString & arg0,const UnicodeString & arg1,const UnicodeString & arg2,UnicodeString & appendTo,UErrorCode & status) const290 UnicodeString& SimplePatternFormatter::format(
291         const UnicodeString &arg0,
292         const UnicodeString &arg1,
293         const UnicodeString &arg2,
294         UnicodeString &appendTo,
295         UErrorCode &status) const {
296     const UnicodeString *params[] = {&arg0, &arg1, &arg2};
297     return formatAndAppend(
298             params,
299             UPRV_LENGTHOF(params),
300             appendTo,
301             NULL,
302             0,
303             status);
304 }
305 
updatePlaceholderOffset(int32_t placeholderId,int32_t placeholderOffset,int32_t * offsetArray,int32_t offsetArrayLength)306 static void updatePlaceholderOffset(
307         int32_t placeholderId,
308         int32_t placeholderOffset,
309         int32_t *offsetArray,
310         int32_t offsetArrayLength) {
311     if (placeholderId < offsetArrayLength) {
312         offsetArray[placeholderId] = placeholderOffset;
313     }
314 }
315 
appendRange(const UnicodeString & src,int32_t start,int32_t end,UnicodeString & dest)316 static void appendRange(
317         const UnicodeString &src,
318         int32_t start,
319         int32_t end,
320         UnicodeString &dest) {
321     // This check improves performance significantly.
322     if (start == end) {
323         return;
324     }
325     dest.append(src, start, end - start);
326 }
327 
formatAndAppend(const UnicodeString * const * placeholderValues,int32_t placeholderValueCount,UnicodeString & appendTo,int32_t * offsetArray,int32_t offsetArrayLength,UErrorCode & status) const328 UnicodeString& SimplePatternFormatter::formatAndAppend(
329         const UnicodeString * const *placeholderValues,
330         int32_t placeholderValueCount,
331         UnicodeString &appendTo,
332         int32_t *offsetArray,
333         int32_t offsetArrayLength,
334         UErrorCode &status) const {
335     if (U_FAILURE(status)) {
336         return appendTo;
337     }
338     if (isInvalidArray(placeholderValues, placeholderValueCount)
339             || isInvalidArray(offsetArray, offsetArrayLength)) {
340         status = U_ILLEGAL_ARGUMENT_ERROR;
341         return appendTo;
342     }
343     if (placeholderValueCount < placeholderCount) {
344         status = U_ILLEGAL_ARGUMENT_ERROR;
345         return appendTo;
346     }
347 
348     // Since we are disallowing parameter values that are the same as
349     // appendTo, we have to check all placeholderValues as opposed to
350     // the first placeholderCount placeholder values.
351     SimplePatternFormatterPlaceholderValues values(
352             placeholderValues, placeholderValueCount);
353     if (values.isAppendToInAnyIndexExcept(appendTo, -1)) {
354         status = U_ILLEGAL_ARGUMENT_ERROR;
355         return appendTo;
356     }
357     return formatAndAppend(
358             values,
359             appendTo,
360             offsetArray,
361             offsetArrayLength);
362 }
363 
formatAndReplace(const UnicodeString * const * placeholderValues,int32_t placeholderValueCount,UnicodeString & result,int32_t * offsetArray,int32_t offsetArrayLength,UErrorCode & status) const364 UnicodeString& SimplePatternFormatter::formatAndReplace(
365         const UnicodeString * const *placeholderValues,
366         int32_t placeholderValueCount,
367         UnicodeString &result,
368         int32_t *offsetArray,
369         int32_t offsetArrayLength,
370         UErrorCode &status) const {
371     if (U_FAILURE(status)) {
372         return result;
373     }
374     if (isInvalidArray(placeholderValues, placeholderValueCount)
375             || isInvalidArray(offsetArray, offsetArrayLength)) {
376         status = U_ILLEGAL_ARGUMENT_ERROR;
377         return result;
378     }
379     if (placeholderValueCount < placeholderCount) {
380         status = U_ILLEGAL_ARGUMENT_ERROR;
381         return result;
382     }
383     SimplePatternFormatterPlaceholderValues values(
384             placeholderValues, placeholderCount);
385     int32_t placeholderAtStart = getUniquePlaceholderAtStart();
386 
387     // If pattern starts with a unique placeholder and that placeholder
388     // value is result, we may be able to optimize by just appending to result.
389     if (placeholderAtStart >= 0
390             && placeholderValues[placeholderAtStart] == &result) {
391 
392         // If result is the value for other placeholders, call off optimization.
393         if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) {
394             values.snapshotAppendTo(result);
395             result.remove();
396             return formatAndAppend(
397                     values,
398                     result,
399                     offsetArray,
400                     offsetArrayLength);
401         }
402 
403         // Otherwise we can optimize
404         formatAndAppend(
405                 values,
406                 result,
407                 offsetArray,
408                 offsetArrayLength);
409 
410         // We have to make the offset for the placeholderAtStart
411         // placeholder be 0. Otherwise it would be the length of the
412         // previous value of result.
413         if (offsetArrayLength > placeholderAtStart) {
414             offsetArray[placeholderAtStart] = 0;
415         }
416         return result;
417     }
418     if (values.isAppendToInAnyIndexExcept(result, -1)) {
419         values.snapshotAppendTo(result);
420     }
421     result.remove();
422     return formatAndAppend(
423             values,
424             result,
425             offsetArray,
426             offsetArrayLength);
427 }
428 
formatAndAppend(const SimplePatternFormatterPlaceholderValues & values,UnicodeString & appendTo,int32_t * offsetArray,int32_t offsetArrayLength) const429 UnicodeString& SimplePatternFormatter::formatAndAppend(
430         const SimplePatternFormatterPlaceholderValues &values,
431         UnicodeString &appendTo,
432         int32_t *offsetArray,
433         int32_t offsetArrayLength) const {
434     for (int32_t i = 0; i < offsetArrayLength; ++i) {
435         offsetArray[i] = -1;
436     }
437     if (placeholderSize == 0) {
438         appendTo.append(noPlaceholders);
439         return appendTo;
440     }
441     appendRange(
442             noPlaceholders,
443             0,
444             placeholders[0].offset,
445             appendTo);
446     updatePlaceholderOffset(
447             placeholders[0].id,
448             appendTo.length(),
449             offsetArray,
450             offsetArrayLength);
451     const UnicodeString *placeholderValue = &values.get(placeholders[0].id);
452     if (placeholderValue != &appendTo) {
453         appendTo.append(*placeholderValue);
454     }
455     for (int32_t i = 1; i < placeholderSize; ++i) {
456         appendRange(
457                 noPlaceholders,
458                 placeholders[i - 1].offset,
459                 placeholders[i].offset,
460                 appendTo);
461         updatePlaceholderOffset(
462                 placeholders[i].id,
463                 appendTo.length(),
464                 offsetArray,
465                 offsetArrayLength);
466         placeholderValue = &values.get(placeholders[i].id);
467         if (placeholderValue != &appendTo) {
468             appendTo.append(*placeholderValue);
469         }
470     }
471     appendRange(
472             noPlaceholders,
473             placeholders[placeholderSize - 1].offset,
474             noPlaceholders.length(),
475             appendTo);
476     return appendTo;
477 }
478 
getUniquePlaceholderAtStart() const479 int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const {
480     if (placeholderSize == 0
481             || firstPlaceholderReused || placeholders[0].offset != 0) {
482         return -1;
483     }
484     return placeholders[0].id;
485 }
486 
ensureCapacity(int32_t desiredCapacity,int32_t allocationSize)487 int32_t SimplePatternFormatter::ensureCapacity(
488         int32_t desiredCapacity, int32_t allocationSize) {
489     if (allocationSize < desiredCapacity) {
490         allocationSize = desiredCapacity;
491     }
492     if (desiredCapacity <= placeholders.getCapacity()) {
493         return desiredCapacity;
494     }
495     // allocate new buffer
496     if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
497         return placeholders.getCapacity();
498     }
499     return desiredCapacity;
500 }
501 
addPlaceholder(int32_t id,int32_t offset)502 UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
503     if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
504         return FALSE;
505     }
506     ++placeholderSize;
507     PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
508     placeholderEnd->offset = offset;
509     placeholderEnd->id = id;
510     if (id >= placeholderCount) {
511         placeholderCount = id + 1;
512     }
513     if (placeholderSize > 1
514             && placeholders[placeholderSize - 1].id == placeholders[0].id) {
515         firstPlaceholderReused = TRUE;
516     }
517     return TRUE;
518 }
519 
520 U_NAMESPACE_END
521