1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2000-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File reslist.cpp
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   02/21/00    weiv        Creation.
15 *******************************************************************************
16 */
17 
18 // Safer use of UnicodeString.
19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
20 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
21 #endif
22 
23 // Less important, but still a good idea.
24 #ifndef UNISTR_FROM_STRING_EXPLICIT
25 #   define UNISTR_FROM_STRING_EXPLICIT explicit
26 #endif
27 
28 #include <assert.h>
29 #include <stdio.h>
30 #include "unicode/localpointer.h"
31 #include "reslist.h"
32 #include "unewdata.h"
33 #include "unicode/ures.h"
34 #include "unicode/putil.h"
35 #include "errmsg.h"
36 
37 #include "uarrsort.h"
38 #include "uelement.h"
39 #include "uhash.h"
40 #include "uinvchar.h"
41 #include "ustr_imp.h"
42 #include "unicode/utf16.h"
43 /*
44  * Align binary data at a 16-byte offset from the start of the resource bundle,
45  * to be safe for any data type it may contain.
46  */
47 #define BIN_ALIGNMENT 16
48 
49 // This numeric constant must be at least 1.
50 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
51 // and it makes no sense to move it to the pool bundle.
52 // The larger the threshold for fNumUnitsSaved
53 // the smaller the savings, and the smaller the pool bundle.
54 // We trade some total size reduction to reduce the pool bundle a bit,
55 // so that one can reasonably save data size by
56 // removing bundle files without rebuilding the pool bundle.
57 // This can also help to keep the pool and total (pool+local) string indexes
58 // within 16 bits, that is, within range of Table16 and Array16 containers.
59 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
60 #   define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
61 #endif
62 
63 U_NAMESPACE_USE
64 
65 static UBool gIncludeCopyright = FALSE;
66 static UBool gUsePoolBundle = FALSE;
67 static UBool gIsDefaultFormatVersion = TRUE;
68 static int32_t gFormatVersion = 3;
69 
70 /* How do we store string values? */
71 enum {
72     STRINGS_UTF16_V1,   /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
73     STRINGS_UTF16_V2    /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
74 };
75 
76 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40;  /* do not store the length explicitly for such strings */
77 
78 static const ResFile kNoPoolBundle;
79 
80 /*
81  * res_none() returns the address of kNoResource,
82  * for use in non-error cases when no resource is to be added to the bundle.
83  * (NULL is used in error cases.)
84  */
85 static SResource kNoResource;  // TODO: const
86 
87 static UDataInfo dataInfo= {
88     sizeof(UDataInfo),
89     0,
90 
91     U_IS_BIG_ENDIAN,
92     U_CHARSET_FAMILY,
93     sizeof(UChar),
94     0,
95 
96     {0x52, 0x65, 0x73, 0x42},     /* dataFormat="ResB" */
97     {1, 3, 0, 0},                 /* formatVersion */
98     {1, 4, 0, 0}                  /* dataVersion take a look at version inside parsed resb*/
99 };
100 
101 static const UVersionInfo gFormatVersions[4] = {  /* indexed by a major-formatVersion integer */
102     { 0, 0, 0, 0 },
103     { 1, 3, 0, 0 },
104     { 2, 0, 0, 0 },
105     { 3, 0, 0, 0 }
106 };
107 // Remember to update genrb.h GENRB_VERSION when changing the data format.
108 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
109 
calcPadding(uint32_t size)110 static uint8_t calcPadding(uint32_t size) {
111     /* returns space we need to pad */
112     return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
113 
114 }
115 
setIncludeCopyright(UBool val)116 void setIncludeCopyright(UBool val){
117     gIncludeCopyright=val;
118 }
119 
getIncludeCopyright(void)120 UBool getIncludeCopyright(void){
121     return gIncludeCopyright;
122 }
123 
setFormatVersion(int32_t formatVersion)124 void setFormatVersion(int32_t formatVersion) {
125     gIsDefaultFormatVersion = FALSE;
126     gFormatVersion = formatVersion;
127 }
128 
getFormatVersion()129 int32_t getFormatVersion() {
130     return gFormatVersion;
131 }
132 
setUsePoolBundle(UBool use)133 void setUsePoolBundle(UBool use) {
134     gUsePoolBundle = use;
135 }
136 
137 // TODO: return const pointer, or find another way to express "none"
res_none()138 struct SResource* res_none() {
139     return &kNoResource;
140 }
141 
SResource()142 SResource::SResource()
143         : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
144           line(0), fNext(NULL) {
145     ustr_init(&fComment);
146 }
147 
SResource(SRBRoot * bundle,const char * tag,int8_t type,const UString * comment,UErrorCode & errorCode)148 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
149                      UErrorCode &errorCode)
150         : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
151           fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
152           line(0), fNext(NULL) {
153     ustr_init(&fComment);
154     if(comment != NULL) {
155         ustr_cpy(&fComment, comment, &errorCode);
156     }
157 }
158 
~SResource()159 SResource::~SResource() {
160     ustr_deinit(&fComment);
161 }
162 
~ContainerResource()163 ContainerResource::~ContainerResource() {
164     SResource *current = fFirst;
165     while (current != NULL) {
166         SResource *next = current->fNext;
167         delete current;
168         current = next;
169     }
170 }
171 
~TableResource()172 TableResource::~TableResource() {}
173 
174 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
add(SResource * res,int linenumber,UErrorCode & errorCode)175 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
176     if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) {
177         return;
178     }
179 
180     /* remember this linenumber to report to the user if there is a duplicate key */
181     res->line = linenumber;
182 
183     /* here we need to traverse the list */
184     ++fCount;
185 
186     /* is the list still empty? */
187     if (fFirst == NULL) {
188         fFirst = res;
189         res->fNext = NULL;
190         return;
191     }
192 
193     const char *resKeyString = fRoot->fKeys + res->fKey;
194 
195     SResource *current = fFirst;
196 
197     SResource *prev = NULL;
198     while (current != NULL) {
199         const char *currentKeyString = fRoot->fKeys + current->fKey;
200         int diff;
201         /*
202          * formatVersion 1: compare key strings in native-charset order
203          * formatVersion 2 and up: compare key strings in ASCII order
204          */
205         if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
206             diff = uprv_strcmp(currentKeyString, resKeyString);
207         } else {
208             diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
209         }
210         if (diff < 0) {
211             prev    = current;
212             current = current->fNext;
213         } else if (diff > 0) {
214             /* we're either in front of the list, or in the middle */
215             if (prev == NULL) {
216                 /* front of the list */
217                 fFirst = res;
218             } else {
219                 /* middle of the list */
220                 prev->fNext = res;
221             }
222 
223             res->fNext = current;
224             return;
225         } else {
226             /* Key already exists! ERROR! */
227             error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
228             errorCode = U_UNSUPPORTED_ERROR;
229             return;
230         }
231     }
232 
233     /* end of list */
234     prev->fNext = res;
235     res->fNext  = NULL;
236 }
237 
~ArrayResource()238 ArrayResource::~ArrayResource() {}
239 
add(SResource * res)240 void ArrayResource::add(SResource *res) {
241     if (res != NULL && res != &kNoResource) {
242         if (fFirst == NULL) {
243             fFirst = res;
244         } else {
245             fLast->fNext = res;
246         }
247         fLast = res;
248         ++fCount;
249     }
250 }
251 
~PseudoListResource()252 PseudoListResource::~PseudoListResource() {}
253 
add(SResource * res)254 void PseudoListResource::add(SResource *res) {
255     if (res != NULL && res != &kNoResource) {
256         res->fNext = fFirst;
257         fFirst = res;
258         ++fCount;
259     }
260 }
261 
StringBaseResource(SRBRoot * bundle,const char * tag,int8_t type,const UChar * value,int32_t len,const UString * comment,UErrorCode & errorCode)262 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
263                                        const UChar *value, int32_t len,
264                                        const UString* comment, UErrorCode &errorCode)
265         : SResource(bundle, tag, type, comment, errorCode) {
266     if (len == 0 && gFormatVersion > 1) {
267         fRes = URES_MAKE_EMPTY_RESOURCE(type);
268         fWritten = TRUE;
269         return;
270     }
271 
272     fString.setTo(value, len);
273     fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
274     if (U_SUCCESS(errorCode) && fString.isBogus()) {
275         errorCode = U_MEMORY_ALLOCATION_ERROR;
276     }
277 }
278 
StringBaseResource(SRBRoot * bundle,int8_t type,const icu::UnicodeString & value,UErrorCode & errorCode)279 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
280                                        const icu::UnicodeString &value, UErrorCode &errorCode)
281         : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
282     if (value.isEmpty() && gFormatVersion > 1) {
283         fRes = URES_MAKE_EMPTY_RESOURCE(type);
284         fWritten = TRUE;
285         return;
286     }
287 
288     fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
289     if (U_SUCCESS(errorCode) && fString.isBogus()) {
290         errorCode = U_MEMORY_ALLOCATION_ERROR;
291     }
292 }
293 
294 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
StringBaseResource(int8_t type,const UChar * value,int32_t len,UErrorCode & errorCode)295 StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
296                                        UErrorCode &errorCode)
297         : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
298     assert(len > 0);
299     assert(!fString.isBogus());
300 }
301 
~StringBaseResource()302 StringBaseResource::~StringBaseResource() {}
303 
304 static int32_t U_CALLCONV
string_hash(const UElement key)305 string_hash(const UElement key) {
306     const StringResource *res = static_cast<const StringResource *>(key.pointer);
307     return res->fString.hashCode();
308 }
309 
310 static UBool U_CALLCONV
string_comp(const UElement key1,const UElement key2)311 string_comp(const UElement key1, const UElement key2) {
312     const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
313     const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
314     return res1->fString == res2->fString;
315 }
316 
~StringResource()317 StringResource::~StringResource() {}
318 
~AliasResource()319 AliasResource::~AliasResource() {}
320 
IntResource(SRBRoot * bundle,const char * tag,int32_t value,const UString * comment,UErrorCode & errorCode)321 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
322                          const UString* comment, UErrorCode &errorCode)
323         : SResource(bundle, tag, URES_INT, comment, errorCode) {
324     fValue = value;
325     fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
326     fWritten = TRUE;
327 }
328 
~IntResource()329 IntResource::~IntResource() {}
330 
IntVectorResource(SRBRoot * bundle,const char * tag,const UString * comment,UErrorCode & errorCode)331 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
332                   const UString* comment, UErrorCode &errorCode)
333         : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
334           fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) {
335     if (fArray == NULL) {
336         errorCode = U_MEMORY_ALLOCATION_ERROR;
337         return;
338     }
339 }
340 
~IntVectorResource()341 IntVectorResource::~IntVectorResource() {
342     delete[] fArray;
343 }
344 
add(int32_t value,UErrorCode & errorCode)345 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
346     if (U_SUCCESS(errorCode)) {
347         fArray[fCount++] = value;
348     }
349 }
350 
BinaryResource(SRBRoot * bundle,const char * tag,uint32_t length,uint8_t * data,const char * fileName,const UString * comment,UErrorCode & errorCode)351 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
352                                uint32_t length, uint8_t *data, const char* fileName,
353                                const UString* comment, UErrorCode &errorCode)
354         : SResource(bundle, tag, URES_BINARY, comment, errorCode),
355           fLength(length), fData(NULL), fFileName(NULL) {
356     if (U_FAILURE(errorCode)) {
357         return;
358     }
359     if (fileName != NULL && *fileName != 0){
360         fFileName = new char[uprv_strlen(fileName)+1];
361         if (fFileName == NULL) {
362             errorCode = U_MEMORY_ALLOCATION_ERROR;
363             return;
364         }
365         uprv_strcpy(fFileName, fileName);
366     }
367     if (length > 0) {
368         fData = new uint8_t[length];
369         if (fData == NULL) {
370             errorCode = U_MEMORY_ALLOCATION_ERROR;
371             return;
372         }
373         uprv_memcpy(fData, data, length);
374     } else {
375         if (gFormatVersion > 1) {
376             fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
377             fWritten = TRUE;
378         }
379     }
380 }
381 
~BinaryResource()382 BinaryResource::~BinaryResource() {
383     delete[] fData;
384     delete[] fFileName;
385 }
386 
387 /* Writing Functions */
388 
389 void
handlePreflightStrings(SRBRoot * bundle,UHashtable * stringSet,UErrorCode & errorCode)390 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
391                                        UErrorCode &errorCode) {
392     assert(fSame == NULL);
393     fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
394     if (fSame != NULL) {
395         // This is a duplicate of a pool bundle string or of an earlier-visited string.
396         if (++fSame->fNumCopies == 1) {
397             assert(fSame->fWritten);
398             int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
399             if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
400                 bundle->fPoolStringIndexLimit = poolStringIndex + 1;
401             }
402         }
403         return;
404     }
405     /* Put this string into the set for finding duplicates. */
406     fNumCopies = 1;
407     uhash_put(stringSet, this, this, &errorCode);
408 
409     if (bundle->fStringsForm != STRINGS_UTF16_V1) {
410         int32_t len = length();
411         if (len <= MAX_IMPLICIT_STRING_LENGTH &&
412                 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
413             /*
414              * This string will be stored without an explicit length.
415              * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
416              */
417             fNumCharsForLength = 0;
418         } else if (len <= 0x3ee) {
419             fNumCharsForLength = 1;
420         } else if (len <= 0xfffff) {
421             fNumCharsForLength = 2;
422         } else {
423             fNumCharsForLength = 3;
424         }
425         bundle->f16BitStringsLength += fNumCharsForLength + len + 1;  /* +1 for the NUL */
426     }
427 }
428 
429 void
handlePreflightStrings(SRBRoot * bundle,UHashtable * stringSet,UErrorCode & errorCode)430 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
431                                           UErrorCode &errorCode) {
432     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
433         current->preflightStrings(bundle, stringSet, errorCode);
434     }
435 }
436 
437 void
preflightStrings(SRBRoot * bundle,UHashtable * stringSet,UErrorCode & errorCode)438 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
439     if (U_FAILURE(errorCode)) {
440         return;
441     }
442     if (fRes != RES_BOGUS) {
443         /*
444          * The resource item word was already precomputed, which means
445          * no further data needs to be written.
446          * This might be an integer, or an empty string/binary/etc.
447          */
448         return;
449     }
450     handlePreflightStrings(bundle, stringSet, errorCode);
451 }
452 
453 void
handlePreflightStrings(SRBRoot *,UHashtable *,UErrorCode &)454 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
455                                   UErrorCode & /*errorCode*/) {
456     /* Neither a string nor a container. */
457 }
458 
459 int32_t
makeRes16(uint32_t resWord) const460 SRBRoot::makeRes16(uint32_t resWord) const {
461     if (resWord == 0) {
462         return 0;  /* empty string */
463     }
464     uint32_t type = RES_GET_TYPE(resWord);
465     int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
466     if (type == URES_STRING_V2) {
467         assert(offset > 0);
468         if (offset < fPoolStringIndexLimit) {
469             if (offset < fPoolStringIndex16Limit) {
470                 return offset;
471             }
472         } else {
473             offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
474             if (offset <= 0xffff) {
475                 return offset;
476             }
477         }
478     }
479     return -1;
480 }
481 
482 int32_t
mapKey(int32_t oldpos) const483 SRBRoot::mapKey(int32_t oldpos) const {
484     const KeyMapEntry *map = fKeyMap;
485     if (map == NULL) {
486         return oldpos;
487     }
488     int32_t i, start, limit;
489 
490     /* do a binary search for the old, pre-compactKeys() key offset */
491     start = fUsePoolBundle->fKeysCount;
492     limit = start + fKeysCount;
493     while (start < limit - 1) {
494         i = (start + limit) / 2;
495         if (oldpos < map[i].oldpos) {
496             limit = i;
497         } else {
498             start = i;
499         }
500     }
501     assert(oldpos == map[start].oldpos);
502     return map[start].newpos;
503 }
504 
505 /*
506  * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
507  * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
508  * and exits early.
509  */
510 void
handleWrite16(SRBRoot *)511 StringResource::handleWrite16(SRBRoot * /*bundle*/) {
512     SResource *same;
513     if ((same = fSame) != NULL) {
514         /* This is a duplicate. */
515         assert(same->fRes != RES_BOGUS && same->fWritten);
516         fRes = same->fRes;
517         fWritten = same->fWritten;
518     }
519 }
520 
521 void
writeAllRes16(SRBRoot * bundle)522 ContainerResource::writeAllRes16(SRBRoot *bundle) {
523     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
524         bundle->f16BitUnits.append((UChar)current->fRes16);
525     }
526     fWritten = TRUE;
527 }
528 
529 void
handleWrite16(SRBRoot * bundle)530 ArrayResource::handleWrite16(SRBRoot *bundle) {
531     if (fCount == 0 && gFormatVersion > 1) {
532         fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
533         fWritten = TRUE;
534         return;
535     }
536 
537     int32_t res16 = 0;
538     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
539         current->write16(bundle);
540         res16 |= current->fRes16;
541     }
542     if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
543         fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
544         bundle->f16BitUnits.append((UChar)fCount);
545         writeAllRes16(bundle);
546     }
547 }
548 
549 void
handleWrite16(SRBRoot * bundle)550 TableResource::handleWrite16(SRBRoot *bundle) {
551     if (fCount == 0 && gFormatVersion > 1) {
552         fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
553         fWritten = TRUE;
554         return;
555     }
556     /* Find the smallest table type that fits the data. */
557     int32_t key16 = 0;
558     int32_t res16 = 0;
559     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
560         current->write16(bundle);
561         key16 |= current->fKey16;
562         res16 |= current->fRes16;
563     }
564     if(fCount > (uint32_t)bundle->fMaxTableLength) {
565         bundle->fMaxTableLength = fCount;
566     }
567     if (fCount <= 0xffff && key16 >= 0) {
568         if (res16 >= 0 && gFormatVersion > 1) {
569             /* 16-bit count, key offsets and values */
570             fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
571             bundle->f16BitUnits.append((UChar)fCount);
572             for (SResource *current = fFirst; current != NULL; current = current->fNext) {
573                 bundle->f16BitUnits.append((UChar)current->fKey16);
574             }
575             writeAllRes16(bundle);
576         } else {
577             /* 16-bit count, 16-bit key offsets, 32-bit values */
578             fTableType = URES_TABLE;
579         }
580     } else {
581         /* 32-bit count, key offsets and values */
582         fTableType = URES_TABLE32;
583     }
584 }
585 
586 void
handleWrite16(SRBRoot *)587 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
588     fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
589     fWritten = TRUE;
590 }
591 
592 void
write16(SRBRoot * bundle)593 SResource::write16(SRBRoot *bundle) {
594     if (fKey >= 0) {
595         // A tagged resource has a non-negative key index into the parsed key strings.
596         // compactKeys() built a map from parsed key index to the final key index.
597         // After the mapping, negative key indexes are used for shared pool bundle keys.
598         fKey = bundle->mapKey(fKey);
599         // If the key index fits into a Key16 for a Table or Table16,
600         // then set the fKey16 field accordingly.
601         // Otherwise keep it at -1.
602         if (fKey >= 0) {
603             if (fKey < bundle->fLocalKeyLimit) {
604                 fKey16 = fKey;
605             }
606         } else {
607             int32_t poolKeyIndex = fKey & 0x7fffffff;
608             if (poolKeyIndex <= 0xffff) {
609                 poolKeyIndex += bundle->fLocalKeyLimit;
610                 if (poolKeyIndex <= 0xffff) {
611                     fKey16 = poolKeyIndex;
612                 }
613             }
614         }
615     }
616     /*
617      * fRes != RES_BOGUS:
618      * The resource item word was already precomputed, which means
619      * no further data needs to be written.
620      * This might be an integer, or an empty or UTF-16 v2 string,
621      * an empty binary, etc.
622      */
623     if (fRes == RES_BOGUS) {
624         handleWrite16(bundle);
625     }
626     // Compute fRes16 for precomputed as well as just-computed fRes.
627     fRes16 = bundle->makeRes16(fRes);
628 }
629 
630 void
handleWrite16(SRBRoot *)631 SResource::handleWrite16(SRBRoot * /*bundle*/) {
632     /* Only a few resource types write 16-bit units. */
633 }
634 
635 /*
636  * Only called for UTF-16 v1 strings, and for aliases.
637  * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
638  * and exits early.
639  */
640 void
handlePreWrite(uint32_t * byteOffset)641 StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
642     /* Write the UTF-16 v1 string. */
643     fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
644     *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
645 }
646 
647 void
handlePreWrite(uint32_t * byteOffset)648 IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
649     if (fCount == 0 && gFormatVersion > 1) {
650         fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
651         fWritten = TRUE;
652     } else {
653         fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
654         *byteOffset += (1 + fCount) * 4;
655     }
656 }
657 
658 void
handlePreWrite(uint32_t * byteOffset)659 BinaryResource::handlePreWrite(uint32_t *byteOffset) {
660     uint32_t pad       = 0;
661     uint32_t dataStart = *byteOffset + sizeof(fLength);
662 
663     if (dataStart % BIN_ALIGNMENT) {
664         pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
665         *byteOffset += pad;  /* pad == 4 or 8 or 12 */
666     }
667     fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
668     *byteOffset += 4 + fLength;
669 }
670 
671 void
preWriteAllRes(uint32_t * byteOffset)672 ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
673     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
674         current->preWrite(byteOffset);
675     }
676 }
677 
678 void
handlePreWrite(uint32_t * byteOffset)679 ArrayResource::handlePreWrite(uint32_t *byteOffset) {
680     preWriteAllRes(byteOffset);
681     fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
682     *byteOffset += (1 + fCount) * 4;
683 }
684 
685 void
handlePreWrite(uint32_t * byteOffset)686 TableResource::handlePreWrite(uint32_t *byteOffset) {
687     preWriteAllRes(byteOffset);
688     if (fTableType == URES_TABLE) {
689         /* 16-bit count, 16-bit key offsets, 32-bit values */
690         fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
691         *byteOffset += 2 + fCount * 6;
692     } else {
693         /* 32-bit count, key offsets and values */
694         fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
695         *byteOffset += 4 + fCount * 8;
696     }
697 }
698 
699 void
preWrite(uint32_t * byteOffset)700 SResource::preWrite(uint32_t *byteOffset) {
701     if (fRes != RES_BOGUS) {
702         /*
703          * The resource item word was already precomputed, which means
704          * no further data needs to be written.
705          * This might be an integer, or an empty or UTF-16 v2 string,
706          * an empty binary, etc.
707          */
708         return;
709     }
710     handlePreWrite(byteOffset);
711     *byteOffset += calcPadding(*byteOffset);
712 }
713 
714 void
handlePreWrite(uint32_t *)715 SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
716     assert(FALSE);
717 }
718 
719 /*
720  * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
721  * write() sees fWritten and exits early.
722  */
723 void
handleWrite(UNewDataMemory * mem,uint32_t * byteOffset)724 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
725     /* Write the UTF-16 v1 string. */
726     int32_t len = length();
727     udata_write32(mem, len);
728     udata_writeUString(mem, getBuffer(), len + 1);
729     *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
730     fWritten = TRUE;
731 }
732 
733 void
writeAllRes(UNewDataMemory * mem,uint32_t * byteOffset)734 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
735     uint32_t i = 0;
736     for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
737         current->write(mem, byteOffset);
738     }
739     assert(i == fCount);
740 }
741 
742 void
writeAllRes32(UNewDataMemory * mem,uint32_t * byteOffset)743 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
744     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
745         udata_write32(mem, current->fRes);
746     }
747     *byteOffset += fCount * 4;
748 }
749 
750 void
handleWrite(UNewDataMemory * mem,uint32_t * byteOffset)751 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
752     writeAllRes(mem, byteOffset);
753     udata_write32(mem, fCount);
754     *byteOffset += 4;
755     writeAllRes32(mem, byteOffset);
756 }
757 
758 void
handleWrite(UNewDataMemory * mem,uint32_t * byteOffset)759 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
760     udata_write32(mem, fCount);
761     for(uint32_t i = 0; i < fCount; ++i) {
762       udata_write32(mem, fArray[i]);
763     }
764     *byteOffset += (1 + fCount) * 4;
765 }
766 
767 void
handleWrite(UNewDataMemory * mem,uint32_t * byteOffset)768 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
769     uint32_t pad       = 0;
770     uint32_t dataStart = *byteOffset + sizeof(fLength);
771 
772     if (dataStart % BIN_ALIGNMENT) {
773         pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
774         udata_writePadding(mem, pad);  /* pad == 4 or 8 or 12 */
775         *byteOffset += pad;
776     }
777 
778     udata_write32(mem, fLength);
779     if (fLength > 0) {
780         udata_writeBlock(mem, fData, fLength);
781     }
782     *byteOffset += 4 + fLength;
783 }
784 
785 void
handleWrite(UNewDataMemory * mem,uint32_t * byteOffset)786 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
787     writeAllRes(mem, byteOffset);
788     if(fTableType == URES_TABLE) {
789         udata_write16(mem, (uint16_t)fCount);
790         for (SResource *current = fFirst; current != NULL; current = current->fNext) {
791             udata_write16(mem, current->fKey16);
792         }
793         *byteOffset += (1 + fCount)* 2;
794         if ((fCount & 1) == 0) {
795             /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
796             udata_writePadding(mem, 2);
797             *byteOffset += 2;
798         }
799     } else /* URES_TABLE32 */ {
800         udata_write32(mem, fCount);
801         for (SResource *current = fFirst; current != NULL; current = current->fNext) {
802             udata_write32(mem, (uint32_t)current->fKey);
803         }
804         *byteOffset += (1 + fCount)* 4;
805     }
806     writeAllRes32(mem, byteOffset);
807 }
808 
809 void
write(UNewDataMemory * mem,uint32_t * byteOffset)810 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
811     if (fWritten) {
812         assert(fRes != RES_BOGUS);
813         return;
814     }
815     handleWrite(mem, byteOffset);
816     uint8_t paddingSize = calcPadding(*byteOffset);
817     if (paddingSize > 0) {
818         udata_writePadding(mem, paddingSize);
819         *byteOffset += paddingSize;
820     }
821     fWritten = TRUE;
822 }
823 
824 void
handleWrite(UNewDataMemory *,uint32_t *)825 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
826     assert(FALSE);
827 }
828 
write(const char * outputDir,const char * outputPkg,char * writtenFilename,int writtenFilenameLen,UErrorCode & errorCode)829 void SRBRoot::write(const char *outputDir, const char *outputPkg,
830                     char *writtenFilename, int writtenFilenameLen,
831                     UErrorCode &errorCode) {
832     UNewDataMemory *mem        = NULL;
833     uint32_t        byteOffset = 0;
834     uint32_t        top, size;
835     char            dataName[1024];
836     int32_t         indexes[URES_INDEX_TOP];
837 
838     compactKeys(errorCode);
839     /*
840      * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
841      * Safe because the capacity is a multiple of 4.
842      */
843     while (fKeysTop & 3) {
844         fKeys[fKeysTop++] = (char)0xaa;
845     }
846     /*
847      * In URES_TABLE, use all local key offsets that fit into 16 bits,
848      * and use the remaining 16-bit offsets for pool key offsets
849      * if there are any.
850      * If there are no local keys, then use the whole 16-bit space
851      * for pool key offsets.
852      * Note: This cannot be changed without changing the major formatVersion.
853      */
854     if (fKeysBottom < fKeysTop) {
855         if (fKeysTop <= 0x10000) {
856             fLocalKeyLimit = fKeysTop;
857         } else {
858             fLocalKeyLimit = 0x10000;
859         }
860     } else {
861         fLocalKeyLimit = 0;
862     }
863 
864     UHashtable *stringSet;
865     if (gFormatVersion > 1) {
866         stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
867         if (U_SUCCESS(errorCode) &&
868                 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
869             for (SResource *current = fUsePoolBundle->fStrings->fFirst;
870                     current != NULL;
871                     current = current->fNext) {
872                 StringResource *sr = static_cast<StringResource *>(current);
873                 sr->fNumCopies = 0;
874                 sr->fNumUnitsSaved = 0;
875                 uhash_put(stringSet, sr, sr, &errorCode);
876             }
877         }
878         fRoot->preflightStrings(this, stringSet, errorCode);
879     } else {
880         stringSet = NULL;
881     }
882     if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
883         compactStringsV2(stringSet, errorCode);
884     }
885     uhash_close(stringSet);
886     if (U_FAILURE(errorCode)) {
887         return;
888     }
889 
890     int32_t formatVersion = gFormatVersion;
891     if (fPoolStringIndexLimit != 0) {
892         int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
893         if ((sum - 1) > RES_MAX_OFFSET) {
894             errorCode = U_BUFFER_OVERFLOW_ERROR;
895             return;
896         }
897         if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
898             // 16-bit indexes work for all pool + local strings.
899             fPoolStringIndex16Limit = fPoolStringIndexLimit;
900         } else {
901             // Set the pool index threshold so that 16-bit indexes work
902             // for some pool strings and some local strings.
903             fPoolStringIndex16Limit = (int32_t)(
904                     ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
905         }
906     } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
907         // If we just default to formatVersion 3
908         // but there are no pool bundle strings to share
909         // and we do not write a pool bundle,
910         // then write formatVersion 2 which is just as good.
911         formatVersion = 2;
912     }
913 
914     fRoot->write16(this);
915     if (f16BitUnits.isBogus()) {
916         errorCode = U_MEMORY_ALLOCATION_ERROR;
917         return;
918     }
919     if (f16BitUnits.length() & 1) {
920         f16BitUnits.append((UChar)0xaaaa);  /* pad to multiple of 4 bytes */
921     }
922     /* all keys have been mapped */
923     uprv_free(fKeyMap);
924     fKeyMap = NULL;
925 
926     byteOffset = fKeysTop + f16BitUnits.length() * 2;
927     fRoot->preWrite(&byteOffset);
928 
929     /* total size including the root item */
930     top = byteOffset;
931 
932     if (writtenFilename && writtenFilenameLen) {
933         *writtenFilename = 0;
934     }
935 
936     if (writtenFilename) {
937        int32_t off = 0, len = 0;
938        if (outputDir) {
939            len = (int32_t)uprv_strlen(outputDir);
940            if (len > writtenFilenameLen) {
941                len = writtenFilenameLen;
942            }
943            uprv_strncpy(writtenFilename, outputDir, len);
944        }
945        if (writtenFilenameLen -= len) {
946            off += len;
947            writtenFilename[off] = U_FILE_SEP_CHAR;
948            if (--writtenFilenameLen) {
949                ++off;
950                if(outputPkg != NULL)
951                {
952                    uprv_strcpy(writtenFilename+off, outputPkg);
953                    off += (int32_t)uprv_strlen(outputPkg);
954                    writtenFilename[off] = '_';
955                    ++off;
956                }
957 
958                len = (int32_t)uprv_strlen(fLocale);
959                if (len > writtenFilenameLen) {
960                    len = writtenFilenameLen;
961                }
962                uprv_strncpy(writtenFilename + off, fLocale, len);
963                if (writtenFilenameLen -= len) {
964                    off += len;
965                    len = 5;
966                    if (len > writtenFilenameLen) {
967                        len = writtenFilenameLen;
968                    }
969                    uprv_strncpy(writtenFilename +  off, ".res", len);
970                }
971            }
972        }
973     }
974 
975     if(outputPkg)
976     {
977         uprv_strcpy(dataName, outputPkg);
978         uprv_strcat(dataName, "_");
979         uprv_strcat(dataName, fLocale);
980     }
981     else
982     {
983         uprv_strcpy(dataName, fLocale);
984     }
985 
986     uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
987 
988     mem = udata_create(outputDir, "res", dataName,
989                        &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode);
990     if(U_FAILURE(errorCode)){
991         return;
992     }
993 
994     /* write the root item */
995     udata_write32(mem, fRoot->fRes);
996 
997     /*
998      * formatVersion 1.1 (ICU 2.8):
999      * write int32_t indexes[] after root and before the key strings
1000      * to make it easier to parse resource bundles in icuswap or from Java etc.
1001      */
1002     uprv_memset(indexes, 0, sizeof(indexes));
1003     indexes[URES_INDEX_LENGTH]=             fIndexLength;
1004     indexes[URES_INDEX_KEYS_TOP]=           fKeysTop>>2;
1005     indexes[URES_INDEX_RESOURCES_TOP]=      (int32_t)(top>>2);
1006     indexes[URES_INDEX_BUNDLE_TOP]=         indexes[URES_INDEX_RESOURCES_TOP];
1007     indexes[URES_INDEX_MAX_TABLE_LENGTH]=   fMaxTableLength;
1008 
1009     /*
1010      * formatVersion 1.2 (ICU 3.6):
1011      * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
1012      * the memset() above initialized all indexes[] to 0
1013      */
1014     if (fNoFallback) {
1015         indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
1016     }
1017     /*
1018      * formatVersion 2.0 (ICU 4.4):
1019      * more compact string value storage, optional pool bundle
1020      */
1021     if (URES_INDEX_16BIT_TOP < fIndexLength) {
1022         indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
1023     }
1024     if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
1025         if (fIsPoolBundle) {
1026             indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
1027             uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
1028                                            (uint32_t)(fKeysTop - fKeysBottom), 0);
1029             if (f16BitUnits.length() <= 1) {
1030                 // no pool strings to checksum
1031             } else if (U_IS_BIG_ENDIAN) {
1032                 checksum = computeCRC((const char *)f16BitUnits.getBuffer(),
1033                                       (uint32_t)f16BitUnits.length() * 2, checksum);
1034             } else {
1035                 // Swap to big-endian so we get the same checksum on all platforms
1036                 // (except for charset family, due to the key strings).
1037                 UnicodeString s(f16BitUnits);
1038                 s.append((UChar)1);  // Ensure that we own this buffer.
1039                 assert(!s.isBogus());
1040                 uint16_t *p = (uint16_t *)s.getBuffer();
1041                 for (int32_t count = f16BitUnits.length(); count > 0; --count) {
1042                     uint16_t x = *p;
1043                     *p++ = (uint16_t)((x << 8) | (x >> 8));
1044                 }
1045                 checksum = computeCRC((const char *)p,
1046                                       (uint32_t)f16BitUnits.length() * 2, checksum);
1047             }
1048             indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
1049         } else if (gUsePoolBundle) {
1050             indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
1051             indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
1052         }
1053     }
1054     // formatVersion 3 (ICU 56):
1055     // share string values via pool bundle strings
1056     indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8;  // bits 23..0 -> 31..8
1057     indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000;  // bits 27..24 -> 15..12
1058     indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
1059 
1060     /* write the indexes[] */
1061     udata_writeBlock(mem, indexes, fIndexLength*4);
1062 
1063     /* write the table key strings */
1064     udata_writeBlock(mem, fKeys+fKeysBottom,
1065                           fKeysTop-fKeysBottom);
1066 
1067     /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
1068     udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
1069 
1070     /* write all of the bundle contents: the root item and its children */
1071     byteOffset = fKeysTop + f16BitUnits.length() * 2;
1072     fRoot->write(mem, &byteOffset);
1073     assert(byteOffset == top);
1074 
1075     size = udata_finish(mem, &errorCode);
1076     if(top != size) {
1077         fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
1078                 (int)size, (int)top);
1079         errorCode = U_INTERNAL_PROGRAM_ERROR;
1080     }
1081 }
1082 
1083 /* Opening Functions */
1084 
table_open(struct SRBRoot * bundle,const char * tag,const struct UString * comment,UErrorCode * status)1085 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1086     LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
1087     return U_SUCCESS(*status) ? res.orphan() : NULL;
1088 }
1089 
array_open(struct SRBRoot * bundle,const char * tag,const struct UString * comment,UErrorCode * status)1090 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1091     LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
1092     return U_SUCCESS(*status) ? res.orphan() : NULL;
1093 }
1094 
string_open(struct SRBRoot * bundle,const char * tag,const UChar * value,int32_t len,const struct UString * comment,UErrorCode * status)1095 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1096     LocalPointer<SResource> res(
1097             new StringResource(bundle, tag, value, len, comment, *status), *status);
1098     return U_SUCCESS(*status) ? res.orphan() : NULL;
1099 }
1100 
alias_open(struct SRBRoot * bundle,const char * tag,UChar * value,int32_t len,const struct UString * comment,UErrorCode * status)1101 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1102     LocalPointer<SResource> res(
1103             new AliasResource(bundle, tag, value, len, comment, *status), *status);
1104     return U_SUCCESS(*status) ? res.orphan() : NULL;
1105 }
1106 
intvector_open(struct SRBRoot * bundle,const char * tag,const struct UString * comment,UErrorCode * status)1107 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1108     LocalPointer<IntVectorResource> res(
1109             new IntVectorResource(bundle, tag, comment, *status), *status);
1110     return U_SUCCESS(*status) ? res.orphan() : NULL;
1111 }
1112 
int_open(struct SRBRoot * bundle,const char * tag,int32_t value,const struct UString * comment,UErrorCode * status)1113 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
1114     LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
1115     return U_SUCCESS(*status) ? res.orphan() : NULL;
1116 }
1117 
bin_open(struct SRBRoot * bundle,const char * tag,uint32_t length,uint8_t * data,const char * fileName,const struct UString * comment,UErrorCode * status)1118 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
1119     LocalPointer<SResource> res(
1120             new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
1121     return U_SUCCESS(*status) ? res.orphan() : NULL;
1122 }
1123 
SRBRoot(const UString * comment,UBool isPoolBundle,UErrorCode & errorCode)1124 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
1125         : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
1126           fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
1127           fKeys(NULL), fKeyMap(NULL),
1128           fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
1129           f16BitUnits(), f16BitStringsLength(0),
1130           fUsePoolBundle(&kNoPoolBundle),
1131           fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
1132           fWritePoolBundle(NULL) {
1133     if (U_FAILURE(errorCode)) {
1134         return;
1135     }
1136 
1137     if (gFormatVersion > 1) {
1138         // f16BitUnits must start with a zero for empty resources.
1139         // We might be able to omit it if there are no empty 16-bit resources.
1140         f16BitUnits.append((UChar)0);
1141     }
1142 
1143     fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
1144     if (isPoolBundle) {
1145         fRoot = new PseudoListResource(this, errorCode);
1146     } else {
1147         fRoot = new TableResource(this, NULL, comment, errorCode);
1148     }
1149     if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
1150         if (U_SUCCESS(errorCode)) {
1151             errorCode = U_MEMORY_ALLOCATION_ERROR;
1152         }
1153         return;
1154     }
1155 
1156     fKeysCapacity = KEY_SPACE_SIZE;
1157     /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
1158     if (gUsePoolBundle || isPoolBundle) {
1159         fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
1160     } else if (gFormatVersion >= 2) {
1161         fIndexLength = URES_INDEX_16BIT_TOP + 1;
1162     } else /* formatVersion 1 */ {
1163         fIndexLength = URES_INDEX_ATTRIBUTES + 1;
1164     }
1165     fKeysBottom = (1 /* root */ + fIndexLength) * 4;
1166     uprv_memset(fKeys, 0, fKeysBottom);
1167     fKeysTop = fKeysBottom;
1168 
1169     if (gFormatVersion == 1) {
1170         fStringsForm = STRINGS_UTF16_V1;
1171     } else {
1172         fStringsForm = STRINGS_UTF16_V2;
1173     }
1174 }
1175 
1176 /* Closing Functions */
1177 
res_close(struct SResource * res)1178 void res_close(struct SResource *res) {
1179     delete res;
1180 }
1181 
~SRBRoot()1182 SRBRoot::~SRBRoot() {
1183     delete fRoot;
1184     uprv_free(fLocale);
1185     uprv_free(fKeys);
1186     uprv_free(fKeyMap);
1187 }
1188 
1189 /* Misc Functions */
1190 
setLocale(UChar * locale,UErrorCode & errorCode)1191 void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) {
1192     if(U_FAILURE(errorCode)) {
1193         return;
1194     }
1195 
1196     uprv_free(fLocale);
1197     fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
1198     if(fLocale == NULL) {
1199         errorCode = U_MEMORY_ALLOCATION_ERROR;
1200         return;
1201     }
1202 
1203     u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
1204 }
1205 
1206 const char *
getKeyString(int32_t key) const1207 SRBRoot::getKeyString(int32_t key) const {
1208     if (key < 0) {
1209         return fUsePoolBundle->fKeys + (key & 0x7fffffff);
1210     } else {
1211         return fKeys + key;
1212     }
1213 }
1214 
1215 const char *
getKeyString(const SRBRoot * bundle) const1216 SResource::getKeyString(const SRBRoot *bundle) const {
1217     if (fKey == -1) {
1218         return NULL;
1219     }
1220     return bundle->getKeyString(fKey);
1221 }
1222 
1223 const char *
getKeyBytes(int32_t * pLength) const1224 SRBRoot::getKeyBytes(int32_t *pLength) const {
1225     *pLength = fKeysTop - fKeysBottom;
1226     return fKeys + fKeysBottom;
1227 }
1228 
1229 int32_t
addKeyBytes(const char * keyBytes,int32_t length,UErrorCode & errorCode)1230 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
1231     int32_t keypos;
1232 
1233     if (U_FAILURE(errorCode)) {
1234         return -1;
1235     }
1236     if (length < 0 || (keyBytes == NULL && length != 0)) {
1237         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1238         return -1;
1239     }
1240     if (length == 0) {
1241         return fKeysTop;
1242     }
1243 
1244     keypos = fKeysTop;
1245     fKeysTop += length;
1246     if (fKeysTop >= fKeysCapacity) {
1247         /* overflow - resize the keys buffer */
1248         fKeysCapacity += KEY_SPACE_SIZE;
1249         fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
1250         if(fKeys == NULL) {
1251             errorCode = U_MEMORY_ALLOCATION_ERROR;
1252             return -1;
1253         }
1254     }
1255 
1256     uprv_memcpy(fKeys + keypos, keyBytes, length);
1257 
1258     return keypos;
1259 }
1260 
1261 int32_t
addTag(const char * tag,UErrorCode & errorCode)1262 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
1263     int32_t keypos;
1264 
1265     if (U_FAILURE(errorCode)) {
1266         return -1;
1267     }
1268 
1269     if (tag == NULL) {
1270         /* no error: the root table and array items have no keys */
1271         return -1;
1272     }
1273 
1274     keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
1275     if (U_SUCCESS(errorCode)) {
1276         ++fKeysCount;
1277     }
1278     return keypos;
1279 }
1280 
1281 static int32_t
compareInt32(int32_t lPos,int32_t rPos)1282 compareInt32(int32_t lPos, int32_t rPos) {
1283     /*
1284      * Compare possibly-negative key offsets. Don't just return lPos - rPos
1285      * because that is prone to negative-integer underflows.
1286      */
1287     if (lPos < rPos) {
1288         return -1;
1289     } else if (lPos > rPos) {
1290         return 1;
1291     } else {
1292         return 0;
1293     }
1294 }
1295 
1296 static int32_t U_CALLCONV
compareKeySuffixes(const void * context,const void * l,const void * r)1297 compareKeySuffixes(const void *context, const void *l, const void *r) {
1298     const struct SRBRoot *bundle=(const struct SRBRoot *)context;
1299     int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
1300     int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
1301     const char *lStart = bundle->getKeyString(lPos);
1302     const char *lLimit = lStart;
1303     const char *rStart = bundle->getKeyString(rPos);
1304     const char *rLimit = rStart;
1305     int32_t diff;
1306     while (*lLimit != 0) { ++lLimit; }
1307     while (*rLimit != 0) { ++rLimit; }
1308     /* compare keys in reverse character order */
1309     while (lStart < lLimit && rStart < rLimit) {
1310         diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
1311         if (diff != 0) {
1312             return diff;
1313         }
1314     }
1315     /* sort equal suffixes by descending key length */
1316     diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
1317     if (diff != 0) {
1318         return diff;
1319     }
1320     /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
1321     return compareInt32(lPos, rPos);
1322 }
1323 
1324 static int32_t U_CALLCONV
compareKeyNewpos(const void *,const void * l,const void * r)1325 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
1326     return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
1327 }
1328 
1329 static int32_t U_CALLCONV
compareKeyOldpos(const void *,const void * l,const void * r)1330 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
1331     return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
1332 }
1333 
1334 void
compactKeys(UErrorCode & errorCode)1335 SRBRoot::compactKeys(UErrorCode &errorCode) {
1336     KeyMapEntry *map;
1337     char *keys;
1338     int32_t i;
1339     int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
1340     if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
1341         return;
1342     }
1343     map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
1344     if (map == NULL) {
1345         errorCode = U_MEMORY_ALLOCATION_ERROR;
1346         return;
1347     }
1348     keys = (char *)fUsePoolBundle->fKeys;
1349     for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
1350         map[i].oldpos =
1351             (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000;  /* negative oldpos */
1352         map[i].newpos = 0;
1353         while (*keys != 0) { ++keys; }  /* skip the key */
1354         ++keys;  /* skip the NUL */
1355     }
1356     keys = fKeys + fKeysBottom;
1357     for (; i < keysCount; ++i) {
1358         map[i].oldpos = (int32_t)(keys - fKeys);
1359         map[i].newpos = 0;
1360         while (*keys != 0) { ++keys; }  /* skip the key */
1361         ++keys;  /* skip the NUL */
1362     }
1363     /* Sort the keys so that each one is immediately followed by all of its suffixes. */
1364     uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1365                    compareKeySuffixes, this, FALSE, &errorCode);
1366     /*
1367      * Make suffixes point into earlier, longer strings that contain them
1368      * and mark the old, now unused suffix bytes as deleted.
1369      */
1370     if (U_SUCCESS(errorCode)) {
1371         keys = fKeys;
1372         for (i = 0; i < keysCount;) {
1373             /*
1374              * This key is not a suffix of the previous one;
1375              * keep this one and delete the following ones that are
1376              * suffixes of this one.
1377              */
1378             const char *key;
1379             const char *keyLimit;
1380             int32_t j = i + 1;
1381             map[i].newpos = map[i].oldpos;
1382             if (j < keysCount && map[j].oldpos < 0) {
1383                 /* Key string from the pool bundle, do not delete. */
1384                 i = j;
1385                 continue;
1386             }
1387             key = getKeyString(map[i].oldpos);
1388             for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
1389             for (; j < keysCount && map[j].oldpos >= 0; ++j) {
1390                 const char *k;
1391                 char *suffix;
1392                 const char *suffixLimit;
1393                 int32_t offset;
1394                 suffix = keys + map[j].oldpos;
1395                 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
1396                 offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix);
1397                 if (offset < 0) {
1398                     break;  /* suffix cannot be longer than the original */
1399                 }
1400                 /* Is it a suffix of the earlier, longer key? */
1401                 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
1402                 if (suffix == suffixLimit && *k == *suffixLimit) {
1403                     map[j].newpos = map[i].oldpos + offset;  /* yes, point to the earlier key */
1404                     /* mark the suffix as deleted */
1405                     while (*suffix != 0) { *suffix++ = 1; }
1406                     *suffix = 1;
1407                 } else {
1408                     break;  /* not a suffix, restart from here */
1409                 }
1410             }
1411             i = j;
1412         }
1413         /*
1414          * Re-sort by newpos, then modify the key characters array in-place
1415          * to squeeze out unused bytes, and readjust the newpos offsets.
1416          */
1417         uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1418                        compareKeyNewpos, NULL, FALSE, &errorCode);
1419         if (U_SUCCESS(errorCode)) {
1420             int32_t oldpos, newpos, limit;
1421             oldpos = newpos = fKeysBottom;
1422             limit = fKeysTop;
1423             /* skip key offsets that point into the pool bundle rather than this new bundle */
1424             for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
1425             if (i < keysCount) {
1426                 while (oldpos < limit) {
1427                     if (keys[oldpos] == 1) {
1428                         ++oldpos;  /* skip unused bytes */
1429                     } else {
1430                         /* adjust the new offsets for keys starting here */
1431                         while (i < keysCount && map[i].newpos == oldpos) {
1432                             map[i++].newpos = newpos;
1433                         }
1434                         /* move the key characters to their new position */
1435                         keys[newpos++] = keys[oldpos++];
1436                     }
1437                 }
1438                 assert(i == keysCount);
1439             }
1440             fKeysTop = newpos;
1441             /* Re-sort once more, by old offsets for binary searching. */
1442             uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1443                            compareKeyOldpos, NULL, FALSE, &errorCode);
1444             if (U_SUCCESS(errorCode)) {
1445                 /* key size reduction by limit - newpos */
1446                 fKeyMap = map;
1447                 map = NULL;
1448             }
1449         }
1450     }
1451     uprv_free(map);
1452 }
1453 
1454 static int32_t U_CALLCONV
compareStringSuffixes(const void *,const void * l,const void * r)1455 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
1456     const StringResource *left = *((const StringResource **)l);
1457     const StringResource *right = *((const StringResource **)r);
1458     const UChar *lStart = left->getBuffer();
1459     const UChar *lLimit = lStart + left->length();
1460     const UChar *rStart = right->getBuffer();
1461     const UChar *rLimit = rStart + right->length();
1462     int32_t diff;
1463     /* compare keys in reverse character order */
1464     while (lStart < lLimit && rStart < rLimit) {
1465         diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
1466         if (diff != 0) {
1467             return diff;
1468         }
1469     }
1470     /* sort equal suffixes by descending string length */
1471     return right->length() - left->length();
1472 }
1473 
1474 static int32_t U_CALLCONV
compareStringLengths(const void *,const void * l,const void * r)1475 compareStringLengths(const void * /*context*/, const void *l, const void *r) {
1476     const StringResource *left = *((const StringResource **)l);
1477     const StringResource *right = *((const StringResource **)r);
1478     int32_t diff;
1479     /* Make "is suffix of another string" compare greater than a non-suffix. */
1480     diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL);
1481     if (diff != 0) {
1482         return diff;
1483     }
1484     /* sort by ascending string length */
1485     diff = left->length() - right->length();
1486     if (diff != 0) {
1487         return diff;
1488     }
1489     // sort by descending size reduction
1490     diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
1491     if (diff != 0) {
1492         return diff;
1493     }
1494     // sort lexically
1495     return left->fString.compare(right->fString);
1496 }
1497 
1498 void
writeUTF16v2(int32_t base,UnicodeString & dest)1499 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
1500     int32_t len = length();
1501     fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
1502     fWritten = TRUE;
1503     switch(fNumCharsForLength) {
1504     case 0:
1505         break;
1506     case 1:
1507         dest.append((UChar)(0xdc00 + len));
1508         break;
1509     case 2:
1510         dest.append((UChar)(0xdfef + (len >> 16)));
1511         dest.append((UChar)len);
1512         break;
1513     case 3:
1514         dest.append((UChar)0xdfff);
1515         dest.append((UChar)(len >> 16));
1516         dest.append((UChar)len);
1517         break;
1518     default:
1519         break;  /* will not occur */
1520     }
1521     dest.append(fString);
1522     dest.append((UChar)0);
1523 }
1524 
1525 void
compactStringsV2(UHashtable * stringSet,UErrorCode & errorCode)1526 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
1527     if (U_FAILURE(errorCode)) {
1528         return;
1529     }
1530     // Store the StringResource pointers in an array for
1531     // easy sorting and processing.
1532     // We enumerate a set of strings, so there are no duplicates.
1533     int32_t count = uhash_count(stringSet);
1534     LocalArray<StringResource *> array(new StringResource *[count], errorCode);
1535     if (U_FAILURE(errorCode)) {
1536         return;
1537     }
1538     for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
1539         array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
1540     }
1541     /* Sort the strings so that each one is immediately followed by all of its suffixes. */
1542     uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1543                    compareStringSuffixes, NULL, FALSE, &errorCode);
1544     if (U_FAILURE(errorCode)) {
1545         return;
1546     }
1547     /*
1548      * Make suffixes point into earlier, longer strings that contain them.
1549      * Temporarily use fSame and fSuffixOffset for suffix strings to
1550      * refer to the remaining ones.
1551      */
1552     for (int32_t i = 0; i < count;) {
1553         /*
1554          * This string is not a suffix of the previous one;
1555          * write this one and subsume the following ones that are
1556          * suffixes of this one.
1557          */
1558         StringResource *res = array[i];
1559         res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
1560         // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
1561         // see StringResource::handlePreflightStrings().
1562         int32_t j;
1563         for (j = i + 1; j < count; ++j) {
1564             StringResource *suffixRes = array[j];
1565             /* Is it a suffix of the earlier, longer string? */
1566             if (res->fString.endsWith(suffixRes->fString)) {
1567                 assert(res->length() != suffixRes->length());  // Set strings are unique.
1568                 if (suffixRes->fWritten) {
1569                     // Pool string, skip.
1570                 } else if (suffixRes->fNumCharsForLength == 0) {
1571                     /* yes, point to the earlier string */
1572                     suffixRes->fSame = res;
1573                     suffixRes->fSuffixOffset = res->length() - suffixRes->length();
1574                     if (res->fWritten) {
1575                         // Suffix-share res which is a pool string.
1576                         // Compute the resource word and collect the maximum.
1577                         suffixRes->fRes =
1578                                 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
1579                         int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
1580                         if (poolStringIndex >= fPoolStringIndexLimit) {
1581                             fPoolStringIndexLimit = poolStringIndex + 1;
1582                         }
1583                         suffixRes->fWritten = TRUE;
1584                     }
1585                     res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
1586                 } else {
1587                     /* write the suffix by itself if we need explicit length */
1588                 }
1589             } else {
1590                 break;  /* not a suffix, restart from here */
1591             }
1592         }
1593         i = j;
1594     }
1595     /*
1596      * Re-sort the strings by ascending length (except suffixes last)
1597      * to optimize for URES_TABLE16 and URES_ARRAY16:
1598      * Keep as many as possible within reach of 16-bit offsets.
1599      */
1600     uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1601                    compareStringLengths, NULL, FALSE, &errorCode);
1602     if (U_FAILURE(errorCode)) {
1603         return;
1604     }
1605     if (fIsPoolBundle) {
1606         // Write strings that are sufficiently shared.
1607         // Avoid writing other strings.
1608         int32_t numStringsWritten = 0;
1609         int32_t numUnitsSaved = 0;
1610         int32_t numUnitsNotSaved = 0;
1611         for (int32_t i = 0; i < count; ++i) {
1612             StringResource *res = array[i];
1613             // Maximum pool string index when suffix-sharing the last character.
1614             int32_t maxStringIndex =
1615                     f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
1616             if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
1617                     maxStringIndex < RES_MAX_OFFSET) {
1618                 res->writeUTF16v2(0, f16BitUnits);
1619                 ++numStringsWritten;
1620                 numUnitsSaved += res->fNumUnitsSaved;
1621             } else {
1622                 numUnitsNotSaved += res->fNumUnitsSaved;
1623                 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
1624                 res->fWritten = TRUE;
1625             }
1626         }
1627         if (f16BitUnits.isBogus()) {
1628             errorCode = U_MEMORY_ALLOCATION_ERROR;
1629         }
1630         if (getShowWarning()) {  // not quiet
1631             printf("number of shared strings: %d\n", (int)numStringsWritten);
1632             printf("16-bit units for strings: %6d = %6d bytes\n",
1633                    (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
1634             printf("16-bit units saved:       %6d = %6d bytes\n",
1635                    (int)numUnitsSaved, (int)numUnitsSaved * 2);
1636             printf("16-bit units not saved:   %6d = %6d bytes\n",
1637                    (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
1638         }
1639     } else {
1640         assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
1641         /* Write the non-suffix strings. */
1642         int32_t i;
1643         for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1644             StringResource *res = array[i];
1645             if (!res->fWritten) {
1646                 int32_t localStringIndex = f16BitUnits.length();
1647                 if (localStringIndex >= fLocalStringIndexLimit) {
1648                     fLocalStringIndexLimit = localStringIndex + 1;
1649                 }
1650                 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
1651             }
1652         }
1653         if (f16BitUnits.isBogus()) {
1654             errorCode = U_MEMORY_ALLOCATION_ERROR;
1655             return;
1656         }
1657         if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
1658             PseudoListResource *poolStrings =
1659                     static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
1660             for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1661                 assert(!array[i]->fString.isEmpty());
1662                 StringResource *poolString =
1663                         new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
1664                 if (poolString == NULL) {
1665                     errorCode = U_MEMORY_ALLOCATION_ERROR;
1666                     break;
1667                 }
1668                 poolStrings->add(poolString);
1669             }
1670         }
1671         /* Write the suffix strings. Make each point to the real string. */
1672         for (; i < count; ++i) {
1673             StringResource *res = array[i];
1674             if (res->fWritten) {
1675                 continue;
1676             }
1677             StringResource *same = res->fSame;
1678             assert(res->length() != same->length());  // Set strings are unique.
1679             res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
1680             int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
1681             // Suffixes of pool strings have been set already.
1682             assert(localStringIndex >= 0);
1683             if (localStringIndex >= fLocalStringIndexLimit) {
1684                 fLocalStringIndexLimit = localStringIndex + 1;
1685             }
1686             res->fWritten = TRUE;
1687         }
1688     }
1689     // +1 to account for the initial zero in f16BitUnits
1690     assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
1691 }
1692