1 //
2 // Copyright 2006 The Android Open Source Project
3 //
4 // Build resource files from raw assets.
5 //
6 
7 #include "StringPool.h"
8 #include "ResourceTable.h"
9 
10 #include <utils/ByteOrder.h>
11 #include <utils/SortedVector.h>
12 #include "qsort_r_compat.h"
13 
14 #if HAVE_PRINTF_ZD
15 #  define ZD "%zd"
16 #  define ZD_TYPE ssize_t
17 #else
18 #  define ZD "%ld"
19 #  define ZD_TYPE long
20 #endif
21 
22 #define NOISY(x) //x
23 
24 #if __cplusplus >= 201103L
strcpy16_htod(char16_t * dst,const char16_t * src)25 void strcpy16_htod(char16_t* dst, const char16_t* src)
26 {
27     while (*src) {
28         char16_t s = htods(*src);
29         *dst++ = s;
30         src++;
31     }
32     *dst = 0;
33 }
34 #endif
35 
strcpy16_htod(uint16_t * dst,const char16_t * src)36 void strcpy16_htod(uint16_t* dst, const char16_t* src)
37 {
38     while (*src) {
39         uint16_t s = htods(static_cast<uint16_t>(*src));
40         *dst++ = s;
41         src++;
42     }
43     *dst = 0;
44 }
45 
printStringPool(const ResStringPool * pool)46 void printStringPool(const ResStringPool* pool)
47 {
48     if (pool->getError() == NO_INIT) {
49         printf("String pool is unitialized.\n");
50         return;
51     } else if (pool->getError() != NO_ERROR) {
52         printf("String pool is corrupt/invalid.\n");
53         return;
54     }
55 
56     SortedVector<const void*> uniqueStrings;
57     const size_t N = pool->size();
58     for (size_t i=0; i<N; i++) {
59         size_t len;
60         if (pool->isUTF8()) {
61             uniqueStrings.add(pool->string8At(i, &len));
62         } else {
63             uniqueStrings.add(pool->stringAt(i, &len));
64         }
65     }
66 
67     printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
68             ZD " styles using " ZD " bytes:\n",
69             (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
70             pool->isSorted() ? "sorted" : "non-sorted",
71             (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
72 
73     const size_t NS = pool->size();
74     for (size_t s=0; s<NS; s++) {
75         String8 str = pool->string8ObjectAt(s);
76         printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
77     }
78 }
79 
makeConfigsString() const80 String8 StringPool::entry::makeConfigsString() const {
81     String8 configStr(configTypeName);
82     if (configStr.size() > 0) configStr.append(" ");
83     if (configs.size() > 0) {
84         for (size_t j=0; j<configs.size(); j++) {
85             if (j > 0) configStr.append(", ");
86             configStr.append(configs[j].toString());
87         }
88     } else {
89         configStr = "(none)";
90     }
91     return configStr;
92 }
93 
compare(const entry & o) const94 int StringPool::entry::compare(const entry& o) const {
95     // Strings with styles go first, to reduce the size of the styles array.
96     // We don't care about the relative order of these strings.
97     if (hasStyles) {
98         return o.hasStyles ? 0 : -1;
99     }
100     if (o.hasStyles) {
101         return 1;
102     }
103 
104     // Sort unstyled strings by type, then by logical configuration.
105     int comp = configTypeName.compare(o.configTypeName);
106     if (comp != 0) {
107         return comp;
108     }
109     const size_t LHN = configs.size();
110     const size_t RHN = o.configs.size();
111     size_t i=0;
112     while (i < LHN && i < RHN) {
113         comp = configs[i].compareLogical(o.configs[i]);
114         if (comp != 0) {
115             return comp;
116         }
117         i++;
118     }
119     if (LHN < RHN) return -1;
120     else if (LHN > RHN) return 1;
121     return 0;
122 }
123 
StringPool(bool utf8)124 StringPool::StringPool(bool utf8) :
125         mUTF8(utf8), mValues(-1)
126 {
127 }
128 
add(const String16 & value,const Vector<entry_style_span> & spans,const String8 * configTypeName,const ResTable_config * config)129 ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
130         const String8* configTypeName, const ResTable_config* config)
131 {
132     ssize_t res = add(value, false, configTypeName, config);
133     if (res >= 0) {
134         addStyleSpans(res, spans);
135     }
136     return res;
137 }
138 
add(const String16 & value,bool mergeDuplicates,const String8 * configTypeName,const ResTable_config * config)139 ssize_t StringPool::add(const String16& value,
140         bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
141 {
142     ssize_t vidx = mValues.indexOfKey(value);
143     ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
144     ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
145     if (eidx < 0) {
146         eidx = mEntries.add(entry(value));
147         if (eidx < 0) {
148             fprintf(stderr, "Failure adding string %s\n", String8(value).string());
149             return eidx;
150         }
151     }
152 
153     if (configTypeName != NULL) {
154         entry& ent = mEntries.editItemAt(eidx);
155         NOISY(printf("*** adding config type name %s, was %s\n",
156                 configTypeName->string(), ent.configTypeName.string()));
157         if (ent.configTypeName.size() <= 0) {
158             ent.configTypeName = *configTypeName;
159         } else if (ent.configTypeName != *configTypeName) {
160             ent.configTypeName = " ";
161         }
162     }
163 
164     if (config != NULL) {
165         // Add this to the set of configs associated with the string.
166         entry& ent = mEntries.editItemAt(eidx);
167         size_t addPos;
168         for (addPos=0; addPos<ent.configs.size(); addPos++) {
169             int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
170             if (cmp >= 0) {
171                 if (cmp > 0) {
172                     NOISY(printf("*** inserting config: %s\n", config->toString().string()));
173                     ent.configs.insertAt(*config, addPos);
174                 }
175                 break;
176             }
177         }
178         if (addPos >= ent.configs.size()) {
179             NOISY(printf("*** adding config: %s\n", config->toString().string()));
180             ent.configs.add(*config);
181         }
182     }
183 
184     const bool first = vidx < 0;
185     const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
186         mEntryStyleArray[pos].spans.size() : 0;
187     if (first || styled || !mergeDuplicates) {
188         pos = mEntryArray.add(eidx);
189         if (first) {
190             vidx = mValues.add(value, pos);
191         }
192         entry& ent = mEntries.editItemAt(eidx);
193         ent.indices.add(pos);
194     }
195 
196     NOISY(printf("Adding string %s to pool: pos=%d eidx=%d vidx=%d\n",
197             String8(value).string(), pos, eidx, vidx));
198 
199     return pos;
200 }
201 
addStyleSpan(size_t idx,const String16 & name,uint32_t start,uint32_t end)202 status_t StringPool::addStyleSpan(size_t idx, const String16& name,
203                                   uint32_t start, uint32_t end)
204 {
205     entry_style_span span;
206     span.name = name;
207     span.span.firstChar = start;
208     span.span.lastChar = end;
209     return addStyleSpan(idx, span);
210 }
211 
addStyleSpans(size_t idx,const Vector<entry_style_span> & spans)212 status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
213 {
214     const size_t N=spans.size();
215     for (size_t i=0; i<N; i++) {
216         status_t err = addStyleSpan(idx, spans[i]);
217         if (err != NO_ERROR) {
218             return err;
219         }
220     }
221     return NO_ERROR;
222 }
223 
addStyleSpan(size_t idx,const entry_style_span & span)224 status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
225 {
226     // Place blank entries in the span array up to this index.
227     while (mEntryStyleArray.size() <= idx) {
228         mEntryStyleArray.add();
229     }
230 
231     entry_style& style = mEntryStyleArray.editItemAt(idx);
232     style.spans.add(span);
233     mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
234     return NO_ERROR;
235 }
236 
config_sort(void * state,const void * lhs,const void * rhs)237 int StringPool::config_sort(void* state, const void* lhs, const void* rhs)
238 {
239     StringPool* pool = (StringPool*)state;
240     const entry& lhe = pool->mEntries[pool->mEntryArray[*static_cast<const size_t*>(lhs)]];
241     const entry& rhe = pool->mEntries[pool->mEntryArray[*static_cast<const size_t*>(rhs)]];
242     return lhe.compare(rhe);
243 }
244 
sortByConfig()245 void StringPool::sortByConfig()
246 {
247     LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
248 
249     const size_t N = mEntryArray.size();
250 
251     // This is a vector that starts out with a 1:1 mapping to entries
252     // in the array, which we will sort to come up with the desired order.
253     // At that point it maps from the new position in the array to the
254     // original position the entry appeared.
255     Vector<size_t> newPosToOriginalPos;
256     newPosToOriginalPos.setCapacity(N);
257     for (size_t i=0; i < N; i++) {
258         newPosToOriginalPos.add(i);
259     }
260 
261     // Sort the array.
262     NOISY(printf("SORTING STRINGS BY CONFIGURATION...\n"));
263     // Vector::sort uses insertion sort, which is very slow for this data set.
264     // Use quicksort instead because we don't need a stable sort here.
265     qsort_r_compat(newPosToOriginalPos.editArray(), N, sizeof(size_t), this, config_sort);
266     //newPosToOriginalPos.sort(config_sort, this);
267     NOISY(printf("DONE SORTING STRINGS BY CONFIGURATION.\n"));
268 
269     // Create the reverse mapping from the original position in the array
270     // to the new position where it appears in the sorted array.  This is
271     // so that clients can re-map any positions they had previously stored.
272     mOriginalPosToNewPos = newPosToOriginalPos;
273     for (size_t i=0; i<N; i++) {
274         mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
275     }
276 
277 #if 0
278     SortedVector<entry> entries;
279 
280     for (size_t i=0; i<N; i++) {
281         printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
282                 mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
283         entries.add(mEntries[mEntryArray[i]]);
284     }
285 
286     for (size_t i=0; i<entries.size(); i++) {
287         printf("Sorted config #%d: %s\n", i,
288                 entries[i].makeConfigsString().string());
289     }
290 #endif
291 
292     // Now we rebuild the arrays.
293     Vector<entry> newEntries;
294     Vector<size_t> newEntryArray;
295     Vector<entry_style> newEntryStyleArray;
296     DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
297 
298     for (size_t i=0; i<N; i++) {
299         // We are filling in new offset 'i'; oldI is where we can find it
300         // in the original data structure.
301         size_t oldI = newPosToOriginalPos[i];
302         // This is the actual entry associated with the old offset.
303         const entry& oldEnt = mEntries[mEntryArray[oldI]];
304         // This is the same entry the last time we added it to the
305         // new entry array, if any.
306         ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
307         size_t newOffset;
308         if (newIndexOfOffset < 0) {
309             // This is the first time we have seen the entry, so add
310             // it.
311             newOffset = newEntries.add(oldEnt);
312             newEntries.editItemAt(newOffset).indices.clear();
313         } else {
314             // We have seen this entry before, use the existing one
315             // instead of adding it again.
316             newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
317         }
318         // Update the indices to include this new position.
319         newEntries.editItemAt(newOffset).indices.add(i);
320         // And add the offset of the entry to the new entry array.
321         newEntryArray.add(newOffset);
322         // Add any old style to the new style array.
323         if (mEntryStyleArray.size() > 0) {
324             if (oldI < mEntryStyleArray.size()) {
325                 newEntryStyleArray.add(mEntryStyleArray[oldI]);
326             } else {
327                 newEntryStyleArray.add(entry_style());
328             }
329         }
330     }
331 
332     // Now trim any entries at the end of the new style array that are
333     // not needed.
334     for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
335         const entry_style& style = newEntryStyleArray[i];
336         if (style.spans.size() > 0) {
337             // That's it.
338             break;
339         }
340         // This one is not needed; remove.
341         newEntryStyleArray.removeAt(i);
342     }
343 
344     // All done, install the new data structures and upate mValues with
345     // the new positions.
346     mEntries = newEntries;
347     mEntryArray = newEntryArray;
348     mEntryStyleArray = newEntryStyleArray;
349     mValues.clear();
350     for (size_t i=0; i<mEntries.size(); i++) {
351         const entry& ent = mEntries[i];
352         mValues.add(ent.value, ent.indices[0]);
353     }
354 
355 #if 0
356     printf("FINAL SORTED STRING CONFIGS:\n");
357     for (size_t i=0; i<mEntries.size(); i++) {
358         const entry& ent = mEntries[i];
359         printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
360                 String8(ent.value).string());
361     }
362 #endif
363 }
364 
createStringBlock()365 sp<AaptFile> StringPool::createStringBlock()
366 {
367     sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
368                                      String8());
369     status_t err = writeStringBlock(pool);
370     return err == NO_ERROR ? pool : NULL;
371 }
372 
373 #define ENCODE_LENGTH(str, chrsz, strSize) \
374 { \
375     size_t maxMask = 1 << ((chrsz*8)-1); \
376     size_t maxSize = maxMask-1; \
377     if (strSize > maxSize) { \
378         *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
379     } \
380     *str++ = strSize; \
381 }
382 
writeStringBlock(const sp<AaptFile> & pool)383 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
384 {
385     // Allow appending.  Sorry this is a little wacky.
386     if (pool->getSize() > 0) {
387         sp<AaptFile> block = createStringBlock();
388         if (block == NULL) {
389             return UNKNOWN_ERROR;
390         }
391         ssize_t res = pool->writeData(block->getData(), block->getSize());
392         return (res >= 0) ? (status_t)NO_ERROR : res;
393     }
394 
395     // First we need to add all style span names to the string pool.
396     // We do this now (instead of when the span is added) so that these
397     // will appear at the end of the pool, not disrupting the order
398     // our client placed their own strings in it.
399 
400     const size_t STYLES = mEntryStyleArray.size();
401     size_t i;
402 
403     for (i=0; i<STYLES; i++) {
404         entry_style& style = mEntryStyleArray.editItemAt(i);
405         const size_t N = style.spans.size();
406         for (size_t i=0; i<N; i++) {
407             entry_style_span& span = style.spans.editItemAt(i);
408             ssize_t idx = add(span.name, true);
409             if (idx < 0) {
410                 fprintf(stderr, "Error adding span for style tag '%s'\n",
411                         String8(span.name).string());
412                 return idx;
413             }
414             span.span.name.index = (uint32_t)idx;
415         }
416     }
417 
418     const size_t ENTRIES = mEntryArray.size();
419 
420     // Now build the pool of unique strings.
421 
422     const size_t STRINGS = mEntries.size();
423     const size_t preSize = sizeof(ResStringPool_header)
424                          + (sizeof(uint32_t)*ENTRIES)
425                          + (sizeof(uint32_t)*STYLES);
426     if (pool->editData(preSize) == NULL) {
427         fprintf(stderr, "ERROR: Out of memory for string pool\n");
428         return NO_MEMORY;
429     }
430 
431     const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(uint16_t);
432 
433     size_t strPos = 0;
434     for (i=0; i<STRINGS; i++) {
435         entry& ent = mEntries.editItemAt(i);
436         const size_t strSize = (ent.value.size());
437         const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
438             charSize*2 : charSize;
439 
440         String8 encStr;
441         if (mUTF8) {
442             encStr = String8(ent.value);
443         }
444 
445         const size_t encSize = mUTF8 ? encStr.size() : 0;
446         const size_t encLenSize = mUTF8 ?
447             (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
448                 charSize*2 : charSize) : 0;
449 
450         ent.offset = strPos;
451 
452         const size_t totalSize = lenSize + encLenSize +
453             ((mUTF8 ? encSize : strSize)+1)*charSize;
454 
455         void* dat = (void*)pool->editData(preSize + strPos + totalSize);
456         if (dat == NULL) {
457             fprintf(stderr, "ERROR: Out of memory for string pool\n");
458             return NO_MEMORY;
459         }
460         dat = (uint8_t*)dat + preSize + strPos;
461         if (mUTF8) {
462             uint8_t* strings = (uint8_t*)dat;
463 
464             ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
465 
466             ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
467 
468             strncpy((char*)strings, encStr, encSize+1);
469         } else {
470             uint16_t* strings = (uint16_t*)dat;
471 
472             ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
473 
474             strcpy16_htod(strings, ent.value);
475         }
476 
477         strPos += totalSize;
478     }
479 
480     // Pad ending string position up to a uint32_t boundary.
481 
482     if (strPos&0x3) {
483         size_t padPos = ((strPos+3)&~0x3);
484         uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
485         if (dat == NULL) {
486             fprintf(stderr, "ERROR: Out of memory padding string pool\n");
487             return NO_MEMORY;
488         }
489         memset(dat+preSize+strPos, 0, padPos-strPos);
490         strPos = padPos;
491     }
492 
493     // Build the pool of style spans.
494 
495     size_t styPos = strPos;
496     for (i=0; i<STYLES; i++) {
497         entry_style& ent = mEntryStyleArray.editItemAt(i);
498         const size_t N = ent.spans.size();
499         const size_t totalSize = (N*sizeof(ResStringPool_span))
500                                + sizeof(ResStringPool_ref);
501 
502         ent.offset = styPos-strPos;
503         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
504         if (dat == NULL) {
505             fprintf(stderr, "ERROR: Out of memory for string styles\n");
506             return NO_MEMORY;
507         }
508         ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
509         for (size_t i=0; i<N; i++) {
510             span->name.index = htodl(ent.spans[i].span.name.index);
511             span->firstChar = htodl(ent.spans[i].span.firstChar);
512             span->lastChar = htodl(ent.spans[i].span.lastChar);
513             span++;
514         }
515         span->name.index = htodl(ResStringPool_span::END);
516 
517         styPos += totalSize;
518     }
519 
520     if (STYLES > 0) {
521         // Add full terminator at the end (when reading we validate that
522         // the end of the pool is fully terminated to simplify error
523         // checking).
524         size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
525         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
526         if (dat == NULL) {
527             fprintf(stderr, "ERROR: Out of memory for string styles\n");
528             return NO_MEMORY;
529         }
530         uint32_t* p = (uint32_t*)(dat+preSize+styPos);
531         while (extra > 0) {
532             *p++ = htodl(ResStringPool_span::END);
533             extra -= sizeof(uint32_t);
534         }
535         styPos += extra;
536     }
537 
538     // Write header.
539 
540     ResStringPool_header* header =
541         (ResStringPool_header*)pool->padData(sizeof(uint32_t));
542     if (header == NULL) {
543         fprintf(stderr, "ERROR: Out of memory for string pool\n");
544         return NO_MEMORY;
545     }
546     memset(header, 0, sizeof(*header));
547     header->header.type = htods(RES_STRING_POOL_TYPE);
548     header->header.headerSize = htods(sizeof(*header));
549     header->header.size = htodl(pool->getSize());
550     header->stringCount = htodl(ENTRIES);
551     header->styleCount = htodl(STYLES);
552     if (mUTF8) {
553         header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
554     }
555     header->stringsStart = htodl(preSize);
556     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
557 
558     // Write string index array.
559 
560     uint32_t* index = (uint32_t*)(header+1);
561     for (i=0; i<ENTRIES; i++) {
562         entry& ent = mEntries.editItemAt(mEntryArray[i]);
563         *index++ = htodl(ent.offset);
564         NOISY(printf("Writing entry #%d: \"%s\" ent=%d off=%d\n", i,
565                 String8(ent.value).string(),
566                 mEntryArray[i], ent.offset));
567     }
568 
569     // Write style index array.
570 
571     for (i=0; i<STYLES; i++) {
572         *index++ = htodl(mEntryStyleArray[i].offset);
573     }
574 
575     return NO_ERROR;
576 }
577 
offsetForString(const String16 & val) const578 ssize_t StringPool::offsetForString(const String16& val) const
579 {
580     const Vector<size_t>* indices = offsetsForString(val);
581     ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
582     NOISY(printf("Offset for string %s: %d (%s)\n", String8(val).string(), res,
583             res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8()));
584     return res;
585 }
586 
offsetsForString(const String16 & val) const587 const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
588 {
589     ssize_t pos = mValues.valueFor(val);
590     if (pos < 0) {
591         return NULL;
592     }
593     return &mEntries[mEntryArray[pos]].indices;
594 }
595