1 //
2 // Copyright 2006 The Android Open Source Project
3 //
4 // Build resource files from raw assets.
5 //
6 #include "StringPool.h"
7 
8 #include <utils/ByteOrder.h>
9 #include <utils/SortedVector.h>
10 
11 #include <algorithm>
12 
13 #include "ResourceTable.h"
14 
15 // SSIZE: mingw does not have signed size_t == ssize_t.
16 #if !defined(_WIN32)
17 #  define SSIZE(x) x
18 #else
19 #  define SSIZE(x) (signed size_t)x
20 #endif
21 
22 // Set to true for noisy debug output.
23 static const bool kIsDebug = false;
24 
25 #if __cplusplus >= 201103L
strcpy16_htod(char16_t * dst,const char16_t * src)26 void strcpy16_htod(char16_t* dst, const char16_t* src)
27 {
28     while (*src) {
29         char16_t s = htods(*src);
30         *dst++ = s;
31         src++;
32     }
33     *dst = 0;
34 }
35 #endif
36 
strcpy16_htod(uint16_t * dst,const char16_t * src)37 void strcpy16_htod(uint16_t* dst, const char16_t* src)
38 {
39     while (*src) {
40         uint16_t s = htods(static_cast<uint16_t>(*src));
41         *dst++ = s;
42         src++;
43     }
44     *dst = 0;
45 }
46 
printStringPool(const ResStringPool * pool)47 void printStringPool(const ResStringPool* pool)
48 {
49     if (pool->getError() == NO_INIT) {
50         printf("String pool is unitialized.\n");
51         return;
52     } else if (pool->getError() != NO_ERROR) {
53         printf("String pool is corrupt/invalid.\n");
54         return;
55     }
56 
57     SortedVector<const void*> uniqueStrings;
58     const size_t N = pool->size();
59     for (size_t i=0; i<N; i++) {
60         size_t len;
61         if (pool->isUTF8()) {
62             uniqueStrings.add(pool->string8At(i, &len));
63         } else {
64             uniqueStrings.add(pool->stringAt(i, &len));
65         }
66     }
67 
68     printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
69             ZD " styles using " ZD " bytes:\n",
70             (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
71             pool->isSorted() ? "sorted" : "non-sorted",
72             (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
73 
74     const size_t NS = pool->size();
75     for (size_t s=0; s<NS; s++) {
76         String8 str = pool->string8ObjectAt(s);
77         printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
78     }
79 }
80 
makeConfigsString() const81 String8 StringPool::entry::makeConfigsString() const {
82     String8 configStr(configTypeName);
83     if (configStr.size() > 0) configStr.append(" ");
84     if (configs.size() > 0) {
85         for (size_t j=0; j<configs.size(); j++) {
86             if (j > 0) configStr.append(", ");
87             configStr.append(configs[j].toString());
88         }
89     } else {
90         configStr = "(none)";
91     }
92     return configStr;
93 }
94 
compare(const entry & o) const95 int StringPool::entry::compare(const entry& o) const {
96     // Strings with styles go first, to reduce the size of the styles array.
97     // We don't care about the relative order of these strings.
98     if (hasStyles) {
99         return o.hasStyles ? 0 : -1;
100     }
101     if (o.hasStyles) {
102         return 1;
103     }
104 
105     // Sort unstyled strings by type, then by logical configuration.
106     int comp = configTypeName.compare(o.configTypeName);
107     if (comp != 0) {
108         return comp;
109     }
110     const size_t LHN = configs.size();
111     const size_t RHN = o.configs.size();
112     size_t i=0;
113     while (i < LHN && i < RHN) {
114         comp = configs[i].compareLogical(o.configs[i]);
115         if (comp != 0) {
116             return comp;
117         }
118         i++;
119     }
120     if (LHN < RHN) return -1;
121     else if (LHN > RHN) return 1;
122     return 0;
123 }
124 
StringPool(bool utf8)125 StringPool::StringPool(bool utf8) :
126         mUTF8(utf8), mValues(-1)
127 {
128 }
129 
add(const String16 & value,const Vector<entry_style_span> & spans,const String8 * configTypeName,const ResTable_config * config)130 ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
131         const String8* configTypeName, const ResTable_config* config)
132 {
133     ssize_t res = add(value, false, configTypeName, config);
134     if (res >= 0) {
135         addStyleSpans(res, spans);
136     }
137     return res;
138 }
139 
add(const String16 & value,bool mergeDuplicates,const String8 * configTypeName,const ResTable_config * config)140 ssize_t StringPool::add(const String16& value,
141         bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
142 {
143     ssize_t vidx = mValues.indexOfKey(value);
144     ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
145     ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
146     if (eidx < 0) {
147         eidx = mEntries.add(entry(value));
148         if (eidx < 0) {
149             fprintf(stderr, "Failure adding string %s\n", String8(value).string());
150             return eidx;
151         }
152     }
153 
154     if (configTypeName != NULL) {
155         entry& ent = mEntries.editItemAt(eidx);
156         if (kIsDebug) {
157             printf("*** adding config type name %s, was %s\n",
158                     configTypeName->string(), ent.configTypeName.string());
159         }
160         if (ent.configTypeName.size() <= 0) {
161             ent.configTypeName = *configTypeName;
162         } else if (ent.configTypeName != *configTypeName) {
163             ent.configTypeName = " ";
164         }
165     }
166 
167     if (config != NULL) {
168         // Add this to the set of configs associated with the string.
169         entry& ent = mEntries.editItemAt(eidx);
170         size_t addPos;
171         for (addPos=0; addPos<ent.configs.size(); addPos++) {
172             int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
173             if (cmp >= 0) {
174                 if (cmp > 0) {
175                     if (kIsDebug) {
176                         printf("*** inserting config: %s\n", config->toString().string());
177                     }
178                     ent.configs.insertAt(*config, addPos);
179                 }
180                 break;
181             }
182         }
183         if (addPos >= ent.configs.size()) {
184             if (kIsDebug) {
185                 printf("*** adding config: %s\n", config->toString().string());
186             }
187             ent.configs.add(*config);
188         }
189     }
190 
191     const bool first = vidx < 0;
192     const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
193         mEntryStyleArray[pos].spans.size() : 0;
194     if (first || styled || !mergeDuplicates) {
195         pos = mEntryArray.add(eidx);
196         if (first) {
197             vidx = mValues.add(value, pos);
198         }
199         entry& ent = mEntries.editItemAt(eidx);
200         ent.indices.add(pos);
201     }
202 
203     if (kIsDebug) {
204         printf("Adding string %s to pool: pos=%zd eidx=%zd vidx=%zd\n",
205                 String8(value).string(), SSIZE(pos), SSIZE(eidx), SSIZE(vidx));
206     }
207 
208     return pos;
209 }
210 
addStyleSpan(size_t idx,const String16 & name,uint32_t start,uint32_t end)211 status_t StringPool::addStyleSpan(size_t idx, const String16& name,
212                                   uint32_t start, uint32_t end)
213 {
214     entry_style_span span;
215     span.name = name;
216     span.span.firstChar = start;
217     span.span.lastChar = end;
218     return addStyleSpan(idx, span);
219 }
220 
addStyleSpans(size_t idx,const Vector<entry_style_span> & spans)221 status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
222 {
223     const size_t N=spans.size();
224     for (size_t i=0; i<N; i++) {
225         status_t err = addStyleSpan(idx, spans[i]);
226         if (err != NO_ERROR) {
227             return err;
228         }
229     }
230     return NO_ERROR;
231 }
232 
addStyleSpan(size_t idx,const entry_style_span & span)233 status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
234 {
235     // Place blank entries in the span array up to this index.
236     while (mEntryStyleArray.size() <= idx) {
237         mEntryStyleArray.add();
238     }
239 
240     entry_style& style = mEntryStyleArray.editItemAt(idx);
241     style.spans.add(span);
242     mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
243     return NO_ERROR;
244 }
245 
ConfigSorter(const StringPool & pool)246 StringPool::ConfigSorter::ConfigSorter(const StringPool& pool) : pool(pool)
247 {
248 }
249 
operator ()(size_t l,size_t r)250 bool StringPool::ConfigSorter::operator()(size_t l, size_t r)
251 {
252     const StringPool::entry& lhe = pool.mEntries[pool.mEntryArray[l]];
253     const StringPool::entry& rhe = pool.mEntries[pool.mEntryArray[r]];
254     return lhe.compare(rhe) < 0;
255 }
256 
sortByConfig()257 void StringPool::sortByConfig()
258 {
259     LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
260 
261     const size_t N = mEntryArray.size();
262 
263     // This is a vector that starts out with a 1:1 mapping to entries
264     // in the array, which we will sort to come up with the desired order.
265     // At that point it maps from the new position in the array to the
266     // original position the entry appeared.
267     Vector<size_t> newPosToOriginalPos;
268     newPosToOriginalPos.setCapacity(N);
269     for (size_t i=0; i < N; i++) {
270         newPosToOriginalPos.add(i);
271     }
272 
273     // Sort the array.
274     if (kIsDebug) {
275         printf("SORTING STRINGS BY CONFIGURATION...\n");
276     }
277     ConfigSorter sorter(*this);
278     std::sort(newPosToOriginalPos.begin(), newPosToOriginalPos.end(), sorter);
279     if (kIsDebug) {
280         printf("DONE SORTING STRINGS BY CONFIGURATION.\n");
281     }
282 
283     // Create the reverse mapping from the original position in the array
284     // to the new position where it appears in the sorted array.  This is
285     // so that clients can re-map any positions they had previously stored.
286     mOriginalPosToNewPos = newPosToOriginalPos;
287     for (size_t i=0; i<N; i++) {
288         mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
289     }
290 
291 #if 0
292     SortedVector<entry> entries;
293 
294     for (size_t i=0; i<N; i++) {
295         printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
296                 mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
297         entries.add(mEntries[mEntryArray[i]]);
298     }
299 
300     for (size_t i=0; i<entries.size(); i++) {
301         printf("Sorted config #%d: %s\n", i,
302                 entries[i].makeConfigsString().string());
303     }
304 #endif
305 
306     // Now we rebuild the arrays.
307     Vector<entry> newEntries;
308     Vector<size_t> newEntryArray;
309     Vector<entry_style> newEntryStyleArray;
310     DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
311 
312     for (size_t i=0; i<N; i++) {
313         // We are filling in new offset 'i'; oldI is where we can find it
314         // in the original data structure.
315         size_t oldI = newPosToOriginalPos[i];
316         // This is the actual entry associated with the old offset.
317         const entry& oldEnt = mEntries[mEntryArray[oldI]];
318         // This is the same entry the last time we added it to the
319         // new entry array, if any.
320         ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
321         size_t newOffset;
322         if (newIndexOfOffset < 0) {
323             // This is the first time we have seen the entry, so add
324             // it.
325             newOffset = newEntries.add(oldEnt);
326             newEntries.editItemAt(newOffset).indices.clear();
327         } else {
328             // We have seen this entry before, use the existing one
329             // instead of adding it again.
330             newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
331         }
332         // Update the indices to include this new position.
333         newEntries.editItemAt(newOffset).indices.add(i);
334         // And add the offset of the entry to the new entry array.
335         newEntryArray.add(newOffset);
336         // Add any old style to the new style array.
337         if (mEntryStyleArray.size() > 0) {
338             if (oldI < mEntryStyleArray.size()) {
339                 newEntryStyleArray.add(mEntryStyleArray[oldI]);
340             } else {
341                 newEntryStyleArray.add(entry_style());
342             }
343         }
344     }
345 
346     // Now trim any entries at the end of the new style array that are
347     // not needed.
348     for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
349         const entry_style& style = newEntryStyleArray[i];
350         if (style.spans.size() > 0) {
351             // That's it.
352             break;
353         }
354         // This one is not needed; remove.
355         newEntryStyleArray.removeAt(i);
356     }
357 
358     // All done, install the new data structures and upate mValues with
359     // the new positions.
360     mEntries = newEntries;
361     mEntryArray = newEntryArray;
362     mEntryStyleArray = newEntryStyleArray;
363     mValues.clear();
364     for (size_t i=0; i<mEntries.size(); i++) {
365         const entry& ent = mEntries[i];
366         mValues.add(ent.value, ent.indices[0]);
367     }
368 
369 #if 0
370     printf("FINAL SORTED STRING CONFIGS:\n");
371     for (size_t i=0; i<mEntries.size(); i++) {
372         const entry& ent = mEntries[i];
373         printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
374                 String8(ent.value).string());
375     }
376 #endif
377 }
378 
createStringBlock()379 sp<AaptFile> StringPool::createStringBlock()
380 {
381     sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
382                                      String8());
383     status_t err = writeStringBlock(pool);
384     return err == NO_ERROR ? pool : NULL;
385 }
386 
387 #define ENCODE_LENGTH(str, chrsz, strSize) \
388 { \
389     size_t maxMask = 1 << (((chrsz)*8)-1); \
390     size_t maxSize = maxMask-1; \
391     if ((strSize) > maxSize) { \
392         *(str)++ = maxMask | (((strSize)>>((chrsz)*8))&maxSize); \
393     } \
394     *(str)++ = strSize; \
395 }
396 
writeStringBlock(const sp<AaptFile> & pool)397 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
398 {
399     // Allow appending.  Sorry this is a little wacky.
400     if (pool->getSize() > 0) {
401         sp<AaptFile> block = createStringBlock();
402         if (block == NULL) {
403             return UNKNOWN_ERROR;
404         }
405         ssize_t res = pool->writeData(block->getData(), block->getSize());
406         return (res >= 0) ? (status_t)NO_ERROR : res;
407     }
408 
409     // First we need to add all style span names to the string pool.
410     // We do this now (instead of when the span is added) so that these
411     // will appear at the end of the pool, not disrupting the order
412     // our client placed their own strings in it.
413 
414     const size_t STYLES = mEntryStyleArray.size();
415     size_t i;
416 
417     for (i=0; i<STYLES; i++) {
418         entry_style& style = mEntryStyleArray.editItemAt(i);
419         const size_t N = style.spans.size();
420         for (size_t i=0; i<N; i++) {
421             entry_style_span& span = style.spans.editItemAt(i);
422             ssize_t idx = add(span.name, true);
423             if (idx < 0) {
424                 fprintf(stderr, "Error adding span for style tag '%s'\n",
425                         String8(span.name).string());
426                 return idx;
427             }
428             span.span.name.index = (uint32_t)idx;
429         }
430     }
431 
432     const size_t ENTRIES = mEntryArray.size();
433 
434     // Now build the pool of unique strings.
435 
436     const size_t STRINGS = mEntries.size();
437     const size_t preSize = sizeof(ResStringPool_header)
438                          + (sizeof(uint32_t)*ENTRIES)
439                          + (sizeof(uint32_t)*STYLES);
440     if (pool->editData(preSize) == NULL) {
441         fprintf(stderr, "ERROR: Out of memory for string pool\n");
442         return NO_MEMORY;
443     }
444 
445     const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(uint16_t);
446 
447     size_t strPos = 0;
448     for (i=0; i<STRINGS; i++) {
449         entry& ent = mEntries.editItemAt(i);
450         const size_t strSize = (ent.value.size());
451         const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
452             charSize*2 : charSize;
453 
454         String8 encStr;
455         if (mUTF8) {
456             encStr = String8(ent.value);
457         }
458 
459         const size_t encSize = mUTF8 ? encStr.size() : 0;
460         const size_t encLenSize = mUTF8 ?
461             (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
462                 charSize*2 : charSize) : 0;
463 
464         ent.offset = strPos;
465 
466         const size_t totalSize = lenSize + encLenSize +
467             ((mUTF8 ? encSize : strSize)+1)*charSize;
468 
469         void* dat = (void*)pool->editData(preSize + strPos + totalSize);
470         if (dat == NULL) {
471             fprintf(stderr, "ERROR: Out of memory for string pool\n");
472             return NO_MEMORY;
473         }
474         dat = (uint8_t*)dat + preSize + strPos;
475         if (mUTF8) {
476             uint8_t* strings = (uint8_t*)dat;
477 
478             ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
479 
480             ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
481 
482             strncpy((char*)strings, encStr, encSize+1);
483         } else {
484             char16_t* strings = (char16_t*)dat;
485 
486             ENCODE_LENGTH(strings, sizeof(char16_t), strSize)
487 
488             strcpy16_htod(strings, ent.value);
489         }
490 
491         strPos += totalSize;
492     }
493 
494     // Pad ending string position up to a uint32_t boundary.
495 
496     if (strPos&0x3) {
497         size_t padPos = ((strPos+3)&~0x3);
498         uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
499         if (dat == NULL) {
500             fprintf(stderr, "ERROR: Out of memory padding string pool\n");
501             return NO_MEMORY;
502         }
503         memset(dat+preSize+strPos, 0, padPos-strPos);
504         strPos = padPos;
505     }
506 
507     // Build the pool of style spans.
508 
509     size_t styPos = strPos;
510     for (i=0; i<STYLES; i++) {
511         entry_style& ent = mEntryStyleArray.editItemAt(i);
512         const size_t N = ent.spans.size();
513         const size_t totalSize = (N*sizeof(ResStringPool_span))
514                                + sizeof(ResStringPool_ref);
515 
516         ent.offset = styPos-strPos;
517         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
518         if (dat == NULL) {
519             fprintf(stderr, "ERROR: Out of memory for string styles\n");
520             return NO_MEMORY;
521         }
522         ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
523         for (size_t i=0; i<N; i++) {
524             span->name.index = htodl(ent.spans[i].span.name.index);
525             span->firstChar = htodl(ent.spans[i].span.firstChar);
526             span->lastChar = htodl(ent.spans[i].span.lastChar);
527             span++;
528         }
529         span->name.index = htodl(ResStringPool_span::END);
530 
531         styPos += totalSize;
532     }
533 
534     if (STYLES > 0) {
535         // Add full terminator at the end (when reading we validate that
536         // the end of the pool is fully terminated to simplify error
537         // checking).
538         size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
539         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
540         if (dat == NULL) {
541             fprintf(stderr, "ERROR: Out of memory for string styles\n");
542             return NO_MEMORY;
543         }
544         uint32_t* p = (uint32_t*)(dat+preSize+styPos);
545         while (extra > 0) {
546             *p++ = htodl(ResStringPool_span::END);
547             extra -= sizeof(uint32_t);
548         }
549         styPos += extra;
550     }
551 
552     // Write header.
553 
554     ResStringPool_header* header =
555         (ResStringPool_header*)pool->padData(sizeof(uint32_t));
556     if (header == NULL) {
557         fprintf(stderr, "ERROR: Out of memory for string pool\n");
558         return NO_MEMORY;
559     }
560     memset(header, 0, sizeof(*header));
561     header->header.type = htods(RES_STRING_POOL_TYPE);
562     header->header.headerSize = htods(sizeof(*header));
563     header->header.size = htodl(pool->getSize());
564     header->stringCount = htodl(ENTRIES);
565     header->styleCount = htodl(STYLES);
566     if (mUTF8) {
567         header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
568     }
569     header->stringsStart = htodl(preSize);
570     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
571 
572     // Write string index array.
573 
574     uint32_t* index = (uint32_t*)(header+1);
575     for (i=0; i<ENTRIES; i++) {
576         entry& ent = mEntries.editItemAt(mEntryArray[i]);
577         *index++ = htodl(ent.offset);
578         if (kIsDebug) {
579             printf("Writing entry #%zu: \"%s\" ent=%zu off=%zu\n",
580                     i,
581                     String8(ent.value).string(),
582                     mEntryArray[i],
583                     ent.offset);
584         }
585     }
586 
587     // Write style index array.
588 
589     for (i=0; i<STYLES; i++) {
590         *index++ = htodl(mEntryStyleArray[i].offset);
591     }
592 
593     return NO_ERROR;
594 }
595 
offsetForString(const String16 & val) const596 ssize_t StringPool::offsetForString(const String16& val) const
597 {
598     const Vector<size_t>* indices = offsetsForString(val);
599     ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
600     if (kIsDebug) {
601         printf("Offset for string %s: %zd (%s)\n", String8(val).string(), SSIZE(res),
602                 res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8());
603     }
604     return res;
605 }
606 
offsetsForString(const String16 & val) const607 const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
608 {
609     ssize_t pos = mValues.valueFor(val);
610     if (pos < 0) {
611         return NULL;
612     }
613     return &mEntries[mEntryArray[pos]].indices;
614 }
615