1 //
2 // Copyright 2006 The Android Open Source Project
3 //
4 // Build resource files from raw assets.
5 //
6 #include "StringPool.h"
7 
8 #include <utils/ByteOrder.h>
9 #include <utils/SortedVector.h>
10 
11 #include <algorithm>
12 
13 #include "ResourceTable.h"
14 
15 // SSIZE: mingw does not have signed size_t == ssize_t.
16 #if !defined(_WIN32)
17 #  define ZD "%zd"
18 #  define ZD_TYPE ssize_t
19 #  define SSIZE(x) x
20 #else
21 #  define ZD "%ld"
22 #  define ZD_TYPE long
23 #  define SSIZE(x) (signed size_t)x
24 #endif
25 
26 // Set to true for noisy debug output.
27 static const bool kIsDebug = false;
28 
29 #if __cplusplus >= 201103L
strcpy16_htod(char16_t * dst,const char16_t * src)30 void strcpy16_htod(char16_t* dst, const char16_t* src)
31 {
32     while (*src) {
33         char16_t s = htods(*src);
34         *dst++ = s;
35         src++;
36     }
37     *dst = 0;
38 }
39 #endif
40 
strcpy16_htod(uint16_t * dst,const char16_t * src)41 void strcpy16_htod(uint16_t* dst, const char16_t* src)
42 {
43     while (*src) {
44         uint16_t s = htods(static_cast<uint16_t>(*src));
45         *dst++ = s;
46         src++;
47     }
48     *dst = 0;
49 }
50 
printStringPool(const ResStringPool * pool)51 void printStringPool(const ResStringPool* pool)
52 {
53     if (pool->getError() == NO_INIT) {
54         printf("String pool is unitialized.\n");
55         return;
56     } else if (pool->getError() != NO_ERROR) {
57         printf("String pool is corrupt/invalid.\n");
58         return;
59     }
60 
61     SortedVector<const void*> uniqueStrings;
62     const size_t N = pool->size();
63     for (size_t i=0; i<N; i++) {
64         size_t len;
65         if (pool->isUTF8()) {
66             uniqueStrings.add(pool->string8At(i, &len));
67         } else {
68             uniqueStrings.add(pool->stringAt(i, &len));
69         }
70     }
71 
72     printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
73             ZD " styles using " ZD " bytes:\n",
74             (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
75             pool->isSorted() ? "sorted" : "non-sorted",
76             (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
77 
78     const size_t NS = pool->size();
79     for (size_t s=0; s<NS; s++) {
80         String8 str = pool->string8ObjectAt(s);
81         printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
82     }
83 }
84 
makeConfigsString() const85 String8 StringPool::entry::makeConfigsString() const {
86     String8 configStr(configTypeName);
87     if (configStr.size() > 0) configStr.append(" ");
88     if (configs.size() > 0) {
89         for (size_t j=0; j<configs.size(); j++) {
90             if (j > 0) configStr.append(", ");
91             configStr.append(configs[j].toString());
92         }
93     } else {
94         configStr = "(none)";
95     }
96     return configStr;
97 }
98 
compare(const entry & o) const99 int StringPool::entry::compare(const entry& o) const {
100     // Strings with styles go first, to reduce the size of the styles array.
101     // We don't care about the relative order of these strings.
102     if (hasStyles) {
103         return o.hasStyles ? 0 : -1;
104     }
105     if (o.hasStyles) {
106         return 1;
107     }
108 
109     // Sort unstyled strings by type, then by logical configuration.
110     int comp = configTypeName.compare(o.configTypeName);
111     if (comp != 0) {
112         return comp;
113     }
114     const size_t LHN = configs.size();
115     const size_t RHN = o.configs.size();
116     size_t i=0;
117     while (i < LHN && i < RHN) {
118         comp = configs[i].compareLogical(o.configs[i]);
119         if (comp != 0) {
120             return comp;
121         }
122         i++;
123     }
124     if (LHN < RHN) return -1;
125     else if (LHN > RHN) return 1;
126     return 0;
127 }
128 
StringPool(bool utf8)129 StringPool::StringPool(bool utf8) :
130         mUTF8(utf8), mValues(-1)
131 {
132 }
133 
add(const String16 & value,const Vector<entry_style_span> & spans,const String8 * configTypeName,const ResTable_config * config)134 ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
135         const String8* configTypeName, const ResTable_config* config)
136 {
137     ssize_t res = add(value, false, configTypeName, config);
138     if (res >= 0) {
139         addStyleSpans(res, spans);
140     }
141     return res;
142 }
143 
add(const String16 & value,bool mergeDuplicates,const String8 * configTypeName,const ResTable_config * config)144 ssize_t StringPool::add(const String16& value,
145         bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
146 {
147     ssize_t vidx = mValues.indexOfKey(value);
148     ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
149     ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
150     if (eidx < 0) {
151         eidx = mEntries.add(entry(value));
152         if (eidx < 0) {
153             fprintf(stderr, "Failure adding string %s\n", String8(value).string());
154             return eidx;
155         }
156     }
157 
158     if (configTypeName != NULL) {
159         entry& ent = mEntries.editItemAt(eidx);
160         if (kIsDebug) {
161             printf("*** adding config type name %s, was %s\n",
162                     configTypeName->string(), ent.configTypeName.string());
163         }
164         if (ent.configTypeName.size() <= 0) {
165             ent.configTypeName = *configTypeName;
166         } else if (ent.configTypeName != *configTypeName) {
167             ent.configTypeName = " ";
168         }
169     }
170 
171     if (config != NULL) {
172         // Add this to the set of configs associated with the string.
173         entry& ent = mEntries.editItemAt(eidx);
174         size_t addPos;
175         for (addPos=0; addPos<ent.configs.size(); addPos++) {
176             int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
177             if (cmp >= 0) {
178                 if (cmp > 0) {
179                     if (kIsDebug) {
180                         printf("*** inserting config: %s\n", config->toString().string());
181                     }
182                     ent.configs.insertAt(*config, addPos);
183                 }
184                 break;
185             }
186         }
187         if (addPos >= ent.configs.size()) {
188             if (kIsDebug) {
189                 printf("*** adding config: %s\n", config->toString().string());
190             }
191             ent.configs.add(*config);
192         }
193     }
194 
195     const bool first = vidx < 0;
196     const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
197         mEntryStyleArray[pos].spans.size() : 0;
198     if (first || styled || !mergeDuplicates) {
199         pos = mEntryArray.add(eidx);
200         if (first) {
201             vidx = mValues.add(value, pos);
202         }
203         entry& ent = mEntries.editItemAt(eidx);
204         ent.indices.add(pos);
205     }
206 
207     if (kIsDebug) {
208         printf("Adding string %s to pool: pos=%zd eidx=%zd vidx=%zd\n",
209                 String8(value).string(), SSIZE(pos), SSIZE(eidx), SSIZE(vidx));
210     }
211 
212     return pos;
213 }
214 
addStyleSpan(size_t idx,const String16 & name,uint32_t start,uint32_t end)215 status_t StringPool::addStyleSpan(size_t idx, const String16& name,
216                                   uint32_t start, uint32_t end)
217 {
218     entry_style_span span;
219     span.name = name;
220     span.span.firstChar = start;
221     span.span.lastChar = end;
222     return addStyleSpan(idx, span);
223 }
224 
addStyleSpans(size_t idx,const Vector<entry_style_span> & spans)225 status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
226 {
227     const size_t N=spans.size();
228     for (size_t i=0; i<N; i++) {
229         status_t err = addStyleSpan(idx, spans[i]);
230         if (err != NO_ERROR) {
231             return err;
232         }
233     }
234     return NO_ERROR;
235 }
236 
addStyleSpan(size_t idx,const entry_style_span & span)237 status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
238 {
239     // Place blank entries in the span array up to this index.
240     while (mEntryStyleArray.size() <= idx) {
241         mEntryStyleArray.add();
242     }
243 
244     entry_style& style = mEntryStyleArray.editItemAt(idx);
245     style.spans.add(span);
246     mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
247     return NO_ERROR;
248 }
249 
ConfigSorter(const StringPool & pool)250 StringPool::ConfigSorter::ConfigSorter(const StringPool& pool) : pool(pool)
251 {
252 }
253 
operator ()(size_t l,size_t r)254 bool StringPool::ConfigSorter::operator()(size_t l, size_t r)
255 {
256     const StringPool::entry& lhe = pool.mEntries[pool.mEntryArray[l]];
257     const StringPool::entry& rhe = pool.mEntries[pool.mEntryArray[r]];
258     return lhe.compare(rhe) < 0;
259 }
260 
sortByConfig()261 void StringPool::sortByConfig()
262 {
263     LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
264 
265     const size_t N = mEntryArray.size();
266 
267     // This is a vector that starts out with a 1:1 mapping to entries
268     // in the array, which we will sort to come up with the desired order.
269     // At that point it maps from the new position in the array to the
270     // original position the entry appeared.
271     Vector<size_t> newPosToOriginalPos;
272     newPosToOriginalPos.setCapacity(N);
273     for (size_t i=0; i < N; i++) {
274         newPosToOriginalPos.add(i);
275     }
276 
277     // Sort the array.
278     if (kIsDebug) {
279         printf("SORTING STRINGS BY CONFIGURATION...\n");
280     }
281     ConfigSorter sorter(*this);
282     std::sort(newPosToOriginalPos.begin(), newPosToOriginalPos.end(), sorter);
283     if (kIsDebug) {
284         printf("DONE SORTING STRINGS BY CONFIGURATION.\n");
285     }
286 
287     // Create the reverse mapping from the original position in the array
288     // to the new position where it appears in the sorted array.  This is
289     // so that clients can re-map any positions they had previously stored.
290     mOriginalPosToNewPos = newPosToOriginalPos;
291     for (size_t i=0; i<N; i++) {
292         mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
293     }
294 
295 #if 0
296     SortedVector<entry> entries;
297 
298     for (size_t i=0; i<N; i++) {
299         printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
300                 mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
301         entries.add(mEntries[mEntryArray[i]]);
302     }
303 
304     for (size_t i=0; i<entries.size(); i++) {
305         printf("Sorted config #%d: %s\n", i,
306                 entries[i].makeConfigsString().string());
307     }
308 #endif
309 
310     // Now we rebuild the arrays.
311     Vector<entry> newEntries;
312     Vector<size_t> newEntryArray;
313     Vector<entry_style> newEntryStyleArray;
314     DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
315 
316     for (size_t i=0; i<N; i++) {
317         // We are filling in new offset 'i'; oldI is where we can find it
318         // in the original data structure.
319         size_t oldI = newPosToOriginalPos[i];
320         // This is the actual entry associated with the old offset.
321         const entry& oldEnt = mEntries[mEntryArray[oldI]];
322         // This is the same entry the last time we added it to the
323         // new entry array, if any.
324         ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
325         size_t newOffset;
326         if (newIndexOfOffset < 0) {
327             // This is the first time we have seen the entry, so add
328             // it.
329             newOffset = newEntries.add(oldEnt);
330             newEntries.editItemAt(newOffset).indices.clear();
331         } else {
332             // We have seen this entry before, use the existing one
333             // instead of adding it again.
334             newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
335         }
336         // Update the indices to include this new position.
337         newEntries.editItemAt(newOffset).indices.add(i);
338         // And add the offset of the entry to the new entry array.
339         newEntryArray.add(newOffset);
340         // Add any old style to the new style array.
341         if (mEntryStyleArray.size() > 0) {
342             if (oldI < mEntryStyleArray.size()) {
343                 newEntryStyleArray.add(mEntryStyleArray[oldI]);
344             } else {
345                 newEntryStyleArray.add(entry_style());
346             }
347         }
348     }
349 
350     // Now trim any entries at the end of the new style array that are
351     // not needed.
352     for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
353         const entry_style& style = newEntryStyleArray[i];
354         if (style.spans.size() > 0) {
355             // That's it.
356             break;
357         }
358         // This one is not needed; remove.
359         newEntryStyleArray.removeAt(i);
360     }
361 
362     // All done, install the new data structures and upate mValues with
363     // the new positions.
364     mEntries = newEntries;
365     mEntryArray = newEntryArray;
366     mEntryStyleArray = newEntryStyleArray;
367     mValues.clear();
368     for (size_t i=0; i<mEntries.size(); i++) {
369         const entry& ent = mEntries[i];
370         mValues.add(ent.value, ent.indices[0]);
371     }
372 
373 #if 0
374     printf("FINAL SORTED STRING CONFIGS:\n");
375     for (size_t i=0; i<mEntries.size(); i++) {
376         const entry& ent = mEntries[i];
377         printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
378                 String8(ent.value).string());
379     }
380 #endif
381 }
382 
createStringBlock()383 sp<AaptFile> StringPool::createStringBlock()
384 {
385     sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
386                                      String8());
387     status_t err = writeStringBlock(pool);
388     return err == NO_ERROR ? pool : NULL;
389 }
390 
391 #define ENCODE_LENGTH(str, chrsz, strSize) \
392 { \
393     size_t maxMask = 1 << ((chrsz*8)-1); \
394     size_t maxSize = maxMask-1; \
395     if (strSize > maxSize) { \
396         *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
397     } \
398     *str++ = strSize; \
399 }
400 
writeStringBlock(const sp<AaptFile> & pool)401 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
402 {
403     // Allow appending.  Sorry this is a little wacky.
404     if (pool->getSize() > 0) {
405         sp<AaptFile> block = createStringBlock();
406         if (block == NULL) {
407             return UNKNOWN_ERROR;
408         }
409         ssize_t res = pool->writeData(block->getData(), block->getSize());
410         return (res >= 0) ? (status_t)NO_ERROR : res;
411     }
412 
413     // First we need to add all style span names to the string pool.
414     // We do this now (instead of when the span is added) so that these
415     // will appear at the end of the pool, not disrupting the order
416     // our client placed their own strings in it.
417 
418     const size_t STYLES = mEntryStyleArray.size();
419     size_t i;
420 
421     for (i=0; i<STYLES; i++) {
422         entry_style& style = mEntryStyleArray.editItemAt(i);
423         const size_t N = style.spans.size();
424         for (size_t i=0; i<N; i++) {
425             entry_style_span& span = style.spans.editItemAt(i);
426             ssize_t idx = add(span.name, true);
427             if (idx < 0) {
428                 fprintf(stderr, "Error adding span for style tag '%s'\n",
429                         String8(span.name).string());
430                 return idx;
431             }
432             span.span.name.index = (uint32_t)idx;
433         }
434     }
435 
436     const size_t ENTRIES = mEntryArray.size();
437 
438     // Now build the pool of unique strings.
439 
440     const size_t STRINGS = mEntries.size();
441     const size_t preSize = sizeof(ResStringPool_header)
442                          + (sizeof(uint32_t)*ENTRIES)
443                          + (sizeof(uint32_t)*STYLES);
444     if (pool->editData(preSize) == NULL) {
445         fprintf(stderr, "ERROR: Out of memory for string pool\n");
446         return NO_MEMORY;
447     }
448 
449     const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(uint16_t);
450 
451     size_t strPos = 0;
452     for (i=0; i<STRINGS; i++) {
453         entry& ent = mEntries.editItemAt(i);
454         const size_t strSize = (ent.value.size());
455         const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
456             charSize*2 : charSize;
457 
458         String8 encStr;
459         if (mUTF8) {
460             encStr = String8(ent.value);
461         }
462 
463         const size_t encSize = mUTF8 ? encStr.size() : 0;
464         const size_t encLenSize = mUTF8 ?
465             (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
466                 charSize*2 : charSize) : 0;
467 
468         ent.offset = strPos;
469 
470         const size_t totalSize = lenSize + encLenSize +
471             ((mUTF8 ? encSize : strSize)+1)*charSize;
472 
473         void* dat = (void*)pool->editData(preSize + strPos + totalSize);
474         if (dat == NULL) {
475             fprintf(stderr, "ERROR: Out of memory for string pool\n");
476             return NO_MEMORY;
477         }
478         dat = (uint8_t*)dat + preSize + strPos;
479         if (mUTF8) {
480             uint8_t* strings = (uint8_t*)dat;
481 
482             ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
483 
484             ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
485 
486             strncpy((char*)strings, encStr, encSize+1);
487         } else {
488             char16_t* strings = (char16_t*)dat;
489 
490             ENCODE_LENGTH(strings, sizeof(char16_t), strSize)
491 
492             strcpy16_htod(strings, ent.value);
493         }
494 
495         strPos += totalSize;
496     }
497 
498     // Pad ending string position up to a uint32_t boundary.
499 
500     if (strPos&0x3) {
501         size_t padPos = ((strPos+3)&~0x3);
502         uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
503         if (dat == NULL) {
504             fprintf(stderr, "ERROR: Out of memory padding string pool\n");
505             return NO_MEMORY;
506         }
507         memset(dat+preSize+strPos, 0, padPos-strPos);
508         strPos = padPos;
509     }
510 
511     // Build the pool of style spans.
512 
513     size_t styPos = strPos;
514     for (i=0; i<STYLES; i++) {
515         entry_style& ent = mEntryStyleArray.editItemAt(i);
516         const size_t N = ent.spans.size();
517         const size_t totalSize = (N*sizeof(ResStringPool_span))
518                                + sizeof(ResStringPool_ref);
519 
520         ent.offset = styPos-strPos;
521         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
522         if (dat == NULL) {
523             fprintf(stderr, "ERROR: Out of memory for string styles\n");
524             return NO_MEMORY;
525         }
526         ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
527         for (size_t i=0; i<N; i++) {
528             span->name.index = htodl(ent.spans[i].span.name.index);
529             span->firstChar = htodl(ent.spans[i].span.firstChar);
530             span->lastChar = htodl(ent.spans[i].span.lastChar);
531             span++;
532         }
533         span->name.index = htodl(ResStringPool_span::END);
534 
535         styPos += totalSize;
536     }
537 
538     if (STYLES > 0) {
539         // Add full terminator at the end (when reading we validate that
540         // the end of the pool is fully terminated to simplify error
541         // checking).
542         size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
543         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
544         if (dat == NULL) {
545             fprintf(stderr, "ERROR: Out of memory for string styles\n");
546             return NO_MEMORY;
547         }
548         uint32_t* p = (uint32_t*)(dat+preSize+styPos);
549         while (extra > 0) {
550             *p++ = htodl(ResStringPool_span::END);
551             extra -= sizeof(uint32_t);
552         }
553         styPos += extra;
554     }
555 
556     // Write header.
557 
558     ResStringPool_header* header =
559         (ResStringPool_header*)pool->padData(sizeof(uint32_t));
560     if (header == NULL) {
561         fprintf(stderr, "ERROR: Out of memory for string pool\n");
562         return NO_MEMORY;
563     }
564     memset(header, 0, sizeof(*header));
565     header->header.type = htods(RES_STRING_POOL_TYPE);
566     header->header.headerSize = htods(sizeof(*header));
567     header->header.size = htodl(pool->getSize());
568     header->stringCount = htodl(ENTRIES);
569     header->styleCount = htodl(STYLES);
570     if (mUTF8) {
571         header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
572     }
573     header->stringsStart = htodl(preSize);
574     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
575 
576     // Write string index array.
577 
578     uint32_t* index = (uint32_t*)(header+1);
579     for (i=0; i<ENTRIES; i++) {
580         entry& ent = mEntries.editItemAt(mEntryArray[i]);
581         *index++ = htodl(ent.offset);
582         if (kIsDebug) {
583             printf("Writing entry #%zu: \"%s\" ent=%zu off=%zu\n",
584                     i,
585                     String8(ent.value).string(),
586                     mEntryArray[i],
587                     ent.offset);
588         }
589     }
590 
591     // Write style index array.
592 
593     for (i=0; i<STYLES; i++) {
594         *index++ = htodl(mEntryStyleArray[i].offset);
595     }
596 
597     return NO_ERROR;
598 }
599 
offsetForString(const String16 & val) const600 ssize_t StringPool::offsetForString(const String16& val) const
601 {
602     const Vector<size_t>* indices = offsetsForString(val);
603     ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
604     if (kIsDebug) {
605         printf("Offset for string %s: %zd (%s)\n", String8(val).string(), SSIZE(res),
606                 res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8());
607     }
608     return res;
609 }
610 
offsetsForString(const String16 & val) const611 const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
612 {
613     ssize_t pos = mValues.valueFor(val);
614     if (pos < 0) {
615         return NULL;
616     }
617     return &mEntries[mEntryArray[pos]].indices;
618 }
619