1 //
2 // Copyright 2006 The Android Open Source Project
3 //
4 // Build resource files from raw assets.
5 //
6 #include "StringPool.h"
7 
8 #include <utils/ByteOrder.h>
9 #include <utils/SortedVector.h>
10 
11 #include <algorithm>
12 
13 #include "ResourceTable.h"
14 
15 // Set to true for noisy debug output.
16 static const bool kIsDebug = false;
17 
18 #if __cplusplus >= 201103L
strcpy16_htod(char16_t * dst,const char16_t * src)19 void strcpy16_htod(char16_t* dst, const char16_t* src)
20 {
21     while (*src) {
22         char16_t s = htods(*src);
23         *dst++ = s;
24         src++;
25     }
26     *dst = 0;
27 }
28 #endif
29 
strcpy16_htod(uint16_t * dst,const char16_t * src)30 void strcpy16_htod(uint16_t* dst, const char16_t* src)
31 {
32     while (*src) {
33         uint16_t s = htods(static_cast<uint16_t>(*src));
34         *dst++ = s;
35         src++;
36     }
37     *dst = 0;
38 }
39 
printStringPool(const ResStringPool * pool)40 void printStringPool(const ResStringPool* pool)
41 {
42     if (pool->getError() == NO_INIT) {
43         printf("String pool is unitialized.\n");
44         return;
45     } else if (pool->getError() != NO_ERROR) {
46         printf("String pool is corrupt/invalid.\n");
47         return;
48     }
49 
50     SortedVector<const void*> uniqueStrings;
51     const size_t N = pool->size();
52     for (size_t i=0; i<N; i++) {
53         size_t len;
54         if (pool->isUTF8()) {
55             uniqueStrings.add(UnpackOptionalString(pool->string8At(i), &len));
56         } else {
57             uniqueStrings.add(UnpackOptionalString(pool->stringAt(i), &len));
58         }
59     }
60 
61     printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
62             ZD " styles using " ZD " bytes:\n",
63             (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
64             pool->isSorted() ? "sorted" : "non-sorted",
65             (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
66 
67     const size_t NS = pool->size();
68     for (size_t s=0; s<NS; s++) {
69         auto str = pool->string8ObjectAt(s);
70         printf("String #" ZD ": %s\n", (ZD_TYPE) s, (str.has_value() ? str->c_str() : ""));
71     }
72 }
73 
makeConfigsString() const74 String8 StringPool::entry::makeConfigsString() const {
75     String8 configStr(configTypeName);
76     if (configStr.size() > 0) configStr.append(" ");
77     if (configs.size() > 0) {
78         for (size_t j=0; j<configs.size(); j++) {
79             if (j > 0) configStr.append(", ");
80             configStr.append(configs[j].toString());
81         }
82     } else {
83         configStr = "(none)";
84     }
85     return configStr;
86 }
87 
compare(const entry & o) const88 int StringPool::entry::compare(const entry& o) const {
89     // Strings with styles go first, to reduce the size of the styles array.
90     // We don't care about the relative order of these strings.
91     if (hasStyles) {
92         return o.hasStyles ? 0 : -1;
93     }
94     if (o.hasStyles) {
95         return 1;
96     }
97 
98     // Sort unstyled strings by type, then by logical configuration.
99     int comp = configTypeName.compare(o.configTypeName);
100     if (comp != 0) {
101         return comp;
102     }
103     const size_t LHN = configs.size();
104     const size_t RHN = o.configs.size();
105     size_t i=0;
106     while (i < LHN && i < RHN) {
107         comp = configs[i].compareLogical(o.configs[i]);
108         if (comp != 0) {
109             return comp;
110         }
111         i++;
112     }
113     if (LHN < RHN) return -1;
114     else if (LHN > RHN) return 1;
115     return 0;
116 }
117 
StringPool(bool utf8)118 StringPool::StringPool(bool utf8) :
119         mUTF8(utf8), mValues(-1)
120 {
121 }
122 
add(const String16 & value,const Vector<entry_style_span> & spans,const String8 * configTypeName,const ResTable_config * config)123 ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
124         const String8* configTypeName, const ResTable_config* config)
125 {
126     ssize_t res = add(value, false, configTypeName, config);
127     if (res >= 0) {
128         addStyleSpans(res, spans);
129     }
130     return res;
131 }
132 
add(const String16 & value,bool mergeDuplicates,const String8 * configTypeName,const ResTable_config * config)133 ssize_t StringPool::add(const String16& value,
134         bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
135 {
136     ssize_t vidx = mValues.indexOfKey(value);
137     ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
138     ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
139     if (eidx < 0) {
140         eidx = mEntries.add(entry(value));
141         if (eidx < 0) {
142             fprintf(stderr, "Failure adding string %s\n", String8(value).c_str());
143             return eidx;
144         }
145     }
146 
147     if (configTypeName != NULL) {
148         entry& ent = mEntries.editItemAt(eidx);
149         if (kIsDebug) {
150             printf("*** adding config type name %s, was %s\n",
151                     configTypeName->c_str(), ent.configTypeName.c_str());
152         }
153         if (ent.configTypeName.size() <= 0) {
154             ent.configTypeName = *configTypeName;
155         } else if (ent.configTypeName != *configTypeName) {
156             ent.configTypeName = " ";
157         }
158     }
159 
160     if (config != NULL) {
161         // Add this to the set of configs associated with the string.
162         entry& ent = mEntries.editItemAt(eidx);
163         size_t addPos;
164         for (addPos=0; addPos<ent.configs.size(); addPos++) {
165             int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
166             if (cmp >= 0) {
167                 if (cmp > 0) {
168                     if (kIsDebug) {
169                         printf("*** inserting config: %s\n", config->toString().c_str());
170                     }
171                     ent.configs.insertAt(*config, addPos);
172                 }
173                 break;
174             }
175         }
176         if (addPos >= ent.configs.size()) {
177             if (kIsDebug) {
178                 printf("*** adding config: %s\n", config->toString().c_str());
179             }
180             ent.configs.add(*config);
181         }
182     }
183 
184     const bool first = vidx < 0;
185     const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
186         mEntryStyleArray[pos].spans.size() : 0;
187     if (first || styled || !mergeDuplicates) {
188         pos = mEntryArray.add(eidx);
189         if (first) {
190             vidx = mValues.add(value, pos);
191         }
192         entry& ent = mEntries.editItemAt(eidx);
193         ent.indices.add(pos);
194     }
195 
196     if (kIsDebug) {
197         printf("Adding string %s to pool: pos=%zd eidx=%zd vidx=%zd\n",
198                 String8(value).c_str(), pos, eidx, vidx);
199     }
200 
201     return pos;
202 }
203 
addStyleSpan(size_t idx,const String16 & name,uint32_t start,uint32_t end)204 status_t StringPool::addStyleSpan(size_t idx, const String16& name,
205                                   uint32_t start, uint32_t end)
206 {
207     entry_style_span span;
208     span.name = name;
209     span.span.firstChar = start;
210     span.span.lastChar = end;
211     return addStyleSpan(idx, span);
212 }
213 
addStyleSpans(size_t idx,const Vector<entry_style_span> & spans)214 status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
215 {
216     const size_t N=spans.size();
217     for (size_t i=0; i<N; i++) {
218         status_t err = addStyleSpan(idx, spans[i]);
219         if (err != NO_ERROR) {
220             return err;
221         }
222     }
223     return NO_ERROR;
224 }
225 
addStyleSpan(size_t idx,const entry_style_span & span)226 status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
227 {
228     // Place blank entries in the span array up to this index.
229     while (mEntryStyleArray.size() <= idx) {
230         mEntryStyleArray.add();
231     }
232 
233     entry_style& style = mEntryStyleArray.editItemAt(idx);
234     style.spans.add(span);
235     mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
236     return NO_ERROR;
237 }
238 
ConfigSorter(const StringPool & pool)239 StringPool::ConfigSorter::ConfigSorter(const StringPool& pool) : pool(pool)
240 {
241 }
242 
operator ()(size_t l,size_t r)243 bool StringPool::ConfigSorter::operator()(size_t l, size_t r)
244 {
245     const StringPool::entry& lhe = pool.mEntries[pool.mEntryArray[l]];
246     const StringPool::entry& rhe = pool.mEntries[pool.mEntryArray[r]];
247     return lhe.compare(rhe) < 0;
248 }
249 
sortByConfig()250 void StringPool::sortByConfig()
251 {
252     LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
253 
254     const size_t N = mEntryArray.size();
255 
256     // This is a vector that starts out with a 1:1 mapping to entries
257     // in the array, which we will sort to come up with the desired order.
258     // At that point it maps from the new position in the array to the
259     // original position the entry appeared.
260     Vector<size_t> newPosToOriginalPos;
261     newPosToOriginalPos.setCapacity(N);
262     for (size_t i=0; i < N; i++) {
263         newPosToOriginalPos.add(i);
264     }
265 
266     // Sort the array.
267     if (kIsDebug) {
268         printf("SORTING STRINGS BY CONFIGURATION...\n");
269     }
270     ConfigSorter sorter(*this);
271     std::sort(newPosToOriginalPos.begin(), newPosToOriginalPos.end(), sorter);
272     if (kIsDebug) {
273         printf("DONE SORTING STRINGS BY CONFIGURATION.\n");
274     }
275 
276     // Create the reverse mapping from the original position in the array
277     // to the new position where it appears in the sorted array.  This is
278     // so that clients can re-map any positions they had previously stored.
279     mOriginalPosToNewPos = newPosToOriginalPos;
280     for (size_t i=0; i<N; i++) {
281         mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
282     }
283 
284 #if 0
285     SortedVector<entry> entries;
286 
287     for (size_t i=0; i<N; i++) {
288         printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
289                 mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().c_str());
290         entries.add(mEntries[mEntryArray[i]]);
291     }
292 
293     for (size_t i=0; i<entries.size(); i++) {
294         printf("Sorted config #%d: %s\n", i,
295                 entries[i].makeConfigsString().c_str());
296     }
297 #endif
298 
299     // Now we rebuild the arrays.
300     Vector<entry> newEntries;
301     Vector<size_t> newEntryArray;
302     Vector<entry_style> newEntryStyleArray;
303     DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
304 
305     for (size_t i=0; i<N; i++) {
306         // We are filling in new offset 'i'; oldI is where we can find it
307         // in the original data structure.
308         size_t oldI = newPosToOriginalPos[i];
309         // This is the actual entry associated with the old offset.
310         const entry& oldEnt = mEntries[mEntryArray[oldI]];
311         // This is the same entry the last time we added it to the
312         // new entry array, if any.
313         ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
314         size_t newOffset;
315         if (newIndexOfOffset < 0) {
316             // This is the first time we have seen the entry, so add
317             // it.
318             newOffset = newEntries.add(oldEnt);
319             newEntries.editItemAt(newOffset).indices.clear();
320         } else {
321             // We have seen this entry before, use the existing one
322             // instead of adding it again.
323             newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
324         }
325         // Update the indices to include this new position.
326         newEntries.editItemAt(newOffset).indices.add(i);
327         // And add the offset of the entry to the new entry array.
328         newEntryArray.add(newOffset);
329         // Add any old style to the new style array.
330         if (mEntryStyleArray.size() > 0) {
331             if (oldI < mEntryStyleArray.size()) {
332                 newEntryStyleArray.add(mEntryStyleArray[oldI]);
333             } else {
334                 newEntryStyleArray.add(entry_style());
335             }
336         }
337     }
338 
339     // Now trim any entries at the end of the new style array that are
340     // not needed.
341     for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
342         const entry_style& style = newEntryStyleArray[i];
343         if (style.spans.size() > 0) {
344             // That's it.
345             break;
346         }
347         // This one is not needed; remove.
348         newEntryStyleArray.removeAt(i);
349     }
350 
351     // All done, install the new data structures and upate mValues with
352     // the new positions.
353     mEntries = newEntries;
354     mEntryArray = newEntryArray;
355     mEntryStyleArray = newEntryStyleArray;
356     mValues.clear();
357     for (size_t i=0; i<mEntries.size(); i++) {
358         const entry& ent = mEntries[i];
359         mValues.add(ent.value, ent.indices[0]);
360     }
361 
362 #if 0
363     printf("FINAL SORTED STRING CONFIGS:\n");
364     for (size_t i=0; i<mEntries.size(); i++) {
365         const entry& ent = mEntries[i];
366         printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().c_str(),
367                 String8(ent.value).c_str());
368     }
369 #endif
370 }
371 
createStringBlock()372 sp<AaptFile> StringPool::createStringBlock()
373 {
374     sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
375                                      String8());
376     status_t err = writeStringBlock(pool);
377     return err == NO_ERROR ? pool : NULL;
378 }
379 
380 #define ENCODE_LENGTH(str, chrsz, strSize) \
381 { \
382     size_t maxMask = 1 << (((chrsz)*8)-1); \
383     size_t maxSize = maxMask-1; \
384     if ((strSize) > maxSize) { \
385         *(str)++ = maxMask | (((strSize)>>((chrsz)*8))&maxSize); \
386     } \
387     *(str)++ = strSize; \
388 }
389 
writeStringBlock(const sp<AaptFile> & pool)390 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
391 {
392     // Allow appending.  Sorry this is a little wacky.
393     if (pool->getSize() > 0) {
394         sp<AaptFile> block = createStringBlock();
395         if (block == NULL) {
396             return UNKNOWN_ERROR;
397         }
398         ssize_t res = pool->writeData(block->getData(), block->getSize());
399         return (res >= 0) ? (status_t)NO_ERROR : res;
400     }
401 
402     // First we need to add all style span names to the string pool.
403     // We do this now (instead of when the span is added) so that these
404     // will appear at the end of the pool, not disrupting the order
405     // our client placed their own strings in it.
406 
407     const size_t STYLES = mEntryStyleArray.size();
408     size_t i;
409 
410     for (i=0; i<STYLES; i++) {
411         entry_style& style = mEntryStyleArray.editItemAt(i);
412         const size_t N = style.spans.size();
413         for (size_t i=0; i<N; i++) {
414             entry_style_span& span = style.spans.editItemAt(i);
415             ssize_t idx = add(span.name, true);
416             if (idx < 0) {
417                 fprintf(stderr, "Error adding span for style tag '%s'\n",
418                         String8(span.name).c_str());
419                 return idx;
420             }
421             span.span.name.index = (uint32_t)idx;
422         }
423     }
424 
425     const size_t ENTRIES = mEntryArray.size();
426 
427     // Now build the pool of unique strings.
428 
429     const size_t STRINGS = mEntries.size();
430     const size_t preSize = sizeof(ResStringPool_header)
431                          + (sizeof(uint32_t)*ENTRIES)
432                          + (sizeof(uint32_t)*STYLES);
433     if (pool->editData(preSize) == NULL) {
434         fprintf(stderr, "ERROR: Out of memory for string pool\n");
435         return NO_MEMORY;
436     }
437 
438     const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(uint16_t);
439 
440     size_t strPos = 0;
441     for (i=0; i<STRINGS; i++) {
442         entry& ent = mEntries.editItemAt(i);
443         const size_t strSize = (ent.value.size());
444         const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
445             charSize*2 : charSize;
446 
447         String8 encStr;
448         if (mUTF8) {
449             encStr = String8(ent.value);
450         }
451 
452         const size_t encSize = mUTF8 ? encStr.size() : 0;
453         const size_t encLenSize = mUTF8 ?
454             (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
455                 charSize*2 : charSize) : 0;
456 
457         ent.offset = strPos;
458 
459         const size_t totalSize = lenSize + encLenSize +
460             ((mUTF8 ? encSize : strSize)+1)*charSize;
461 
462         void* dat = (void*)pool->editData(preSize + strPos + totalSize);
463         if (dat == NULL) {
464             fprintf(stderr, "ERROR: Out of memory for string pool\n");
465             return NO_MEMORY;
466         }
467         dat = (uint8_t*)dat + preSize + strPos;
468         if (mUTF8) {
469             uint8_t* strings = (uint8_t*)dat;
470 
471             ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
472 
473             ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
474 
475             strncpy((char*)strings, encStr.c_str(), encSize + 1);
476         } else {
477             char16_t* strings = (char16_t*)dat;
478 
479             ENCODE_LENGTH(strings, sizeof(char16_t), strSize)
480 
481             strcpy16_htod(strings, ent.value.c_str());
482         }
483 
484         strPos += totalSize;
485     }
486 
487     // Pad ending string position up to a uint32_t boundary.
488 
489     if (strPos&0x3) {
490         size_t padPos = ((strPos+3)&~0x3);
491         uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
492         if (dat == NULL) {
493             fprintf(stderr, "ERROR: Out of memory padding string pool\n");
494             return NO_MEMORY;
495         }
496         memset(dat+preSize+strPos, 0, padPos-strPos);
497         strPos = padPos;
498     }
499 
500     // Build the pool of style spans.
501 
502     size_t styPos = strPos;
503     for (i=0; i<STYLES; i++) {
504         entry_style& ent = mEntryStyleArray.editItemAt(i);
505         const size_t N = ent.spans.size();
506         const size_t totalSize = (N*sizeof(ResStringPool_span))
507                                + sizeof(ResStringPool_ref);
508 
509         ent.offset = styPos-strPos;
510         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
511         if (dat == NULL) {
512             fprintf(stderr, "ERROR: Out of memory for string styles\n");
513             return NO_MEMORY;
514         }
515         ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
516         for (size_t i=0; i<N; i++) {
517             span->name.index = htodl(ent.spans[i].span.name.index);
518             span->firstChar = htodl(ent.spans[i].span.firstChar);
519             span->lastChar = htodl(ent.spans[i].span.lastChar);
520             span++;
521         }
522         span->name.index = htodl(ResStringPool_span::END);
523 
524         styPos += totalSize;
525     }
526 
527     if (STYLES > 0) {
528         // Add full terminator at the end (when reading we validate that
529         // the end of the pool is fully terminated to simplify error
530         // checking).
531         size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
532         uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
533         if (dat == NULL) {
534             fprintf(stderr, "ERROR: Out of memory for string styles\n");
535             return NO_MEMORY;
536         }
537         uint32_t* p = (uint32_t*)(dat+preSize+styPos);
538         while (extra > 0) {
539             *p++ = htodl(ResStringPool_span::END);
540             extra -= sizeof(uint32_t);
541         }
542         styPos += extra;
543     }
544 
545     // Write header.
546 
547     ResStringPool_header* header =
548         (ResStringPool_header*)pool->padData(sizeof(uint32_t));
549     if (header == NULL) {
550         fprintf(stderr, "ERROR: Out of memory for string pool\n");
551         return NO_MEMORY;
552     }
553     memset(header, 0, sizeof(*header));
554     header->header.type = htods(RES_STRING_POOL_TYPE);
555     header->header.headerSize = htods(sizeof(*header));
556     header->header.size = htodl(pool->getSize());
557     header->stringCount = htodl(ENTRIES);
558     header->styleCount = htodl(STYLES);
559     if (mUTF8) {
560         header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
561     }
562     header->stringsStart = htodl(preSize);
563     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
564 
565     // Write string index array.
566 
567     uint32_t* index = (uint32_t*)(header+1);
568     for (i=0; i<ENTRIES; i++) {
569         entry& ent = mEntries.editItemAt(mEntryArray[i]);
570         *index++ = htodl(ent.offset);
571         if (kIsDebug) {
572             printf("Writing entry #%zu: \"%s\" ent=%zu off=%zu\n",
573                     i,
574                     String8(ent.value).c_str(),
575                     mEntryArray[i],
576                     ent.offset);
577         }
578     }
579 
580     // Write style index array.
581 
582     for (i=0; i<STYLES; i++) {
583         *index++ = htodl(mEntryStyleArray[i].offset);
584     }
585 
586     return NO_ERROR;
587 }
588 
offsetForString(const String16 & val) const589 ssize_t StringPool::offsetForString(const String16& val) const
590 {
591     const Vector<size_t>* indices = offsetsForString(val);
592     ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
593     if (kIsDebug) {
594         printf("Offset for string %s: %zd (%s)\n", String8(val).c_str(), res,
595                res >= 0 ? String8(mEntries[mEntryArray[res]].value).c_str() : "");
596     }
597     return res;
598 }
599 
offsetsForString(const String16 & val) const600 const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
601 {
602     ssize_t pos = mValues.valueFor(val);
603     if (pos < 0) {
604         return NULL;
605     }
606     return &mEntries[mEntryArray[pos]].indices;
607 }
608