1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Access the contents of a .dex file.
19  */
20 
21 #include "DexFile.h"
22 #include "DexOptData.h"
23 #include "DexProto.h"
24 #include "DexCatch.h"
25 #include "Leb128.h"
26 #include "sha1.h"
27 #include "ZipArchive.h"
28 
29 #include <zlib.h>
30 
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <errno.h>
36 
37 
38 /*
39  * Verifying checksums is good, but it slows things down and causes us to
40  * touch every page.  In the "optimized" world, it doesn't work at all,
41  * because we rewrite the contents.
42  */
43 static const bool kVerifySignature = false;
44 
45 /* (documented in header) */
dexGetPrimitiveTypeDescriptorChar(PrimitiveType type)46 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
47     const char* string = dexGetPrimitiveTypeDescriptor(type);
48 
49     return (string == NULL) ? '\0' : string[0];
50 }
51 
52 /* (documented in header) */
dexGetPrimitiveTypeDescriptor(PrimitiveType type)53 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
54     switch (type) {
55         case PRIM_VOID:    return "V";
56         case PRIM_BOOLEAN: return "Z";
57         case PRIM_BYTE:    return "B";
58         case PRIM_SHORT:   return "S";
59         case PRIM_CHAR:    return "C";
60         case PRIM_INT:     return "I";
61         case PRIM_LONG:    return "J";
62         case PRIM_FLOAT:   return "F";
63         case PRIM_DOUBLE:  return "D";
64         default:           return NULL;
65     }
66 
67     return NULL;
68 }
69 
70 /* (documented in header) */
dexGetBoxedTypeDescriptor(PrimitiveType type)71 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
72     switch (type) {
73         case PRIM_VOID:    return NULL;
74         case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
75         case PRIM_BYTE:    return "Ljava/lang/Byte;";
76         case PRIM_SHORT:   return "Ljava/lang/Short;";
77         case PRIM_CHAR:    return "Ljava/lang/Character;";
78         case PRIM_INT:     return "Ljava/lang/Integer;";
79         case PRIM_LONG:    return "Ljava/lang/Long;";
80         case PRIM_FLOAT:   return "Ljava/lang/Float;";
81         case PRIM_DOUBLE:  return "Ljava/lang/Double;";
82         default:           return NULL;
83     }
84 }
85 
86 /* (documented in header) */
dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar)87 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
88     switch (descriptorChar) {
89         case 'V': return PRIM_VOID;
90         case 'Z': return PRIM_BOOLEAN;
91         case 'B': return PRIM_BYTE;
92         case 'S': return PRIM_SHORT;
93         case 'C': return PRIM_CHAR;
94         case 'I': return PRIM_INT;
95         case 'J': return PRIM_LONG;
96         case 'F': return PRIM_FLOAT;
97         case 'D': return PRIM_DOUBLE;
98         default:  return PRIM_NOT;
99     }
100 }
101 
102 /* Return the UTF-8 encoded string with the specified string_id index,
103  * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)104 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
105         u4* utf16Size) {
106     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
107     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
108 
109     *utf16Size = readUnsignedLeb128(&ptr);
110     return (const char*) ptr;
111 }
112 
113 /*
114  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
115  * least kSHA1DigestOutputLen bytes.
116  */
117 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
118 
119 /*
120  * Compute a SHA-1 digest on a range of bytes.
121  */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])122 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
123     unsigned char digest[])
124 {
125     SHA1_CTX context;
126     SHA1Init(&context);
127     SHA1Update(&context, data, length);
128     SHA1Final(digest, &context);
129 }
130 
131 /*
132  * Format the SHA-1 digest into the buffer, which must be able to hold at
133  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
134  */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)135 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
136 {
137     static const char hexDigit[] = "0123456789abcdef";
138     char* cp;
139     int i;
140 
141     cp = tmpBuf;
142     for (i = 0; i < kSHA1DigestLen; i++) {
143         *cp++ = hexDigit[digest[i] >> 4];
144         *cp++ = hexDigit[digest[i] & 0x0f];
145     }
146     *cp++ = '\0';
147 
148     assert(cp == tmpBuf + kSHA1DigestOutputLen);
149 
150     return tmpBuf;
151 }
152 
153 /*
154  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
155  *
156  * This may or may not be compatible with UTF-8 hash functions used inside
157  * the Dalvik VM.
158  *
159  * The basic "multiply by 31 and add" approach does better on class names
160  * than most other things tried (e.g. adler32).
161  */
classDescriptorHash(const char * str)162 static u4 classDescriptorHash(const char* str)
163 {
164     u4 hash = 1;
165 
166     while (*str != '\0')
167         hash = hash * 31 + *str++;
168 
169     return hash;
170 }
171 
172 /*
173  * Add an entry to the class lookup table.  We hash the string and probe
174  * until we find an open slot.
175  */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)176 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
177     int stringOff, int classDefOff, int* pNumProbes)
178 {
179     const char* classDescriptor =
180         (const char*) (pDexFile->baseAddr + stringOff);
181     u4 hash = classDescriptorHash(classDescriptor);
182     int mask = pLookup->numEntries-1;
183     int idx = hash & mask;
184 
185     /*
186      * Find the first empty slot.  We oversized the table, so this is
187      * guaranteed to finish.
188      */
189     int probes = 0;
190     while (pLookup->table[idx].classDescriptorOffset != 0) {
191         idx = (idx + 1) & mask;
192         probes++;
193     }
194     //if (probes > 1)
195     //    ALOGW("classLookupAdd: probes=%d", probes);
196 
197     pLookup->table[idx].classDescriptorHash = hash;
198     pLookup->table[idx].classDescriptorOffset = stringOff;
199     pLookup->table[idx].classDefOffset = classDefOff;
200     *pNumProbes = probes;
201 }
202 
203 /*
204  * Create the class lookup hash table.
205  *
206  * Returns newly-allocated storage.
207  */
dexCreateClassLookup(DexFile * pDexFile)208 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
209 {
210     DexClassLookup* pLookup;
211     int allocSize;
212     int i, numEntries;
213     int numProbes, totalProbes, maxProbes;
214 
215     numProbes = totalProbes = maxProbes = 0;
216 
217     assert(pDexFile != NULL);
218 
219     /*
220      * Using a factor of 3 results in far less probing than a factor of 2,
221      * but almost doubles the flash storage requirements for the bootstrap
222      * DEX files.  The overall impact on class loading performance seems
223      * to be minor.  We could probably get some performance improvement by
224      * using a secondary hash.
225      */
226     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
227     allocSize = offsetof(DexClassLookup, table)
228                     + numEntries * sizeof(pLookup->table[0]);
229 
230     pLookup = (DexClassLookup*) calloc(1, allocSize);
231     if (pLookup == NULL)
232         return NULL;
233     pLookup->size = allocSize;
234     pLookup->numEntries = numEntries;
235 
236     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
237         const DexClassDef* pClassDef;
238         const char* pString;
239 
240         pClassDef = dexGetClassDef(pDexFile, i);
241         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
242 
243         classLookupAdd(pDexFile, pLookup,
244             (u1*)pString - pDexFile->baseAddr,
245             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
246 
247         if (numProbes > maxProbes)
248             maxProbes = numProbes;
249         totalProbes += numProbes;
250     }
251 
252     ALOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
253          " total=%d max=%d",
254         pDexFile->pHeader->classDefsSize, numEntries,
255         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
256         allocSize, totalProbes, maxProbes);
257 
258     return pLookup;
259 }
260 
261 
262 /*
263  * Set up the basic raw data pointers of a DexFile. This function isn't
264  * meant for general use.
265  */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)266 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
267     DexHeader *pHeader = (DexHeader*) data;
268 
269     pDexFile->baseAddr = data;
270     pDexFile->pHeader = pHeader;
271     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
272     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
273     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
274     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
275     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
276     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
277     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
278 }
279 
280 /*
281  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
282  * called after the byte-ordering and structure alignment has been fixed up.
283  *
284  * On success, return a newly-allocated DexFile.
285  */
dexFileParse(const u1 * data,size_t length,int flags)286 DexFile* dexFileParse(const u1* data, size_t length, int flags)
287 {
288     DexFile* pDexFile = NULL;
289     const DexHeader* pHeader;
290     const u1* magic;
291     int result = -1;
292 
293     if (length < sizeof(DexHeader)) {
294         ALOGE("too short to be a valid .dex");
295         goto bail;      /* bad file format */
296     }
297 
298     pDexFile = (DexFile*) malloc(sizeof(DexFile));
299     if (pDexFile == NULL)
300         goto bail;      /* alloc failure */
301     memset(pDexFile, 0, sizeof(DexFile));
302 
303     /*
304      * Peel off the optimized header.
305      */
306     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
307         magic = data;
308         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
309             ALOGE("bad opt version (0x%02x %02x %02x %02x)",
310                  magic[4], magic[5], magic[6], magic[7]);
311             goto bail;
312         }
313 
314         pDexFile->pOptHeader = (const DexOptHeader*) data;
315         ALOGV("Good opt header, DEX offset is %d, flags=0x%02x",
316             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
317 
318         /* parse the optimized dex file tables */
319         if (!dexParseOptData(data, length, pDexFile))
320             goto bail;
321 
322         /* ignore the opt header and appended data from here on out */
323         data += pDexFile->pOptHeader->dexOffset;
324         length -= pDexFile->pOptHeader->dexOffset;
325         if (pDexFile->pOptHeader->dexLength > length) {
326             ALOGE("File truncated? stored len=%d, rem len=%d",
327                 pDexFile->pOptHeader->dexLength, (int) length);
328             goto bail;
329         }
330         length = pDexFile->pOptHeader->dexLength;
331     }
332 
333     dexFileSetupBasicPointers(pDexFile, data);
334     pHeader = pDexFile->pHeader;
335 
336     if (!dexHasValidMagic(pHeader)) {
337         goto bail;
338     }
339 
340     /*
341      * Verify the checksum(s).  This is reasonably quick, but does require
342      * touching every byte in the DEX file.  The base checksum changes after
343      * byte-swapping and DEX optimization.
344      */
345     if (flags & kDexParseVerifyChecksum) {
346         u4 adler = dexComputeChecksum(pHeader);
347         if (adler != pHeader->checksum) {
348             ALOGE("ERROR: bad checksum (%08x vs %08x)",
349                 adler, pHeader->checksum);
350             if (!(flags & kDexParseContinueOnError))
351                 goto bail;
352         } else {
353             ALOGV("+++ adler32 checksum (%08x) verified", adler);
354         }
355 
356         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
357         if (pOptHeader != NULL) {
358             adler = dexComputeOptChecksum(pOptHeader);
359             if (adler != pOptHeader->checksum) {
360                 ALOGE("ERROR: bad opt checksum (%08x vs %08x)",
361                     adler, pOptHeader->checksum);
362                 if (!(flags & kDexParseContinueOnError))
363                     goto bail;
364             } else {
365                 ALOGV("+++ adler32 opt checksum (%08x) verified", adler);
366             }
367         }
368     }
369 
370     /*
371      * Verify the SHA-1 digest.  (Normally we don't want to do this --
372      * the digest is used to uniquely identify the original DEX file, and
373      * can't be computed for verification after the DEX is byte-swapped
374      * and optimized.)
375      */
376     if (kVerifySignature) {
377         unsigned char sha1Digest[kSHA1DigestLen];
378         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
379                             kSHA1DigestLen;
380 
381         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
382         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
383             char tmpBuf1[kSHA1DigestOutputLen];
384             char tmpBuf2[kSHA1DigestOutputLen];
385             ALOGE("ERROR: bad SHA1 digest (%s vs %s)",
386                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
387                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
388             if (!(flags & kDexParseContinueOnError))
389                 goto bail;
390         } else {
391             ALOGV("+++ sha1 digest verified");
392         }
393     }
394 
395     if (pHeader->fileSize != length) {
396         ALOGE("ERROR: stored file size (%d) != expected (%d)",
397             (int) pHeader->fileSize, (int) length);
398         if (!(flags & kDexParseContinueOnError))
399             goto bail;
400     }
401 
402     if (pHeader->classDefsSize == 0) {
403         ALOGE("ERROR: DEX file has no classes in it, failing");
404         goto bail;
405     }
406 
407     /*
408      * Success!
409      */
410     result = 0;
411 
412 bail:
413     if (result != 0 && pDexFile != NULL) {
414         dexFileFree(pDexFile);
415         pDexFile = NULL;
416     }
417     return pDexFile;
418 }
419 
420 /*
421  * Free up the DexFile and any associated data structures.
422  *
423  * Note we may be called with a partially-initialized DexFile.
424  */
dexFileFree(DexFile * pDexFile)425 void dexFileFree(DexFile* pDexFile)
426 {
427     if (pDexFile == NULL)
428         return;
429 
430     free(pDexFile);
431 }
432 
433 /*
434  * Look up a class definition entry by descriptor.
435  *
436  * "descriptor" should look like "Landroid/debug/Stuff;".
437  */
dexFindClass(const DexFile * pDexFile,const char * descriptor)438 const DexClassDef* dexFindClass(const DexFile* pDexFile,
439     const char* descriptor)
440 {
441     const DexClassLookup* pLookup = pDexFile->pClassLookup;
442     u4 hash;
443     int idx, mask;
444 
445     hash = classDescriptorHash(descriptor);
446     mask = pLookup->numEntries - 1;
447     idx = hash & mask;
448 
449     /*
450      * Search until we find a matching entry or an empty slot.
451      */
452     while (true) {
453         int offset;
454 
455         offset = pLookup->table[idx].classDescriptorOffset;
456         if (offset == 0)
457             return NULL;
458 
459         if (pLookup->table[idx].classDescriptorHash == hash) {
460             const char* str;
461 
462             str = (const char*) (pDexFile->baseAddr + offset);
463             if (strcmp(str, descriptor) == 0) {
464                 return (const DexClassDef*)
465                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
466             }
467         }
468 
469         idx = (idx + 1) & mask;
470     }
471 }
472 
473 
474 /*
475  * Compute the DEX file checksum for a memory-mapped DEX file.
476  */
dexComputeChecksum(const DexHeader * pHeader)477 u4 dexComputeChecksum(const DexHeader* pHeader)
478 {
479     const u1* start = (const u1*) pHeader;
480 
481     uLong adler = adler32(0L, Z_NULL, 0);
482     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
483 
484     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
485 }
486 
487 /*
488  * Compute the size, in bytes, of a DexCode.
489  */
dexGetDexCodeSize(const DexCode * pCode)490 size_t dexGetDexCodeSize(const DexCode* pCode)
491 {
492     /*
493      * The catch handler data is the last entry.  It has a variable number
494      * of variable-size pieces, so we need to create an iterator.
495      */
496     u4 handlersSize;
497     u4 offset;
498     u4 ui;
499 
500     if (pCode->triesSize != 0) {
501         handlersSize = dexGetHandlersSize(pCode);
502         offset = dexGetFirstHandlerOffset(pCode);
503     } else {
504         handlersSize = 0;
505         offset = 0;
506     }
507 
508     for (ui = 0; ui < handlersSize; ui++) {
509         DexCatchIterator iterator;
510         dexCatchIteratorInit(&iterator, pCode, offset);
511         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
512     }
513 
514     const u1* handlerData = dexGetCatchHandlerData(pCode);
515 
516     //ALOGD("+++ pCode=%p handlerData=%p last offset=%d",
517     //    pCode, handlerData, offset);
518 
519     /* return the size of the catch handler + everything before it */
520     return (handlerData - (u1*) pCode) + offset;
521 }
522 
523 /*
524  * Round up to the next highest power of 2.
525  *
526  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
527  */
dexRoundUpPower2(u4 val)528 u4 dexRoundUpPower2(u4 val)
529 {
530     val--;
531     val |= val >> 1;
532     val |= val >> 2;
533     val |= val >> 4;
534     val |= val >> 8;
535     val |= val >> 16;
536     val++;
537 
538     return val;
539 }
540