1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Access the contents of a .dex file.
19  */
20 
21 #include "DexFile.h"
22 #include "DexOptData.h"
23 #include "DexProto.h"
24 #include "DexCatch.h"
25 #include "Leb128.h"
26 #include "sha1.h"
27 #include "ZipArchive.h"
28 
29 #include <zlib.h>
30 
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <errno.h>
36 
37 
38 /*
39  * Verifying checksums is good, but it slows things down and causes us to
40  * touch every page.  In the "optimized" world, it doesn't work at all,
41  * because we rewrite the contents.
42  */
43 static const bool kVerifyChecksum = false;
44 static const bool kVerifySignature = false;
45 
46 /* (documented in header) */
dexGetPrimitiveTypeDescriptorChar(PrimitiveType type)47 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
48     const char* string = dexGetPrimitiveTypeDescriptor(type);
49 
50     return (string == NULL) ? '\0' : string[0];
51 }
52 
53 /* (documented in header) */
dexGetPrimitiveTypeDescriptor(PrimitiveType type)54 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
55     switch (type) {
56         case PRIM_VOID:    return "V";
57         case PRIM_BOOLEAN: return "Z";
58         case PRIM_BYTE:    return "B";
59         case PRIM_SHORT:   return "S";
60         case PRIM_CHAR:    return "C";
61         case PRIM_INT:     return "I";
62         case PRIM_LONG:    return "J";
63         case PRIM_FLOAT:   return "F";
64         case PRIM_DOUBLE:  return "D";
65         default:           return NULL;
66     }
67 
68     return NULL;
69 }
70 
71 /* (documented in header) */
dexGetBoxedTypeDescriptor(PrimitiveType type)72 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
73     switch (type) {
74         case PRIM_VOID:    return NULL;
75         case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
76         case PRIM_BYTE:    return "Ljava/lang/Byte;";
77         case PRIM_SHORT:   return "Ljava/lang/Short;";
78         case PRIM_CHAR:    return "Ljava/lang/Character;";
79         case PRIM_INT:     return "Ljava/lang/Integer;";
80         case PRIM_LONG:    return "Ljava/lang/Long;";
81         case PRIM_FLOAT:   return "Ljava/lang/Float;";
82         case PRIM_DOUBLE:  return "Ljava/lang/Double;";
83         default:           return NULL;
84     }
85 }
86 
87 /* (documented in header) */
dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar)88 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
89     switch (descriptorChar) {
90         case 'V': return PRIM_VOID;
91         case 'Z': return PRIM_BOOLEAN;
92         case 'B': return PRIM_BYTE;
93         case 'S': return PRIM_SHORT;
94         case 'C': return PRIM_CHAR;
95         case 'I': return PRIM_INT;
96         case 'J': return PRIM_LONG;
97         case 'F': return PRIM_FLOAT;
98         case 'D': return PRIM_DOUBLE;
99         default:  return PRIM_NOT;
100     }
101 }
102 
103 /* Return the UTF-8 encoded string with the specified string_id index,
104  * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)105 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
106         u4* utf16Size) {
107     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
108     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
109 
110     *utf16Size = readUnsignedLeb128(&ptr);
111     return (const char*) ptr;
112 }
113 
114 /*
115  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
116  * least kSHA1DigestOutputLen bytes.
117  */
118 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
119 
120 /*
121  * Compute a SHA-1 digest on a range of bytes.
122  */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])123 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
124     unsigned char digest[])
125 {
126     SHA1_CTX context;
127     SHA1Init(&context);
128     SHA1Update(&context, data, length);
129     SHA1Final(digest, &context);
130 }
131 
132 /*
133  * Format the SHA-1 digest into the buffer, which must be able to hold at
134  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
135  */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)136 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
137 {
138     static const char hexDigit[] = "0123456789abcdef";
139     char* cp;
140     int i;
141 
142     cp = tmpBuf;
143     for (i = 0; i < kSHA1DigestLen; i++) {
144         *cp++ = hexDigit[digest[i] >> 4];
145         *cp++ = hexDigit[digest[i] & 0x0f];
146     }
147     *cp++ = '\0';
148 
149     assert(cp == tmpBuf + kSHA1DigestOutputLen);
150 
151     return tmpBuf;
152 }
153 
154 /*
155  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
156  *
157  * This may or may not be compatible with UTF-8 hash functions used inside
158  * the Dalvik VM.
159  *
160  * The basic "multiply by 31 and add" approach does better on class names
161  * than most other things tried (e.g. adler32).
162  */
classDescriptorHash(const char * str)163 static u4 classDescriptorHash(const char* str)
164 {
165     u4 hash = 1;
166 
167     while (*str != '\0')
168         hash = hash * 31 + *str++;
169 
170     return hash;
171 }
172 
173 /*
174  * Add an entry to the class lookup table.  We hash the string and probe
175  * until we find an open slot.
176  */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)177 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
178     int stringOff, int classDefOff, int* pNumProbes)
179 {
180     const char* classDescriptor =
181         (const char*) (pDexFile->baseAddr + stringOff);
182     const DexClassDef* pClassDef =
183         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
184     u4 hash = classDescriptorHash(classDescriptor);
185     int mask = pLookup->numEntries-1;
186     int idx = hash & mask;
187 
188     /*
189      * Find the first empty slot.  We oversized the table, so this is
190      * guaranteed to finish.
191      */
192     int probes = 0;
193     while (pLookup->table[idx].classDescriptorOffset != 0) {
194         idx = (idx + 1) & mask;
195         probes++;
196     }
197     //if (probes > 1)
198     //    ALOGW("classLookupAdd: probes=%d", probes);
199 
200     pLookup->table[idx].classDescriptorHash = hash;
201     pLookup->table[idx].classDescriptorOffset = stringOff;
202     pLookup->table[idx].classDefOffset = classDefOff;
203     *pNumProbes = probes;
204 }
205 
206 /*
207  * Create the class lookup hash table.
208  *
209  * Returns newly-allocated storage.
210  */
dexCreateClassLookup(DexFile * pDexFile)211 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
212 {
213     DexClassLookup* pLookup;
214     int allocSize;
215     int i, numEntries;
216     int numProbes, totalProbes, maxProbes;
217 
218     numProbes = totalProbes = maxProbes = 0;
219 
220     assert(pDexFile != NULL);
221 
222     /*
223      * Using a factor of 3 results in far less probing than a factor of 2,
224      * but almost doubles the flash storage requirements for the bootstrap
225      * DEX files.  The overall impact on class loading performance seems
226      * to be minor.  We could probably get some performance improvement by
227      * using a secondary hash.
228      */
229     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
230     allocSize = offsetof(DexClassLookup, table)
231                     + numEntries * sizeof(pLookup->table[0]);
232 
233     pLookup = (DexClassLookup*) calloc(1, allocSize);
234     if (pLookup == NULL)
235         return NULL;
236     pLookup->size = allocSize;
237     pLookup->numEntries = numEntries;
238 
239     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
240         const DexClassDef* pClassDef;
241         const char* pString;
242 
243         pClassDef = dexGetClassDef(pDexFile, i);
244         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
245 
246         classLookupAdd(pDexFile, pLookup,
247             (u1*)pString - pDexFile->baseAddr,
248             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
249 
250         if (numProbes > maxProbes)
251             maxProbes = numProbes;
252         totalProbes += numProbes;
253     }
254 
255     ALOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
256          " total=%d max=%d",
257         pDexFile->pHeader->classDefsSize, numEntries,
258         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
259         allocSize, totalProbes, maxProbes);
260 
261     return pLookup;
262 }
263 
264 
265 /*
266  * Set up the basic raw data pointers of a DexFile. This function isn't
267  * meant for general use.
268  */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)269 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
270     DexHeader *pHeader = (DexHeader*) data;
271 
272     pDexFile->baseAddr = data;
273     pDexFile->pHeader = pHeader;
274     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
275     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
276     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
277     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
278     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
279     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
280     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
281 }
282 
283 /*
284  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
285  * called after the byte-ordering and structure alignment has been fixed up.
286  *
287  * On success, return a newly-allocated DexFile.
288  */
dexFileParse(const u1 * data,size_t length,int flags)289 DexFile* dexFileParse(const u1* data, size_t length, int flags)
290 {
291     DexFile* pDexFile = NULL;
292     const DexHeader* pHeader;
293     const u1* magic;
294     int result = -1;
295 
296     if (length < sizeof(DexHeader)) {
297         ALOGE("too short to be a valid .dex");
298         goto bail;      /* bad file format */
299     }
300 
301     pDexFile = (DexFile*) malloc(sizeof(DexFile));
302     if (pDexFile == NULL)
303         goto bail;      /* alloc failure */
304     memset(pDexFile, 0, sizeof(DexFile));
305 
306     /*
307      * Peel off the optimized header.
308      */
309     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
310         magic = data;
311         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
312             ALOGE("bad opt version (0x%02x %02x %02x %02x)",
313                  magic[4], magic[5], magic[6], magic[7]);
314             goto bail;
315         }
316 
317         pDexFile->pOptHeader = (const DexOptHeader*) data;
318         ALOGV("Good opt header, DEX offset is %d, flags=0x%02x",
319             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
320 
321         /* parse the optimized dex file tables */
322         if (!dexParseOptData(data, length, pDexFile))
323             goto bail;
324 
325         /* ignore the opt header and appended data from here on out */
326         data += pDexFile->pOptHeader->dexOffset;
327         length -= pDexFile->pOptHeader->dexOffset;
328         if (pDexFile->pOptHeader->dexLength > length) {
329             ALOGE("File truncated? stored len=%d, rem len=%d",
330                 pDexFile->pOptHeader->dexLength, (int) length);
331             goto bail;
332         }
333         length = pDexFile->pOptHeader->dexLength;
334     }
335 
336     dexFileSetupBasicPointers(pDexFile, data);
337     pHeader = pDexFile->pHeader;
338 
339     if (!dexHasValidMagic(pHeader)) {
340         goto bail;
341     }
342 
343     /*
344      * Verify the checksum(s).  This is reasonably quick, but does require
345      * touching every byte in the DEX file.  The base checksum changes after
346      * byte-swapping and DEX optimization.
347      */
348     if (flags & kDexParseVerifyChecksum) {
349         u4 adler = dexComputeChecksum(pHeader);
350         if (adler != pHeader->checksum) {
351             ALOGE("ERROR: bad checksum (%08x vs %08x)",
352                 adler, pHeader->checksum);
353             if (!(flags & kDexParseContinueOnError))
354                 goto bail;
355         } else {
356             ALOGV("+++ adler32 checksum (%08x) verified", adler);
357         }
358 
359         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
360         if (pOptHeader != NULL) {
361             adler = dexComputeOptChecksum(pOptHeader);
362             if (adler != pOptHeader->checksum) {
363                 ALOGE("ERROR: bad opt checksum (%08x vs %08x)",
364                     adler, pOptHeader->checksum);
365                 if (!(flags & kDexParseContinueOnError))
366                     goto bail;
367             } else {
368                 ALOGV("+++ adler32 opt checksum (%08x) verified", adler);
369             }
370         }
371     }
372 
373     /*
374      * Verify the SHA-1 digest.  (Normally we don't want to do this --
375      * the digest is used to uniquely identify the original DEX file, and
376      * can't be computed for verification after the DEX is byte-swapped
377      * and optimized.)
378      */
379     if (kVerifySignature) {
380         unsigned char sha1Digest[kSHA1DigestLen];
381         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
382                             kSHA1DigestLen;
383 
384         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
385         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
386             char tmpBuf1[kSHA1DigestOutputLen];
387             char tmpBuf2[kSHA1DigestOutputLen];
388             ALOGE("ERROR: bad SHA1 digest (%s vs %s)",
389                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
390                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
391             if (!(flags & kDexParseContinueOnError))
392                 goto bail;
393         } else {
394             ALOGV("+++ sha1 digest verified");
395         }
396     }
397 
398     if (pHeader->fileSize != length) {
399         ALOGE("ERROR: stored file size (%d) != expected (%d)",
400             (int) pHeader->fileSize, (int) length);
401         if (!(flags & kDexParseContinueOnError))
402             goto bail;
403     }
404 
405     if (pHeader->classDefsSize == 0) {
406         ALOGE("ERROR: DEX file has no classes in it, failing");
407         goto bail;
408     }
409 
410     /*
411      * Success!
412      */
413     result = 0;
414 
415 bail:
416     if (result != 0 && pDexFile != NULL) {
417         dexFileFree(pDexFile);
418         pDexFile = NULL;
419     }
420     return pDexFile;
421 }
422 
423 /*
424  * Free up the DexFile and any associated data structures.
425  *
426  * Note we may be called with a partially-initialized DexFile.
427  */
dexFileFree(DexFile * pDexFile)428 void dexFileFree(DexFile* pDexFile)
429 {
430     if (pDexFile == NULL)
431         return;
432 
433     free(pDexFile);
434 }
435 
436 /*
437  * Look up a class definition entry by descriptor.
438  *
439  * "descriptor" should look like "Landroid/debug/Stuff;".
440  */
dexFindClass(const DexFile * pDexFile,const char * descriptor)441 const DexClassDef* dexFindClass(const DexFile* pDexFile,
442     const char* descriptor)
443 {
444     const DexClassLookup* pLookup = pDexFile->pClassLookup;
445     u4 hash;
446     int idx, mask;
447 
448     hash = classDescriptorHash(descriptor);
449     mask = pLookup->numEntries - 1;
450     idx = hash & mask;
451 
452     /*
453      * Search until we find a matching entry or an empty slot.
454      */
455     while (true) {
456         int offset;
457 
458         offset = pLookup->table[idx].classDescriptorOffset;
459         if (offset == 0)
460             return NULL;
461 
462         if (pLookup->table[idx].classDescriptorHash == hash) {
463             const char* str;
464 
465             str = (const char*) (pDexFile->baseAddr + offset);
466             if (strcmp(str, descriptor) == 0) {
467                 return (const DexClassDef*)
468                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
469             }
470         }
471 
472         idx = (idx + 1) & mask;
473     }
474 }
475 
476 
477 /*
478  * Compute the DEX file checksum for a memory-mapped DEX file.
479  */
dexComputeChecksum(const DexHeader * pHeader)480 u4 dexComputeChecksum(const DexHeader* pHeader)
481 {
482     const u1* start = (const u1*) pHeader;
483 
484     uLong adler = adler32(0L, Z_NULL, 0);
485     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
486 
487     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
488 }
489 
490 /*
491  * Compute the size, in bytes, of a DexCode.
492  */
dexGetDexCodeSize(const DexCode * pCode)493 size_t dexGetDexCodeSize(const DexCode* pCode)
494 {
495     /*
496      * The catch handler data is the last entry.  It has a variable number
497      * of variable-size pieces, so we need to create an iterator.
498      */
499     u4 handlersSize;
500     u4 offset;
501     u4 ui;
502 
503     if (pCode->triesSize != 0) {
504         handlersSize = dexGetHandlersSize(pCode);
505         offset = dexGetFirstHandlerOffset(pCode);
506     } else {
507         handlersSize = 0;
508         offset = 0;
509     }
510 
511     for (ui = 0; ui < handlersSize; ui++) {
512         DexCatchIterator iterator;
513         dexCatchIteratorInit(&iterator, pCode, offset);
514         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
515     }
516 
517     const u1* handlerData = dexGetCatchHandlerData(pCode);
518 
519     //ALOGD("+++ pCode=%p handlerData=%p last offset=%d",
520     //    pCode, handlerData, offset);
521 
522     /* return the size of the catch handler + everything before it */
523     return (handlerData - (u1*) pCode) + offset;
524 }
525 
526 /*
527  * Round up to the next highest power of 2.
528  *
529  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
530  */
dexRoundUpPower2(u4 val)531 u4 dexRoundUpPower2(u4 val)
532 {
533     val--;
534     val |= val >> 1;
535     val |= val >> 2;
536     val |= val >> 4;
537     val |= val >> 8;
538     val |= val >> 16;
539     val++;
540 
541     return val;
542 }
543