1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.dexdeps;
18 
19 import java.io.IOException;
20 import java.io.RandomAccessFile;
21 import java.nio.charset.StandardCharsets;
22 import java.util.Arrays;
23 
24 /**
25  * Data extracted from a DEX file.
26  */
27 public class DexData {
28     private RandomAccessFile mDexFile;
29     private HeaderItem mHeaderItem;
30     private String[] mStrings;              // strings from string_data_*
31     private TypeIdItem[] mTypeIds;
32     private ProtoIdItem[] mProtoIds;
33     private FieldIdItem[] mFieldIds;
34     private MethodIdItem[] mMethodIds;
35     private ClassDefItem[] mClassDefs;
36 
37     private byte tmpBuf[] = new byte[4];
38     private boolean isBigEndian = false;
39 
40     /**
41      * Constructs a new DexData for this file.
42      */
DexData(RandomAccessFile raf)43     public DexData(RandomAccessFile raf) {
44         mDexFile = raf;
45     }
46 
47     /**
48      * Loads the contents of the DEX file into our data structures.
49      *
50      * @throws IOException if we encounter a problem while reading
51      * @throws DexDataException if the DEX contents look bad
52      */
load()53     public void load() throws IOException {
54         parseHeaderItem();
55 
56         loadStrings();
57         loadTypeIds();
58         loadProtoIds();
59         loadFieldIds();
60         loadMethodIds();
61         loadClassDefs();
62 
63         markInternalClasses();
64     }
65 
66     /**
67      * Verifies the given magic number.
68      */
verifyMagic(byte[] magic)69     private static boolean verifyMagic(byte[] magic) {
70         return Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v035) ||
71             Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v037);
72     }
73 
74     /**
75      * Parses the interesting bits out of the header.
76      */
parseHeaderItem()77     void parseHeaderItem() throws IOException {
78         mHeaderItem = new HeaderItem();
79 
80         seek(0);
81 
82         byte[] magic = new byte[8];
83         readBytes(magic);
84         if (!verifyMagic(magic)) {
85             System.err.println("Magic number is wrong -- are you sure " +
86                 "this is a DEX file?");
87             throw new DexDataException();
88         }
89 
90         /*
91          * Read the endian tag, so we properly swap things as we read
92          * them from here on.
93          */
94         seek(8+4+20+4+4);
95         mHeaderItem.endianTag = readInt();
96         if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) {
97             /* do nothing */
98         } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){
99             /* file is big-endian (!), reverse future reads */
100             isBigEndian = true;
101         } else {
102             System.err.println("Endian constant has unexpected value " +
103                 Integer.toHexString(mHeaderItem.endianTag));
104             throw new DexDataException();
105         }
106 
107         seek(8+4+20);  // magic, checksum, signature
108         mHeaderItem.fileSize = readInt();
109         mHeaderItem.headerSize = readInt();
110         /*mHeaderItem.endianTag =*/ readInt();
111         /*mHeaderItem.linkSize =*/ readInt();
112         /*mHeaderItem.linkOff =*/ readInt();
113         /*mHeaderItem.mapOff =*/ readInt();
114         mHeaderItem.stringIdsSize = readInt();
115         mHeaderItem.stringIdsOff = readInt();
116         mHeaderItem.typeIdsSize = readInt();
117         mHeaderItem.typeIdsOff = readInt();
118         mHeaderItem.protoIdsSize = readInt();
119         mHeaderItem.protoIdsOff = readInt();
120         mHeaderItem.fieldIdsSize = readInt();
121         mHeaderItem.fieldIdsOff = readInt();
122         mHeaderItem.methodIdsSize = readInt();
123         mHeaderItem.methodIdsOff = readInt();
124         mHeaderItem.classDefsSize = readInt();
125         mHeaderItem.classDefsOff = readInt();
126         /*mHeaderItem.dataSize =*/ readInt();
127         /*mHeaderItem.dataOff =*/ readInt();
128     }
129 
130     /**
131      * Loads the string table out of the DEX.
132      *
133      * First we read all of the string_id_items, then we read all of the
134      * string_data_item.  Doing it this way should allow us to avoid
135      * seeking around in the file.
136      */
loadStrings()137     void loadStrings() throws IOException {
138         int count = mHeaderItem.stringIdsSize;
139         int stringOffsets[] = new int[count];
140 
141         //System.out.println("reading " + count + " strings");
142 
143         seek(mHeaderItem.stringIdsOff);
144         for (int i = 0; i < count; i++) {
145             stringOffsets[i] = readInt();
146         }
147 
148         mStrings = new String[count];
149 
150         seek(stringOffsets[0]);
151         for (int i = 0; i < count; i++) {
152             seek(stringOffsets[i]);         // should be a no-op
153             mStrings[i] = readString();
154             //System.out.println("STR: " + i + ": " + mStrings[i]);
155         }
156     }
157 
158     /**
159      * Loads the type ID list.
160      */
loadTypeIds()161     void loadTypeIds() throws IOException {
162         int count = mHeaderItem.typeIdsSize;
163         mTypeIds = new TypeIdItem[count];
164 
165         //System.out.println("reading " + count + " typeIds");
166         seek(mHeaderItem.typeIdsOff);
167         for (int i = 0; i < count; i++) {
168             mTypeIds[i] = new TypeIdItem();
169             mTypeIds[i].descriptorIdx = readInt();
170 
171             //System.out.println(i + ": " + mTypeIds[i].descriptorIdx +
172             //    " " + mStrings[mTypeIds[i].descriptorIdx]);
173         }
174     }
175 
176     /**
177      * Loads the proto ID list.
178      */
loadProtoIds()179     void loadProtoIds() throws IOException {
180         int count = mHeaderItem.protoIdsSize;
181         mProtoIds = new ProtoIdItem[count];
182 
183         //System.out.println("reading " + count + " protoIds");
184         seek(mHeaderItem.protoIdsOff);
185 
186         /*
187          * Read the proto ID items.
188          */
189         for (int i = 0; i < count; i++) {
190             mProtoIds[i] = new ProtoIdItem();
191             mProtoIds[i].shortyIdx = readInt();
192             mProtoIds[i].returnTypeIdx = readInt();
193             mProtoIds[i].parametersOff = readInt();
194 
195             //System.out.println(i + ": " + mProtoIds[i].shortyIdx +
196             //    " " + mStrings[mProtoIds[i].shortyIdx]);
197         }
198 
199         /*
200          * Go back through and read the type lists.
201          */
202         for (int i = 0; i < count; i++) {
203             ProtoIdItem protoId = mProtoIds[i];
204 
205             int offset = protoId.parametersOff;
206 
207             if (offset == 0) {
208                 protoId.types = new int[0];
209                 continue;
210             } else {
211                 seek(offset);
212                 int size = readInt();       // #of entries in list
213                 protoId.types = new int[size];
214 
215                 for (int j = 0; j < size; j++) {
216                     protoId.types[j] = readShort() & 0xffff;
217                 }
218             }
219         }
220     }
221 
222     /**
223      * Loads the field ID list.
224      */
loadFieldIds()225     void loadFieldIds() throws IOException {
226         int count = mHeaderItem.fieldIdsSize;
227         mFieldIds = new FieldIdItem[count];
228 
229         //System.out.println("reading " + count + " fieldIds");
230         seek(mHeaderItem.fieldIdsOff);
231         for (int i = 0; i < count; i++) {
232             mFieldIds[i] = new FieldIdItem();
233             mFieldIds[i].classIdx = readShort() & 0xffff;
234             mFieldIds[i].typeIdx = readShort() & 0xffff;
235             mFieldIds[i].nameIdx = readInt();
236 
237             //System.out.println(i + ": " + mFieldIds[i].nameIdx +
238             //    " " + mStrings[mFieldIds[i].nameIdx]);
239         }
240     }
241 
242     /**
243      * Loads the method ID list.
244      */
loadMethodIds()245     void loadMethodIds() throws IOException {
246         int count = mHeaderItem.methodIdsSize;
247         mMethodIds = new MethodIdItem[count];
248 
249         //System.out.println("reading " + count + " methodIds");
250         seek(mHeaderItem.methodIdsOff);
251         for (int i = 0; i < count; i++) {
252             mMethodIds[i] = new MethodIdItem();
253             mMethodIds[i].classIdx = readShort() & 0xffff;
254             mMethodIds[i].protoIdx = readShort() & 0xffff;
255             mMethodIds[i].nameIdx = readInt();
256 
257             //System.out.println(i + ": " + mMethodIds[i].nameIdx +
258             //    " " + mStrings[mMethodIds[i].nameIdx]);
259         }
260     }
261 
262     /**
263      * Loads the class defs list.
264      */
loadClassDefs()265     void loadClassDefs() throws IOException {
266         int count = mHeaderItem.classDefsSize;
267         mClassDefs = new ClassDefItem[count];
268 
269         //System.out.println("reading " + count + " classDefs");
270         seek(mHeaderItem.classDefsOff);
271         for (int i = 0; i < count; i++) {
272             mClassDefs[i] = new ClassDefItem();
273             mClassDefs[i].classIdx = readInt();
274 
275             /* access_flags = */ readInt();
276             /* superclass_idx = */ readInt();
277             /* interfaces_off = */ readInt();
278             /* source_file_idx = */ readInt();
279             /* annotations_off = */ readInt();
280             /* class_data_off = */ readInt();
281             /* static_values_off = */ readInt();
282 
283             //System.out.println(i + ": " + mClassDefs[i].classIdx + " " +
284             //    mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]);
285         }
286     }
287 
288     /**
289      * Sets the "internal" flag on type IDs which are defined in the
290      * DEX file or within the VM (e.g. primitive classes and arrays).
291      */
markInternalClasses()292     void markInternalClasses() {
293         for (int i = mClassDefs.length -1; i >= 0; i--) {
294             mTypeIds[mClassDefs[i].classIdx].internal = true;
295         }
296 
297         for (int i = 0; i < mTypeIds.length; i++) {
298             String className = mStrings[mTypeIds[i].descriptorIdx];
299 
300             if (className.length() == 1) {
301                 // primitive class
302                 mTypeIds[i].internal = true;
303             } else if (className.charAt(0) == '[') {
304                 mTypeIds[i].internal = true;
305             }
306 
307             //System.out.println(i + " " +
308             //    (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " +
309             //    mStrings[mTypeIds[i].descriptorIdx]);
310         }
311     }
312 
313 
314     /*
315      * =======================================================================
316      *      Queries
317      * =======================================================================
318      */
319 
320     /**
321      * Returns the class name, given an index into the type_ids table.
322      */
classNameFromTypeIndex(int idx)323     private String classNameFromTypeIndex(int idx) {
324         return mStrings[mTypeIds[idx].descriptorIdx];
325     }
326 
327     /**
328      * Returns an array of method argument type strings, given an index
329      * into the proto_ids table.
330      */
argArrayFromProtoIndex(int idx)331     private String[] argArrayFromProtoIndex(int idx) {
332         ProtoIdItem protoId = mProtoIds[idx];
333         String[] result = new String[protoId.types.length];
334 
335         for (int i = 0; i < protoId.types.length; i++) {
336             result[i] = mStrings[mTypeIds[protoId.types[i]].descriptorIdx];
337         }
338 
339         return result;
340     }
341 
342     /**
343      * Returns a string representing the method's return type, given an
344      * index into the proto_ids table.
345      */
returnTypeFromProtoIndex(int idx)346     private String returnTypeFromProtoIndex(int idx) {
347         ProtoIdItem protoId = mProtoIds[idx];
348         return mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx];
349     }
350 
351     /**
352      * Returns an array with all of the class references that don't
353      * correspond to classes in the DEX file.  Each class reference has
354      * a list of the referenced fields and methods associated with
355      * that class.
356      */
getExternalReferences()357     public ClassRef[] getExternalReferences() {
358         // create a sparse array of ClassRef that parallels mTypeIds
359         ClassRef[] sparseRefs = new ClassRef[mTypeIds.length];
360 
361         // create entries for all externally-referenced classes
362         int count = 0;
363         for (int i = 0; i < mTypeIds.length; i++) {
364             if (!mTypeIds[i].internal) {
365                 sparseRefs[i] =
366                     new ClassRef(mStrings[mTypeIds[i].descriptorIdx]);
367                 count++;
368             }
369         }
370 
371         // add fields and methods to the appropriate class entry
372         addExternalFieldReferences(sparseRefs);
373         addExternalMethodReferences(sparseRefs);
374 
375         // crunch out the sparseness
376         ClassRef[] classRefs = new ClassRef[count];
377         int idx = 0;
378         for (int i = 0; i < mTypeIds.length; i++) {
379             if (sparseRefs[i] != null)
380                 classRefs[idx++] = sparseRefs[i];
381         }
382 
383         assert idx == count;
384 
385         return classRefs;
386     }
387 
388     /**
389      * Runs through the list of field references, inserting external
390      * references into the appropriate ClassRef.
391      */
addExternalFieldReferences(ClassRef[] sparseRefs)392     private void addExternalFieldReferences(ClassRef[] sparseRefs) {
393         for (int i = 0; i < mFieldIds.length; i++) {
394             if (!mTypeIds[mFieldIds[i].classIdx].internal) {
395                 FieldIdItem fieldId = mFieldIds[i];
396                 FieldRef newFieldRef = new FieldRef(
397                         classNameFromTypeIndex(fieldId.classIdx),
398                         classNameFromTypeIndex(fieldId.typeIdx),
399                         mStrings[fieldId.nameIdx]);
400                 sparseRefs[mFieldIds[i].classIdx].addField(newFieldRef);
401             }
402         }
403     }
404 
405     /**
406      * Runs through the list of method references, inserting external
407      * references into the appropriate ClassRef.
408      */
addExternalMethodReferences(ClassRef[] sparseRefs)409     private void addExternalMethodReferences(ClassRef[] sparseRefs) {
410         for (int i = 0; i < mMethodIds.length; i++) {
411             if (!mTypeIds[mMethodIds[i].classIdx].internal) {
412                 MethodIdItem methodId = mMethodIds[i];
413                 MethodRef newMethodRef = new MethodRef(
414                         classNameFromTypeIndex(methodId.classIdx),
415                         argArrayFromProtoIndex(methodId.protoIdx),
416                         returnTypeFromProtoIndex(methodId.protoIdx),
417                         mStrings[methodId.nameIdx]);
418                 sparseRefs[mMethodIds[i].classIdx].addMethod(newMethodRef);
419             }
420         }
421     }
422 
423 
424     /*
425      * =======================================================================
426      *      Basic I/O functions
427      * =======================================================================
428      */
429 
430     /**
431      * Seeks the DEX file to the specified absolute position.
432      */
seek(int position)433     void seek(int position) throws IOException {
434         mDexFile.seek(position);
435     }
436 
437     /**
438      * Fills the buffer by reading bytes from the DEX file.
439      */
readBytes(byte[] buffer)440     void readBytes(byte[] buffer) throws IOException {
441         mDexFile.readFully(buffer);
442     }
443 
444     /**
445      * Reads a single signed byte value.
446      */
readByte()447     byte readByte() throws IOException {
448         mDexFile.readFully(tmpBuf, 0, 1);
449         return tmpBuf[0];
450     }
451 
452     /**
453      * Reads a signed 16-bit integer, byte-swapping if necessary.
454      */
readShort()455     short readShort() throws IOException {
456         mDexFile.readFully(tmpBuf, 0, 2);
457         if (isBigEndian) {
458             return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8));
459         } else {
460             return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8));
461         }
462     }
463 
464     /**
465      * Reads a signed 32-bit integer, byte-swapping if necessary.
466      */
readInt()467     int readInt() throws IOException {
468         mDexFile.readFully(tmpBuf, 0, 4);
469 
470         if (isBigEndian) {
471             return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) |
472                    ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24);
473         } else {
474             return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) |
475                    ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24);
476         }
477     }
478 
479     /**
480      * Reads a variable-length unsigned LEB128 value.  Does not attempt to
481      * verify that the value is valid.
482      *
483      * @throws EOFException if we run off the end of the file
484      */
readUnsignedLeb128()485     int readUnsignedLeb128() throws IOException {
486         int result = 0;
487         byte val;
488 
489         do {
490             val = readByte();
491             result = (result << 7) | (val & 0x7f);
492         } while (val < 0);
493 
494         return result;
495     }
496 
497     /**
498      * Reads a UTF-8 string.
499      *
500      * We don't know how long the UTF-8 string is, so we have to read one
501      * byte at a time.  We could make an educated guess based on the
502      * utf16_size and seek back if we get it wrong, but seeking backward
503      * may cause the underlying implementation to reload I/O buffers.
504      */
readString()505     String readString() throws IOException {
506         int utf16len = readUnsignedLeb128();
507         byte inBuf[] = new byte[utf16len * 3];      // worst case
508         int idx;
509 
510         for (idx = 0; idx < inBuf.length; idx++) {
511             byte val = readByte();
512             if (val == 0)
513                 break;
514             inBuf[idx] = val;
515         }
516 
517         return new String(inBuf, 0, idx, "UTF-8");
518     }
519 
520 
521     /*
522      * =======================================================================
523      *      Internal "structure" declarations
524      * =======================================================================
525      */
526 
527     /**
528      * Holds the contents of a header_item.
529      */
530     static class HeaderItem {
531         public int fileSize;
532         public int headerSize;
533         public int endianTag;
534         public int stringIdsSize, stringIdsOff;
535         public int typeIdsSize, typeIdsOff;
536         public int protoIdsSize, protoIdsOff;
537         public int fieldIdsSize, fieldIdsOff;
538         public int methodIdsSize, methodIdsOff;
539         public int classDefsSize, classDefsOff;
540 
541         /* expected magic values */
542         public static final byte[] DEX_FILE_MAGIC_v035 =
543             "dex\n035\0".getBytes(StandardCharsets.US_ASCII);
544 
545         // Dex version 036 skipped because of an old dalvik bug on some versions
546         // of android where dex files with that version number would erroneously
547         // be accepted and run. See: art/runtime/dex_file.cc
548 
549         // V037 was introduced in API LEVEL 24
550         public static final byte[] DEX_FILE_MAGIC_v037 =
551             "dex\n037\0".getBytes(StandardCharsets.US_ASCII);
552         public static final int ENDIAN_CONSTANT = 0x12345678;
553         public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412;
554     }
555 
556     /**
557      * Holds the contents of a type_id_item.
558      *
559      * This is chiefly a list of indices into the string table.  We need
560      * some additional bits of data, such as whether or not the type ID
561      * represents a class defined in this DEX, so we use an object for
562      * each instead of a simple integer.  (Could use a parallel array, but
563      * since this is a desktop app it's not essential.)
564      */
565     static class TypeIdItem {
566         public int descriptorIdx;       // index into string_ids
567 
568         public boolean internal;        // defined within this DEX file?
569     }
570 
571     /**
572      * Holds the contents of a proto_id_item.
573      */
574     static class ProtoIdItem {
575         public int shortyIdx;           // index into string_ids
576         public int returnTypeIdx;       // index into type_ids
577         public int parametersOff;       // file offset to a type_list
578 
579         public int types[];             // contents of type list
580     }
581 
582     /**
583      * Holds the contents of a field_id_item.
584      */
585     static class FieldIdItem {
586         public int classIdx;            // index into type_ids (defining class)
587         public int typeIdx;             // index into type_ids (field type)
588         public int nameIdx;             // index into string_ids
589     }
590 
591     /**
592      * Holds the contents of a method_id_item.
593      */
594     static class MethodIdItem {
595         public int classIdx;            // index into type_ids
596         public int protoIdx;            // index into proto_ids
597         public int nameIdx;             // index into string_ids
598     }
599 
600     /**
601      * Holds the contents of a class_def_item.
602      *
603      * We don't really need a class for this, but there's some stuff in
604      * the class_def_item that we might want later.
605      */
606     static class ClassDefItem {
607         public int classIdx;            // index into type_ids
608     }
609 }
610