1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.dexdeps;
18 
19 import java.io.IOException;
20 import java.io.RandomAccessFile;
21 import java.nio.charset.StandardCharsets;
22 import java.util.Arrays;
23 
24 /**
25  * Data extracted from a DEX file.
26  */
27 public class DexData {
28     private RandomAccessFile mDexFile;
29     private HeaderItem mHeaderItem;
30     private String[] mStrings;              // strings from string_data_*
31     private TypeIdItem[] mTypeIds;
32     private ProtoIdItem[] mProtoIds;
33     private FieldIdItem[] mFieldIds;
34     private MethodIdItem[] mMethodIds;
35     private ClassDefItem[] mClassDefs;
36 
37     private byte tmpBuf[] = new byte[4];
38     private boolean isBigEndian = false;
39 
40     /**
41      * Constructs a new DexData for this file.
42      */
DexData(RandomAccessFile raf)43     public DexData(RandomAccessFile raf) {
44         mDexFile = raf;
45     }
46 
47     /**
48      * Loads the contents of the DEX file into our data structures.
49      *
50      * @throws IOException if we encounter a problem while reading
51      * @throws DexDataException if the DEX contents look bad
52      */
load()53     public void load() throws IOException {
54         parseHeaderItem();
55 
56         loadStrings();
57         loadTypeIds();
58         loadProtoIds();
59         loadFieldIds();
60         loadMethodIds();
61         loadClassDefs();
62 
63         markInternalClasses();
64     }
65 
66     /**
67      * Verifies the given magic number.
68      */
verifyMagic(byte[] magic)69     private static boolean verifyMagic(byte[] magic) {
70         return Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v035) ||
71             Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v037) ||
72             Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v038) ||
73             Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v039);
74     }
75 
76     /**
77      * Parses the interesting bits out of the header.
78      */
parseHeaderItem()79     void parseHeaderItem() throws IOException {
80         mHeaderItem = new HeaderItem();
81 
82         seek(0);
83 
84         byte[] magic = new byte[8];
85         readBytes(magic);
86         if (!verifyMagic(magic)) {
87             System.err.println("Magic number is wrong -- are you sure " +
88                 "this is a DEX file?");
89             throw new DexDataException();
90         }
91 
92         /*
93          * Read the endian tag, so we properly swap things as we read
94          * them from here on.
95          */
96         seek(8+4+20+4+4);
97         mHeaderItem.endianTag = readInt();
98         if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) {
99             /* do nothing */
100         } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){
101             /* file is big-endian (!), reverse future reads */
102             isBigEndian = true;
103         } else {
104             System.err.println("Endian constant has unexpected value " +
105                 Integer.toHexString(mHeaderItem.endianTag));
106             throw new DexDataException();
107         }
108 
109         seek(8+4+20);  // magic, checksum, signature
110         mHeaderItem.fileSize = readInt();
111         mHeaderItem.headerSize = readInt();
112         /*mHeaderItem.endianTag =*/ readInt();
113         /*mHeaderItem.linkSize =*/ readInt();
114         /*mHeaderItem.linkOff =*/ readInt();
115         /*mHeaderItem.mapOff =*/ readInt();
116         mHeaderItem.stringIdsSize = readInt();
117         mHeaderItem.stringIdsOff = readInt();
118         mHeaderItem.typeIdsSize = readInt();
119         mHeaderItem.typeIdsOff = readInt();
120         mHeaderItem.protoIdsSize = readInt();
121         mHeaderItem.protoIdsOff = readInt();
122         mHeaderItem.fieldIdsSize = readInt();
123         mHeaderItem.fieldIdsOff = readInt();
124         mHeaderItem.methodIdsSize = readInt();
125         mHeaderItem.methodIdsOff = readInt();
126         mHeaderItem.classDefsSize = readInt();
127         mHeaderItem.classDefsOff = readInt();
128         /*mHeaderItem.dataSize =*/ readInt();
129         /*mHeaderItem.dataOff =*/ readInt();
130     }
131 
132     /**
133      * Loads the string table out of the DEX.
134      *
135      * First we read all of the string_id_items, then we read all of the
136      * string_data_item.  Doing it this way should allow us to avoid
137      * seeking around in the file.
138      */
loadStrings()139     void loadStrings() throws IOException {
140         int count = mHeaderItem.stringIdsSize;
141         int stringOffsets[] = new int[count];
142 
143         //System.out.println("reading " + count + " strings");
144 
145         seek(mHeaderItem.stringIdsOff);
146         for (int i = 0; i < count; i++) {
147             stringOffsets[i] = readInt();
148         }
149 
150         mStrings = new String[count];
151 
152         seek(stringOffsets[0]);
153         for (int i = 0; i < count; i++) {
154             seek(stringOffsets[i]);         // should be a no-op
155             mStrings[i] = readString();
156             //System.out.println("STR: " + i + ": " + mStrings[i]);
157         }
158     }
159 
160     /**
161      * Loads the type ID list.
162      */
loadTypeIds()163     void loadTypeIds() throws IOException {
164         int count = mHeaderItem.typeIdsSize;
165         mTypeIds = new TypeIdItem[count];
166 
167         //System.out.println("reading " + count + " typeIds");
168         seek(mHeaderItem.typeIdsOff);
169         for (int i = 0; i < count; i++) {
170             mTypeIds[i] = new TypeIdItem();
171             mTypeIds[i].descriptorIdx = readInt();
172 
173             //System.out.println(i + ": " + mTypeIds[i].descriptorIdx +
174             //    " " + mStrings[mTypeIds[i].descriptorIdx]);
175         }
176     }
177 
178     /**
179      * Loads the proto ID list.
180      */
loadProtoIds()181     void loadProtoIds() throws IOException {
182         int count = mHeaderItem.protoIdsSize;
183         mProtoIds = new ProtoIdItem[count];
184 
185         //System.out.println("reading " + count + " protoIds");
186         seek(mHeaderItem.protoIdsOff);
187 
188         /*
189          * Read the proto ID items.
190          */
191         for (int i = 0; i < count; i++) {
192             mProtoIds[i] = new ProtoIdItem();
193             mProtoIds[i].shortyIdx = readInt();
194             mProtoIds[i].returnTypeIdx = readInt();
195             mProtoIds[i].parametersOff = readInt();
196 
197             //System.out.println(i + ": " + mProtoIds[i].shortyIdx +
198             //    " " + mStrings[mProtoIds[i].shortyIdx]);
199         }
200 
201         /*
202          * Go back through and read the type lists.
203          */
204         for (int i = 0; i < count; i++) {
205             ProtoIdItem protoId = mProtoIds[i];
206 
207             int offset = protoId.parametersOff;
208 
209             if (offset == 0) {
210                 protoId.types = new int[0];
211                 continue;
212             } else {
213                 seek(offset);
214                 int size = readInt();       // #of entries in list
215                 protoId.types = new int[size];
216 
217                 for (int j = 0; j < size; j++) {
218                     protoId.types[j] = readShort() & 0xffff;
219                 }
220             }
221         }
222     }
223 
224     /**
225      * Loads the field ID list.
226      */
loadFieldIds()227     void loadFieldIds() throws IOException {
228         int count = mHeaderItem.fieldIdsSize;
229         mFieldIds = new FieldIdItem[count];
230 
231         //System.out.println("reading " + count + " fieldIds");
232         seek(mHeaderItem.fieldIdsOff);
233         for (int i = 0; i < count; i++) {
234             mFieldIds[i] = new FieldIdItem();
235             mFieldIds[i].classIdx = readShort() & 0xffff;
236             mFieldIds[i].typeIdx = readShort() & 0xffff;
237             mFieldIds[i].nameIdx = readInt();
238 
239             //System.out.println(i + ": " + mFieldIds[i].nameIdx +
240             //    " " + mStrings[mFieldIds[i].nameIdx]);
241         }
242     }
243 
244     /**
245      * Loads the method ID list.
246      */
loadMethodIds()247     void loadMethodIds() throws IOException {
248         int count = mHeaderItem.methodIdsSize;
249         mMethodIds = new MethodIdItem[count];
250 
251         //System.out.println("reading " + count + " methodIds");
252         seek(mHeaderItem.methodIdsOff);
253         for (int i = 0; i < count; i++) {
254             mMethodIds[i] = new MethodIdItem();
255             mMethodIds[i].classIdx = readShort() & 0xffff;
256             mMethodIds[i].protoIdx = readShort() & 0xffff;
257             mMethodIds[i].nameIdx = readInt();
258 
259             //System.out.println(i + ": " + mMethodIds[i].nameIdx +
260             //    " " + mStrings[mMethodIds[i].nameIdx]);
261         }
262     }
263 
264     /**
265      * Loads the class defs list.
266      */
loadClassDefs()267     void loadClassDefs() throws IOException {
268         int count = mHeaderItem.classDefsSize;
269         mClassDefs = new ClassDefItem[count];
270 
271         //System.out.println("reading " + count + " classDefs");
272         seek(mHeaderItem.classDefsOff);
273         for (int i = 0; i < count; i++) {
274             mClassDefs[i] = new ClassDefItem();
275             mClassDefs[i].classIdx = readInt();
276 
277             /* access_flags = */ readInt();
278             /* superclass_idx = */ readInt();
279             /* interfaces_off = */ readInt();
280             /* source_file_idx = */ readInt();
281             /* annotations_off = */ readInt();
282             /* class_data_off = */ readInt();
283             /* static_values_off = */ readInt();
284 
285             //System.out.println(i + ": " + mClassDefs[i].classIdx + " " +
286             //    mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]);
287         }
288     }
289 
290     /**
291      * Sets the "internal" flag on type IDs which are defined in the
292      * DEX file or within the VM (e.g. primitive classes and arrays).
293      */
markInternalClasses()294     void markInternalClasses() {
295         for (int i = mClassDefs.length -1; i >= 0; i--) {
296             mTypeIds[mClassDefs[i].classIdx].internal = true;
297         }
298 
299         for (int i = 0; i < mTypeIds.length; i++) {
300             String className = mStrings[mTypeIds[i].descriptorIdx];
301 
302             if (className.length() == 1) {
303                 // primitive class
304                 mTypeIds[i].internal = true;
305             } else if (className.charAt(0) == '[') {
306                 mTypeIds[i].internal = true;
307             }
308 
309             //System.out.println(i + " " +
310             //    (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " +
311             //    mStrings[mTypeIds[i].descriptorIdx]);
312         }
313     }
314 
315 
316     /*
317      * =======================================================================
318      *      Queries
319      * =======================================================================
320      */
321 
322     /**
323      * Returns the class name, given an index into the type_ids table.
324      */
classNameFromTypeIndex(int idx)325     private String classNameFromTypeIndex(int idx) {
326         return mStrings[mTypeIds[idx].descriptorIdx];
327     }
328 
329     /**
330      * Returns an array of method argument type strings, given an index
331      * into the proto_ids table.
332      */
argArrayFromProtoIndex(int idx)333     private String[] argArrayFromProtoIndex(int idx) {
334         ProtoIdItem protoId = mProtoIds[idx];
335         String[] result = new String[protoId.types.length];
336 
337         for (int i = 0; i < protoId.types.length; i++) {
338             result[i] = mStrings[mTypeIds[protoId.types[i]].descriptorIdx];
339         }
340 
341         return result;
342     }
343 
344     /**
345      * Returns a string representing the method's return type, given an
346      * index into the proto_ids table.
347      */
returnTypeFromProtoIndex(int idx)348     private String returnTypeFromProtoIndex(int idx) {
349         ProtoIdItem protoId = mProtoIds[idx];
350         return mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx];
351     }
352 
353     /**
354      * Returns an array with all of the class references that don't
355      * correspond to classes in the DEX file.  Each class reference has
356      * a list of the referenced fields and methods associated with
357      * that class.
358      */
getExternalReferences()359     public ClassRef[] getExternalReferences() {
360         // create a sparse array of ClassRef that parallels mTypeIds
361         ClassRef[] sparseRefs = new ClassRef[mTypeIds.length];
362 
363         // create entries for all externally-referenced classes
364         int count = 0;
365         for (int i = 0; i < mTypeIds.length; i++) {
366             if (!mTypeIds[i].internal) {
367                 sparseRefs[i] =
368                     new ClassRef(mStrings[mTypeIds[i].descriptorIdx]);
369                 count++;
370             }
371         }
372 
373         // add fields and methods to the appropriate class entry
374         addExternalFieldReferences(sparseRefs);
375         addExternalMethodReferences(sparseRefs);
376 
377         // crunch out the sparseness
378         ClassRef[] classRefs = new ClassRef[count];
379         int idx = 0;
380         for (int i = 0; i < mTypeIds.length; i++) {
381             if (sparseRefs[i] != null)
382                 classRefs[idx++] = sparseRefs[i];
383         }
384 
385         assert idx == count;
386 
387         return classRefs;
388     }
389 
390     /**
391      * Runs through the list of field references, inserting external
392      * references into the appropriate ClassRef.
393      */
addExternalFieldReferences(ClassRef[] sparseRefs)394     private void addExternalFieldReferences(ClassRef[] sparseRefs) {
395         for (int i = 0; i < mFieldIds.length; i++) {
396             if (!mTypeIds[mFieldIds[i].classIdx].internal) {
397                 FieldIdItem fieldId = mFieldIds[i];
398                 FieldRef newFieldRef = new FieldRef(
399                         classNameFromTypeIndex(fieldId.classIdx),
400                         classNameFromTypeIndex(fieldId.typeIdx),
401                         mStrings[fieldId.nameIdx]);
402                 sparseRefs[mFieldIds[i].classIdx].addField(newFieldRef);
403             }
404         }
405     }
406 
407     /**
408      * Runs through the list of method references, inserting external
409      * references into the appropriate ClassRef.
410      */
addExternalMethodReferences(ClassRef[] sparseRefs)411     private void addExternalMethodReferences(ClassRef[] sparseRefs) {
412         for (int i = 0; i < mMethodIds.length; i++) {
413             if (!mTypeIds[mMethodIds[i].classIdx].internal) {
414                 MethodIdItem methodId = mMethodIds[i];
415                 MethodRef newMethodRef = new MethodRef(
416                         classNameFromTypeIndex(methodId.classIdx),
417                         argArrayFromProtoIndex(methodId.protoIdx),
418                         returnTypeFromProtoIndex(methodId.protoIdx),
419                         mStrings[methodId.nameIdx]);
420                 sparseRefs[mMethodIds[i].classIdx].addMethod(newMethodRef);
421             }
422         }
423     }
424 
425 
426     /*
427      * =======================================================================
428      *      Basic I/O functions
429      * =======================================================================
430      */
431 
432     /**
433      * Seeks the DEX file to the specified absolute position.
434      */
seek(int position)435     void seek(int position) throws IOException {
436         mDexFile.seek(position);
437     }
438 
439     /**
440      * Fills the buffer by reading bytes from the DEX file.
441      */
readBytes(byte[] buffer)442     void readBytes(byte[] buffer) throws IOException {
443         mDexFile.readFully(buffer);
444     }
445 
446     /**
447      * Reads a single signed byte value.
448      */
readByte()449     byte readByte() throws IOException {
450         mDexFile.readFully(tmpBuf, 0, 1);
451         return tmpBuf[0];
452     }
453 
454     /**
455      * Reads a signed 16-bit integer, byte-swapping if necessary.
456      */
readShort()457     short readShort() throws IOException {
458         mDexFile.readFully(tmpBuf, 0, 2);
459         if (isBigEndian) {
460             return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8));
461         } else {
462             return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8));
463         }
464     }
465 
466     /**
467      * Reads a signed 32-bit integer, byte-swapping if necessary.
468      */
readInt()469     int readInt() throws IOException {
470         mDexFile.readFully(tmpBuf, 0, 4);
471 
472         if (isBigEndian) {
473             return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) |
474                    ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24);
475         } else {
476             return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) |
477                    ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24);
478         }
479     }
480 
481     /**
482      * Reads a variable-length unsigned LEB128 value.  Does not attempt to
483      * verify that the value is valid.
484      *
485      * @throws EOFException if we run off the end of the file
486      */
readUnsignedLeb128()487     int readUnsignedLeb128() throws IOException {
488         int result = 0;
489         byte val;
490 
491         do {
492             val = readByte();
493             result = (result << 7) | (val & 0x7f);
494         } while (val < 0);
495 
496         return result;
497     }
498 
499     /**
500      * Reads a UTF-8 string.
501      *
502      * We don't know how long the UTF-8 string is, so we have to read one
503      * byte at a time.  We could make an educated guess based on the
504      * utf16_size and seek back if we get it wrong, but seeking backward
505      * may cause the underlying implementation to reload I/O buffers.
506      */
readString()507     String readString() throws IOException {
508         int utf16len = readUnsignedLeb128();
509         byte inBuf[] = new byte[utf16len * 3];      // worst case
510         int idx;
511 
512         for (idx = 0; idx < inBuf.length; idx++) {
513             byte val = readByte();
514             if (val == 0)
515                 break;
516             inBuf[idx] = val;
517         }
518 
519         return new String(inBuf, 0, idx, "UTF-8");
520     }
521 
522 
523     /*
524      * =======================================================================
525      *      Internal "structure" declarations
526      * =======================================================================
527      */
528 
529     /**
530      * Holds the contents of a header_item.
531      */
532     static class HeaderItem {
533         public int fileSize;
534         public int headerSize;
535         public int endianTag;
536         public int stringIdsSize, stringIdsOff;
537         public int typeIdsSize, typeIdsOff;
538         public int protoIdsSize, protoIdsOff;
539         public int fieldIdsSize, fieldIdsOff;
540         public int methodIdsSize, methodIdsOff;
541         public int classDefsSize, classDefsOff;
542 
543         /* expected magic values */
544         public static final byte[] DEX_FILE_MAGIC_v035 =
545             "dex\n035\0".getBytes(StandardCharsets.US_ASCII);
546 
547         // Dex version 036 skipped because of an old dalvik bug on some versions
548         // of android where dex files with that version number would erroneously
549         // be accepted and run. See: art/runtime/dex_file.cc
550 
551         // V037 was introduced in API LEVEL 24
552         public static final byte[] DEX_FILE_MAGIC_v037 =
553             "dex\n037\0".getBytes(StandardCharsets.US_ASCII);
554 
555         // V038 was introduced in API LEVEL 26
556         public static final byte[] DEX_FILE_MAGIC_v038 =
557             "dex\n038\0".getBytes(StandardCharsets.US_ASCII);
558 
559         // V039 was introduced in API LEVEL 28
560         public static final byte[] DEX_FILE_MAGIC_v039 =
561             "dex\n039\0".getBytes(StandardCharsets.US_ASCII);
562 
563         public static final int ENDIAN_CONSTANT = 0x12345678;
564         public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412;
565     }
566 
567     /**
568      * Holds the contents of a type_id_item.
569      *
570      * This is chiefly a list of indices into the string table.  We need
571      * some additional bits of data, such as whether or not the type ID
572      * represents a class defined in this DEX, so we use an object for
573      * each instead of a simple integer.  (Could use a parallel array, but
574      * since this is a desktop app it's not essential.)
575      */
576     static class TypeIdItem {
577         public int descriptorIdx;       // index into string_ids
578 
579         public boolean internal;        // defined within this DEX file?
580     }
581 
582     /**
583      * Holds the contents of a proto_id_item.
584      */
585     static class ProtoIdItem {
586         public int shortyIdx;           // index into string_ids
587         public int returnTypeIdx;       // index into type_ids
588         public int parametersOff;       // file offset to a type_list
589 
590         public int types[];             // contents of type list
591     }
592 
593     /**
594      * Holds the contents of a field_id_item.
595      */
596     static class FieldIdItem {
597         public int classIdx;            // index into type_ids (defining class)
598         public int typeIdx;             // index into type_ids (field type)
599         public int nameIdx;             // index into string_ids
600     }
601 
602     /**
603      * Holds the contents of a method_id_item.
604      */
605     static class MethodIdItem {
606         public int classIdx;            // index into type_ids
607         public int protoIdx;            // index into proto_ids
608         public int nameIdx;             // index into string_ids
609     }
610 
611     /**
612      * Holds the contents of a class_def_item.
613      *
614      * We don't really need a class for this, but there's some stuff in
615      * the class_def_item that we might want later.
616      */
617     static class ClassDefItem {
618         public int classIdx;            // index into type_ids
619     }
620 }
621