1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.dexdeps;
18 
19 import java.io.IOException;
20 import java.io.RandomAccessFile;
21 import java.nio.ByteBuffer;
22 import java.nio.ByteOrder;
23 import java.nio.charset.StandardCharsets;
24 import java.util.Arrays;
25 
26 /** Data extracted from a DEX file. */
27 public class DexData {
28     private RandomAccessFile mDexFile;
29     private HeaderItem mHeaderItem;
30     private String[] mStrings; // strings from string_data_*
31     private TypeIdItem[] mTypeIds;
32     private ProtoIdItem[] mProtoIds;
33     private FieldIdItem[] mFieldIds;
34     private MethodIdItem[] mMethodIds;
35     private ClassDefItem[] mClassDefs;
36 
37     private byte tmpBuf[] = new byte[4];
38     private ByteOrder mByteOrder = ByteOrder.LITTLE_ENDIAN;
39 
40     /** Constructs a new DexData for this file. */
DexData(RandomAccessFile raf)41     public DexData(RandomAccessFile raf) {
42         mDexFile = raf;
43     }
44 
45     /**
46      * Loads the contents of the DEX file into our data structures.
47      *
48      * @throws IOException if we encounter a problem while reading
49      * @throws DexDataException if the DEX contents look bad
50      */
load()51     public void load() throws IOException {
52         parseHeaderItem();
53 
54         loadStrings();
55         loadTypeIds();
56         loadProtoIds();
57         loadFieldIds();
58         loadMethodIds();
59         loadClassDefs();
60 
61         markInternalClasses();
62     }
63 
64     /** Verifies the given magic number. */
verifyMagic(byte[] magic)65     private static boolean verifyMagic(byte[] magic) {
66         return Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v035)
67                 || Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v037)
68                 || Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v038)
69                 || Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v039)
70                 || Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v041);
71     }
72 
73     /** Parses the interesting bits out of the header. */
parseHeaderItem()74     void parseHeaderItem() throws IOException {
75         mHeaderItem = new HeaderItem();
76 
77         seek(0);
78 
79         byte[] magic = new byte[8];
80         readBytes(magic);
81         if (!verifyMagic(magic)) {
82             System.err.println("Magic number is wrong -- are you sure " + "this is a DEX file?");
83             throw new DexDataException();
84         }
85 
86         /*
87          * Read the endian tag, so we properly swap things as we read
88          * them from here on.
89          */
90         seek(8 + 4 + 20 + 4 + 4);
91         mHeaderItem.endianTag = readInt();
92         if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) {
93             /* do nothing */
94         } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT) {
95             /* file is big-endian (!), reverse future reads */
96             mByteOrder = ByteOrder.BIG_ENDIAN;
97         } else {
98             System.err.println(
99                     "Endian constant has unexpected value "
100                             + Integer.toHexString(mHeaderItem.endianTag));
101             throw new DexDataException();
102         }
103 
104         seek(8 + 4 + 20); // magic, checksum, signature
105         ByteBuffer buffer = readByteBuffer(Integer.BYTES * 20);
106         mHeaderItem.fileSize = buffer.getInt();
107         mHeaderItem.headerSize = buffer.getInt();
108         /*mHeaderItem.endianTag =*/ buffer.getInt();
109         /*mHeaderItem.linkSize =*/ buffer.getInt();
110         /*mHeaderItem.linkOff =*/ buffer.getInt();
111         /*mHeaderItem.mapOff =*/ buffer.getInt();
112         mHeaderItem.stringIdsSize = buffer.getInt();
113         mHeaderItem.stringIdsOff = buffer.getInt();
114         mHeaderItem.typeIdsSize = buffer.getInt();
115         mHeaderItem.typeIdsOff = buffer.getInt();
116         mHeaderItem.protoIdsSize = buffer.getInt();
117         mHeaderItem.protoIdsOff = buffer.getInt();
118         mHeaderItem.fieldIdsSize = buffer.getInt();
119         mHeaderItem.fieldIdsOff = buffer.getInt();
120         mHeaderItem.methodIdsSize = buffer.getInt();
121         mHeaderItem.methodIdsOff = buffer.getInt();
122         mHeaderItem.classDefsSize = buffer.getInt();
123         mHeaderItem.classDefsOff = buffer.getInt();
124         /*mHeaderItem.dataSize =*/ buffer.getInt();
125         /*mHeaderItem.dataOff =*/ buffer.getInt();
126     }
127 
128     /**
129      * Loads the string table out of the DEX.
130      *
131      * <p>First we read all of the string_id_items, then we read all of the string_data_item. Doing
132      * it this way should allow us to avoid seeking around in the file.
133      */
loadStrings()134     void loadStrings() throws IOException {
135         int count = mHeaderItem.stringIdsSize;
136         int stringOffsets[] = new int[count];
137 
138         // System.out.println("reading " + count + " strings");
139 
140         seek(mHeaderItem.stringIdsOff);
141         readByteBuffer(Integer.BYTES * count).asIntBuffer().get(stringOffsets);
142 
143         mStrings = new String[count];
144 
145         seek(stringOffsets[0]);
146         for (int i = 0; i < count; i++) {
147             seek(stringOffsets[i]); // should be a no-op
148             mStrings[i] = readString();
149             // System.out.println("STR: " + i + ": " + mStrings[i]);
150         }
151     }
152 
153     /** Loads the type ID list. */
loadTypeIds()154     void loadTypeIds() throws IOException {
155         int count = mHeaderItem.typeIdsSize;
156         mTypeIds = new TypeIdItem[count];
157 
158         // System.out.println("reading " + count + " typeIds");
159         seek(mHeaderItem.typeIdsOff);
160         ByteBuffer buffer = readByteBuffer(Integer.BYTES * count);
161         for (int i = 0; i < count; i++) {
162             mTypeIds[i] = new TypeIdItem();
163             mTypeIds[i].descriptorIdx = buffer.getInt();
164 
165             // System.out.println(i + ": " + mTypeIds[i].descriptorIdx +
166             //    " " + mStrings[mTypeIds[i].descriptorIdx]);
167         }
168     }
169 
170     /** Loads the proto ID list. */
loadProtoIds()171     void loadProtoIds() throws IOException {
172         int count = mHeaderItem.protoIdsSize;
173         mProtoIds = new ProtoIdItem[count];
174 
175         // System.out.println("reading " + count + " protoIds");
176         seek(mHeaderItem.protoIdsOff);
177         ByteBuffer buffer = readByteBuffer(Integer.BYTES * 3 * count);
178 
179         /*
180          * Read the proto ID items.
181          */
182         for (int i = 0; i < count; i++) {
183             mProtoIds[i] = new ProtoIdItem();
184             mProtoIds[i].shortyIdx = buffer.getInt();
185             mProtoIds[i].returnTypeIdx = buffer.getInt();
186             mProtoIds[i].parametersOff = buffer.getInt();
187 
188             // System.out.println(i + ": " + mProtoIds[i].shortyIdx +
189             //    " " + mStrings[mProtoIds[i].shortyIdx]);
190         }
191 
192         /*
193          * Go back through and read the type lists.
194          */
195         for (int i = 0; i < count; i++) {
196             ProtoIdItem protoId = mProtoIds[i];
197 
198             int offset = protoId.parametersOff;
199 
200             if (offset == 0) {
201                 protoId.types = new int[0];
202                 continue;
203             } else {
204                 seek(offset);
205                 int size = readInt(); // #of entries in list
206                 buffer = readByteBuffer(Short.BYTES * size);
207                 protoId.types = new int[size];
208 
209                 for (int j = 0; j < size; j++) {
210                     protoId.types[j] = buffer.getShort() & 0xffff;
211                 }
212             }
213         }
214     }
215 
216     /** Loads the field ID list. */
loadFieldIds()217     void loadFieldIds() throws IOException {
218         int count = mHeaderItem.fieldIdsSize;
219         mFieldIds = new FieldIdItem[count];
220 
221         // System.out.println("reading " + count + " fieldIds");
222         seek(mHeaderItem.fieldIdsOff);
223         ByteBuffer buffer = readByteBuffer((Integer.BYTES + Short.BYTES * 2) * count);
224         for (int i = 0; i < count; i++) {
225             mFieldIds[i] = new FieldIdItem();
226             mFieldIds[i].classIdx = buffer.getShort() & 0xffff;
227             mFieldIds[i].typeIdx = buffer.getShort() & 0xffff;
228             mFieldIds[i].nameIdx = buffer.getInt();
229 
230             // System.out.println(i + ": " + mFieldIds[i].nameIdx +
231             //    " " + mStrings[mFieldIds[i].nameIdx]);
232         }
233     }
234 
235     /** Loads the method ID list. */
loadMethodIds()236     void loadMethodIds() throws IOException {
237         int count = mHeaderItem.methodIdsSize;
238         mMethodIds = new MethodIdItem[count];
239 
240         // System.out.println("reading " + count + " methodIds");
241         seek(mHeaderItem.methodIdsOff);
242         ByteBuffer buffer = readByteBuffer((Integer.BYTES + Short.BYTES * 2) * count);
243         for (int i = 0; i < count; i++) {
244             mMethodIds[i] = new MethodIdItem();
245             mMethodIds[i].classIdx = buffer.getShort() & 0xffff;
246             mMethodIds[i].protoIdx = buffer.getShort() & 0xffff;
247             mMethodIds[i].nameIdx = buffer.getInt();
248 
249             // System.out.println(i + ": " + mMethodIds[i].nameIdx +
250             //    " " + mStrings[mMethodIds[i].nameIdx]);
251         }
252     }
253 
254     /** Loads the class defs list. */
loadClassDefs()255     void loadClassDefs() throws IOException {
256         int count = mHeaderItem.classDefsSize;
257         mClassDefs = new ClassDefItem[count];
258 
259         // System.out.println("reading " + count + " classDefs");
260         seek(mHeaderItem.classDefsOff);
261         ByteBuffer buffer = readByteBuffer(Integer.BYTES * 8 * count);
262         for (int i = 0; i < count; i++) {
263             mClassDefs[i] = new ClassDefItem();
264             mClassDefs[i].classIdx = buffer.getInt();
265 
266             /* access_flags= */ buffer.getInt();
267             /* superclass_idx= */ buffer.getInt();
268             /* interfaces_off= */ buffer.getInt();
269             /* source_file_idx= */ buffer.getInt();
270             /* annotations_off= */ buffer.getInt();
271             /* class_data_off= */ buffer.getInt();
272             /* static_values_off= */ buffer.getInt();
273 
274             // System.out.println(i + ": " + mClassDefs[i].classIdx + " " +
275             //    mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]);
276         }
277     }
278 
279     /**
280      * Sets the "internal" flag on type IDs which are defined in the DEX file or within the VM (e.g.
281      * primitive classes and arrays).
282      */
markInternalClasses()283     void markInternalClasses() {
284         for (int i = mClassDefs.length - 1; i >= 0; i--) {
285             mTypeIds[mClassDefs[i].classIdx].internal = true;
286         }
287 
288         for (int i = 0; i < mTypeIds.length; i++) {
289             String className = mStrings[mTypeIds[i].descriptorIdx];
290 
291             if (className.length() == 1) {
292                 // primitive class
293                 mTypeIds[i].internal = true;
294             } else if (className.charAt(0) == '[') {
295                 mTypeIds[i].internal = true;
296             }
297 
298             // System.out.println(i + " " +
299             //    (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " +
300             //    mStrings[mTypeIds[i].descriptorIdx]);
301         }
302     }
303 
304     /*
305      * =======================================================================
306      *      Queries
307      * =======================================================================
308      */
309 
310     /** Returns the class name, given an index into the type_ids table. */
classNameFromTypeIndex(int idx)311     private String classNameFromTypeIndex(int idx) {
312         return mStrings[mTypeIds[idx].descriptorIdx];
313     }
314 
315     /**
316      * Returns an array of method argument type strings, given an index into the proto_ids table.
317      */
argArrayFromProtoIndex(int idx)318     private String[] argArrayFromProtoIndex(int idx) {
319         ProtoIdItem protoId = mProtoIds[idx];
320         String[] result = new String[protoId.types.length];
321 
322         for (int i = 0; i < protoId.types.length; i++) {
323             result[i] = mStrings[mTypeIds[protoId.types[i]].descriptorIdx];
324         }
325 
326         return result;
327     }
328 
329     /**
330      * Returns a string representing the method's return type, given an index into the proto_ids
331      * table.
332      */
returnTypeFromProtoIndex(int idx)333     private String returnTypeFromProtoIndex(int idx) {
334         ProtoIdItem protoId = mProtoIds[idx];
335         return mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx];
336     }
337 
338     /**
339      * Returns an array with all of the class references that don't correspond to classes in the DEX
340      * file. Each class reference has a list of the referenced fields and methods associated with
341      * that class.
342      */
getExternalReferences()343     public ClassRef[] getExternalReferences() {
344         // create a sparse array of ClassRef that parallels mTypeIds
345         ClassRef[] sparseRefs = new ClassRef[mTypeIds.length];
346 
347         // create entries for all externally-referenced classes
348         int count = 0;
349         for (int i = 0; i < mTypeIds.length; i++) {
350             if (!mTypeIds[i].internal) {
351                 sparseRefs[i] = new ClassRef(mStrings[mTypeIds[i].descriptorIdx]);
352                 count++;
353             }
354         }
355 
356         // add fields and methods to the appropriate class entry
357         addExternalFieldReferences(sparseRefs);
358         addExternalMethodReferences(sparseRefs);
359 
360         // crunch out the sparseness
361         ClassRef[] classRefs = new ClassRef[count];
362         int idx = 0;
363         for (int i = 0; i < mTypeIds.length; i++) {
364             if (sparseRefs[i] != null) classRefs[idx++] = sparseRefs[i];
365         }
366 
367         assert idx == count;
368 
369         return classRefs;
370     }
371 
372     /**
373      * Runs through the list of field references, inserting external references into the appropriate
374      * ClassRef.
375      */
addExternalFieldReferences(ClassRef[] sparseRefs)376     private void addExternalFieldReferences(ClassRef[] sparseRefs) {
377         for (int i = 0; i < mFieldIds.length; i++) {
378             if (!mTypeIds[mFieldIds[i].classIdx].internal) {
379                 FieldIdItem fieldId = mFieldIds[i];
380                 FieldRef newFieldRef =
381                         new FieldRef(
382                                 classNameFromTypeIndex(fieldId.classIdx),
383                                 classNameFromTypeIndex(fieldId.typeIdx),
384                                 mStrings[fieldId.nameIdx]);
385                 sparseRefs[mFieldIds[i].classIdx].addField(newFieldRef);
386             }
387         }
388     }
389 
390     /**
391      * Runs through the list of method references, inserting external references into the
392      * appropriate ClassRef.
393      */
addExternalMethodReferences(ClassRef[] sparseRefs)394     private void addExternalMethodReferences(ClassRef[] sparseRefs) {
395         for (int i = 0; i < mMethodIds.length; i++) {
396             if (!mTypeIds[mMethodIds[i].classIdx].internal) {
397                 MethodIdItem methodId = mMethodIds[i];
398                 MethodRef newMethodRef =
399                         new MethodRef(
400                                 classNameFromTypeIndex(methodId.classIdx),
401                                 argArrayFromProtoIndex(methodId.protoIdx),
402                                 returnTypeFromProtoIndex(methodId.protoIdx),
403                                 mStrings[methodId.nameIdx]);
404                 sparseRefs[mMethodIds[i].classIdx].addMethod(newMethodRef);
405             }
406         }
407     }
408 
409     /*
410      * =======================================================================
411      *      Basic I/O functions
412      * =======================================================================
413      */
414 
415     /** Seeks the DEX file to the specified absolute position. */
seek(int position)416     void seek(int position) throws IOException {
417         mDexFile.seek(position);
418     }
419 
420     /** Fills the buffer by reading bytes from the DEX file. */
readBytes(byte[] buffer)421     void readBytes(byte[] buffer) throws IOException {
422         mDexFile.readFully(buffer);
423     }
424 
425     /** Reads a single signed byte value. */
readByte()426     byte readByte() throws IOException {
427         mDexFile.readFully(tmpBuf, 0, 1);
428         return tmpBuf[0];
429     }
430 
431     /** Reads a signed 32-bit integer, byte-swapping if necessary. */
readInt()432     int readInt() throws IOException {
433         mDexFile.readFully(tmpBuf, 0, 4);
434 
435         if (mByteOrder == ByteOrder.BIG_ENDIAN) {
436             return (tmpBuf[3] & 0xff)
437                     | ((tmpBuf[2] & 0xff) << 8)
438                     | ((tmpBuf[1] & 0xff) << 16)
439                     | ((tmpBuf[0] & 0xff) << 24);
440         } else {
441             return (tmpBuf[0] & 0xff)
442                     | ((tmpBuf[1] & 0xff) << 8)
443                     | ((tmpBuf[2] & 0xff) << 16)
444                     | ((tmpBuf[3] & 0xff) << 24);
445         }
446     }
447 
448     /**
449      * Reads a variable-length unsigned LEB128 value. Does not attempt to verify that the value is
450      * valid.
451      *
452      * @throws EOFException if we run off the end of the file
453      */
readUnsignedLeb128()454     int readUnsignedLeb128() throws IOException {
455         int result = 0;
456         byte val;
457 
458         do {
459             val = readByte();
460             result = (result << 7) | (val & 0x7f);
461         } while (val < 0);
462 
463         return result;
464     }
465 
466     /**
467      * Reads bytes and transforms them into a ByteBuffer with the desired byte order set, from which
468      * primitive values can be read.
469      */
readByteBuffer(int size)470     ByteBuffer readByteBuffer(int size) throws IOException {
471         byte bytes[] = new byte[size];
472         mDexFile.read(bytes);
473         return ByteBuffer.wrap(bytes).order(mByteOrder);
474     }
475 
476     /**
477      * Reads a UTF-8 string.
478      *
479      * <p>We don't know how long the UTF-8 string is, so we try to read the worst case amount of
480      * bytes.
481      *
482      * <p>Note that the dex file pointer will likely be at a wrong location after this operation,
483      * which means it can't be used in the middle of sequential reads.
484      */
readString()485     String readString() throws IOException {
486         int utf16len = readUnsignedLeb128();
487         byte inBuf[] = new byte[utf16len * 3]; // worst case
488 
489         int bytesRead = mDexFile.read(inBuf);
490         for (int i = 0; i < bytesRead; i++) {
491             if (inBuf[i] == 0) {
492                 bytesRead = i;
493                 break;
494             }
495         }
496 
497         return new String(inBuf, 0, bytesRead, "UTF-8");
498     }
499 
500     /*
501      * =======================================================================
502      *      Internal "structure" declarations
503      * =======================================================================
504      */
505 
506     /** Holds the contents of a header_item. */
507     static class HeaderItem {
508         public int fileSize;
509         public int headerSize;
510         public int endianTag;
511         public int stringIdsSize, stringIdsOff;
512         public int typeIdsSize, typeIdsOff;
513         public int protoIdsSize, protoIdsOff;
514         public int fieldIdsSize, fieldIdsOff;
515         public int methodIdsSize, methodIdsOff;
516         public int classDefsSize, classDefsOff;
517 
518         /* expected magic values */
519         public static final byte[] DEX_FILE_MAGIC_v035 =
520                 "dex\n035\0".getBytes(StandardCharsets.US_ASCII);
521 
522         // Dex version 036 skipped because of an old dalvik bug on some versions
523         // of android where dex files with that version number would erroneously
524         // be accepted and run. See: art/runtime/dex_file.cc
525 
526         // V037 was introduced in API LEVEL 24
527         public static final byte[] DEX_FILE_MAGIC_v037 =
528                 "dex\n037\0".getBytes(StandardCharsets.US_ASCII);
529 
530         // V038 was introduced in API LEVEL 26
531         public static final byte[] DEX_FILE_MAGIC_v038 =
532                 "dex\n038\0".getBytes(StandardCharsets.US_ASCII);
533 
534         // V039 was introduced in API LEVEL 28
535         public static final byte[] DEX_FILE_MAGIC_v039 =
536                 "dex\n039\0".getBytes(StandardCharsets.US_ASCII);
537 
538         // V041 will be introduced in API LEVEL 35
539         public static final byte[] DEX_FILE_MAGIC_v041 =
540                 "dex\n041\0".getBytes(StandardCharsets.US_ASCII);
541 
542         public static final int ENDIAN_CONSTANT = 0x12345678;
543         public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412;
544     }
545 
546     /**
547      * Holds the contents of a type_id_item.
548      *
549      * <p>This is chiefly a list of indices into the string table. We need some additional bits of
550      * data, such as whether or not the type ID represents a class defined in this DEX, so we use an
551      * object for each instead of a simple integer. (Could use a parallel array, but since this is a
552      * desktop app it's not essential.)
553      */
554     static class TypeIdItem {
555         public int descriptorIdx; // index into string_ids
556 
557         public boolean internal; // defined within this DEX file?
558     }
559 
560     /** Holds the contents of a proto_id_item. */
561     static class ProtoIdItem {
562         public int shortyIdx; // index into string_ids
563         public int returnTypeIdx; // index into type_ids
564         public int parametersOff; // file offset to a type_list
565 
566         public int types[]; // contents of type list
567     }
568 
569     /** Holds the contents of a field_id_item. */
570     static class FieldIdItem {
571         public int classIdx; // index into type_ids (defining class)
572         public int typeIdx; // index into type_ids (field type)
573         public int nameIdx; // index into string_ids
574     }
575 
576     /** Holds the contents of a method_id_item. */
577     static class MethodIdItem {
578         public int classIdx; // index into type_ids
579         public int protoIdx; // index into proto_ids
580         public int nameIdx; // index into string_ids
581     }
582 
583     /**
584      * Holds the contents of a class_def_item.
585      *
586      * <p>We don't really need a class for this, but there's some stuff in the class_def_item that
587      * we might want later.
588      */
589     static class ClassDefItem {
590         public int classIdx; // index into type_ids
591     }
592 }
593