1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.dexdeps; 18 19 import java.io.IOException; 20 import java.io.RandomAccessFile; 21 import java.nio.charset.StandardCharsets; 22 import java.util.Arrays; 23 24 /** 25 * Data extracted from a DEX file. 26 */ 27 public class DexData { 28 private RandomAccessFile mDexFile; 29 private HeaderItem mHeaderItem; 30 private String[] mStrings; // strings from string_data_* 31 private TypeIdItem[] mTypeIds; 32 private ProtoIdItem[] mProtoIds; 33 private FieldIdItem[] mFieldIds; 34 private MethodIdItem[] mMethodIds; 35 private ClassDefItem[] mClassDefs; 36 37 private byte tmpBuf[] = new byte[4]; 38 private boolean isBigEndian = false; 39 40 /** 41 * Constructs a new DexData for this file. 42 */ DexData(RandomAccessFile raf)43 public DexData(RandomAccessFile raf) { 44 mDexFile = raf; 45 } 46 47 /** 48 * Loads the contents of the DEX file into our data structures. 49 * 50 * @throws IOException if we encounter a problem while reading 51 * @throws DexDataException if the DEX contents look bad 52 */ load()53 public void load() throws IOException { 54 parseHeaderItem(); 55 56 loadStrings(); 57 loadTypeIds(); 58 loadProtoIds(); 59 loadFieldIds(); 60 loadMethodIds(); 61 loadClassDefs(); 62 63 markInternalClasses(); 64 } 65 66 /** 67 * Verifies the given magic number. 68 */ verifyMagic(byte[] magic)69 private static boolean verifyMagic(byte[] magic) { 70 return Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v035) || 71 Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v037) || 72 Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v038) || 73 Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v039); 74 } 75 76 /** 77 * Parses the interesting bits out of the header. 78 */ parseHeaderItem()79 void parseHeaderItem() throws IOException { 80 mHeaderItem = new HeaderItem(); 81 82 seek(0); 83 84 byte[] magic = new byte[8]; 85 readBytes(magic); 86 if (!verifyMagic(magic)) { 87 System.err.println("Magic number is wrong -- are you sure " + 88 "this is a DEX file?"); 89 throw new DexDataException(); 90 } 91 92 /* 93 * Read the endian tag, so we properly swap things as we read 94 * them from here on. 95 */ 96 seek(8+4+20+4+4); 97 mHeaderItem.endianTag = readInt(); 98 if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) { 99 /* do nothing */ 100 } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){ 101 /* file is big-endian (!), reverse future reads */ 102 isBigEndian = true; 103 } else { 104 System.err.println("Endian constant has unexpected value " + 105 Integer.toHexString(mHeaderItem.endianTag)); 106 throw new DexDataException(); 107 } 108 109 seek(8+4+20); // magic, checksum, signature 110 mHeaderItem.fileSize = readInt(); 111 mHeaderItem.headerSize = readInt(); 112 /*mHeaderItem.endianTag =*/ readInt(); 113 /*mHeaderItem.linkSize =*/ readInt(); 114 /*mHeaderItem.linkOff =*/ readInt(); 115 /*mHeaderItem.mapOff =*/ readInt(); 116 mHeaderItem.stringIdsSize = readInt(); 117 mHeaderItem.stringIdsOff = readInt(); 118 mHeaderItem.typeIdsSize = readInt(); 119 mHeaderItem.typeIdsOff = readInt(); 120 mHeaderItem.protoIdsSize = readInt(); 121 mHeaderItem.protoIdsOff = readInt(); 122 mHeaderItem.fieldIdsSize = readInt(); 123 mHeaderItem.fieldIdsOff = readInt(); 124 mHeaderItem.methodIdsSize = readInt(); 125 mHeaderItem.methodIdsOff = readInt(); 126 mHeaderItem.classDefsSize = readInt(); 127 mHeaderItem.classDefsOff = readInt(); 128 /*mHeaderItem.dataSize =*/ readInt(); 129 /*mHeaderItem.dataOff =*/ readInt(); 130 } 131 132 /** 133 * Loads the string table out of the DEX. 134 * 135 * First we read all of the string_id_items, then we read all of the 136 * string_data_item. Doing it this way should allow us to avoid 137 * seeking around in the file. 138 */ loadStrings()139 void loadStrings() throws IOException { 140 int count = mHeaderItem.stringIdsSize; 141 int stringOffsets[] = new int[count]; 142 143 //System.out.println("reading " + count + " strings"); 144 145 seek(mHeaderItem.stringIdsOff); 146 for (int i = 0; i < count; i++) { 147 stringOffsets[i] = readInt(); 148 } 149 150 mStrings = new String[count]; 151 152 seek(stringOffsets[0]); 153 for (int i = 0; i < count; i++) { 154 seek(stringOffsets[i]); // should be a no-op 155 mStrings[i] = readString(); 156 //System.out.println("STR: " + i + ": " + mStrings[i]); 157 } 158 } 159 160 /** 161 * Loads the type ID list. 162 */ loadTypeIds()163 void loadTypeIds() throws IOException { 164 int count = mHeaderItem.typeIdsSize; 165 mTypeIds = new TypeIdItem[count]; 166 167 //System.out.println("reading " + count + " typeIds"); 168 seek(mHeaderItem.typeIdsOff); 169 for (int i = 0; i < count; i++) { 170 mTypeIds[i] = new TypeIdItem(); 171 mTypeIds[i].descriptorIdx = readInt(); 172 173 //System.out.println(i + ": " + mTypeIds[i].descriptorIdx + 174 // " " + mStrings[mTypeIds[i].descriptorIdx]); 175 } 176 } 177 178 /** 179 * Loads the proto ID list. 180 */ loadProtoIds()181 void loadProtoIds() throws IOException { 182 int count = mHeaderItem.protoIdsSize; 183 mProtoIds = new ProtoIdItem[count]; 184 185 //System.out.println("reading " + count + " protoIds"); 186 seek(mHeaderItem.protoIdsOff); 187 188 /* 189 * Read the proto ID items. 190 */ 191 for (int i = 0; i < count; i++) { 192 mProtoIds[i] = new ProtoIdItem(); 193 mProtoIds[i].shortyIdx = readInt(); 194 mProtoIds[i].returnTypeIdx = readInt(); 195 mProtoIds[i].parametersOff = readInt(); 196 197 //System.out.println(i + ": " + mProtoIds[i].shortyIdx + 198 // " " + mStrings[mProtoIds[i].shortyIdx]); 199 } 200 201 /* 202 * Go back through and read the type lists. 203 */ 204 for (int i = 0; i < count; i++) { 205 ProtoIdItem protoId = mProtoIds[i]; 206 207 int offset = protoId.parametersOff; 208 209 if (offset == 0) { 210 protoId.types = new int[0]; 211 continue; 212 } else { 213 seek(offset); 214 int size = readInt(); // #of entries in list 215 protoId.types = new int[size]; 216 217 for (int j = 0; j < size; j++) { 218 protoId.types[j] = readShort() & 0xffff; 219 } 220 } 221 } 222 } 223 224 /** 225 * Loads the field ID list. 226 */ loadFieldIds()227 void loadFieldIds() throws IOException { 228 int count = mHeaderItem.fieldIdsSize; 229 mFieldIds = new FieldIdItem[count]; 230 231 //System.out.println("reading " + count + " fieldIds"); 232 seek(mHeaderItem.fieldIdsOff); 233 for (int i = 0; i < count; i++) { 234 mFieldIds[i] = new FieldIdItem(); 235 mFieldIds[i].classIdx = readShort() & 0xffff; 236 mFieldIds[i].typeIdx = readShort() & 0xffff; 237 mFieldIds[i].nameIdx = readInt(); 238 239 //System.out.println(i + ": " + mFieldIds[i].nameIdx + 240 // " " + mStrings[mFieldIds[i].nameIdx]); 241 } 242 } 243 244 /** 245 * Loads the method ID list. 246 */ loadMethodIds()247 void loadMethodIds() throws IOException { 248 int count = mHeaderItem.methodIdsSize; 249 mMethodIds = new MethodIdItem[count]; 250 251 //System.out.println("reading " + count + " methodIds"); 252 seek(mHeaderItem.methodIdsOff); 253 for (int i = 0; i < count; i++) { 254 mMethodIds[i] = new MethodIdItem(); 255 mMethodIds[i].classIdx = readShort() & 0xffff; 256 mMethodIds[i].protoIdx = readShort() & 0xffff; 257 mMethodIds[i].nameIdx = readInt(); 258 259 //System.out.println(i + ": " + mMethodIds[i].nameIdx + 260 // " " + mStrings[mMethodIds[i].nameIdx]); 261 } 262 } 263 264 /** 265 * Loads the class defs list. 266 */ loadClassDefs()267 void loadClassDefs() throws IOException { 268 int count = mHeaderItem.classDefsSize; 269 mClassDefs = new ClassDefItem[count]; 270 271 //System.out.println("reading " + count + " classDefs"); 272 seek(mHeaderItem.classDefsOff); 273 for (int i = 0; i < count; i++) { 274 mClassDefs[i] = new ClassDefItem(); 275 mClassDefs[i].classIdx = readInt(); 276 277 /* access_flags = */ readInt(); 278 /* superclass_idx = */ readInt(); 279 /* interfaces_off = */ readInt(); 280 /* source_file_idx = */ readInt(); 281 /* annotations_off = */ readInt(); 282 /* class_data_off = */ readInt(); 283 /* static_values_off = */ readInt(); 284 285 //System.out.println(i + ": " + mClassDefs[i].classIdx + " " + 286 // mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]); 287 } 288 } 289 290 /** 291 * Sets the "internal" flag on type IDs which are defined in the 292 * DEX file or within the VM (e.g. primitive classes and arrays). 293 */ markInternalClasses()294 void markInternalClasses() { 295 for (int i = mClassDefs.length -1; i >= 0; i--) { 296 mTypeIds[mClassDefs[i].classIdx].internal = true; 297 } 298 299 for (int i = 0; i < mTypeIds.length; i++) { 300 String className = mStrings[mTypeIds[i].descriptorIdx]; 301 302 if (className.length() == 1) { 303 // primitive class 304 mTypeIds[i].internal = true; 305 } else if (className.charAt(0) == '[') { 306 mTypeIds[i].internal = true; 307 } 308 309 //System.out.println(i + " " + 310 // (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " + 311 // mStrings[mTypeIds[i].descriptorIdx]); 312 } 313 } 314 315 316 /* 317 * ======================================================================= 318 * Queries 319 * ======================================================================= 320 */ 321 322 /** 323 * Returns the class name, given an index into the type_ids table. 324 */ classNameFromTypeIndex(int idx)325 private String classNameFromTypeIndex(int idx) { 326 return mStrings[mTypeIds[idx].descriptorIdx]; 327 } 328 329 /** 330 * Returns an array of method argument type strings, given an index 331 * into the proto_ids table. 332 */ argArrayFromProtoIndex(int idx)333 private String[] argArrayFromProtoIndex(int idx) { 334 ProtoIdItem protoId = mProtoIds[idx]; 335 String[] result = new String[protoId.types.length]; 336 337 for (int i = 0; i < protoId.types.length; i++) { 338 result[i] = mStrings[mTypeIds[protoId.types[i]].descriptorIdx]; 339 } 340 341 return result; 342 } 343 344 /** 345 * Returns a string representing the method's return type, given an 346 * index into the proto_ids table. 347 */ returnTypeFromProtoIndex(int idx)348 private String returnTypeFromProtoIndex(int idx) { 349 ProtoIdItem protoId = mProtoIds[idx]; 350 return mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx]; 351 } 352 353 /** 354 * Returns an array with all of the class references that don't 355 * correspond to classes in the DEX file. Each class reference has 356 * a list of the referenced fields and methods associated with 357 * that class. 358 */ getExternalReferences()359 public ClassRef[] getExternalReferences() { 360 // create a sparse array of ClassRef that parallels mTypeIds 361 ClassRef[] sparseRefs = new ClassRef[mTypeIds.length]; 362 363 // create entries for all externally-referenced classes 364 int count = 0; 365 for (int i = 0; i < mTypeIds.length; i++) { 366 if (!mTypeIds[i].internal) { 367 sparseRefs[i] = 368 new ClassRef(mStrings[mTypeIds[i].descriptorIdx]); 369 count++; 370 } 371 } 372 373 // add fields and methods to the appropriate class entry 374 addExternalFieldReferences(sparseRefs); 375 addExternalMethodReferences(sparseRefs); 376 377 // crunch out the sparseness 378 ClassRef[] classRefs = new ClassRef[count]; 379 int idx = 0; 380 for (int i = 0; i < mTypeIds.length; i++) { 381 if (sparseRefs[i] != null) 382 classRefs[idx++] = sparseRefs[i]; 383 } 384 385 assert idx == count; 386 387 return classRefs; 388 } 389 390 /** 391 * Runs through the list of field references, inserting external 392 * references into the appropriate ClassRef. 393 */ addExternalFieldReferences(ClassRef[] sparseRefs)394 private void addExternalFieldReferences(ClassRef[] sparseRefs) { 395 for (int i = 0; i < mFieldIds.length; i++) { 396 if (!mTypeIds[mFieldIds[i].classIdx].internal) { 397 FieldIdItem fieldId = mFieldIds[i]; 398 FieldRef newFieldRef = new FieldRef( 399 classNameFromTypeIndex(fieldId.classIdx), 400 classNameFromTypeIndex(fieldId.typeIdx), 401 mStrings[fieldId.nameIdx]); 402 sparseRefs[mFieldIds[i].classIdx].addField(newFieldRef); 403 } 404 } 405 } 406 407 /** 408 * Runs through the list of method references, inserting external 409 * references into the appropriate ClassRef. 410 */ addExternalMethodReferences(ClassRef[] sparseRefs)411 private void addExternalMethodReferences(ClassRef[] sparseRefs) { 412 for (int i = 0; i < mMethodIds.length; i++) { 413 if (!mTypeIds[mMethodIds[i].classIdx].internal) { 414 MethodIdItem methodId = mMethodIds[i]; 415 MethodRef newMethodRef = new MethodRef( 416 classNameFromTypeIndex(methodId.classIdx), 417 argArrayFromProtoIndex(methodId.protoIdx), 418 returnTypeFromProtoIndex(methodId.protoIdx), 419 mStrings[methodId.nameIdx]); 420 sparseRefs[mMethodIds[i].classIdx].addMethod(newMethodRef); 421 } 422 } 423 } 424 425 426 /* 427 * ======================================================================= 428 * Basic I/O functions 429 * ======================================================================= 430 */ 431 432 /** 433 * Seeks the DEX file to the specified absolute position. 434 */ seek(int position)435 void seek(int position) throws IOException { 436 mDexFile.seek(position); 437 } 438 439 /** 440 * Fills the buffer by reading bytes from the DEX file. 441 */ readBytes(byte[] buffer)442 void readBytes(byte[] buffer) throws IOException { 443 mDexFile.readFully(buffer); 444 } 445 446 /** 447 * Reads a single signed byte value. 448 */ readByte()449 byte readByte() throws IOException { 450 mDexFile.readFully(tmpBuf, 0, 1); 451 return tmpBuf[0]; 452 } 453 454 /** 455 * Reads a signed 16-bit integer, byte-swapping if necessary. 456 */ readShort()457 short readShort() throws IOException { 458 mDexFile.readFully(tmpBuf, 0, 2); 459 if (isBigEndian) { 460 return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8)); 461 } else { 462 return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8)); 463 } 464 } 465 466 /** 467 * Reads a signed 32-bit integer, byte-swapping if necessary. 468 */ readInt()469 int readInt() throws IOException { 470 mDexFile.readFully(tmpBuf, 0, 4); 471 472 if (isBigEndian) { 473 return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) | 474 ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24); 475 } else { 476 return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) | 477 ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24); 478 } 479 } 480 481 /** 482 * Reads a variable-length unsigned LEB128 value. Does not attempt to 483 * verify that the value is valid. 484 * 485 * @throws EOFException if we run off the end of the file 486 */ readUnsignedLeb128()487 int readUnsignedLeb128() throws IOException { 488 int result = 0; 489 byte val; 490 491 do { 492 val = readByte(); 493 result = (result << 7) | (val & 0x7f); 494 } while (val < 0); 495 496 return result; 497 } 498 499 /** 500 * Reads a UTF-8 string. 501 * 502 * We don't know how long the UTF-8 string is, so we have to read one 503 * byte at a time. We could make an educated guess based on the 504 * utf16_size and seek back if we get it wrong, but seeking backward 505 * may cause the underlying implementation to reload I/O buffers. 506 */ readString()507 String readString() throws IOException { 508 int utf16len = readUnsignedLeb128(); 509 byte inBuf[] = new byte[utf16len * 3]; // worst case 510 int idx; 511 512 for (idx = 0; idx < inBuf.length; idx++) { 513 byte val = readByte(); 514 if (val == 0) 515 break; 516 inBuf[idx] = val; 517 } 518 519 return new String(inBuf, 0, idx, "UTF-8"); 520 } 521 522 523 /* 524 * ======================================================================= 525 * Internal "structure" declarations 526 * ======================================================================= 527 */ 528 529 /** 530 * Holds the contents of a header_item. 531 */ 532 static class HeaderItem { 533 public int fileSize; 534 public int headerSize; 535 public int endianTag; 536 public int stringIdsSize, stringIdsOff; 537 public int typeIdsSize, typeIdsOff; 538 public int protoIdsSize, protoIdsOff; 539 public int fieldIdsSize, fieldIdsOff; 540 public int methodIdsSize, methodIdsOff; 541 public int classDefsSize, classDefsOff; 542 543 /* expected magic values */ 544 public static final byte[] DEX_FILE_MAGIC_v035 = 545 "dex\n035\0".getBytes(StandardCharsets.US_ASCII); 546 547 // Dex version 036 skipped because of an old dalvik bug on some versions 548 // of android where dex files with that version number would erroneously 549 // be accepted and run. See: art/runtime/dex_file.cc 550 551 // V037 was introduced in API LEVEL 24 552 public static final byte[] DEX_FILE_MAGIC_v037 = 553 "dex\n037\0".getBytes(StandardCharsets.US_ASCII); 554 555 // V038 was introduced in API LEVEL 26 556 public static final byte[] DEX_FILE_MAGIC_v038 = 557 "dex\n038\0".getBytes(StandardCharsets.US_ASCII); 558 559 // V039 was introduced in API LEVEL 28 560 public static final byte[] DEX_FILE_MAGIC_v039 = 561 "dex\n039\0".getBytes(StandardCharsets.US_ASCII); 562 563 public static final int ENDIAN_CONSTANT = 0x12345678; 564 public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412; 565 } 566 567 /** 568 * Holds the contents of a type_id_item. 569 * 570 * This is chiefly a list of indices into the string table. We need 571 * some additional bits of data, such as whether or not the type ID 572 * represents a class defined in this DEX, so we use an object for 573 * each instead of a simple integer. (Could use a parallel array, but 574 * since this is a desktop app it's not essential.) 575 */ 576 static class TypeIdItem { 577 public int descriptorIdx; // index into string_ids 578 579 public boolean internal; // defined within this DEX file? 580 } 581 582 /** 583 * Holds the contents of a proto_id_item. 584 */ 585 static class ProtoIdItem { 586 public int shortyIdx; // index into string_ids 587 public int returnTypeIdx; // index into type_ids 588 public int parametersOff; // file offset to a type_list 589 590 public int types[]; // contents of type list 591 } 592 593 /** 594 * Holds the contents of a field_id_item. 595 */ 596 static class FieldIdItem { 597 public int classIdx; // index into type_ids (defining class) 598 public int typeIdx; // index into type_ids (field type) 599 public int nameIdx; // index into string_ids 600 } 601 602 /** 603 * Holds the contents of a method_id_item. 604 */ 605 static class MethodIdItem { 606 public int classIdx; // index into type_ids 607 public int protoIdx; // index into proto_ids 608 public int nameIdx; // index into string_ids 609 } 610 611 /** 612 * Holds the contents of a class_def_item. 613 * 614 * We don't really need a class for this, but there's some stuff in 615 * the class_def_item that we might want later. 616 */ 617 static class ClassDefItem { 618 public int classIdx; // index into type_ids 619 } 620 } 621