1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.dexdeps; 18 19 import java.io.IOException; 20 import java.io.RandomAccessFile; 21 import java.nio.charset.StandardCharsets; 22 import java.util.Arrays; 23 24 /** 25 * Data extracted from a DEX file. 26 */ 27 public class DexData { 28 private RandomAccessFile mDexFile; 29 private HeaderItem mHeaderItem; 30 private String[] mStrings; // strings from string_data_* 31 private TypeIdItem[] mTypeIds; 32 private ProtoIdItem[] mProtoIds; 33 private FieldIdItem[] mFieldIds; 34 private MethodIdItem[] mMethodIds; 35 private ClassDefItem[] mClassDefs; 36 37 private byte tmpBuf[] = new byte[4]; 38 private boolean isBigEndian = false; 39 40 /** 41 * Constructs a new DexData for this file. 42 */ DexData(RandomAccessFile raf)43 public DexData(RandomAccessFile raf) { 44 mDexFile = raf; 45 } 46 47 /** 48 * Loads the contents of the DEX file into our data structures. 49 * 50 * @throws IOException if we encounter a problem while reading 51 * @throws DexDataException if the DEX contents look bad 52 */ load()53 public void load() throws IOException { 54 parseHeaderItem(); 55 56 loadStrings(); 57 loadTypeIds(); 58 loadProtoIds(); 59 loadFieldIds(); 60 loadMethodIds(); 61 loadClassDefs(); 62 63 markInternalClasses(); 64 } 65 66 /** 67 * Verifies the given magic number. 68 */ verifyMagic(byte[] magic)69 private static boolean verifyMagic(byte[] magic) { 70 return Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v035) || 71 Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v037); 72 } 73 74 /** 75 * Parses the interesting bits out of the header. 76 */ parseHeaderItem()77 void parseHeaderItem() throws IOException { 78 mHeaderItem = new HeaderItem(); 79 80 seek(0); 81 82 byte[] magic = new byte[8]; 83 readBytes(magic); 84 if (!verifyMagic(magic)) { 85 System.err.println("Magic number is wrong -- are you sure " + 86 "this is a DEX file?"); 87 throw new DexDataException(); 88 } 89 90 /* 91 * Read the endian tag, so we properly swap things as we read 92 * them from here on. 93 */ 94 seek(8+4+20+4+4); 95 mHeaderItem.endianTag = readInt(); 96 if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) { 97 /* do nothing */ 98 } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){ 99 /* file is big-endian (!), reverse future reads */ 100 isBigEndian = true; 101 } else { 102 System.err.println("Endian constant has unexpected value " + 103 Integer.toHexString(mHeaderItem.endianTag)); 104 throw new DexDataException(); 105 } 106 107 seek(8+4+20); // magic, checksum, signature 108 mHeaderItem.fileSize = readInt(); 109 mHeaderItem.headerSize = readInt(); 110 /*mHeaderItem.endianTag =*/ readInt(); 111 /*mHeaderItem.linkSize =*/ readInt(); 112 /*mHeaderItem.linkOff =*/ readInt(); 113 /*mHeaderItem.mapOff =*/ readInt(); 114 mHeaderItem.stringIdsSize = readInt(); 115 mHeaderItem.stringIdsOff = readInt(); 116 mHeaderItem.typeIdsSize = readInt(); 117 mHeaderItem.typeIdsOff = readInt(); 118 mHeaderItem.protoIdsSize = readInt(); 119 mHeaderItem.protoIdsOff = readInt(); 120 mHeaderItem.fieldIdsSize = readInt(); 121 mHeaderItem.fieldIdsOff = readInt(); 122 mHeaderItem.methodIdsSize = readInt(); 123 mHeaderItem.methodIdsOff = readInt(); 124 mHeaderItem.classDefsSize = readInt(); 125 mHeaderItem.classDefsOff = readInt(); 126 /*mHeaderItem.dataSize =*/ readInt(); 127 /*mHeaderItem.dataOff =*/ readInt(); 128 } 129 130 /** 131 * Loads the string table out of the DEX. 132 * 133 * First we read all of the string_id_items, then we read all of the 134 * string_data_item. Doing it this way should allow us to avoid 135 * seeking around in the file. 136 */ loadStrings()137 void loadStrings() throws IOException { 138 int count = mHeaderItem.stringIdsSize; 139 int stringOffsets[] = new int[count]; 140 141 //System.out.println("reading " + count + " strings"); 142 143 seek(mHeaderItem.stringIdsOff); 144 for (int i = 0; i < count; i++) { 145 stringOffsets[i] = readInt(); 146 } 147 148 mStrings = new String[count]; 149 150 seek(stringOffsets[0]); 151 for (int i = 0; i < count; i++) { 152 seek(stringOffsets[i]); // should be a no-op 153 mStrings[i] = readString(); 154 //System.out.println("STR: " + i + ": " + mStrings[i]); 155 } 156 } 157 158 /** 159 * Loads the type ID list. 160 */ loadTypeIds()161 void loadTypeIds() throws IOException { 162 int count = mHeaderItem.typeIdsSize; 163 mTypeIds = new TypeIdItem[count]; 164 165 //System.out.println("reading " + count + " typeIds"); 166 seek(mHeaderItem.typeIdsOff); 167 for (int i = 0; i < count; i++) { 168 mTypeIds[i] = new TypeIdItem(); 169 mTypeIds[i].descriptorIdx = readInt(); 170 171 //System.out.println(i + ": " + mTypeIds[i].descriptorIdx + 172 // " " + mStrings[mTypeIds[i].descriptorIdx]); 173 } 174 } 175 176 /** 177 * Loads the proto ID list. 178 */ loadProtoIds()179 void loadProtoIds() throws IOException { 180 int count = mHeaderItem.protoIdsSize; 181 mProtoIds = new ProtoIdItem[count]; 182 183 //System.out.println("reading " + count + " protoIds"); 184 seek(mHeaderItem.protoIdsOff); 185 186 /* 187 * Read the proto ID items. 188 */ 189 for (int i = 0; i < count; i++) { 190 mProtoIds[i] = new ProtoIdItem(); 191 mProtoIds[i].shortyIdx = readInt(); 192 mProtoIds[i].returnTypeIdx = readInt(); 193 mProtoIds[i].parametersOff = readInt(); 194 195 //System.out.println(i + ": " + mProtoIds[i].shortyIdx + 196 // " " + mStrings[mProtoIds[i].shortyIdx]); 197 } 198 199 /* 200 * Go back through and read the type lists. 201 */ 202 for (int i = 0; i < count; i++) { 203 ProtoIdItem protoId = mProtoIds[i]; 204 205 int offset = protoId.parametersOff; 206 207 if (offset == 0) { 208 protoId.types = new int[0]; 209 continue; 210 } else { 211 seek(offset); 212 int size = readInt(); // #of entries in list 213 protoId.types = new int[size]; 214 215 for (int j = 0; j < size; j++) { 216 protoId.types[j] = readShort() & 0xffff; 217 } 218 } 219 } 220 } 221 222 /** 223 * Loads the field ID list. 224 */ loadFieldIds()225 void loadFieldIds() throws IOException { 226 int count = mHeaderItem.fieldIdsSize; 227 mFieldIds = new FieldIdItem[count]; 228 229 //System.out.println("reading " + count + " fieldIds"); 230 seek(mHeaderItem.fieldIdsOff); 231 for (int i = 0; i < count; i++) { 232 mFieldIds[i] = new FieldIdItem(); 233 mFieldIds[i].classIdx = readShort() & 0xffff; 234 mFieldIds[i].typeIdx = readShort() & 0xffff; 235 mFieldIds[i].nameIdx = readInt(); 236 237 //System.out.println(i + ": " + mFieldIds[i].nameIdx + 238 // " " + mStrings[mFieldIds[i].nameIdx]); 239 } 240 } 241 242 /** 243 * Loads the method ID list. 244 */ loadMethodIds()245 void loadMethodIds() throws IOException { 246 int count = mHeaderItem.methodIdsSize; 247 mMethodIds = new MethodIdItem[count]; 248 249 //System.out.println("reading " + count + " methodIds"); 250 seek(mHeaderItem.methodIdsOff); 251 for (int i = 0; i < count; i++) { 252 mMethodIds[i] = new MethodIdItem(); 253 mMethodIds[i].classIdx = readShort() & 0xffff; 254 mMethodIds[i].protoIdx = readShort() & 0xffff; 255 mMethodIds[i].nameIdx = readInt(); 256 257 //System.out.println(i + ": " + mMethodIds[i].nameIdx + 258 // " " + mStrings[mMethodIds[i].nameIdx]); 259 } 260 } 261 262 /** 263 * Loads the class defs list. 264 */ loadClassDefs()265 void loadClassDefs() throws IOException { 266 int count = mHeaderItem.classDefsSize; 267 mClassDefs = new ClassDefItem[count]; 268 269 //System.out.println("reading " + count + " classDefs"); 270 seek(mHeaderItem.classDefsOff); 271 for (int i = 0; i < count; i++) { 272 mClassDefs[i] = new ClassDefItem(); 273 mClassDefs[i].classIdx = readInt(); 274 275 /* access_flags = */ readInt(); 276 /* superclass_idx = */ readInt(); 277 /* interfaces_off = */ readInt(); 278 /* source_file_idx = */ readInt(); 279 /* annotations_off = */ readInt(); 280 /* class_data_off = */ readInt(); 281 /* static_values_off = */ readInt(); 282 283 //System.out.println(i + ": " + mClassDefs[i].classIdx + " " + 284 // mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]); 285 } 286 } 287 288 /** 289 * Sets the "internal" flag on type IDs which are defined in the 290 * DEX file or within the VM (e.g. primitive classes and arrays). 291 */ markInternalClasses()292 void markInternalClasses() { 293 for (int i = mClassDefs.length -1; i >= 0; i--) { 294 mTypeIds[mClassDefs[i].classIdx].internal = true; 295 } 296 297 for (int i = 0; i < mTypeIds.length; i++) { 298 String className = mStrings[mTypeIds[i].descriptorIdx]; 299 300 if (className.length() == 1) { 301 // primitive class 302 mTypeIds[i].internal = true; 303 } else if (className.charAt(0) == '[') { 304 mTypeIds[i].internal = true; 305 } 306 307 //System.out.println(i + " " + 308 // (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " + 309 // mStrings[mTypeIds[i].descriptorIdx]); 310 } 311 } 312 313 314 /* 315 * ======================================================================= 316 * Queries 317 * ======================================================================= 318 */ 319 320 /** 321 * Returns the class name, given an index into the type_ids table. 322 */ classNameFromTypeIndex(int idx)323 private String classNameFromTypeIndex(int idx) { 324 return mStrings[mTypeIds[idx].descriptorIdx]; 325 } 326 327 /** 328 * Returns an array of method argument type strings, given an index 329 * into the proto_ids table. 330 */ argArrayFromProtoIndex(int idx)331 private String[] argArrayFromProtoIndex(int idx) { 332 ProtoIdItem protoId = mProtoIds[idx]; 333 String[] result = new String[protoId.types.length]; 334 335 for (int i = 0; i < protoId.types.length; i++) { 336 result[i] = mStrings[mTypeIds[protoId.types[i]].descriptorIdx]; 337 } 338 339 return result; 340 } 341 342 /** 343 * Returns a string representing the method's return type, given an 344 * index into the proto_ids table. 345 */ returnTypeFromProtoIndex(int idx)346 private String returnTypeFromProtoIndex(int idx) { 347 ProtoIdItem protoId = mProtoIds[idx]; 348 return mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx]; 349 } 350 351 /** 352 * Returns an array with all of the class references that don't 353 * correspond to classes in the DEX file. Each class reference has 354 * a list of the referenced fields and methods associated with 355 * that class. 356 */ getExternalReferences()357 public ClassRef[] getExternalReferences() { 358 // create a sparse array of ClassRef that parallels mTypeIds 359 ClassRef[] sparseRefs = new ClassRef[mTypeIds.length]; 360 361 // create entries for all externally-referenced classes 362 int count = 0; 363 for (int i = 0; i < mTypeIds.length; i++) { 364 if (!mTypeIds[i].internal) { 365 sparseRefs[i] = 366 new ClassRef(mStrings[mTypeIds[i].descriptorIdx]); 367 count++; 368 } 369 } 370 371 // add fields and methods to the appropriate class entry 372 addExternalFieldReferences(sparseRefs); 373 addExternalMethodReferences(sparseRefs); 374 375 // crunch out the sparseness 376 ClassRef[] classRefs = new ClassRef[count]; 377 int idx = 0; 378 for (int i = 0; i < mTypeIds.length; i++) { 379 if (sparseRefs[i] != null) 380 classRefs[idx++] = sparseRefs[i]; 381 } 382 383 assert idx == count; 384 385 return classRefs; 386 } 387 388 /** 389 * Runs through the list of field references, inserting external 390 * references into the appropriate ClassRef. 391 */ addExternalFieldReferences(ClassRef[] sparseRefs)392 private void addExternalFieldReferences(ClassRef[] sparseRefs) { 393 for (int i = 0; i < mFieldIds.length; i++) { 394 if (!mTypeIds[mFieldIds[i].classIdx].internal) { 395 FieldIdItem fieldId = mFieldIds[i]; 396 FieldRef newFieldRef = new FieldRef( 397 classNameFromTypeIndex(fieldId.classIdx), 398 classNameFromTypeIndex(fieldId.typeIdx), 399 mStrings[fieldId.nameIdx]); 400 sparseRefs[mFieldIds[i].classIdx].addField(newFieldRef); 401 } 402 } 403 } 404 405 /** 406 * Runs through the list of method references, inserting external 407 * references into the appropriate ClassRef. 408 */ addExternalMethodReferences(ClassRef[] sparseRefs)409 private void addExternalMethodReferences(ClassRef[] sparseRefs) { 410 for (int i = 0; i < mMethodIds.length; i++) { 411 if (!mTypeIds[mMethodIds[i].classIdx].internal) { 412 MethodIdItem methodId = mMethodIds[i]; 413 MethodRef newMethodRef = new MethodRef( 414 classNameFromTypeIndex(methodId.classIdx), 415 argArrayFromProtoIndex(methodId.protoIdx), 416 returnTypeFromProtoIndex(methodId.protoIdx), 417 mStrings[methodId.nameIdx]); 418 sparseRefs[mMethodIds[i].classIdx].addMethod(newMethodRef); 419 } 420 } 421 } 422 423 424 /* 425 * ======================================================================= 426 * Basic I/O functions 427 * ======================================================================= 428 */ 429 430 /** 431 * Seeks the DEX file to the specified absolute position. 432 */ seek(int position)433 void seek(int position) throws IOException { 434 mDexFile.seek(position); 435 } 436 437 /** 438 * Fills the buffer by reading bytes from the DEX file. 439 */ readBytes(byte[] buffer)440 void readBytes(byte[] buffer) throws IOException { 441 mDexFile.readFully(buffer); 442 } 443 444 /** 445 * Reads a single signed byte value. 446 */ readByte()447 byte readByte() throws IOException { 448 mDexFile.readFully(tmpBuf, 0, 1); 449 return tmpBuf[0]; 450 } 451 452 /** 453 * Reads a signed 16-bit integer, byte-swapping if necessary. 454 */ readShort()455 short readShort() throws IOException { 456 mDexFile.readFully(tmpBuf, 0, 2); 457 if (isBigEndian) { 458 return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8)); 459 } else { 460 return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8)); 461 } 462 } 463 464 /** 465 * Reads a signed 32-bit integer, byte-swapping if necessary. 466 */ readInt()467 int readInt() throws IOException { 468 mDexFile.readFully(tmpBuf, 0, 4); 469 470 if (isBigEndian) { 471 return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) | 472 ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24); 473 } else { 474 return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) | 475 ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24); 476 } 477 } 478 479 /** 480 * Reads a variable-length unsigned LEB128 value. Does not attempt to 481 * verify that the value is valid. 482 * 483 * @throws EOFException if we run off the end of the file 484 */ readUnsignedLeb128()485 int readUnsignedLeb128() throws IOException { 486 int result = 0; 487 byte val; 488 489 do { 490 val = readByte(); 491 result = (result << 7) | (val & 0x7f); 492 } while (val < 0); 493 494 return result; 495 } 496 497 /** 498 * Reads a UTF-8 string. 499 * 500 * We don't know how long the UTF-8 string is, so we have to read one 501 * byte at a time. We could make an educated guess based on the 502 * utf16_size and seek back if we get it wrong, but seeking backward 503 * may cause the underlying implementation to reload I/O buffers. 504 */ readString()505 String readString() throws IOException { 506 int utf16len = readUnsignedLeb128(); 507 byte inBuf[] = new byte[utf16len * 3]; // worst case 508 int idx; 509 510 for (idx = 0; idx < inBuf.length; idx++) { 511 byte val = readByte(); 512 if (val == 0) 513 break; 514 inBuf[idx] = val; 515 } 516 517 return new String(inBuf, 0, idx, "UTF-8"); 518 } 519 520 521 /* 522 * ======================================================================= 523 * Internal "structure" declarations 524 * ======================================================================= 525 */ 526 527 /** 528 * Holds the contents of a header_item. 529 */ 530 static class HeaderItem { 531 public int fileSize; 532 public int headerSize; 533 public int endianTag; 534 public int stringIdsSize, stringIdsOff; 535 public int typeIdsSize, typeIdsOff; 536 public int protoIdsSize, protoIdsOff; 537 public int fieldIdsSize, fieldIdsOff; 538 public int methodIdsSize, methodIdsOff; 539 public int classDefsSize, classDefsOff; 540 541 /* expected magic values */ 542 public static final byte[] DEX_FILE_MAGIC_v035 = 543 "dex\n035\0".getBytes(StandardCharsets.US_ASCII); 544 545 // Dex version 036 skipped because of an old dalvik bug on some versions 546 // of android where dex files with that version number would erroneously 547 // be accepted and run. See: art/runtime/dex_file.cc 548 549 // V037 was introduced in API LEVEL 24 550 public static final byte[] DEX_FILE_MAGIC_v037 = 551 "dex\n037\0".getBytes(StandardCharsets.US_ASCII); 552 public static final int ENDIAN_CONSTANT = 0x12345678; 553 public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412; 554 } 555 556 /** 557 * Holds the contents of a type_id_item. 558 * 559 * This is chiefly a list of indices into the string table. We need 560 * some additional bits of data, such as whether or not the type ID 561 * represents a class defined in this DEX, so we use an object for 562 * each instead of a simple integer. (Could use a parallel array, but 563 * since this is a desktop app it's not essential.) 564 */ 565 static class TypeIdItem { 566 public int descriptorIdx; // index into string_ids 567 568 public boolean internal; // defined within this DEX file? 569 } 570 571 /** 572 * Holds the contents of a proto_id_item. 573 */ 574 static class ProtoIdItem { 575 public int shortyIdx; // index into string_ids 576 public int returnTypeIdx; // index into type_ids 577 public int parametersOff; // file offset to a type_list 578 579 public int types[]; // contents of type list 580 } 581 582 /** 583 * Holds the contents of a field_id_item. 584 */ 585 static class FieldIdItem { 586 public int classIdx; // index into type_ids (defining class) 587 public int typeIdx; // index into type_ids (field type) 588 public int nameIdx; // index into string_ids 589 } 590 591 /** 592 * Holds the contents of a method_id_item. 593 */ 594 static class MethodIdItem { 595 public int classIdx; // index into type_ids 596 public int protoIdx; // index into proto_ids 597 public int nameIdx; // index into string_ids 598 } 599 600 /** 601 * Holds the contents of a class_def_item. 602 * 603 * We don't really need a class for this, but there's some stuff in 604 * the class_def_item that we might want later. 605 */ 606 static class ClassDefItem { 607 public int classIdx; // index into type_ids 608 } 609 } 610