1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Implementation file of the dexlayout utility.
17 *
18 * This is a tool to read dex files into an internal representation,
19 * reorganize the representation, and emit dex files with a better
20 * file layout.
21 */
22
23 #include "dexlayout.h"
24
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <sys/mman.h> // For the PROT_* and MAP_* constants.
28
29 #include <iostream>
30 #include <memory>
31 #include <sstream>
32 #include <vector>
33
34 #include "android-base/stringprintf.h"
35
36 #include "base/logging.h" // For VLOG_IS_ON.
37 #include "base/os.h"
38 #include "base/utils.h"
39 #include "dex/art_dex_file_loader.h"
40 #include "dex/descriptors_names.h"
41 #include "dex/dex_file-inl.h"
42 #include "dex/dex_file_layout.h"
43 #include "dex/dex_file_loader.h"
44 #include "dex/dex_file_types.h"
45 #include "dex/dex_file_verifier.h"
46 #include "dex/dex_instruction-inl.h"
47 #include "dex_ir_builder.h"
48 #include "dex_verify.h"
49 #include "dex_visualize.h"
50 #include "dex_writer.h"
51 #include "jit/profile_compilation_info.h"
52 #include "mem_map.h"
53
54 namespace art {
55
56 using android::base::StringPrintf;
57
58 /*
59 * Flags for use with createAccessFlagStr().
60 */
61 enum AccessFor {
62 kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
63 };
64 const int kNumFlags = 18;
65
66 /*
67 * Gets 2 little-endian bytes.
68 */
Get2LE(unsigned char const * src)69 static inline uint16_t Get2LE(unsigned char const* src) {
70 return src[0] | (src[1] << 8);
71 }
72
73 /*
74 * Converts a type descriptor to human-readable "dotted" form. For
75 * example, "Ljava/lang/String;" becomes "java.lang.String", and
76 * "[I" becomes "int[]". Also converts '$' to '.', which means this
77 * form can't be converted back to a descriptor.
78 */
DescriptorToDotWrapper(const char * descriptor)79 static std::string DescriptorToDotWrapper(const char* descriptor) {
80 std::string result = DescriptorToDot(descriptor);
81 size_t found = result.find('$');
82 while (found != std::string::npos) {
83 result[found] = '.';
84 found = result.find('$', found);
85 }
86 return result;
87 }
88
89 /*
90 * Converts the class name portion of a type descriptor to human-readable
91 * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
92 */
DescriptorClassToDot(const char * str)93 static std::string DescriptorClassToDot(const char* str) {
94 std::string descriptor(str);
95 // Reduce to just the class name prefix.
96 size_t last_slash = descriptor.rfind('/');
97 if (last_slash == std::string::npos) {
98 last_slash = 0;
99 }
100 // Start past the '/' or 'L'.
101 last_slash++;
102
103 // Copy class name over, trimming trailing ';'.
104 size_t size = descriptor.size() - 1 - last_slash;
105 std::string result(descriptor.substr(last_slash, size));
106
107 // Replace '$' with '.'.
108 size_t dollar_sign = result.find('$');
109 while (dollar_sign != std::string::npos) {
110 result[dollar_sign] = '.';
111 dollar_sign = result.find('$', dollar_sign);
112 }
113
114 return result;
115 }
116
117 /*
118 * Returns string representing the boolean value.
119 */
StrBool(bool val)120 static const char* StrBool(bool val) {
121 return val ? "true" : "false";
122 }
123
124 /*
125 * Returns a quoted string representing the boolean value.
126 */
QuotedBool(bool val)127 static const char* QuotedBool(bool val) {
128 return val ? "\"true\"" : "\"false\"";
129 }
130
131 /*
132 * Returns a quoted string representing the access flags.
133 */
QuotedVisibility(uint32_t access_flags)134 static const char* QuotedVisibility(uint32_t access_flags) {
135 if (access_flags & kAccPublic) {
136 return "\"public\"";
137 } else if (access_flags & kAccProtected) {
138 return "\"protected\"";
139 } else if (access_flags & kAccPrivate) {
140 return "\"private\"";
141 } else {
142 return "\"package\"";
143 }
144 }
145
146 /*
147 * Counts the number of '1' bits in a word.
148 */
CountOnes(uint32_t val)149 static int CountOnes(uint32_t val) {
150 val = val - ((val >> 1) & 0x55555555);
151 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
152 return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
153 }
154
155 /*
156 * Creates a new string with human-readable access flags.
157 *
158 * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
159 */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)160 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
161 static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
162 {
163 "PUBLIC", /* 0x00001 */
164 "PRIVATE", /* 0x00002 */
165 "PROTECTED", /* 0x00004 */
166 "STATIC", /* 0x00008 */
167 "FINAL", /* 0x00010 */
168 "?", /* 0x00020 */
169 "?", /* 0x00040 */
170 "?", /* 0x00080 */
171 "?", /* 0x00100 */
172 "INTERFACE", /* 0x00200 */
173 "ABSTRACT", /* 0x00400 */
174 "?", /* 0x00800 */
175 "SYNTHETIC", /* 0x01000 */
176 "ANNOTATION", /* 0x02000 */
177 "ENUM", /* 0x04000 */
178 "?", /* 0x08000 */
179 "VERIFIED", /* 0x10000 */
180 "OPTIMIZED", /* 0x20000 */
181 }, {
182 "PUBLIC", /* 0x00001 */
183 "PRIVATE", /* 0x00002 */
184 "PROTECTED", /* 0x00004 */
185 "STATIC", /* 0x00008 */
186 "FINAL", /* 0x00010 */
187 "SYNCHRONIZED", /* 0x00020 */
188 "BRIDGE", /* 0x00040 */
189 "VARARGS", /* 0x00080 */
190 "NATIVE", /* 0x00100 */
191 "?", /* 0x00200 */
192 "ABSTRACT", /* 0x00400 */
193 "STRICT", /* 0x00800 */
194 "SYNTHETIC", /* 0x01000 */
195 "?", /* 0x02000 */
196 "?", /* 0x04000 */
197 "MIRANDA", /* 0x08000 */
198 "CONSTRUCTOR", /* 0x10000 */
199 "DECLARED_SYNCHRONIZED", /* 0x20000 */
200 }, {
201 "PUBLIC", /* 0x00001 */
202 "PRIVATE", /* 0x00002 */
203 "PROTECTED", /* 0x00004 */
204 "STATIC", /* 0x00008 */
205 "FINAL", /* 0x00010 */
206 "?", /* 0x00020 */
207 "VOLATILE", /* 0x00040 */
208 "TRANSIENT", /* 0x00080 */
209 "?", /* 0x00100 */
210 "?", /* 0x00200 */
211 "?", /* 0x00400 */
212 "?", /* 0x00800 */
213 "SYNTHETIC", /* 0x01000 */
214 "?", /* 0x02000 */
215 "ENUM", /* 0x04000 */
216 "?", /* 0x08000 */
217 "?", /* 0x10000 */
218 "?", /* 0x20000 */
219 },
220 };
221
222 // Allocate enough storage to hold the expected number of strings,
223 // plus a space between each. We over-allocate, using the longest
224 // string above as the base metric.
225 const int kLongest = 21; // The strlen of longest string above.
226 const int count = CountOnes(flags);
227 char* str;
228 char* cp;
229 cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
230
231 for (int i = 0; i < kNumFlags; i++) {
232 if (flags & 0x01) {
233 const char* accessStr = kAccessStrings[for_what][i];
234 const int len = strlen(accessStr);
235 if (cp != str) {
236 *cp++ = ' ';
237 }
238 memcpy(cp, accessStr, len);
239 cp += len;
240 }
241 flags >>= 1;
242 } // for
243
244 *cp = '\0';
245 return str;
246 }
247
GetSignatureForProtoId(const dex_ir::ProtoId * proto)248 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
249 if (proto == nullptr) {
250 return "<no signature>";
251 }
252
253 std::string result("(");
254 const dex_ir::TypeList* type_list = proto->Parameters();
255 if (type_list != nullptr) {
256 for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
257 result += type_id->GetStringId()->Data();
258 }
259 }
260 result += ")";
261 result += proto->ReturnType()->GetStringId()->Data();
262 return result;
263 }
264
265 /*
266 * Copies character data from "data" to "out", converting non-ASCII values
267 * to fprintf format chars or an ASCII filler ('.' or '?').
268 *
269 * The output buffer must be able to hold (2*len)+1 bytes. The result is
270 * NULL-terminated.
271 */
Asciify(char * out,const unsigned char * data,size_t len)272 static void Asciify(char* out, const unsigned char* data, size_t len) {
273 while (len--) {
274 if (*data < 0x20) {
275 // Could do more here, but we don't need them yet.
276 switch (*data) {
277 case '\0':
278 *out++ = '\\';
279 *out++ = '0';
280 break;
281 case '\n':
282 *out++ = '\\';
283 *out++ = 'n';
284 break;
285 default:
286 *out++ = '.';
287 break;
288 } // switch
289 } else if (*data >= 0x80) {
290 *out++ = '?';
291 } else {
292 *out++ = *data;
293 }
294 data++;
295 } // while
296 *out = '\0';
297 }
298
299 /*
300 * Dumps a string value with some escape characters.
301 */
DumpEscapedString(const char * p,FILE * out_file)302 static void DumpEscapedString(const char* p, FILE* out_file) {
303 fputs("\"", out_file);
304 for (; *p; p++) {
305 switch (*p) {
306 case '\\':
307 fputs("\\\\", out_file);
308 break;
309 case '\"':
310 fputs("\\\"", out_file);
311 break;
312 case '\t':
313 fputs("\\t", out_file);
314 break;
315 case '\n':
316 fputs("\\n", out_file);
317 break;
318 case '\r':
319 fputs("\\r", out_file);
320 break;
321 default:
322 putc(*p, out_file);
323 } // switch
324 } // for
325 fputs("\"", out_file);
326 }
327
328 /*
329 * Dumps a string as an XML attribute value.
330 */
DumpXmlAttribute(const char * p,FILE * out_file)331 static void DumpXmlAttribute(const char* p, FILE* out_file) {
332 for (; *p; p++) {
333 switch (*p) {
334 case '&':
335 fputs("&", out_file);
336 break;
337 case '<':
338 fputs("<", out_file);
339 break;
340 case '>':
341 fputs(">", out_file);
342 break;
343 case '"':
344 fputs(""", out_file);
345 break;
346 case '\t':
347 fputs("	", out_file);
348 break;
349 case '\n':
350 fputs("
", out_file);
351 break;
352 case '\r':
353 fputs("
", out_file);
354 break;
355 default:
356 putc(*p, out_file);
357 } // switch
358 } // for
359 }
360
361 /*
362 * Helper for dumpInstruction(), which builds the string
363 * representation for the index in the given instruction.
364 * Returns a pointer to a buffer of sufficient size.
365 */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)366 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
367 const Instruction* dec_insn,
368 size_t buf_size) {
369 std::unique_ptr<char[]> buf(new char[buf_size]);
370 // Determine index and width of the string.
371 uint32_t index = 0;
372 uint32_t secondary_index = dex::kDexNoIndex;
373 uint32_t width = 4;
374 switch (Instruction::FormatOf(dec_insn->Opcode())) {
375 // SOME NOT SUPPORTED:
376 // case Instruction::k20bc:
377 case Instruction::k21c:
378 case Instruction::k35c:
379 // case Instruction::k35ms:
380 case Instruction::k3rc:
381 // case Instruction::k3rms:
382 // case Instruction::k35mi:
383 // case Instruction::k3rmi:
384 index = dec_insn->VRegB();
385 width = 4;
386 break;
387 case Instruction::k31c:
388 index = dec_insn->VRegB();
389 width = 8;
390 break;
391 case Instruction::k22c:
392 // case Instruction::k22cs:
393 index = dec_insn->VRegC();
394 width = 4;
395 break;
396 case Instruction::k45cc:
397 case Instruction::k4rcc:
398 index = dec_insn->VRegB();
399 secondary_index = dec_insn->VRegH();
400 width = 4;
401 break;
402 default:
403 break;
404 } // switch
405
406 // Determine index type.
407 size_t outSize = 0;
408 switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
409 case Instruction::kIndexUnknown:
410 // This function should never get called for this type, but do
411 // something sensible here, just to help with debugging.
412 outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
413 break;
414 case Instruction::kIndexNone:
415 // This function should never get called for this type, but do
416 // something sensible here, just to help with debugging.
417 outSize = snprintf(buf.get(), buf_size, "<no-index>");
418 break;
419 case Instruction::kIndexTypeRef:
420 if (index < header->GetCollections().TypeIdsSize()) {
421 const char* tp = header->GetCollections().GetTypeId(index)->GetStringId()->Data();
422 outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
423 } else {
424 outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
425 }
426 break;
427 case Instruction::kIndexStringRef:
428 if (index < header->GetCollections().StringIdsSize()) {
429 const char* st = header->GetCollections().GetStringId(index)->Data();
430 outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
431 } else {
432 outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
433 }
434 break;
435 case Instruction::kIndexMethodRef:
436 if (index < header->GetCollections().MethodIdsSize()) {
437 dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
438 const char* name = method_id->Name()->Data();
439 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
440 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
441 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
442 back_descriptor, name, type_descriptor.c_str(), width, index);
443 } else {
444 outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
445 }
446 break;
447 case Instruction::kIndexFieldRef:
448 if (index < header->GetCollections().FieldIdsSize()) {
449 dex_ir::FieldId* field_id = header->GetCollections().GetFieldId(index);
450 const char* name = field_id->Name()->Data();
451 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
452 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
453 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
454 back_descriptor, name, type_descriptor, width, index);
455 } else {
456 outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
457 }
458 break;
459 case Instruction::kIndexVtableOffset:
460 outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
461 width, index, width, index);
462 break;
463 case Instruction::kIndexFieldOffset:
464 outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
465 break;
466 case Instruction::kIndexMethodAndProtoRef: {
467 std::string method("<method?>");
468 std::string proto("<proto?>");
469 if (index < header->GetCollections().MethodIdsSize()) {
470 dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
471 const char* name = method_id->Name()->Data();
472 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
473 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
474 method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
475 }
476 if (secondary_index < header->GetCollections().ProtoIdsSize()) {
477 dex_ir::ProtoId* proto_id = header->GetCollections().GetProtoId(secondary_index);
478 proto = GetSignatureForProtoId(proto_id);
479 }
480 outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
481 method.c_str(), proto.c_str(), width, index, width, secondary_index);
482 }
483 break;
484 // SOME NOT SUPPORTED:
485 // case Instruction::kIndexVaries:
486 // case Instruction::kIndexInlineMethod:
487 default:
488 outSize = snprintf(buf.get(), buf_size, "<?>");
489 break;
490 } // switch
491
492 // Determine success of string construction.
493 if (outSize >= buf_size) {
494 // The buffer wasn't big enough; retry with computed size. Note: snprintf()
495 // doesn't count/ the '\0' as part of its returned size, so we add explicit
496 // space for it here.
497 return IndexString(header, dec_insn, outSize + 1);
498 }
499 return buf;
500 }
501
502 /*
503 * Dumps encoded annotation.
504 */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)505 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
506 fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
507 // Display all name=value pairs.
508 for (auto& subannotation : *annotation->GetAnnotationElements()) {
509 fputc(' ', out_file_);
510 fputs(subannotation->GetName()->Data(), out_file_);
511 fputc('=', out_file_);
512 DumpEncodedValue(subannotation->GetValue());
513 }
514 }
515 /*
516 * Dumps encoded value.
517 */
DumpEncodedValue(const dex_ir::EncodedValue * data)518 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
519 switch (data->Type()) {
520 case DexFile::kDexAnnotationByte:
521 fprintf(out_file_, "%" PRId8, data->GetByte());
522 break;
523 case DexFile::kDexAnnotationShort:
524 fprintf(out_file_, "%" PRId16, data->GetShort());
525 break;
526 case DexFile::kDexAnnotationChar:
527 fprintf(out_file_, "%" PRIu16, data->GetChar());
528 break;
529 case DexFile::kDexAnnotationInt:
530 fprintf(out_file_, "%" PRId32, data->GetInt());
531 break;
532 case DexFile::kDexAnnotationLong:
533 fprintf(out_file_, "%" PRId64, data->GetLong());
534 break;
535 case DexFile::kDexAnnotationFloat: {
536 fprintf(out_file_, "%g", data->GetFloat());
537 break;
538 }
539 case DexFile::kDexAnnotationDouble: {
540 fprintf(out_file_, "%g", data->GetDouble());
541 break;
542 }
543 case DexFile::kDexAnnotationString: {
544 dex_ir::StringId* string_id = data->GetStringId();
545 if (options_.output_format_ == kOutputPlain) {
546 DumpEscapedString(string_id->Data(), out_file_);
547 } else {
548 DumpXmlAttribute(string_id->Data(), out_file_);
549 }
550 break;
551 }
552 case DexFile::kDexAnnotationType: {
553 dex_ir::TypeId* type_id = data->GetTypeId();
554 fputs(type_id->GetStringId()->Data(), out_file_);
555 break;
556 }
557 case DexFile::kDexAnnotationField:
558 case DexFile::kDexAnnotationEnum: {
559 dex_ir::FieldId* field_id = data->GetFieldId();
560 fputs(field_id->Name()->Data(), out_file_);
561 break;
562 }
563 case DexFile::kDexAnnotationMethod: {
564 dex_ir::MethodId* method_id = data->GetMethodId();
565 fputs(method_id->Name()->Data(), out_file_);
566 break;
567 }
568 case DexFile::kDexAnnotationArray: {
569 fputc('{', out_file_);
570 // Display all elements.
571 for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
572 fputc(' ', out_file_);
573 DumpEncodedValue(value.get());
574 }
575 fputs(" }", out_file_);
576 break;
577 }
578 case DexFile::kDexAnnotationAnnotation: {
579 DumpEncodedAnnotation(data->GetEncodedAnnotation());
580 break;
581 }
582 case DexFile::kDexAnnotationNull:
583 fputs("null", out_file_);
584 break;
585 case DexFile::kDexAnnotationBoolean:
586 fputs(StrBool(data->GetBoolean()), out_file_);
587 break;
588 default:
589 fputs("????", out_file_);
590 break;
591 } // switch
592 }
593
594 /*
595 * Dumps the file header.
596 */
DumpFileHeader()597 void DexLayout::DumpFileHeader() {
598 char sanitized[8 * 2 + 1];
599 dex_ir::Collections& collections = header_->GetCollections();
600 fprintf(out_file_, "DEX file header:\n");
601 Asciify(sanitized, header_->Magic(), 8);
602 fprintf(out_file_, "magic : '%s'\n", sanitized);
603 fprintf(out_file_, "checksum : %08x\n", header_->Checksum());
604 fprintf(out_file_, "signature : %02x%02x...%02x%02x\n",
605 header_->Signature()[0], header_->Signature()[1],
606 header_->Signature()[DexFile::kSha1DigestSize - 2],
607 header_->Signature()[DexFile::kSha1DigestSize - 1]);
608 fprintf(out_file_, "file_size : %d\n", header_->FileSize());
609 fprintf(out_file_, "header_size : %d\n", header_->HeaderSize());
610 fprintf(out_file_, "link_size : %d\n", header_->LinkSize());
611 fprintf(out_file_, "link_off : %d (0x%06x)\n",
612 header_->LinkOffset(), header_->LinkOffset());
613 fprintf(out_file_, "string_ids_size : %d\n", collections.StringIdsSize());
614 fprintf(out_file_, "string_ids_off : %d (0x%06x)\n",
615 collections.StringIdsOffset(), collections.StringIdsOffset());
616 fprintf(out_file_, "type_ids_size : %d\n", collections.TypeIdsSize());
617 fprintf(out_file_, "type_ids_off : %d (0x%06x)\n",
618 collections.TypeIdsOffset(), collections.TypeIdsOffset());
619 fprintf(out_file_, "proto_ids_size : %d\n", collections.ProtoIdsSize());
620 fprintf(out_file_, "proto_ids_off : %d (0x%06x)\n",
621 collections.ProtoIdsOffset(), collections.ProtoIdsOffset());
622 fprintf(out_file_, "field_ids_size : %d\n", collections.FieldIdsSize());
623 fprintf(out_file_, "field_ids_off : %d (0x%06x)\n",
624 collections.FieldIdsOffset(), collections.FieldIdsOffset());
625 fprintf(out_file_, "method_ids_size : %d\n", collections.MethodIdsSize());
626 fprintf(out_file_, "method_ids_off : %d (0x%06x)\n",
627 collections.MethodIdsOffset(), collections.MethodIdsOffset());
628 fprintf(out_file_, "class_defs_size : %d\n", collections.ClassDefsSize());
629 fprintf(out_file_, "class_defs_off : %d (0x%06x)\n",
630 collections.ClassDefsOffset(), collections.ClassDefsOffset());
631 fprintf(out_file_, "data_size : %d\n", header_->DataSize());
632 fprintf(out_file_, "data_off : %d (0x%06x)\n\n",
633 header_->DataOffset(), header_->DataOffset());
634 }
635
636 /*
637 * Dumps a class_def_item.
638 */
DumpClassDef(int idx)639 void DexLayout::DumpClassDef(int idx) {
640 // General class information.
641 dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
642 fprintf(out_file_, "Class #%d header:\n", idx);
643 fprintf(out_file_, "class_idx : %d\n", class_def->ClassType()->GetIndex());
644 fprintf(out_file_, "access_flags : %d (0x%04x)\n",
645 class_def->GetAccessFlags(), class_def->GetAccessFlags());
646 uint32_t superclass_idx = class_def->Superclass() == nullptr ?
647 DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
648 fprintf(out_file_, "superclass_idx : %d\n", superclass_idx);
649 fprintf(out_file_, "interfaces_off : %d (0x%06x)\n",
650 class_def->InterfacesOffset(), class_def->InterfacesOffset());
651 uint32_t source_file_offset = 0xffffffffU;
652 if (class_def->SourceFile() != nullptr) {
653 source_file_offset = class_def->SourceFile()->GetIndex();
654 }
655 fprintf(out_file_, "source_file_idx : %d\n", source_file_offset);
656 uint32_t annotations_offset = 0;
657 if (class_def->Annotations() != nullptr) {
658 annotations_offset = class_def->Annotations()->GetOffset();
659 }
660 fprintf(out_file_, "annotations_off : %d (0x%06x)\n",
661 annotations_offset, annotations_offset);
662 if (class_def->GetClassData() == nullptr) {
663 fprintf(out_file_, "class_data_off : %d (0x%06x)\n", 0, 0);
664 } else {
665 fprintf(out_file_, "class_data_off : %d (0x%06x)\n",
666 class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
667 }
668
669 // Fields and methods.
670 dex_ir::ClassData* class_data = class_def->GetClassData();
671 if (class_data != nullptr && class_data->StaticFields() != nullptr) {
672 fprintf(out_file_, "static_fields_size : %zu\n", class_data->StaticFields()->size());
673 } else {
674 fprintf(out_file_, "static_fields_size : 0\n");
675 }
676 if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
677 fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
678 } else {
679 fprintf(out_file_, "instance_fields_size: 0\n");
680 }
681 if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
682 fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
683 } else {
684 fprintf(out_file_, "direct_methods_size : 0\n");
685 }
686 if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
687 fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
688 } else {
689 fprintf(out_file_, "virtual_methods_size: 0\n");
690 }
691 fprintf(out_file_, "\n");
692 }
693
694 /**
695 * Dumps an annotation set item.
696 */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)697 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
698 if (set_item == nullptr || set_item->GetItems()->size() == 0) {
699 fputs(" empty-annotation-set\n", out_file_);
700 return;
701 }
702 for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
703 if (annotation == nullptr) {
704 continue;
705 }
706 fputs(" ", out_file_);
707 switch (annotation->GetVisibility()) {
708 case DexFile::kDexVisibilityBuild: fputs("VISIBILITY_BUILD ", out_file_); break;
709 case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
710 case DexFile::kDexVisibilitySystem: fputs("VISIBILITY_SYSTEM ", out_file_); break;
711 default: fputs("VISIBILITY_UNKNOWN ", out_file_); break;
712 } // switch
713 DumpEncodedAnnotation(annotation->GetAnnotation());
714 fputc('\n', out_file_);
715 }
716 }
717
718 /*
719 * Dumps class annotations.
720 */
DumpClassAnnotations(int idx)721 void DexLayout::DumpClassAnnotations(int idx) {
722 dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
723 dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
724 if (annotations_directory == nullptr) {
725 return; // none
726 }
727
728 fprintf(out_file_, "Class #%d annotations:\n", idx);
729
730 dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
731 dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
732 dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
733 dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
734
735 // Annotations on the class itself.
736 if (class_set_item != nullptr) {
737 fprintf(out_file_, "Annotations on class\n");
738 DumpAnnotationSetItem(class_set_item);
739 }
740
741 // Annotations on fields.
742 if (fields != nullptr) {
743 for (auto& field : *fields) {
744 const dex_ir::FieldId* field_id = field->GetFieldId();
745 const uint32_t field_idx = field_id->GetIndex();
746 const char* field_name = field_id->Name()->Data();
747 fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
748 DumpAnnotationSetItem(field->GetAnnotationSetItem());
749 }
750 }
751
752 // Annotations on methods.
753 if (methods != nullptr) {
754 for (auto& method : *methods) {
755 const dex_ir::MethodId* method_id = method->GetMethodId();
756 const uint32_t method_idx = method_id->GetIndex();
757 const char* method_name = method_id->Name()->Data();
758 fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
759 DumpAnnotationSetItem(method->GetAnnotationSetItem());
760 }
761 }
762
763 // Annotations on method parameters.
764 if (parameters != nullptr) {
765 for (auto& parameter : *parameters) {
766 const dex_ir::MethodId* method_id = parameter->GetMethodId();
767 const uint32_t method_idx = method_id->GetIndex();
768 const char* method_name = method_id->Name()->Data();
769 fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
770 uint32_t j = 0;
771 for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
772 fprintf(out_file_, "#%u\n", j);
773 DumpAnnotationSetItem(annotation);
774 ++j;
775 }
776 }
777 }
778
779 fputc('\n', out_file_);
780 }
781
782 /*
783 * Dumps an interface that a class declares to implement.
784 */
DumpInterface(const dex_ir::TypeId * type_item,int i)785 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
786 const char* interface_name = type_item->GetStringId()->Data();
787 if (options_.output_format_ == kOutputPlain) {
788 fprintf(out_file_, " #%d : '%s'\n", i, interface_name);
789 } else {
790 std::string dot(DescriptorToDotWrapper(interface_name));
791 fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
792 }
793 }
794
795 /*
796 * Dumps the catches table associated with the code.
797 */
DumpCatches(const dex_ir::CodeItem * code)798 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
799 const uint16_t tries_size = code->TriesSize();
800
801 // No catch table.
802 if (tries_size == 0) {
803 fprintf(out_file_, " catches : (none)\n");
804 return;
805 }
806
807 // Dump all table entries.
808 fprintf(out_file_, " catches : %d\n", tries_size);
809 std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
810 for (uint32_t i = 0; i < tries_size; i++) {
811 const dex_ir::TryItem* try_item = (*tries)[i].get();
812 const uint32_t start = try_item->StartAddr();
813 const uint32_t end = start + try_item->InsnCount();
814 fprintf(out_file_, " 0x%04x - 0x%04x\n", start, end);
815 for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
816 const dex_ir::TypeId* type_id = handler->GetTypeId();
817 const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
818 fprintf(out_file_, " %s -> 0x%04x\n", descriptor, handler->GetAddress());
819 } // for
820 } // for
821 }
822
823 /*
824 * Dumps a single instruction.
825 */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)826 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
827 uint32_t code_offset,
828 uint32_t insn_idx,
829 uint32_t insn_width,
830 const Instruction* dec_insn) {
831 // Address of instruction (expressed as byte offset).
832 fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
833
834 // Dump (part of) raw bytes.
835 const uint16_t* insns = code->Insns();
836 for (uint32_t i = 0; i < 8; i++) {
837 if (i < insn_width) {
838 if (i == 7) {
839 fprintf(out_file_, " ... ");
840 } else {
841 // Print 16-bit value in little-endian order.
842 const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
843 fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
844 }
845 } else {
846 fputs(" ", out_file_);
847 }
848 } // for
849
850 // Dump pseudo-instruction or opcode.
851 if (dec_insn->Opcode() == Instruction::NOP) {
852 const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
853 if (instr == Instruction::kPackedSwitchSignature) {
854 fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
855 } else if (instr == Instruction::kSparseSwitchSignature) {
856 fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
857 } else if (instr == Instruction::kArrayDataSignature) {
858 fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
859 } else {
860 fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
861 }
862 } else {
863 fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
864 }
865
866 // Set up additional argument.
867 std::unique_ptr<char[]> index_buf;
868 if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
869 index_buf = IndexString(header_, dec_insn, 200);
870 }
871
872 // Dump the instruction.
873 //
874 // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
875 //
876 switch (Instruction::FormatOf(dec_insn->Opcode())) {
877 case Instruction::k10x: // op
878 break;
879 case Instruction::k12x: // op vA, vB
880 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
881 break;
882 case Instruction::k11n: // op vA, #+B
883 fprintf(out_file_, " v%d, #int %d // #%x",
884 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
885 break;
886 case Instruction::k11x: // op vAA
887 fprintf(out_file_, " v%d", dec_insn->VRegA());
888 break;
889 case Instruction::k10t: // op +AA
890 case Instruction::k20t: { // op +AAAA
891 const int32_t targ = (int32_t) dec_insn->VRegA();
892 fprintf(out_file_, " %04x // %c%04x",
893 insn_idx + targ,
894 (targ < 0) ? '-' : '+',
895 (targ < 0) ? -targ : targ);
896 break;
897 }
898 case Instruction::k22x: // op vAA, vBBBB
899 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
900 break;
901 case Instruction::k21t: { // op vAA, +BBBB
902 const int32_t targ = (int32_t) dec_insn->VRegB();
903 fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
904 insn_idx + targ,
905 (targ < 0) ? '-' : '+',
906 (targ < 0) ? -targ : targ);
907 break;
908 }
909 case Instruction::k21s: // op vAA, #+BBBB
910 fprintf(out_file_, " v%d, #int %d // #%x",
911 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
912 break;
913 case Instruction::k21h: // op vAA, #+BBBB0000[00000000]
914 // The printed format varies a bit based on the actual opcode.
915 if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
916 const int32_t value = dec_insn->VRegB() << 16;
917 fprintf(out_file_, " v%d, #int %d // #%x",
918 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
919 } else {
920 const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
921 fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
922 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
923 }
924 break;
925 case Instruction::k21c: // op vAA, thing@BBBB
926 case Instruction::k31c: // op vAA, thing@BBBBBBBB
927 fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
928 break;
929 case Instruction::k23x: // op vAA, vBB, vCC
930 fprintf(out_file_, " v%d, v%d, v%d",
931 dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
932 break;
933 case Instruction::k22b: // op vAA, vBB, #+CC
934 fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
935 dec_insn->VRegA(), dec_insn->VRegB(),
936 (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
937 break;
938 case Instruction::k22t: { // op vA, vB, +CCCC
939 const int32_t targ = (int32_t) dec_insn->VRegC();
940 fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
941 dec_insn->VRegA(), dec_insn->VRegB(),
942 insn_idx + targ,
943 (targ < 0) ? '-' : '+',
944 (targ < 0) ? -targ : targ);
945 break;
946 }
947 case Instruction::k22s: // op vA, vB, #+CCCC
948 fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
949 dec_insn->VRegA(), dec_insn->VRegB(),
950 (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
951 break;
952 case Instruction::k22c: // op vA, vB, thing@CCCC
953 // NOT SUPPORTED:
954 // case Instruction::k22cs: // [opt] op vA, vB, field offset CCCC
955 fprintf(out_file_, " v%d, v%d, %s",
956 dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
957 break;
958 case Instruction::k30t:
959 fprintf(out_file_, " #%08x", dec_insn->VRegA());
960 break;
961 case Instruction::k31i: { // op vAA, #+BBBBBBBB
962 // This is often, but not always, a float.
963 union {
964 float f;
965 uint32_t i;
966 } conv;
967 conv.i = dec_insn->VRegB();
968 fprintf(out_file_, " v%d, #float %g // #%08x",
969 dec_insn->VRegA(), conv.f, dec_insn->VRegB());
970 break;
971 }
972 case Instruction::k31t: // op vAA, offset +BBBBBBBB
973 fprintf(out_file_, " v%d, %08x // +%08x",
974 dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
975 break;
976 case Instruction::k32x: // op vAAAA, vBBBB
977 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
978 break;
979 case Instruction::k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
980 case Instruction::k45cc: { // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
981 // NOT SUPPORTED:
982 // case Instruction::k35ms: // [opt] invoke-virtual+super
983 // case Instruction::k35mi: // [opt] inline invoke
984 uint32_t arg[Instruction::kMaxVarArgRegs];
985 dec_insn->GetVarArgs(arg);
986 fputs(" {", out_file_);
987 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
988 if (i == 0) {
989 fprintf(out_file_, "v%d", arg[i]);
990 } else {
991 fprintf(out_file_, ", v%d", arg[i]);
992 }
993 } // for
994 fprintf(out_file_, "}, %s", index_buf.get());
995 break;
996 }
997 case Instruction::k3rc: // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
998 case Instruction::k4rcc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
999 // NOT SUPPORTED:
1000 // case Instruction::k3rms: // [opt] invoke-virtual+super/range
1001 // case Instruction::k3rmi: // [opt] execute-inline/range
1002 {
1003 // This doesn't match the "dx" output when some of the args are
1004 // 64-bit values -- dx only shows the first register.
1005 fputs(" {", out_file_);
1006 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1007 if (i == 0) {
1008 fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
1009 } else {
1010 fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
1011 }
1012 } // for
1013 fprintf(out_file_, "}, %s", index_buf.get());
1014 }
1015 break;
1016 case Instruction::k51l: { // op vAA, #+BBBBBBBBBBBBBBBB
1017 // This is often, but not always, a double.
1018 union {
1019 double d;
1020 uint64_t j;
1021 } conv;
1022 conv.j = dec_insn->WideVRegB();
1023 fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1024 dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1025 break;
1026 }
1027 // NOT SUPPORTED:
1028 // case Instruction::k00x: // unknown op or breakpoint
1029 // break;
1030 default:
1031 fprintf(out_file_, " ???");
1032 break;
1033 } // switch
1034
1035 fputc('\n', out_file_);
1036 }
1037
1038 /*
1039 * Dumps a bytecode disassembly.
1040 */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1041 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1042 dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1043 const char* name = method_id->Name()->Data();
1044 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1045 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1046
1047 // Generate header.
1048 std::string dot(DescriptorToDotWrapper(back_descriptor));
1049 fprintf(out_file_, "%06x: |[%06x] %s.%s:%s\n",
1050 code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1051
1052 // Iterate over all instructions.
1053 for (const DexInstructionPcPair& inst : code->Instructions()) {
1054 const uint32_t insn_width = inst->SizeInCodeUnits();
1055 if (insn_width == 0) {
1056 LOG(WARNING) << "GLITCH: zero-width instruction at idx=0x" << std::hex << inst.DexPc();
1057 break;
1058 }
1059 DumpInstruction(code, code_offset, inst.DexPc(), insn_width, &inst.Inst());
1060 } // for
1061 }
1062
1063 /*
1064 * Callback for dumping each positions table entry.
1065 */
DumpPositionsCb(void * context,const DexFile::PositionInfo & entry)1066 static bool DumpPositionsCb(void* context, const DexFile::PositionInfo& entry) {
1067 FILE* out_file = reinterpret_cast<FILE*>(context);
1068 fprintf(out_file, " 0x%04x line=%d\n", entry.address_, entry.line_);
1069 return false;
1070 }
1071
1072 /*
1073 * Callback for dumping locals table entry.
1074 */
DumpLocalsCb(void * context,const DexFile::LocalInfo & entry)1075 static void DumpLocalsCb(void* context, const DexFile::LocalInfo& entry) {
1076 const char* signature = entry.signature_ != nullptr ? entry.signature_ : "";
1077 FILE* out_file = reinterpret_cast<FILE*>(context);
1078 fprintf(out_file, " 0x%04x - 0x%04x reg=%d %s %s %s\n",
1079 entry.start_address_, entry.end_address_, entry.reg_,
1080 entry.name_, entry.descriptor_, signature);
1081 }
1082
1083 /*
1084 * Lookup functions.
1085 */
StringDataByIdx(uint32_t idx,dex_ir::Collections & collections)1086 static const char* StringDataByIdx(uint32_t idx, dex_ir::Collections& collections) {
1087 dex_ir::StringId* string_id = collections.GetStringIdOrNullPtr(idx);
1088 if (string_id == nullptr) {
1089 return nullptr;
1090 }
1091 return string_id->Data();
1092 }
1093
StringDataByTypeIdx(uint16_t idx,dex_ir::Collections & collections)1094 static const char* StringDataByTypeIdx(uint16_t idx, dex_ir::Collections& collections) {
1095 dex_ir::TypeId* type_id = collections.GetTypeIdOrNullPtr(idx);
1096 if (type_id == nullptr) {
1097 return nullptr;
1098 }
1099 dex_ir::StringId* string_id = type_id->GetStringId();
1100 if (string_id == nullptr) {
1101 return nullptr;
1102 }
1103 return string_id->Data();
1104 }
1105
1106
1107 /*
1108 * Dumps code of a method.
1109 */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset,const char * declaring_class_descriptor,const char * method_name,bool is_static,const dex_ir::ProtoId * proto)1110 void DexLayout::DumpCode(uint32_t idx,
1111 const dex_ir::CodeItem* code,
1112 uint32_t code_offset,
1113 const char* declaring_class_descriptor,
1114 const char* method_name,
1115 bool is_static,
1116 const dex_ir::ProtoId* proto) {
1117 fprintf(out_file_, " registers : %d\n", code->RegistersSize());
1118 fprintf(out_file_, " ins : %d\n", code->InsSize());
1119 fprintf(out_file_, " outs : %d\n", code->OutsSize());
1120 fprintf(out_file_, " insns size : %d 16-bit code units\n",
1121 code->InsnsSize());
1122
1123 // Bytecode disassembly, if requested.
1124 if (options_.disassemble_) {
1125 DumpBytecodes(idx, code, code_offset);
1126 }
1127
1128 // Try-catch blocks.
1129 DumpCatches(code);
1130
1131 // Positions and locals table in the debug info.
1132 dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
1133 fprintf(out_file_, " positions : \n");
1134 if (debug_info != nullptr) {
1135 DexFile::DecodeDebugPositionInfo(debug_info->GetDebugInfo(),
1136 [this](uint32_t idx) {
1137 return StringDataByIdx(idx, this->header_->GetCollections());
1138 },
1139 DumpPositionsCb,
1140 out_file_);
1141 }
1142 fprintf(out_file_, " locals : \n");
1143 if (debug_info != nullptr) {
1144 std::vector<const char*> arg_descriptors;
1145 const dex_ir::TypeList* parameters = proto->Parameters();
1146 if (parameters != nullptr) {
1147 const dex_ir::TypeIdVector* parameter_type_vector = parameters->GetTypeList();
1148 if (parameter_type_vector != nullptr) {
1149 for (const dex_ir::TypeId* type_id : *parameter_type_vector) {
1150 arg_descriptors.push_back(type_id->GetStringId()->Data());
1151 }
1152 }
1153 }
1154 DexFile::DecodeDebugLocalInfo(debug_info->GetDebugInfo(),
1155 "DexLayout in-memory",
1156 declaring_class_descriptor,
1157 arg_descriptors,
1158 method_name,
1159 is_static,
1160 code->RegistersSize(),
1161 code->InsSize(),
1162 code->InsnsSize(),
1163 [this](uint32_t idx) {
1164 return StringDataByIdx(idx, this->header_->GetCollections());
1165 },
1166 [this](uint32_t idx) {
1167 return
1168 StringDataByTypeIdx(dchecked_integral_cast<uint16_t>(idx),
1169 this->header_->GetCollections());
1170 },
1171 DumpLocalsCb,
1172 out_file_);
1173 }
1174 }
1175
1176 /*
1177 * Dumps a method.
1178 */
DumpMethod(uint32_t idx,uint32_t flags,const dex_ir::CodeItem * code,int i)1179 void DexLayout::DumpMethod(uint32_t idx, uint32_t flags, const dex_ir::CodeItem* code, int i) {
1180 // Bail for anything private if export only requested.
1181 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1182 return;
1183 }
1184
1185 dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1186 const char* name = method_id->Name()->Data();
1187 char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1188 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1189 char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1190
1191 if (options_.output_format_ == kOutputPlain) {
1192 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1193 fprintf(out_file_, " name : '%s'\n", name);
1194 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1195 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1196 if (code == nullptr) {
1197 fprintf(out_file_, " code : (none)\n");
1198 } else {
1199 fprintf(out_file_, " code -\n");
1200 DumpCode(idx,
1201 code,
1202 code->GetOffset(),
1203 back_descriptor,
1204 name,
1205 (flags & kAccStatic) != 0,
1206 method_id->Proto());
1207 }
1208 if (options_.disassemble_) {
1209 fputc('\n', out_file_);
1210 }
1211 } else if (options_.output_format_ == kOutputXml) {
1212 const bool constructor = (name[0] == '<');
1213
1214 // Method name and prototype.
1215 if (constructor) {
1216 std::string dot(DescriptorClassToDot(back_descriptor));
1217 fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1218 dot = DescriptorToDotWrapper(back_descriptor);
1219 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1220 } else {
1221 fprintf(out_file_, "<method name=\"%s\"\n", name);
1222 const char* return_type = strrchr(type_descriptor, ')');
1223 if (return_type == nullptr) {
1224 LOG(ERROR) << "bad method type descriptor '" << type_descriptor << "'";
1225 goto bail;
1226 }
1227 std::string dot(DescriptorToDotWrapper(return_type + 1));
1228 fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1229 fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1230 fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1231 fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1232 (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1233 }
1234
1235 // Additional method flags.
1236 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1237 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1238 // The "deprecated=" not knowable w/o parsing annotations.
1239 fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1240
1241 // Parameters.
1242 if (type_descriptor[0] != '(') {
1243 LOG(ERROR) << "ERROR: bad descriptor '" << type_descriptor << "'";
1244 goto bail;
1245 }
1246 char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1247 const char* base = type_descriptor + 1;
1248 int arg_num = 0;
1249 while (*base != ')') {
1250 char* cp = tmp_buf;
1251 while (*base == '[') {
1252 *cp++ = *base++;
1253 }
1254 if (*base == 'L') {
1255 // Copy through ';'.
1256 do {
1257 *cp = *base++;
1258 } while (*cp++ != ';');
1259 } else {
1260 // Primitive char, copy it.
1261 if (strchr("ZBCSIFJD", *base) == nullptr) {
1262 LOG(ERROR) << "ERROR: bad method signature '" << base << "'";
1263 break; // while
1264 }
1265 *cp++ = *base++;
1266 }
1267 // Null terminate and display.
1268 *cp++ = '\0';
1269 std::string dot(DescriptorToDotWrapper(tmp_buf));
1270 fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1271 "</parameter>\n", arg_num++, dot.c_str());
1272 } // while
1273 free(tmp_buf);
1274 if (constructor) {
1275 fprintf(out_file_, "</constructor>\n");
1276 } else {
1277 fprintf(out_file_, "</method>\n");
1278 }
1279 }
1280
1281 bail:
1282 free(type_descriptor);
1283 free(access_str);
1284 }
1285
1286 /*
1287 * Dumps a static (class) field.
1288 */
DumpSField(uint32_t idx,uint32_t flags,int i,dex_ir::EncodedValue * init)1289 void DexLayout::DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init) {
1290 // Bail for anything private if export only requested.
1291 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1292 return;
1293 }
1294
1295 dex_ir::FieldId* field_id = header_->GetCollections().GetFieldId(idx);
1296 const char* name = field_id->Name()->Data();
1297 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1298 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1299 char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1300
1301 if (options_.output_format_ == kOutputPlain) {
1302 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1303 fprintf(out_file_, " name : '%s'\n", name);
1304 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1305 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1306 if (init != nullptr) {
1307 fputs(" value : ", out_file_);
1308 DumpEncodedValue(init);
1309 fputs("\n", out_file_);
1310 }
1311 } else if (options_.output_format_ == kOutputXml) {
1312 fprintf(out_file_, "<field name=\"%s\"\n", name);
1313 std::string dot(DescriptorToDotWrapper(type_descriptor));
1314 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1315 fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1316 fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1317 // The "value=" is not knowable w/o parsing annotations.
1318 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1319 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1320 // The "deprecated=" is not knowable w/o parsing annotations.
1321 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1322 if (init != nullptr) {
1323 fputs(" value=\"", out_file_);
1324 DumpEncodedValue(init);
1325 fputs("\"\n", out_file_);
1326 }
1327 fputs(">\n</field>\n", out_file_);
1328 }
1329
1330 free(access_str);
1331 }
1332
1333 /*
1334 * Dumps an instance field.
1335 */
DumpIField(uint32_t idx,uint32_t flags,int i)1336 void DexLayout::DumpIField(uint32_t idx, uint32_t flags, int i) {
1337 DumpSField(idx, flags, i, nullptr);
1338 }
1339
1340 /*
1341 * Dumps the class.
1342 *
1343 * Note "idx" is a DexClassDef index, not a DexTypeId index.
1344 *
1345 * If "*last_package" is nullptr or does not match the current class' package,
1346 * the value will be replaced with a newly-allocated string.
1347 */
DumpClass(int idx,char ** last_package)1348 void DexLayout::DumpClass(int idx, char** last_package) {
1349 dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
1350 // Omitting non-public class.
1351 if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1352 return;
1353 }
1354
1355 if (options_.show_section_headers_) {
1356 DumpClassDef(idx);
1357 }
1358
1359 if (options_.show_annotations_) {
1360 DumpClassAnnotations(idx);
1361 }
1362
1363 // For the XML output, show the package name. Ideally we'd gather
1364 // up the classes, sort them, and dump them alphabetically so the
1365 // package name wouldn't jump around, but that's not a great plan
1366 // for something that needs to run on the device.
1367 const char* class_descriptor =
1368 header_->GetCollections().GetClassDef(idx)->ClassType()->GetStringId()->Data();
1369 if (!(class_descriptor[0] == 'L' &&
1370 class_descriptor[strlen(class_descriptor)-1] == ';')) {
1371 // Arrays and primitives should not be defined explicitly. Keep going?
1372 LOG(ERROR) << "Malformed class name '" << class_descriptor << "'";
1373 } else if (options_.output_format_ == kOutputXml) {
1374 char* mangle = strdup(class_descriptor + 1);
1375 mangle[strlen(mangle)-1] = '\0';
1376
1377 // Reduce to just the package name.
1378 char* last_slash = strrchr(mangle, '/');
1379 if (last_slash != nullptr) {
1380 *last_slash = '\0';
1381 } else {
1382 *mangle = '\0';
1383 }
1384
1385 for (char* cp = mangle; *cp != '\0'; cp++) {
1386 if (*cp == '/') {
1387 *cp = '.';
1388 }
1389 } // for
1390
1391 if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1392 // Start of a new package.
1393 if (*last_package != nullptr) {
1394 fprintf(out_file_, "</package>\n");
1395 }
1396 fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1397 free(*last_package);
1398 *last_package = mangle;
1399 } else {
1400 free(mangle);
1401 }
1402 }
1403
1404 // General class information.
1405 char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1406 const char* superclass_descriptor = nullptr;
1407 if (class_def->Superclass() != nullptr) {
1408 superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1409 }
1410 if (options_.output_format_ == kOutputPlain) {
1411 fprintf(out_file_, "Class #%d -\n", idx);
1412 fprintf(out_file_, " Class descriptor : '%s'\n", class_descriptor);
1413 fprintf(out_file_, " Access flags : 0x%04x (%s)\n",
1414 class_def->GetAccessFlags(), access_str);
1415 if (superclass_descriptor != nullptr) {
1416 fprintf(out_file_, " Superclass : '%s'\n", superclass_descriptor);
1417 }
1418 fprintf(out_file_, " Interfaces -\n");
1419 } else {
1420 std::string dot(DescriptorClassToDot(class_descriptor));
1421 fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1422 if (superclass_descriptor != nullptr) {
1423 dot = DescriptorToDotWrapper(superclass_descriptor);
1424 fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1425 }
1426 fprintf(out_file_, " interface=%s\n",
1427 QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1428 fprintf(out_file_, " abstract=%s\n",
1429 QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1430 fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1431 fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1432 // The "deprecated=" not knowable w/o parsing annotations.
1433 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1434 fprintf(out_file_, ">\n");
1435 }
1436
1437 // Interfaces.
1438 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1439 if (interfaces != nullptr) {
1440 const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1441 for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1442 DumpInterface((*interfaces_vector)[i], i);
1443 } // for
1444 }
1445
1446 // Fields and methods.
1447 dex_ir::ClassData* class_data = class_def->GetClassData();
1448 // Prepare data for static fields.
1449 dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1450 dex_ir::EncodedValueVector* encoded_values =
1451 static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1452 const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1453
1454 // Static fields.
1455 if (options_.output_format_ == kOutputPlain) {
1456 fprintf(out_file_, " Static fields -\n");
1457 }
1458 if (class_data != nullptr) {
1459 dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1460 if (static_fields != nullptr) {
1461 for (uint32_t i = 0; i < static_fields->size(); i++) {
1462 DumpSField((*static_fields)[i]->GetFieldId()->GetIndex(),
1463 (*static_fields)[i]->GetAccessFlags(),
1464 i,
1465 i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1466 } // for
1467 }
1468 }
1469
1470 // Instance fields.
1471 if (options_.output_format_ == kOutputPlain) {
1472 fprintf(out_file_, " Instance fields -\n");
1473 }
1474 if (class_data != nullptr) {
1475 dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1476 if (instance_fields != nullptr) {
1477 for (uint32_t i = 0; i < instance_fields->size(); i++) {
1478 DumpIField((*instance_fields)[i]->GetFieldId()->GetIndex(),
1479 (*instance_fields)[i]->GetAccessFlags(),
1480 i);
1481 } // for
1482 }
1483 }
1484
1485 // Direct methods.
1486 if (options_.output_format_ == kOutputPlain) {
1487 fprintf(out_file_, " Direct methods -\n");
1488 }
1489 if (class_data != nullptr) {
1490 dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1491 if (direct_methods != nullptr) {
1492 for (uint32_t i = 0; i < direct_methods->size(); i++) {
1493 DumpMethod((*direct_methods)[i]->GetMethodId()->GetIndex(),
1494 (*direct_methods)[i]->GetAccessFlags(),
1495 (*direct_methods)[i]->GetCodeItem(),
1496 i);
1497 } // for
1498 }
1499 }
1500
1501 // Virtual methods.
1502 if (options_.output_format_ == kOutputPlain) {
1503 fprintf(out_file_, " Virtual methods -\n");
1504 }
1505 if (class_data != nullptr) {
1506 dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1507 if (virtual_methods != nullptr) {
1508 for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1509 DumpMethod((*virtual_methods)[i]->GetMethodId()->GetIndex(),
1510 (*virtual_methods)[i]->GetAccessFlags(),
1511 (*virtual_methods)[i]->GetCodeItem(),
1512 i);
1513 } // for
1514 }
1515 }
1516
1517 // End of class.
1518 if (options_.output_format_ == kOutputPlain) {
1519 const char* file_name = "unknown";
1520 if (class_def->SourceFile() != nullptr) {
1521 file_name = class_def->SourceFile()->Data();
1522 }
1523 const dex_ir::StringId* source_file = class_def->SourceFile();
1524 fprintf(out_file_, " source_file_idx : %d (%s)\n\n",
1525 source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1526 } else if (options_.output_format_ == kOutputXml) {
1527 fprintf(out_file_, "</class>\n");
1528 }
1529
1530 free(access_str);
1531 }
1532
DumpDexFile()1533 void DexLayout::DumpDexFile() {
1534 // Headers.
1535 if (options_.show_file_headers_) {
1536 DumpFileHeader();
1537 }
1538
1539 // Open XML context.
1540 if (options_.output_format_ == kOutputXml) {
1541 fprintf(out_file_, "<api>\n");
1542 }
1543
1544 // Iterate over all classes.
1545 char* package = nullptr;
1546 const uint32_t class_defs_size = header_->GetCollections().ClassDefsSize();
1547 for (uint32_t i = 0; i < class_defs_size; i++) {
1548 DumpClass(i, &package);
1549 } // for
1550
1551 // Free the last package allocated.
1552 if (package != nullptr) {
1553 fprintf(out_file_, "</package>\n");
1554 free(package);
1555 }
1556
1557 // Close XML context.
1558 if (options_.output_format_ == kOutputXml) {
1559 fprintf(out_file_, "</api>\n");
1560 }
1561 }
1562
LayoutClassDefsAndClassData(const DexFile * dex_file)1563 void DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1564 std::vector<dex_ir::ClassDef*> new_class_def_order;
1565 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1566 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1567 if (info_->ContainsClass(*dex_file, type_idx)) {
1568 new_class_def_order.push_back(class_def.get());
1569 }
1570 }
1571 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1572 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1573 if (!info_->ContainsClass(*dex_file, type_idx)) {
1574 new_class_def_order.push_back(class_def.get());
1575 }
1576 }
1577 std::unordered_set<dex_ir::ClassData*> visited_class_data;
1578 size_t class_data_index = 0;
1579 dex_ir::CollectionVector<dex_ir::ClassData>::Vector& class_datas =
1580 header_->GetCollections().ClassDatas();
1581 for (dex_ir::ClassDef* class_def : new_class_def_order) {
1582 dex_ir::ClassData* class_data = class_def->GetClassData();
1583 if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1584 visited_class_data.insert(class_data);
1585 // Overwrite the existing vector with the new ordering, note that the sets of objects are
1586 // equivalent, but the order changes. This is why this is not a memory leak.
1587 // TODO: Consider cleaning this up with a shared_ptr.
1588 class_datas[class_data_index].release();
1589 class_datas[class_data_index].reset(class_data);
1590 ++class_data_index;
1591 }
1592 }
1593 CHECK_EQ(class_data_index, class_datas.size());
1594
1595 if (DexLayout::kChangeClassDefOrder) {
1596 // This currently produces dex files that violate the spec since the super class class_def is
1597 // supposed to occur before any subclasses.
1598 dex_ir::CollectionVector<dex_ir::ClassDef>::Vector& class_defs =
1599 header_->GetCollections().ClassDefs();
1600 CHECK_EQ(new_class_def_order.size(), class_defs.size());
1601 for (size_t i = 0; i < class_defs.size(); ++i) {
1602 // Overwrite the existing vector with the new ordering, note that the sets of objects are
1603 // equivalent, but the order changes. This is why this is not a memory leak.
1604 // TODO: Consider cleaning this up with a shared_ptr.
1605 class_defs[i].release();
1606 class_defs[i].reset(new_class_def_order[i]);
1607 }
1608 }
1609 }
1610
LayoutStringData(const DexFile * dex_file)1611 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1612 const size_t num_strings = header_->GetCollections().StringIds().size();
1613 std::vector<bool> is_shorty(num_strings, false);
1614 std::vector<bool> from_hot_method(num_strings, false);
1615 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1616 // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1617 // as hot. Add its super class and interfaces as well, which can be used during initialization.
1618 const bool is_profile_class =
1619 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1620 if (is_profile_class) {
1621 from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1622 const dex_ir::TypeId* superclass = class_def->Superclass();
1623 if (superclass != nullptr) {
1624 from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1625 }
1626 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1627 if (interfaces != nullptr) {
1628 for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1629 from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1630 }
1631 }
1632 }
1633 dex_ir::ClassData* data = class_def->GetClassData();
1634 if (data == nullptr) {
1635 continue;
1636 }
1637 for (size_t i = 0; i < 2; ++i) {
1638 for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1639 const dex_ir::MethodId* method_id = method->GetMethodId();
1640 dex_ir::CodeItem* code_item = method->GetCodeItem();
1641 if (code_item == nullptr) {
1642 continue;
1643 }
1644 const bool is_clinit = is_profile_class &&
1645 (method->GetAccessFlags() & kAccConstructor) != 0 &&
1646 (method->GetAccessFlags() & kAccStatic) != 0;
1647 const bool method_executed = is_clinit ||
1648 info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1649 if (!method_executed) {
1650 continue;
1651 }
1652 is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1653 dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1654 if (fixups == nullptr) {
1655 continue;
1656 }
1657 // Add const-strings.
1658 for (dex_ir::StringId* id : fixups->StringIds()) {
1659 from_hot_method[id->GetIndex()] = true;
1660 }
1661 // Add field classes, names, and types.
1662 for (dex_ir::FieldId* id : fixups->FieldIds()) {
1663 // TODO: Only visit field ids from static getters and setters.
1664 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1665 from_hot_method[id->Name()->GetIndex()] = true;
1666 from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1667 }
1668 // For clinits, add referenced method classes, names, and protos.
1669 if (is_clinit) {
1670 for (dex_ir::MethodId* id : fixups->MethodIds()) {
1671 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1672 from_hot_method[id->Name()->GetIndex()] = true;
1673 is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1674 }
1675 }
1676 }
1677 }
1678 }
1679 // Sort string data by specified order.
1680 std::vector<dex_ir::StringId*> string_ids;
1681 for (auto& string_id : header_->GetCollections().StringIds()) {
1682 string_ids.push_back(string_id.get());
1683 }
1684 std::sort(string_ids.begin(),
1685 string_ids.end(),
1686 [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1687 const dex_ir::StringId* b) {
1688 const bool a_is_hot = from_hot_method[a->GetIndex()];
1689 const bool b_is_hot = from_hot_method[b->GetIndex()];
1690 if (a_is_hot != b_is_hot) {
1691 return a_is_hot < b_is_hot;
1692 }
1693 // After hot methods are partitioned, subpartition shorties.
1694 const bool a_is_shorty = is_shorty[a->GetIndex()];
1695 const bool b_is_shorty = is_shorty[b->GetIndex()];
1696 if (a_is_shorty != b_is_shorty) {
1697 return a_is_shorty < b_is_shorty;
1698 }
1699 // Order by index by default.
1700 return a->GetIndex() < b->GetIndex();
1701 });
1702 dex_ir::CollectionVector<dex_ir::StringData>::Vector& string_datas =
1703 header_->GetCollections().StringDatas();
1704 // Now we know what order we want the string data, reorder them.
1705 size_t data_index = 0;
1706 for (dex_ir::StringId* string_id : string_ids) {
1707 string_datas[data_index].release();
1708 string_datas[data_index].reset(string_id->DataItem());
1709 ++data_index;
1710 }
1711 if (kIsDebugBuild) {
1712 std::unordered_set<dex_ir::StringData*> visited;
1713 for (const std::unique_ptr<dex_ir::StringData>& data : string_datas) {
1714 visited.insert(data.get());
1715 }
1716 for (auto& string_id : header_->GetCollections().StringIds()) {
1717 CHECK(visited.find(string_id->DataItem()) != visited.end());
1718 }
1719 }
1720 CHECK_EQ(data_index, string_datas.size());
1721 }
1722
1723 // Orders code items according to specified class data ordering.
LayoutCodeItems(const DexFile * dex_file)1724 void DexLayout::LayoutCodeItems(const DexFile* dex_file) {
1725 static constexpr InvokeType invoke_types[] = {
1726 kDirect,
1727 kVirtual
1728 };
1729
1730 std::unordered_map<dex_ir::CodeItem*, LayoutType>& code_item_layout =
1731 layout_hotness_info_.code_item_layout_;
1732
1733 // Assign hotness flags to all code items.
1734 for (InvokeType invoke_type : invoke_types) {
1735 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1736 const bool is_profile_class =
1737 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1738
1739 // Skip classes that are not defined in this dex file.
1740 dex_ir::ClassData* class_data = class_def->GetClassData();
1741 if (class_data == nullptr) {
1742 continue;
1743 }
1744 for (auto& method : *(invoke_type == InvokeType::kDirect
1745 ? class_data->DirectMethods()
1746 : class_data->VirtualMethods())) {
1747 const dex_ir::MethodId *method_id = method->GetMethodId();
1748 dex_ir::CodeItem *code_item = method->GetCodeItem();
1749 if (code_item == nullptr) {
1750 continue;
1751 }
1752 // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1753 const bool is_clinit = (method->GetAccessFlags() & kAccConstructor) != 0 &&
1754 (method->GetAccessFlags() & kAccStatic) != 0;
1755 const bool is_startup_clinit = is_profile_class && is_clinit;
1756 using Hotness = ProfileCompilationInfo::MethodHotness;
1757 Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1758 LayoutType state = LayoutType::kLayoutTypeUnused;
1759 if (hotness.IsHot()) {
1760 // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1761 // now.
1762 state = LayoutType::kLayoutTypeHot;
1763 } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1764 // Startup clinit or a method that only has the startup flag.
1765 state = LayoutType::kLayoutTypeStartupOnly;
1766 } else if (is_clinit) {
1767 state = LayoutType::kLayoutTypeUsedOnce;
1768 } else if (hotness.IsInProfile()) {
1769 state = LayoutType::kLayoutTypeSometimesUsed;
1770 }
1771 auto it = code_item_layout.emplace(code_item, state);
1772 if (!it.second) {
1773 LayoutType& layout_type = it.first->second;
1774 // Already exists, merge the hotness.
1775 layout_type = MergeLayoutType(layout_type, state);
1776 }
1777 }
1778 }
1779 }
1780
1781 dex_ir::CollectionVector<dex_ir::CodeItem>::Vector& code_items =
1782 header_->GetCollections().CodeItems();
1783 if (VLOG_IS_ON(dex)) {
1784 size_t layout_count[static_cast<size_t>(LayoutType::kLayoutTypeCount)] = {};
1785 for (const std::unique_ptr<dex_ir::CodeItem>& code_item : code_items) {
1786 auto it = code_item_layout.find(code_item.get());
1787 DCHECK(it != code_item_layout.end());
1788 ++layout_count[static_cast<size_t>(it->second)];
1789 }
1790 for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
1791 LOG(INFO) << "Code items in category " << i << " count=" << layout_count[i];
1792 }
1793 }
1794
1795 // Sort the code items vector by new layout. The writing process will take care of calculating
1796 // all the offsets. Stable sort to preserve any existing locality that might be there.
1797 std::stable_sort(code_items.begin(),
1798 code_items.end(),
1799 [&](const std::unique_ptr<dex_ir::CodeItem>& a,
1800 const std::unique_ptr<dex_ir::CodeItem>& b) {
1801 auto it_a = code_item_layout.find(a.get());
1802 auto it_b = code_item_layout.find(b.get());
1803 DCHECK(it_a != code_item_layout.end());
1804 DCHECK(it_b != code_item_layout.end());
1805 const LayoutType layout_type_a = it_a->second;
1806 const LayoutType layout_type_b = it_b->second;
1807 return layout_type_a < layout_type_b;
1808 });
1809 }
1810
LayoutOutputFile(const DexFile * dex_file)1811 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1812 LayoutStringData(dex_file);
1813 LayoutClassDefsAndClassData(dex_file);
1814 LayoutCodeItems(dex_file);
1815 }
1816
OutputDexFile(const DexFile * input_dex_file,bool compute_offsets,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1817 bool DexLayout::OutputDexFile(const DexFile* input_dex_file,
1818 bool compute_offsets,
1819 std::unique_ptr<DexContainer>* dex_container,
1820 std::string* error_msg) {
1821 const std::string& dex_file_location = input_dex_file->GetLocation();
1822 std::unique_ptr<File> new_file;
1823 // If options_.output_dex_directory_ is non null, we are outputting to a file.
1824 if (options_.output_dex_directory_ != nullptr) {
1825 std::string output_location(options_.output_dex_directory_);
1826 size_t last_slash = dex_file_location.rfind('/');
1827 std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1828 if (output_location == dex_file_directory) {
1829 output_location = dex_file_location + ".new";
1830 } else if (last_slash != std::string::npos) {
1831 output_location += dex_file_location.substr(last_slash);
1832 } else {
1833 output_location += "/" + dex_file_location + ".new";
1834 }
1835 new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1836 if (new_file == nullptr) {
1837 LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1838 return false;
1839 }
1840 }
1841 if (!DexWriter::Output(this, dex_container, compute_offsets, error_msg)) {
1842 return false;
1843 }
1844 if (new_file != nullptr) {
1845 DexContainer* const container = dex_container->get();
1846 DexContainer::Section* const main_section = container->GetMainSection();
1847 if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
1848 LOG(ERROR) << "Failed to write main section for dex file " << dex_file_location;
1849 new_file->Erase();
1850 return false;
1851 }
1852 DexContainer::Section* const data_section = container->GetDataSection();
1853 if (!new_file->WriteFully(data_section->Begin(), data_section->Size())) {
1854 LOG(ERROR) << "Failed to write data section for dex file " << dex_file_location;
1855 new_file->Erase();
1856 return false;
1857 }
1858 UNUSED(new_file->FlushCloseOrErase());
1859 }
1860 return true;
1861 }
1862
1863 /*
1864 * Dumps the requested sections of the file.
1865 */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1866 bool DexLayout::ProcessDexFile(const char* file_name,
1867 const DexFile* dex_file,
1868 size_t dex_file_index,
1869 std::unique_ptr<DexContainer>* dex_container,
1870 std::string* error_msg) {
1871 const bool has_output_container = dex_container != nullptr;
1872 const bool output = options_.output_dex_directory_ != nullptr || has_output_container;
1873
1874 // Try to avoid eagerly assigning offsets to find bugs since GetOffset will abort if the offset
1875 // is unassigned.
1876 bool eagerly_assign_offsets = false;
1877 if (options_.visualize_pattern_ || options_.show_section_statistics_ || options_.dump_) {
1878 // These options required the offsets for dumping purposes.
1879 eagerly_assign_offsets = true;
1880 }
1881 std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file,
1882 eagerly_assign_offsets,
1883 GetOptions()));
1884 SetHeader(header.get());
1885
1886 if (options_.verbose_) {
1887 fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1888 file_name, dex_file->GetHeader().magic_ + 4);
1889 }
1890
1891 if (options_.visualize_pattern_) {
1892 VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1893 return true;
1894 }
1895
1896 if (options_.show_section_statistics_) {
1897 ShowDexSectionStatistics(header_, dex_file_index);
1898 return true;
1899 }
1900
1901 // Dump dex file.
1902 if (options_.dump_) {
1903 DumpDexFile();
1904 }
1905
1906 // In case we are outputting to a file, keep it open so we can verify.
1907 if (output) {
1908 // Layout information about what strings and code items are hot. Used by the writing process
1909 // to generate the sections that are stored in the oat file.
1910 bool do_layout = info_ != nullptr;
1911 if (do_layout) {
1912 LayoutOutputFile(dex_file);
1913 }
1914 // The output needs a dex container, use a temporary one.
1915 std::unique_ptr<DexContainer> temp_container;
1916 if (dex_container == nullptr) {
1917 dex_container = &temp_container;
1918 }
1919 // If we didn't set the offsets eagerly, we definitely need to compute them here.
1920 if (!OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets, dex_container, error_msg)) {
1921 return false;
1922 }
1923
1924 // Clear header before verifying to reduce peak RAM usage.
1925 const size_t file_size = header_->FileSize();
1926 header.reset();
1927
1928 // Verify the output dex file's structure, only enabled by default for debug builds.
1929 if (options_.verify_output_ && has_output_container) {
1930 std::string location = "memory mapped file for " + std::string(file_name);
1931 // Dex file verifier cannot handle compact dex.
1932 bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
1933 const ArtDexFileLoader dex_file_loader;
1934 DexContainer::Section* const main_section = (*dex_container)->GetMainSection();
1935 DexContainer::Section* const data_section = (*dex_container)->GetDataSection();
1936 DCHECK_EQ(file_size, main_section->Size())
1937 << main_section->Size() << " " << data_section->Size();
1938 std::unique_ptr<const DexFile> output_dex_file(
1939 dex_file_loader.OpenWithDataSection(
1940 main_section->Begin(),
1941 main_section->Size(),
1942 data_section->Begin(),
1943 data_section->Size(),
1944 location,
1945 /* checksum */ 0,
1946 /*oat_dex_file*/ nullptr,
1947 verify,
1948 /*verify_checksum*/ false,
1949 error_msg));
1950 CHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << *error_msg;
1951
1952 // Do IR-level comparison between input and output. This check ignores potential differences
1953 // due to layout, so offsets are not checked. Instead, it checks the data contents of each
1954 // item.
1955 //
1956 // Regenerate output IR to catch any bugs that might happen during writing.
1957 std::unique_ptr<dex_ir::Header> output_header(
1958 dex_ir::DexIrBuilder(*output_dex_file,
1959 /*eagerly_assign_offsets*/ true,
1960 GetOptions()));
1961 std::unique_ptr<dex_ir::Header> orig_header(
1962 dex_ir::DexIrBuilder(*dex_file,
1963 /*eagerly_assign_offsets*/ true,
1964 GetOptions()));
1965 CHECK(VerifyOutputDexFile(output_header.get(), orig_header.get(), error_msg)) << *error_msg;
1966 }
1967 }
1968 return true;
1969 }
1970
1971 /*
1972 * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
1973 */
ProcessFile(const char * file_name)1974 int DexLayout::ProcessFile(const char* file_name) {
1975 if (options_.verbose_) {
1976 fprintf(out_file_, "Processing '%s'...\n", file_name);
1977 }
1978
1979 // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
1980 // all of which are Zip archives with "classes.dex" inside.
1981 const bool verify_checksum = !options_.ignore_bad_checksum_;
1982 std::string error_msg;
1983 const ArtDexFileLoader dex_file_loader;
1984 std::vector<std::unique_ptr<const DexFile>> dex_files;
1985 if (!dex_file_loader.Open(
1986 file_name, file_name, /* verify */ true, verify_checksum, &error_msg, &dex_files)) {
1987 // Display returned error message to user. Note that this error behavior
1988 // differs from the error messages shown by the original Dalvik dexdump.
1989 LOG(ERROR) << error_msg;
1990 return -1;
1991 }
1992
1993 // Success. Either report checksum verification or process
1994 // all dex files found in given file.
1995 if (options_.checksum_only_) {
1996 fprintf(out_file_, "Checksum verified\n");
1997 } else {
1998 for (size_t i = 0; i < dex_files.size(); i++) {
1999 // Pass in a null container to avoid output by default.
2000 if (!ProcessDexFile(file_name,
2001 dex_files[i].get(),
2002 i,
2003 /*dex_container*/ nullptr,
2004 &error_msg)) {
2005 LOG(WARNING) << "Failed to run dex file " << i << " in " << file_name << " : " << error_msg;
2006 }
2007 }
2008 }
2009 return 0;
2010 }
2011
2012 } // namespace art
2013