1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  * Implementation file of the dexlayout utility.
17  *
18  * This is a tool to read dex files into an internal representation,
19  * reorganize the representation, and emit dex files with a better
20  * file layout.
21  */
22 
23 #include "dexlayout.h"
24 
25 #include <inttypes.h>
26 #include <stdio.h>
27 
28 #include <iostream>
29 #include <memory>
30 #include <sstream>
31 #include <unordered_set>
32 #include <vector>
33 
34 #include "android-base/stringprintf.h"
35 
36 #include "base/logging.h"  // For VLOG_IS_ON.
37 #include "base/hiddenapi_flags.h"
38 #include "base/mem_map.h"
39 #include "base/mman.h"  // For the PROT_* and MAP_* constants.
40 #include "base/os.h"
41 #include "base/utils.h"
42 #include "dex/art_dex_file_loader.h"
43 #include "dex/descriptors_names.h"
44 #include "dex/dex_file-inl.h"
45 #include "dex/dex_file_layout.h"
46 #include "dex/dex_file_loader.h"
47 #include "dex/dex_file_types.h"
48 #include "dex/dex_file_verifier.h"
49 #include "dex/dex_instruction-inl.h"
50 #include "dex_ir_builder.h"
51 #include "dex_verify.h"
52 #include "dex_visualize.h"
53 #include "dex_writer.h"
54 #include "profile/profile_compilation_info.h"
55 
56 namespace art {
57 
58 using android::base::StringPrintf;
59 
60 /*
61  * Flags for use with createAccessFlagStr().
62  */
63 enum AccessFor {
64   kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
65 };
66 const int kNumFlags = 18;
67 
68 /*
69  * Gets 2 little-endian bytes.
70  */
Get2LE(unsigned char const * src)71 static inline uint16_t Get2LE(unsigned char const* src) {
72   return src[0] | (src[1] << 8);
73 }
74 
75 /*
76  * Converts the class name portion of a type descriptor to human-readable
77  * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
78  */
DescriptorClassToName(const char * str)79 static std::string DescriptorClassToName(const char* str) {
80   std::string descriptor(str);
81   // Reduce to just the class name prefix.
82   size_t last_slash = descriptor.rfind('/');
83   if (last_slash == std::string::npos) {
84     last_slash = 0;
85   }
86   // Start past the '/' or 'L'.
87   last_slash++;
88 
89   // Copy class name over, trimming trailing ';'.
90   size_t size = descriptor.size() - 1 - last_slash;
91   std::string result(descriptor.substr(last_slash, size));
92 
93   return result;
94 }
95 
96 /*
97  * Returns string representing the boolean value.
98  */
StrBool(bool val)99 static const char* StrBool(bool val) {
100   return val ? "true" : "false";
101 }
102 
103 /*
104  * Returns a quoted string representing the boolean value.
105  */
QuotedBool(bool val)106 static const char* QuotedBool(bool val) {
107   return val ? "\"true\"" : "\"false\"";
108 }
109 
110 /*
111  * Returns a quoted string representing the access flags.
112  */
QuotedVisibility(uint32_t access_flags)113 static const char* QuotedVisibility(uint32_t access_flags) {
114   if (access_flags & kAccPublic) {
115     return "\"public\"";
116   } else if (access_flags & kAccProtected) {
117     return "\"protected\"";
118   } else if (access_flags & kAccPrivate) {
119     return "\"private\"";
120   } else {
121     return "\"package\"";
122   }
123 }
124 
125 /*
126  * Counts the number of '1' bits in a word.
127  */
CountOnes(uint32_t val)128 static int CountOnes(uint32_t val) {
129   val = val - ((val >> 1) & 0x55555555);
130   val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
131   return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
132 }
133 
134 /*
135  * Creates a new string with human-readable access flags.
136  *
137  * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
138  */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)139 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
140   static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
141     {
142       "PUBLIC",                /* 0x00001 */
143       "PRIVATE",               /* 0x00002 */
144       "PROTECTED",             /* 0x00004 */
145       "STATIC",                /* 0x00008 */
146       "FINAL",                 /* 0x00010 */
147       "?",                     /* 0x00020 */
148       "?",                     /* 0x00040 */
149       "?",                     /* 0x00080 */
150       "?",                     /* 0x00100 */
151       "INTERFACE",             /* 0x00200 */
152       "ABSTRACT",              /* 0x00400 */
153       "?",                     /* 0x00800 */
154       "SYNTHETIC",             /* 0x01000 */
155       "ANNOTATION",            /* 0x02000 */
156       "ENUM",                  /* 0x04000 */
157       "?",                     /* 0x08000 */
158       "VERIFIED",              /* 0x10000 */
159       "OPTIMIZED",             /* 0x20000 */
160     }, {
161       "PUBLIC",                /* 0x00001 */
162       "PRIVATE",               /* 0x00002 */
163       "PROTECTED",             /* 0x00004 */
164       "STATIC",                /* 0x00008 */
165       "FINAL",                 /* 0x00010 */
166       "SYNCHRONIZED",          /* 0x00020 */
167       "BRIDGE",                /* 0x00040 */
168       "VARARGS",               /* 0x00080 */
169       "NATIVE",                /* 0x00100 */
170       "?",                     /* 0x00200 */
171       "ABSTRACT",              /* 0x00400 */
172       "STRICT",                /* 0x00800 */
173       "SYNTHETIC",             /* 0x01000 */
174       "?",                     /* 0x02000 */
175       "?",                     /* 0x04000 */
176       "MIRANDA",               /* 0x08000 */
177       "CONSTRUCTOR",           /* 0x10000 */
178       "DECLARED_SYNCHRONIZED", /* 0x20000 */
179     }, {
180       "PUBLIC",                /* 0x00001 */
181       "PRIVATE",               /* 0x00002 */
182       "PROTECTED",             /* 0x00004 */
183       "STATIC",                /* 0x00008 */
184       "FINAL",                 /* 0x00010 */
185       "?",                     /* 0x00020 */
186       "VOLATILE",              /* 0x00040 */
187       "TRANSIENT",             /* 0x00080 */
188       "?",                     /* 0x00100 */
189       "?",                     /* 0x00200 */
190       "?",                     /* 0x00400 */
191       "?",                     /* 0x00800 */
192       "SYNTHETIC",             /* 0x01000 */
193       "?",                     /* 0x02000 */
194       "ENUM",                  /* 0x04000 */
195       "?",                     /* 0x08000 */
196       "?",                     /* 0x10000 */
197       "?",                     /* 0x20000 */
198     },
199   };
200 
201   // Allocate enough storage to hold the expected number of strings,
202   // plus a space between each.  We over-allocate, using the longest
203   // string above as the base metric.
204   const int kLongest = 21;  // The strlen of longest string above.
205   const int count = CountOnes(flags);
206   char* str;
207   char* cp;
208   cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
209 
210   for (int i = 0; i < kNumFlags; i++) {
211     if (flags & 0x01) {
212       const char* accessStr = kAccessStrings[for_what][i];
213       const int len = strlen(accessStr);
214       if (cp != str) {
215         *cp++ = ' ';
216       }
217       memcpy(cp, accessStr, len);
218       cp += len;
219     }
220     flags >>= 1;
221   }  // for
222 
223   *cp = '\0';
224   return str;
225 }
226 
GetHiddenapiFlagStr(uint32_t hiddenapi_flags)227 static std::string GetHiddenapiFlagStr(uint32_t hiddenapi_flags) {
228   std::stringstream ss;
229   hiddenapi::ApiList(hiddenapi_flags).Dump(ss);
230   std::string api_list = ss.str();
231   std::transform(api_list.begin(), api_list.end(), api_list.begin(), ::toupper);
232   return api_list;
233 }
234 
GetSignatureForProtoId(const dex_ir::ProtoId * proto)235 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
236   if (proto == nullptr) {
237     return "<no signature>";
238   }
239 
240   std::string result("(");
241   const dex_ir::TypeList* type_list = proto->Parameters();
242   if (type_list != nullptr) {
243     for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
244       result += type_id->GetStringId()->Data();
245     }
246   }
247   result += ")";
248   result += proto->ReturnType()->GetStringId()->Data();
249   return result;
250 }
251 
252 /*
253  * Copies character data from "data" to "out", converting non-ASCII values
254  * to fprintf format chars or an ASCII filler ('.' or '?').
255  *
256  * The output buffer must be able to hold (2*len)+1 bytes.  The result is
257  * NULL-terminated.
258  */
Asciify(char * out,const unsigned char * data,size_t len)259 static void Asciify(char* out, const unsigned char* data, size_t len) {
260   for (; len != 0u; --len) {
261     if (*data < 0x20) {
262       // Could do more here, but we don't need them yet.
263       switch (*data) {
264         case '\0':
265           *out++ = '\\';
266           *out++ = '0';
267           break;
268         case '\n':
269           *out++ = '\\';
270           *out++ = 'n';
271           break;
272         default:
273           *out++ = '.';
274           break;
275       }  // switch
276     } else if (*data >= 0x80) {
277       *out++ = '?';
278     } else {
279       *out++ = *data;
280     }
281     data++;
282   }  // while
283   *out = '\0';
284 }
285 
286 /*
287  * Dumps a string value with some escape characters.
288  */
DumpEscapedString(const char * p,FILE * out_file)289 static void DumpEscapedString(const char* p, FILE* out_file) {
290   fputs("\"", out_file);
291   for (; *p; p++) {
292     switch (*p) {
293       case '\\':
294         fputs("\\\\", out_file);
295         break;
296       case '\"':
297         fputs("\\\"", out_file);
298         break;
299       case '\t':
300         fputs("\\t", out_file);
301         break;
302       case '\n':
303         fputs("\\n", out_file);
304         break;
305       case '\r':
306         fputs("\\r", out_file);
307         break;
308       default:
309         putc(*p, out_file);
310     }  // switch
311   }  // for
312   fputs("\"", out_file);
313 }
314 
315 /*
316  * Dumps a string as an XML attribute value.
317  */
DumpXmlAttribute(const char * p,FILE * out_file)318 static void DumpXmlAttribute(const char* p, FILE* out_file) {
319   for (; *p; p++) {
320     switch (*p) {
321       case '&':
322         fputs("&amp;", out_file);
323         break;
324       case '<':
325         fputs("&lt;", out_file);
326         break;
327       case '>':
328         fputs("&gt;", out_file);
329         break;
330       case '"':
331         fputs("&quot;", out_file);
332         break;
333       case '\t':
334         fputs("&#x9;", out_file);
335         break;
336       case '\n':
337         fputs("&#xA;", out_file);
338         break;
339       case '\r':
340         fputs("&#xD;", out_file);
341         break;
342       default:
343         putc(*p, out_file);
344     }  // switch
345   }  // for
346 }
347 
348 /*
349  * Helper for dumpInstruction(), which builds the string
350  * representation for the index in the given instruction.
351  * Returns a pointer to a buffer of sufficient size.
352  */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)353 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
354                                            const Instruction* dec_insn,
355                                            size_t buf_size) {
356   std::unique_ptr<char[]> buf(new char[buf_size]);
357   // Determine index and width of the string.
358   uint32_t index = 0;
359   uint32_t secondary_index = dex::kDexNoIndex;
360   uint32_t width = 4;
361   switch (Instruction::FormatOf(dec_insn->Opcode())) {
362     // SOME NOT SUPPORTED:
363     // case Instruction::k20bc:
364     case Instruction::k21c:
365     case Instruction::k35c:
366     // case Instruction::k35ms:
367     case Instruction::k3rc:
368     // case Instruction::k3rms:
369     // case Instruction::k35mi:
370     // case Instruction::k3rmi:
371       index = dec_insn->VRegB();
372       width = 4;
373       break;
374     case Instruction::k31c:
375       index = dec_insn->VRegB();
376       width = 8;
377       break;
378     case Instruction::k22c:
379     // case Instruction::k22cs:
380       index = dec_insn->VRegC();
381       width = 4;
382       break;
383     case Instruction::k45cc:
384     case Instruction::k4rcc:
385       index = dec_insn->VRegB();
386       secondary_index = dec_insn->VRegH();
387       width = 4;
388       break;
389     default:
390       break;
391   }  // switch
392 
393   // Determine index type.
394   size_t outSize = 0;
395   switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
396     case Instruction::kIndexUnknown:
397       // This function should never get called for this type, but do
398       // something sensible here, just to help with debugging.
399       outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
400       break;
401     case Instruction::kIndexNone:
402       // This function should never get called for this type, but do
403       // something sensible here, just to help with debugging.
404       outSize = snprintf(buf.get(), buf_size, "<no-index>");
405       break;
406     case Instruction::kIndexTypeRef:
407       if (index < header->TypeIds().Size()) {
408         const char* tp = header->TypeIds()[index]->GetStringId()->Data();
409         outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
410       } else {
411         outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
412       }
413       break;
414     case Instruction::kIndexStringRef:
415       if (index < header->StringIds().Size()) {
416         const char* st = header->StringIds()[index]->Data();
417         outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
418       } else {
419         outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
420       }
421       break;
422     case Instruction::kIndexMethodRef:
423       if (index < header->MethodIds().Size()) {
424         dex_ir::MethodId* method_id = header->MethodIds()[index];
425         const char* name = method_id->Name()->Data();
426         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
427         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
428         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
429                            back_descriptor, name, type_descriptor.c_str(), width, index);
430       } else {
431         outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
432       }
433       break;
434     case Instruction::kIndexFieldRef:
435       if (index < header->FieldIds().Size()) {
436         dex_ir::FieldId* field_id = header->FieldIds()[index];
437         const char* name = field_id->Name()->Data();
438         const char* type_descriptor = field_id->Type()->GetStringId()->Data();
439         const char* back_descriptor = field_id->Class()->GetStringId()->Data();
440         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
441                            back_descriptor, name, type_descriptor, width, index);
442       } else {
443         outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
444       }
445       break;
446     case Instruction::kIndexVtableOffset:
447       outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
448                          width, index, width, index);
449       break;
450     case Instruction::kIndexFieldOffset:
451       outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
452       break;
453     case Instruction::kIndexMethodAndProtoRef: {
454       std::string method("<method?>");
455       std::string proto("<proto?>");
456       if (index < header->MethodIds().Size()) {
457         dex_ir::MethodId* method_id = header->MethodIds()[index];
458         const char* name = method_id->Name()->Data();
459         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
460         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
461         method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
462       }
463       if (secondary_index < header->ProtoIds().Size()) {
464         dex_ir::ProtoId* proto_id = header->ProtoIds()[secondary_index];
465         proto = GetSignatureForProtoId(proto_id);
466       }
467       outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
468                          method.c_str(), proto.c_str(), width, index, width, secondary_index);
469     }
470     break;
471     // SOME NOT SUPPORTED:
472     // case Instruction::kIndexVaries:
473     // case Instruction::kIndexInlineMethod:
474     default:
475       outSize = snprintf(buf.get(), buf_size, "<?>");
476       break;
477   }  // switch
478 
479   // Determine success of string construction.
480   if (outSize >= buf_size) {
481     // The buffer wasn't big enough; retry with computed size. Note: snprintf()
482     // doesn't count/ the '\0' as part of its returned size, so we add explicit
483     // space for it here.
484     return IndexString(header, dec_insn, outSize + 1);
485   }
486   return buf;
487 }
488 
489 /*
490  * Dumps encoded annotation.
491  */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)492 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
493   fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
494   // Display all name=value pairs.
495   for (auto& subannotation : *annotation->GetAnnotationElements()) {
496     fputc(' ', out_file_);
497     fputs(subannotation->GetName()->Data(), out_file_);
498     fputc('=', out_file_);
499     DumpEncodedValue(subannotation->GetValue());
500   }
501 }
502 /*
503  * Dumps encoded value.
504  */
DumpEncodedValue(const dex_ir::EncodedValue * data)505 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
506   switch (data->Type()) {
507     case DexFile::kDexAnnotationByte:
508       fprintf(out_file_, "%" PRId8, data->GetByte());
509       break;
510     case DexFile::kDexAnnotationShort:
511       fprintf(out_file_, "%" PRId16, data->GetShort());
512       break;
513     case DexFile::kDexAnnotationChar:
514       fprintf(out_file_, "%" PRIu16, data->GetChar());
515       break;
516     case DexFile::kDexAnnotationInt:
517       fprintf(out_file_, "%" PRId32, data->GetInt());
518       break;
519     case DexFile::kDexAnnotationLong:
520       fprintf(out_file_, "%" PRId64, data->GetLong());
521       break;
522     case DexFile::kDexAnnotationFloat: {
523       fprintf(out_file_, "%g", data->GetFloat());
524       break;
525     }
526     case DexFile::kDexAnnotationDouble: {
527       fprintf(out_file_, "%g", data->GetDouble());
528       break;
529     }
530     case DexFile::kDexAnnotationString: {
531       dex_ir::StringId* string_id = data->GetStringId();
532       if (options_.output_format_ == kOutputPlain) {
533         DumpEscapedString(string_id->Data(), out_file_);
534       } else {
535         DumpXmlAttribute(string_id->Data(), out_file_);
536       }
537       break;
538     }
539     case DexFile::kDexAnnotationType: {
540       dex_ir::TypeId* type_id = data->GetTypeId();
541       fputs(type_id->GetStringId()->Data(), out_file_);
542       break;
543     }
544     case DexFile::kDexAnnotationField:
545     case DexFile::kDexAnnotationEnum: {
546       dex_ir::FieldId* field_id = data->GetFieldId();
547       fputs(field_id->Name()->Data(), out_file_);
548       break;
549     }
550     case DexFile::kDexAnnotationMethod: {
551       dex_ir::MethodId* method_id = data->GetMethodId();
552       fputs(method_id->Name()->Data(), out_file_);
553       break;
554     }
555     case DexFile::kDexAnnotationArray: {
556       fputc('{', out_file_);
557       // Display all elements.
558       for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
559         fputc(' ', out_file_);
560         DumpEncodedValue(value.get());
561       }
562       fputs(" }", out_file_);
563       break;
564     }
565     case DexFile::kDexAnnotationAnnotation: {
566       DumpEncodedAnnotation(data->GetEncodedAnnotation());
567       break;
568     }
569     case DexFile::kDexAnnotationNull:
570       fputs("null", out_file_);
571       break;
572     case DexFile::kDexAnnotationBoolean:
573       fputs(StrBool(data->GetBoolean()), out_file_);
574       break;
575     default:
576       fputs("????", out_file_);
577       break;
578   }  // switch
579 }
580 
581 /*
582  * Dumps the file header.
583  */
DumpFileHeader()584 void DexLayout::DumpFileHeader() {
585   char sanitized[8 * 2 + 1];
586   fprintf(out_file_, "DEX file header:\n");
587   Asciify(sanitized, header_->Magic(), 8);
588   fprintf(out_file_, "magic               : '%s'\n", sanitized);
589   fprintf(out_file_, "checksum            : %08x\n", header_->Checksum());
590   fprintf(out_file_, "signature           : %02x%02x...%02x%02x\n",
591           header_->Signature()[0], header_->Signature()[1],
592           header_->Signature()[DexFile::kSha1DigestSize - 2],
593           header_->Signature()[DexFile::kSha1DigestSize - 1]);
594   fprintf(out_file_, "file_size           : %d\n", header_->FileSize());
595   fprintf(out_file_, "header_size         : %d\n", header_->HeaderSize());
596   fprintf(out_file_, "link_size           : %d\n", header_->LinkSize());
597   fprintf(out_file_, "link_off            : %d (0x%06x)\n",
598           header_->LinkOffset(), header_->LinkOffset());
599   fprintf(out_file_, "string_ids_size     : %d\n", header_->StringIds().Size());
600   fprintf(out_file_, "string_ids_off      : %d (0x%06x)\n",
601           header_->StringIds().GetOffset(), header_->StringIds().GetOffset());
602   fprintf(out_file_, "type_ids_size       : %d\n", header_->TypeIds().Size());
603   fprintf(out_file_, "type_ids_off        : %d (0x%06x)\n",
604           header_->TypeIds().GetOffset(), header_->TypeIds().GetOffset());
605   fprintf(out_file_, "proto_ids_size      : %d\n", header_->ProtoIds().Size());
606   fprintf(out_file_, "proto_ids_off       : %d (0x%06x)\n",
607           header_->ProtoIds().GetOffset(), header_->ProtoIds().GetOffset());
608   fprintf(out_file_, "field_ids_size      : %d\n", header_->FieldIds().Size());
609   fprintf(out_file_, "field_ids_off       : %d (0x%06x)\n",
610           header_->FieldIds().GetOffset(), header_->FieldIds().GetOffset());
611   fprintf(out_file_, "method_ids_size     : %d\n", header_->MethodIds().Size());
612   fprintf(out_file_, "method_ids_off      : %d (0x%06x)\n",
613           header_->MethodIds().GetOffset(), header_->MethodIds().GetOffset());
614   fprintf(out_file_, "class_defs_size     : %d\n", header_->ClassDefs().Size());
615   fprintf(out_file_, "class_defs_off      : %d (0x%06x)\n",
616           header_->ClassDefs().GetOffset(), header_->ClassDefs().GetOffset());
617   fprintf(out_file_, "data_size           : %d\n", header_->DataSize());
618   fprintf(out_file_, "data_off            : %d (0x%06x)\n\n",
619           header_->DataOffset(), header_->DataOffset());
620 }
621 
622 /*
623  * Dumps a class_def_item.
624  */
DumpClassDef(int idx)625 void DexLayout::DumpClassDef(int idx) {
626   // General class information.
627   dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
628   fprintf(out_file_, "Class #%d header:\n", idx);
629   fprintf(out_file_, "class_idx           : %d\n", class_def->ClassType()->GetIndex());
630   fprintf(out_file_, "access_flags        : %d (0x%04x)\n",
631           class_def->GetAccessFlags(), class_def->GetAccessFlags());
632   uint32_t superclass_idx =  class_def->Superclass() == nullptr ?
633       DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
634   fprintf(out_file_, "superclass_idx      : %d\n", superclass_idx);
635   fprintf(out_file_, "interfaces_off      : %d (0x%06x)\n",
636           class_def->InterfacesOffset(), class_def->InterfacesOffset());
637   uint32_t source_file_offset = 0xffffffffU;
638   if (class_def->SourceFile() != nullptr) {
639     source_file_offset = class_def->SourceFile()->GetIndex();
640   }
641   fprintf(out_file_, "source_file_idx     : %d\n", source_file_offset);
642   uint32_t annotations_offset = 0;
643   if (class_def->Annotations() != nullptr) {
644     annotations_offset = class_def->Annotations()->GetOffset();
645   }
646   fprintf(out_file_, "annotations_off     : %d (0x%06x)\n",
647           annotations_offset, annotations_offset);
648   if (class_def->GetClassData() == nullptr) {
649     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n", 0, 0);
650   } else {
651     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n",
652             class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
653   }
654 
655   // Fields and methods.
656   dex_ir::ClassData* class_data = class_def->GetClassData();
657   if (class_data != nullptr && class_data->StaticFields() != nullptr) {
658     fprintf(out_file_, "static_fields_size  : %zu\n", class_data->StaticFields()->size());
659   } else {
660     fprintf(out_file_, "static_fields_size  : 0\n");
661   }
662   if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
663     fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
664   } else {
665     fprintf(out_file_, "instance_fields_size: 0\n");
666   }
667   if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
668     fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
669   } else {
670     fprintf(out_file_, "direct_methods_size : 0\n");
671   }
672   if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
673     fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
674   } else {
675     fprintf(out_file_, "virtual_methods_size: 0\n");
676   }
677   fprintf(out_file_, "\n");
678 }
679 
680 /**
681  * Dumps an annotation set item.
682  */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)683 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
684   if (set_item == nullptr || set_item->GetItems()->size() == 0) {
685     fputs("  empty-annotation-set\n", out_file_);
686     return;
687   }
688   for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
689     if (annotation == nullptr) {
690       continue;
691     }
692     fputs("  ", out_file_);
693     switch (annotation->GetVisibility()) {
694       case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   out_file_); break;
695       case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
696       case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  out_file_); break;
697       default:                             fputs("VISIBILITY_UNKNOWN ", out_file_); break;
698     }  // switch
699     DumpEncodedAnnotation(annotation->GetAnnotation());
700     fputc('\n', out_file_);
701   }
702 }
703 
704 /*
705  * Dumps class annotations.
706  */
DumpClassAnnotations(int idx)707 void DexLayout::DumpClassAnnotations(int idx) {
708   dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
709   dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
710   if (annotations_directory == nullptr) {
711     return;  // none
712   }
713 
714   fprintf(out_file_, "Class #%d annotations:\n", idx);
715 
716   dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
717   dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
718   dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
719   dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
720 
721   // Annotations on the class itself.
722   if (class_set_item != nullptr) {
723     fprintf(out_file_, "Annotations on class\n");
724     DumpAnnotationSetItem(class_set_item);
725   }
726 
727   // Annotations on fields.
728   if (fields != nullptr) {
729     for (auto& field : *fields) {
730       const dex_ir::FieldId* field_id = field->GetFieldId();
731       const uint32_t field_idx = field_id->GetIndex();
732       const char* field_name = field_id->Name()->Data();
733       fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
734       DumpAnnotationSetItem(field->GetAnnotationSetItem());
735     }
736   }
737 
738   // Annotations on methods.
739   if (methods != nullptr) {
740     for (auto& method : *methods) {
741       const dex_ir::MethodId* method_id = method->GetMethodId();
742       const uint32_t method_idx = method_id->GetIndex();
743       const char* method_name = method_id->Name()->Data();
744       fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
745       DumpAnnotationSetItem(method->GetAnnotationSetItem());
746     }
747   }
748 
749   // Annotations on method parameters.
750   if (parameters != nullptr) {
751     for (auto& parameter : *parameters) {
752       const dex_ir::MethodId* method_id = parameter->GetMethodId();
753       const uint32_t method_idx = method_id->GetIndex();
754       const char* method_name = method_id->Name()->Data();
755       fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
756       uint32_t j = 0;
757       for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
758         fprintf(out_file_, "#%u\n", j);
759         DumpAnnotationSetItem(annotation);
760         ++j;
761       }
762     }
763   }
764 
765   fputc('\n', out_file_);
766 }
767 
768 /*
769  * Dumps an interface that a class declares to implement.
770  */
DumpInterface(const dex_ir::TypeId * type_item,int i)771 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
772   const char* interface_name = type_item->GetStringId()->Data();
773   if (options_.output_format_ == kOutputPlain) {
774     fprintf(out_file_, "    #%d              : '%s'\n", i, interface_name);
775   } else {
776     std::string dot(DescriptorToDot(interface_name));
777     fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
778   }
779 }
780 
781 /*
782  * Dumps the catches table associated with the code.
783  */
DumpCatches(const dex_ir::CodeItem * code)784 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
785   const uint16_t tries_size = code->TriesSize();
786 
787   // No catch table.
788   if (tries_size == 0) {
789     fprintf(out_file_, "      catches       : (none)\n");
790     return;
791   }
792 
793   // Dump all table entries.
794   fprintf(out_file_, "      catches       : %d\n", tries_size);
795   std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
796   for (uint32_t i = 0; i < tries_size; i++) {
797     const dex_ir::TryItem* try_item = (*tries)[i].get();
798     const uint32_t start = try_item->StartAddr();
799     const uint32_t end = start + try_item->InsnCount();
800     fprintf(out_file_, "        0x%04x - 0x%04x\n", start, end);
801     for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
802       const dex_ir::TypeId* type_id = handler->GetTypeId();
803       const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
804       fprintf(out_file_, "          %s -> 0x%04x\n", descriptor, handler->GetAddress());
805     }  // for
806   }  // for
807 }
808 
809 /*
810  * Dumps a single instruction.
811  */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)812 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
813                                 uint32_t code_offset,
814                                 uint32_t insn_idx,
815                                 uint32_t insn_width,
816                                 const Instruction* dec_insn) {
817   // Address of instruction (expressed as byte offset).
818   fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
819 
820   // Dump (part of) raw bytes.
821   const uint16_t* insns = code->Insns();
822   for (uint32_t i = 0; i < 8; i++) {
823     if (i < insn_width) {
824       if (i == 7) {
825         fprintf(out_file_, " ... ");
826       } else {
827         // Print 16-bit value in little-endian order.
828         const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
829         fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
830       }
831     } else {
832       fputs("     ", out_file_);
833     }
834   }  // for
835 
836   // Dump pseudo-instruction or opcode.
837   if (dec_insn->Opcode() == Instruction::NOP) {
838     const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
839     if (instr == Instruction::kPackedSwitchSignature) {
840       fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
841     } else if (instr == Instruction::kSparseSwitchSignature) {
842       fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
843     } else if (instr == Instruction::kArrayDataSignature) {
844       fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
845     } else {
846       fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
847     }
848   } else {
849     fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
850   }
851 
852   // Set up additional argument.
853   std::unique_ptr<char[]> index_buf;
854   if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
855     index_buf = IndexString(header_, dec_insn, 200);
856   }
857 
858   // Dump the instruction.
859   //
860   // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
861   //
862   switch (Instruction::FormatOf(dec_insn->Opcode())) {
863     case Instruction::k10x:        // op
864       break;
865     case Instruction::k12x:        // op vA, vB
866       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
867       break;
868     case Instruction::k11n:        // op vA, #+B
869       fprintf(out_file_, " v%d, #int %d // #%x",
870               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
871       break;
872     case Instruction::k11x:        // op vAA
873       fprintf(out_file_, " v%d", dec_insn->VRegA());
874       break;
875     case Instruction::k10t:        // op +AA
876     case Instruction::k20t: {      // op +AAAA
877       const int32_t targ = (int32_t) dec_insn->VRegA();
878       fprintf(out_file_, " %04x // %c%04x",
879               insn_idx + targ,
880               (targ < 0) ? '-' : '+',
881               (targ < 0) ? -targ : targ);
882       break;
883     }
884     case Instruction::k22x:        // op vAA, vBBBB
885       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
886       break;
887     case Instruction::k21t: {     // op vAA, +BBBB
888       const int32_t targ = (int32_t) dec_insn->VRegB();
889       fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
890               insn_idx + targ,
891               (targ < 0) ? '-' : '+',
892               (targ < 0) ? -targ : targ);
893       break;
894     }
895     case Instruction::k21s:        // op vAA, #+BBBB
896       fprintf(out_file_, " v%d, #int %d // #%x",
897               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
898       break;
899     case Instruction::k21h:        // op vAA, #+BBBB0000[00000000]
900       // The printed format varies a bit based on the actual opcode.
901       if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
902         const int32_t value = dec_insn->VRegB() << 16;
903         fprintf(out_file_, " v%d, #int %d // #%x",
904                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
905       } else {
906         const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
907         fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
908                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
909       }
910       break;
911     case Instruction::k21c:        // op vAA, thing@BBBB
912     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
913       fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
914       break;
915     case Instruction::k23x:        // op vAA, vBB, vCC
916       fprintf(out_file_, " v%d, v%d, v%d",
917               dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
918       break;
919     case Instruction::k22b:        // op vAA, vBB, #+CC
920       fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
921               dec_insn->VRegA(), dec_insn->VRegB(),
922               (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
923       break;
924     case Instruction::k22t: {      // op vA, vB, +CCCC
925       const int32_t targ = (int32_t) dec_insn->VRegC();
926       fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
927               dec_insn->VRegA(), dec_insn->VRegB(),
928               insn_idx + targ,
929               (targ < 0) ? '-' : '+',
930               (targ < 0) ? -targ : targ);
931       break;
932     }
933     case Instruction::k22s:        // op vA, vB, #+CCCC
934       fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
935               dec_insn->VRegA(), dec_insn->VRegB(),
936               (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
937       break;
938     case Instruction::k22c:        // op vA, vB, thing@CCCC
939     // NOT SUPPORTED:
940     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
941       fprintf(out_file_, " v%d, v%d, %s",
942               dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
943       break;
944     case Instruction::k30t:
945       fprintf(out_file_, " #%08x", dec_insn->VRegA());
946       break;
947     case Instruction::k31i: {     // op vAA, #+BBBBBBBB
948       // This is often, but not always, a float.
949       union {
950         float f;
951         uint32_t i;
952       } conv;
953       conv.i = dec_insn->VRegB();
954       fprintf(out_file_, " v%d, #float %g // #%08x",
955               dec_insn->VRegA(), conv.f, dec_insn->VRegB());
956       break;
957     }
958     case Instruction::k31t:       // op vAA, offset +BBBBBBBB
959       fprintf(out_file_, " v%d, %08x // +%08x",
960               dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
961       break;
962     case Instruction::k32x:        // op vAAAA, vBBBB
963       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
964       break;
965     case Instruction::k35c:           // op {vC, vD, vE, vF, vG}, thing@BBBB
966     case Instruction::k45cc: {        // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
967     // NOT SUPPORTED:
968     // case Instruction::k35ms:       // [opt] invoke-virtual+super
969     // case Instruction::k35mi:       // [opt] inline invoke
970       uint32_t arg[Instruction::kMaxVarArgRegs];
971       dec_insn->GetVarArgs(arg);
972       fputs(" {", out_file_);
973       for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
974         if (i == 0) {
975           fprintf(out_file_, "v%d", arg[i]);
976         } else {
977           fprintf(out_file_, ", v%d", arg[i]);
978         }
979       }  // for
980       fprintf(out_file_, "}, %s", index_buf.get());
981       break;
982     }
983     case Instruction::k3rc:           // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
984     case Instruction::k4rcc:          // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
985     // NOT SUPPORTED:
986     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
987     // case Instruction::k3rmi:       // [opt] execute-inline/range
988       {
989         // This doesn't match the "dx" output when some of the args are
990         // 64-bit values -- dx only shows the first register.
991         fputs(" {", out_file_);
992         for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
993           if (i == 0) {
994             fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
995           } else {
996             fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
997           }
998         }  // for
999         fprintf(out_file_, "}, %s", index_buf.get());
1000       }
1001       break;
1002     case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
1003       // This is often, but not always, a double.
1004       union {
1005         double d;
1006         uint64_t j;
1007       } conv;
1008       conv.j = dec_insn->WideVRegB();
1009       fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1010               dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1011       break;
1012     }
1013     // NOT SUPPORTED:
1014     // case Instruction::k00x:        // unknown op or breakpoint
1015     //    break;
1016     default:
1017       fprintf(out_file_, " ???");
1018       break;
1019   }  // switch
1020 
1021   fputc('\n', out_file_);
1022 }
1023 
1024 /*
1025  * Dumps a bytecode disassembly.
1026  */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1027 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1028   dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1029   const char* name = method_id->Name()->Data();
1030   std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1031   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1032 
1033   // Generate header.
1034   std::string dot(DescriptorToDot(back_descriptor));
1035   fprintf(out_file_, "%06x:                                        |[%06x] %s.%s:%s\n",
1036           code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1037 
1038   // Iterate over all instructions.
1039   for (const DexInstructionPcPair& inst : code->Instructions()) {
1040     const uint32_t insn_width = inst->SizeInCodeUnits();
1041     if (insn_width == 0) {
1042       LOG(WARNING) << "GLITCH: zero-width instruction at idx=0x" << std::hex << inst.DexPc();
1043       break;
1044     }
1045     DumpInstruction(code, code_offset, inst.DexPc(), insn_width, &inst.Inst());
1046   }  // for
1047 }
1048 
1049 /*
1050  * Lookup functions.
1051  */
StringDataByIdx(uint32_t idx,dex_ir::Header * header)1052 static const char* StringDataByIdx(uint32_t idx, dex_ir::Header* header) {
1053   dex_ir::StringId* string_id = header->GetStringIdOrNullPtr(idx);
1054   if (string_id == nullptr) {
1055     return nullptr;
1056   }
1057   return string_id->Data();
1058 }
1059 
StringDataByTypeIdx(uint16_t idx,dex_ir::Header * header)1060 static const char* StringDataByTypeIdx(uint16_t idx, dex_ir::Header* header) {
1061   dex_ir::TypeId* type_id = header->GetTypeIdOrNullPtr(idx);
1062   if (type_id == nullptr) {
1063     return nullptr;
1064   }
1065   dex_ir::StringId* string_id = type_id->GetStringId();
1066   if (string_id == nullptr) {
1067     return nullptr;
1068   }
1069   return string_id->Data();
1070 }
1071 
1072 
1073 /*
1074  * Dumps code of a method.
1075  */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset,const char * declaring_class_descriptor,const char * method_name,bool is_static,const dex_ir::ProtoId * proto)1076 void DexLayout::DumpCode(uint32_t idx,
1077                          const dex_ir::CodeItem* code,
1078                          uint32_t code_offset,
1079                          const char* declaring_class_descriptor,
1080                          const char* method_name,
1081                          bool is_static,
1082                          const dex_ir::ProtoId* proto) {
1083   fprintf(out_file_, "      registers     : %d\n", code->RegistersSize());
1084   fprintf(out_file_, "      ins           : %d\n", code->InsSize());
1085   fprintf(out_file_, "      outs          : %d\n", code->OutsSize());
1086   fprintf(out_file_, "      insns size    : %d 16-bit code units\n",
1087           code->InsnsSize());
1088 
1089   // Bytecode disassembly, if requested.
1090   if (options_.disassemble_) {
1091     DumpBytecodes(idx, code, code_offset);
1092   }
1093 
1094   // Try-catch blocks.
1095   DumpCatches(code);
1096 
1097   // Positions and locals table in the debug info.
1098   dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
1099   fprintf(out_file_, "      positions     : \n");
1100   if (debug_info != nullptr) {
1101     DexFile::DecodeDebugPositionInfo(debug_info->GetDebugInfo(),
1102                                      [this](uint32_t idx) {
1103                                        return StringDataByIdx(idx, this->header_);
1104                                      },
1105                                      [&](const DexFile::PositionInfo& entry) {
1106                                        fprintf(out_file_,
1107                                                "        0x%04x line=%d\n",
1108                                                entry.address_,
1109                                                entry.line_);
1110                                         return false;
1111                                      });
1112   }
1113   fprintf(out_file_, "      locals        : \n");
1114   if (debug_info != nullptr) {
1115     std::vector<const char*> arg_descriptors;
1116     const dex_ir::TypeList* parameters = proto->Parameters();
1117     if (parameters != nullptr) {
1118       const dex_ir::TypeIdVector* parameter_type_vector = parameters->GetTypeList();
1119       if (parameter_type_vector != nullptr) {
1120         for (const dex_ir::TypeId* type_id : *parameter_type_vector) {
1121           arg_descriptors.push_back(type_id->GetStringId()->Data());
1122         }
1123       }
1124     }
1125     DexFile::DecodeDebugLocalInfo(debug_info->GetDebugInfo(),
1126                                   "DexLayout in-memory",
1127                                   declaring_class_descriptor,
1128                                   arg_descriptors,
1129                                   method_name,
1130                                   is_static,
1131                                   code->RegistersSize(),
1132                                   code->InsSize(),
1133                                   code->InsnsSize(),
1134                                   [this](uint32_t idx) {
1135                                     return StringDataByIdx(idx, this->header_);
1136                                   },
1137                                   [this](uint32_t idx) {
1138                                     return
1139                                         StringDataByTypeIdx(dchecked_integral_cast<uint16_t>(idx),
1140                                                             this->header_);
1141                                   },
1142                                   [&](const DexFile::LocalInfo& entry) {
1143                                     const char* signature =
1144                                         entry.signature_ != nullptr ? entry.signature_ : "";
1145                                     fprintf(out_file_,
1146                                             "        0x%04x - 0x%04x reg=%d %s %s %s\n",
1147                                             entry.start_address_,
1148                                             entry.end_address_,
1149                                             entry.reg_,
1150                                             entry.name_,
1151                                             entry.descriptor_,
1152                                             signature);
1153                                   });
1154   }
1155 }
1156 
1157 /*
1158  * Dumps a method.
1159  */
DumpMethod(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,const dex_ir::CodeItem * code,int i)1160 void DexLayout::DumpMethod(uint32_t idx,
1161                            uint32_t flags,
1162                            uint32_t hiddenapi_flags,
1163                            const dex_ir::CodeItem* code,
1164                            int i) {
1165   // Bail for anything private if export only requested.
1166   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1167     return;
1168   }
1169 
1170   dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1171   const char* name = method_id->Name()->Data();
1172   char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1173   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1174   char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1175 
1176   if (options_.output_format_ == kOutputPlain) {
1177     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1178     fprintf(out_file_, "      name          : '%s'\n", name);
1179     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1180     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1181     if (options_.show_section_headers_) {
1182       fprintf(out_file_, "      method_idx    : %d\n", method_id->GetIndex());
1183     }
1184     if (hiddenapi_flags != 0u) {
1185       fprintf(out_file_,
1186               "      hiddenapi     : 0x%04x (%s)\n",
1187               hiddenapi_flags,
1188               GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1189     }
1190     if (code == nullptr) {
1191       fprintf(out_file_, "      code          : (none)\n");
1192     } else {
1193       fprintf(out_file_, "      code          -\n");
1194       DumpCode(idx,
1195                code,
1196                code->GetOffset(),
1197                back_descriptor,
1198                name,
1199                (flags & kAccStatic) != 0,
1200                method_id->Proto());
1201     }
1202     if (options_.disassemble_) {
1203       fputc('\n', out_file_);
1204     }
1205   } else if (options_.output_format_ == kOutputXml) {
1206     const bool constructor = (name[0] == '<');
1207 
1208     // Method name and prototype.
1209     if (constructor) {
1210       std::string dot(DescriptorClassToName(back_descriptor));
1211       fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1212       dot = DescriptorToDot(back_descriptor);
1213       fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1214     } else {
1215       fprintf(out_file_, "<method name=\"%s\"\n", name);
1216       const char* return_type = strrchr(type_descriptor, ')');
1217       if (return_type == nullptr) {
1218         LOG(ERROR) << "bad method type descriptor '" << type_descriptor << "'";
1219         goto bail;
1220       }
1221       std::string dot(DescriptorToDot(return_type + 1));
1222       fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1223       fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1224       fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1225       fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1226           (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1227     }
1228 
1229     // Additional method flags.
1230     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1231     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1232     // The "deprecated=" not knowable w/o parsing annotations.
1233     fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1234 
1235     // Parameters.
1236     if (type_descriptor[0] != '(') {
1237       LOG(ERROR) << "ERROR: bad descriptor '" << type_descriptor << "'";
1238       goto bail;
1239     }
1240     char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1241     const char* base = type_descriptor + 1;
1242     int arg_num = 0;
1243     while (*base != ')') {
1244       char* cp = tmp_buf;
1245       while (*base == '[') {
1246         *cp++ = *base++;
1247       }
1248       if (*base == 'L') {
1249         // Copy through ';'.
1250         do {
1251           *cp = *base++;
1252         } while (*cp++ != ';');
1253       } else {
1254         // Primitive char, copy it.
1255         if (strchr("ZBCSIFJD", *base) == nullptr) {
1256           LOG(ERROR) << "ERROR: bad method signature '" << base << "'";
1257           break;  // while
1258         }
1259         *cp++ = *base++;
1260       }
1261       // Null terminate and display.
1262       *cp++ = '\0';
1263       std::string dot(DescriptorToDot(tmp_buf));
1264       fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1265                         "</parameter>\n", arg_num++, dot.c_str());
1266     }  // while
1267     free(tmp_buf);
1268     if (constructor) {
1269       fprintf(out_file_, "</constructor>\n");
1270     } else {
1271       fprintf(out_file_, "</method>\n");
1272     }
1273   }
1274 
1275  bail:
1276   free(type_descriptor);
1277   free(access_str);
1278 }
1279 
1280 /*
1281  * Dumps a static (class) field.
1282  */
DumpSField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i,dex_ir::EncodedValue * init)1283 void DexLayout::DumpSField(uint32_t idx,
1284                            uint32_t flags,
1285                            uint32_t hiddenapi_flags,
1286                            int i,
1287                            dex_ir::EncodedValue* init) {
1288   // Bail for anything private if export only requested.
1289   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1290     return;
1291   }
1292 
1293   dex_ir::FieldId* field_id = header_->FieldIds()[idx];
1294   const char* name = field_id->Name()->Data();
1295   const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1296   const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1297   char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1298 
1299   if (options_.output_format_ == kOutputPlain) {
1300     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1301     fprintf(out_file_, "      name          : '%s'\n", name);
1302     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1303     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1304     if (hiddenapi_flags != 0u) {
1305       fprintf(out_file_,
1306               "      hiddenapi     : 0x%04x (%s)\n",
1307               hiddenapi_flags,
1308               GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1309     }
1310     if (init != nullptr) {
1311       fputs("      value         : ", out_file_);
1312       DumpEncodedValue(init);
1313       fputs("\n", out_file_);
1314     }
1315   } else if (options_.output_format_ == kOutputXml) {
1316     fprintf(out_file_, "<field name=\"%s\"\n", name);
1317     std::string dot(DescriptorToDot(type_descriptor));
1318     fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1319     fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1320     fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1321     // The "value=" is not knowable w/o parsing annotations.
1322     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1323     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1324     // The "deprecated=" is not knowable w/o parsing annotations.
1325     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1326     if (init != nullptr) {
1327       fputs(" value=\"", out_file_);
1328       DumpEncodedValue(init);
1329       fputs("\"\n", out_file_);
1330     }
1331     fputs(">\n</field>\n", out_file_);
1332   }
1333 
1334   free(access_str);
1335 }
1336 
1337 /*
1338  * Dumps an instance field.
1339  */
DumpIField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i)1340 void DexLayout::DumpIField(uint32_t idx,
1341                            uint32_t flags,
1342                            uint32_t hiddenapi_flags,
1343                            int i) {
1344   DumpSField(idx, flags, hiddenapi_flags, i, nullptr);
1345 }
1346 
1347 /*
1348  * Dumps the class.
1349  *
1350  * Note "idx" is a DexClassDef index, not a DexTypeId index.
1351  *
1352  * If "*last_package" is nullptr or does not match the current class' package,
1353  * the value will be replaced with a newly-allocated string.
1354  */
DumpClass(int idx,char ** last_package)1355 void DexLayout::DumpClass(int idx, char** last_package) {
1356   dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
1357   // Omitting non-public class.
1358   if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1359     return;
1360   }
1361 
1362   if (options_.show_section_headers_) {
1363     DumpClassDef(idx);
1364   }
1365 
1366   if (options_.show_annotations_) {
1367     DumpClassAnnotations(idx);
1368   }
1369 
1370   // For the XML output, show the package name.  Ideally we'd gather
1371   // up the classes, sort them, and dump them alphabetically so the
1372   // package name wouldn't jump around, but that's not a great plan
1373   // for something that needs to run on the device.
1374   const char* class_descriptor = header_->ClassDefs()[idx]->ClassType()->GetStringId()->Data();
1375   if (!(class_descriptor[0] == 'L' &&
1376         class_descriptor[strlen(class_descriptor)-1] == ';')) {
1377     // Arrays and primitives should not be defined explicitly. Keep going?
1378     LOG(ERROR) << "Malformed class name '" << class_descriptor << "'";
1379   } else if (options_.output_format_ == kOutputXml) {
1380     char* mangle = strdup(class_descriptor + 1);
1381     mangle[strlen(mangle)-1] = '\0';
1382 
1383     // Reduce to just the package name.
1384     char* last_slash = strrchr(mangle, '/');
1385     if (last_slash != nullptr) {
1386       *last_slash = '\0';
1387     } else {
1388       *mangle = '\0';
1389     }
1390 
1391     for (char* cp = mangle; *cp != '\0'; cp++) {
1392       if (*cp == '/') {
1393         *cp = '.';
1394       }
1395     }  // for
1396 
1397     if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1398       // Start of a new package.
1399       if (*last_package != nullptr) {
1400         fprintf(out_file_, "</package>\n");
1401       }
1402       fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1403       free(*last_package);
1404       *last_package = mangle;
1405     } else {
1406       free(mangle);
1407     }
1408   }
1409 
1410   // General class information.
1411   char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1412   const char* superclass_descriptor = nullptr;
1413   if (class_def->Superclass() != nullptr) {
1414     superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1415   }
1416   if (options_.output_format_ == kOutputPlain) {
1417     fprintf(out_file_, "Class #%d            -\n", idx);
1418     fprintf(out_file_, "  Class descriptor  : '%s'\n", class_descriptor);
1419     fprintf(out_file_, "  Access flags      : 0x%04x (%s)\n",
1420             class_def->GetAccessFlags(), access_str);
1421     if (superclass_descriptor != nullptr) {
1422       fprintf(out_file_, "  Superclass        : '%s'\n", superclass_descriptor);
1423     }
1424     fprintf(out_file_, "  Interfaces        -\n");
1425   } else {
1426     std::string dot(DescriptorClassToName(class_descriptor));
1427     fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1428     if (superclass_descriptor != nullptr) {
1429       dot = DescriptorToDot(superclass_descriptor);
1430       fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1431     }
1432     fprintf(out_file_, " interface=%s\n",
1433             QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1434     fprintf(out_file_, " abstract=%s\n",
1435             QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1436     fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1437     fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1438     // The "deprecated=" not knowable w/o parsing annotations.
1439     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1440     fprintf(out_file_, ">\n");
1441   }
1442 
1443   // Interfaces.
1444   const dex_ir::TypeList* interfaces = class_def->Interfaces();
1445   if (interfaces != nullptr) {
1446     const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1447     for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1448       DumpInterface((*interfaces_vector)[i], i);
1449     }  // for
1450   }
1451 
1452   // Fields and methods.
1453   dex_ir::ClassData* class_data = class_def->GetClassData();
1454   // Prepare data for static fields.
1455   dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1456   dex_ir::EncodedValueVector* encoded_values =
1457       static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1458   const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1459 
1460   // Static fields.
1461   if (options_.output_format_ == kOutputPlain) {
1462     fprintf(out_file_, "  Static fields     -\n");
1463   }
1464   if (class_data != nullptr) {
1465     dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1466     if (static_fields != nullptr) {
1467       for (uint32_t i = 0; i < static_fields->size(); i++) {
1468         DumpSField((*static_fields)[i].GetFieldId()->GetIndex(),
1469                    (*static_fields)[i].GetAccessFlags(),
1470                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*static_fields)[i]),
1471                    i,
1472                    i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1473       }  // for
1474     }
1475   }
1476 
1477   // Instance fields.
1478   if (options_.output_format_ == kOutputPlain) {
1479     fprintf(out_file_, "  Instance fields   -\n");
1480   }
1481   if (class_data != nullptr) {
1482     dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1483     if (instance_fields != nullptr) {
1484       for (uint32_t i = 0; i < instance_fields->size(); i++) {
1485         DumpIField((*instance_fields)[i].GetFieldId()->GetIndex(),
1486                    (*instance_fields)[i].GetAccessFlags(),
1487                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*instance_fields)[i]),
1488                    i);
1489       }  // for
1490     }
1491   }
1492 
1493   // Direct methods.
1494   if (options_.output_format_ == kOutputPlain) {
1495     fprintf(out_file_, "  Direct methods    -\n");
1496   }
1497   if (class_data != nullptr) {
1498     dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1499     if (direct_methods != nullptr) {
1500       for (uint32_t i = 0; i < direct_methods->size(); i++) {
1501         DumpMethod((*direct_methods)[i].GetMethodId()->GetIndex(),
1502                    (*direct_methods)[i].GetAccessFlags(),
1503                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*direct_methods)[i]),
1504                    (*direct_methods)[i].GetCodeItem(),
1505                    i);
1506       }  // for
1507     }
1508   }
1509 
1510   // Virtual methods.
1511   if (options_.output_format_ == kOutputPlain) {
1512     fprintf(out_file_, "  Virtual methods   -\n");
1513   }
1514   if (class_data != nullptr) {
1515     dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1516     if (virtual_methods != nullptr) {
1517       for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1518         DumpMethod((*virtual_methods)[i].GetMethodId()->GetIndex(),
1519                    (*virtual_methods)[i].GetAccessFlags(),
1520                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*virtual_methods)[i]),
1521                    (*virtual_methods)[i].GetCodeItem(),
1522                    i);
1523       }  // for
1524     }
1525   }
1526 
1527   // End of class.
1528   if (options_.output_format_ == kOutputPlain) {
1529     const char* file_name = "unknown";
1530     if (class_def->SourceFile() != nullptr) {
1531       file_name = class_def->SourceFile()->Data();
1532     }
1533     const dex_ir::StringId* source_file = class_def->SourceFile();
1534     fprintf(out_file_, "  source_file_idx   : %d (%s)\n\n",
1535             source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1536   } else if (options_.output_format_ == kOutputXml) {
1537     fprintf(out_file_, "</class>\n");
1538   }
1539 
1540   free(access_str);
1541 }
1542 
DumpDexFile()1543 void DexLayout::DumpDexFile() {
1544   // Headers.
1545   if (options_.show_file_headers_) {
1546     DumpFileHeader();
1547   }
1548 
1549   // Open XML context.
1550   if (options_.output_format_ == kOutputXml) {
1551     fprintf(out_file_, "<api>\n");
1552   }
1553 
1554   // Iterate over all classes.
1555   char* package = nullptr;
1556   const uint32_t class_defs_size = header_->ClassDefs().Size();
1557   for (uint32_t i = 0; i < class_defs_size; i++) {
1558     DumpClass(i, &package);
1559   }  // for
1560 
1561   // Free the last package allocated.
1562   if (package != nullptr) {
1563     fprintf(out_file_, "</package>\n");
1564     free(package);
1565   }
1566 
1567   // Close XML context.
1568   if (options_.output_format_ == kOutputXml) {
1569     fprintf(out_file_, "</api>\n");
1570   }
1571 }
1572 
LayoutClassDefsAndClassData(const DexFile * dex_file)1573 void DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1574   std::vector<dex_ir::ClassDef*> new_class_def_order;
1575   for (auto& class_def : header_->ClassDefs()) {
1576     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1577     if (info_->ContainsClass(*dex_file, type_idx)) {
1578       new_class_def_order.push_back(class_def.get());
1579     }
1580   }
1581   for (auto& class_def : header_->ClassDefs()) {
1582     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1583     if (!info_->ContainsClass(*dex_file, type_idx)) {
1584       new_class_def_order.push_back(class_def.get());
1585     }
1586   }
1587   std::unordered_set<dex_ir::ClassData*> visited_class_data;
1588   size_t class_data_index = 0;
1589   auto& class_datas = header_->ClassDatas();
1590   for (dex_ir::ClassDef* class_def : new_class_def_order) {
1591     dex_ir::ClassData* class_data = class_def->GetClassData();
1592     if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1593       visited_class_data.insert(class_data);
1594       // Overwrite the existing vector with the new ordering, note that the sets of objects are
1595       // equivalent, but the order changes. This is why this is not a memory leak.
1596       // TODO: Consider cleaning this up with a shared_ptr.
1597       class_datas[class_data_index].release();  // NOLINT b/117926937
1598       class_datas[class_data_index].reset(class_data);
1599       ++class_data_index;
1600     }
1601   }
1602   CHECK_EQ(class_data_index, class_datas.Size());
1603 
1604   if (DexLayout::kChangeClassDefOrder) {
1605     // This currently produces dex files that violate the spec since the super class class_def is
1606     // supposed to occur before any subclasses.
1607     dex_ir::CollectionVector<dex_ir::ClassDef>& class_defs = header_->ClassDefs();
1608     CHECK_EQ(new_class_def_order.size(), class_defs.Size());
1609     for (size_t i = 0; i < class_defs.Size(); ++i) {
1610       // Overwrite the existing vector with the new ordering, note that the sets of objects are
1611       // equivalent, but the order changes. This is why this is not a memory leak.
1612       // TODO: Consider cleaning this up with a shared_ptr.
1613       class_defs[i].release();  // NOLINT b/117926937
1614       class_defs[i].reset(new_class_def_order[i]);
1615     }
1616   }
1617 }
1618 
LayoutStringData(const DexFile * dex_file)1619 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1620   const size_t num_strings = header_->StringIds().Size();
1621   std::vector<bool> is_shorty(num_strings, false);
1622   std::vector<bool> from_hot_method(num_strings, false);
1623   for (auto& class_def : header_->ClassDefs()) {
1624     // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1625     // as hot. Add its super class and interfaces as well, which can be used during initialization.
1626     const bool is_profile_class =
1627         info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1628     if (is_profile_class) {
1629       from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1630       const dex_ir::TypeId* superclass = class_def->Superclass();
1631       if (superclass != nullptr) {
1632         from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1633       }
1634       const dex_ir::TypeList* interfaces = class_def->Interfaces();
1635       if (interfaces != nullptr) {
1636         for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1637           from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1638         }
1639       }
1640     }
1641     dex_ir::ClassData* data = class_def->GetClassData();
1642     if (data == nullptr) {
1643       continue;
1644     }
1645     for (size_t i = 0; i < 2; ++i) {
1646       for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1647         const dex_ir::MethodId* method_id = method.GetMethodId();
1648         dex_ir::CodeItem* code_item = method.GetCodeItem();
1649         if (code_item == nullptr) {
1650           continue;
1651         }
1652         const bool is_clinit = is_profile_class &&
1653             (method.GetAccessFlags() & kAccConstructor) != 0 &&
1654             (method.GetAccessFlags() & kAccStatic) != 0;
1655         const bool method_executed = is_clinit ||
1656             info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1657         if (!method_executed) {
1658           continue;
1659         }
1660         is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1661         dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1662         if (fixups == nullptr) {
1663           continue;
1664         }
1665         // Add const-strings.
1666         for (dex_ir::StringId* id : fixups->StringIds()) {
1667           from_hot_method[id->GetIndex()] = true;
1668         }
1669         // Add field classes, names, and types.
1670         for (dex_ir::FieldId* id : fixups->FieldIds()) {
1671           // TODO: Only visit field ids from static getters and setters.
1672           from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1673           from_hot_method[id->Name()->GetIndex()] = true;
1674           from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1675         }
1676         // For clinits, add referenced method classes, names, and protos.
1677         if (is_clinit) {
1678           for (dex_ir::MethodId* id : fixups->MethodIds()) {
1679             from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1680             from_hot_method[id->Name()->GetIndex()] = true;
1681             is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1682           }
1683         }
1684       }
1685     }
1686   }
1687   // Sort string data by specified order.
1688   std::vector<dex_ir::StringId*> string_ids;
1689   for (auto& string_id : header_->StringIds()) {
1690     string_ids.push_back(string_id.get());
1691   }
1692   std::sort(string_ids.begin(),
1693             string_ids.end(),
1694             [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1695                                            const dex_ir::StringId* b) {
1696     const bool a_is_hot = from_hot_method[a->GetIndex()];
1697     const bool b_is_hot = from_hot_method[b->GetIndex()];
1698     if (a_is_hot != b_is_hot) {
1699       return a_is_hot < b_is_hot;
1700     }
1701     // After hot methods are partitioned, subpartition shorties.
1702     const bool a_is_shorty = is_shorty[a->GetIndex()];
1703     const bool b_is_shorty = is_shorty[b->GetIndex()];
1704     if (a_is_shorty != b_is_shorty) {
1705       return a_is_shorty < b_is_shorty;
1706     }
1707     // Order by index by default.
1708     return a->GetIndex() < b->GetIndex();
1709   });
1710   auto& string_datas = header_->StringDatas();
1711   // Now we know what order we want the string data, reorder them.
1712   size_t data_index = 0;
1713   for (dex_ir::StringId* string_id : string_ids) {
1714     string_datas[data_index].release();  // NOLINT b/117926937
1715     string_datas[data_index].reset(string_id->DataItem());
1716     ++data_index;
1717   }
1718   if (kIsDebugBuild) {
1719     std::unordered_set<dex_ir::StringData*> visited;
1720     for (const std::unique_ptr<dex_ir::StringData>& data : string_datas) {
1721       visited.insert(data.get());
1722     }
1723     for (auto& string_id : header_->StringIds()) {
1724       CHECK(visited.find(string_id->DataItem()) != visited.end());
1725     }
1726   }
1727   CHECK_EQ(data_index, string_datas.Size());
1728 }
1729 
1730 // Orders code items according to specified class data ordering.
LayoutCodeItems(const DexFile * dex_file)1731 void DexLayout::LayoutCodeItems(const DexFile* dex_file) {
1732   static constexpr InvokeType invoke_types[] = {
1733     kDirect,
1734     kVirtual
1735   };
1736 
1737   std::unordered_map<dex_ir::CodeItem*, LayoutType>& code_item_layout =
1738       layout_hotness_info_.code_item_layout_;
1739 
1740   // Assign hotness flags to all code items.
1741   for (InvokeType invoke_type : invoke_types) {
1742     for (auto& class_def : header_->ClassDefs()) {
1743       const bool is_profile_class =
1744           info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1745 
1746       // Skip classes that are not defined in this dex file.
1747       dex_ir::ClassData* class_data = class_def->GetClassData();
1748       if (class_data == nullptr) {
1749         continue;
1750       }
1751       for (auto& method : *(invoke_type == InvokeType::kDirect
1752                                 ? class_data->DirectMethods()
1753                                 : class_data->VirtualMethods())) {
1754         const dex_ir::MethodId *method_id = method.GetMethodId();
1755         dex_ir::CodeItem *code_item = method.GetCodeItem();
1756         if (code_item == nullptr) {
1757           continue;
1758         }
1759         // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1760         const bool is_clinit = (method.GetAccessFlags() & kAccConstructor) != 0 &&
1761             (method.GetAccessFlags() & kAccStatic) != 0;
1762         const bool is_startup_clinit = is_profile_class && is_clinit;
1763         using Hotness = ProfileCompilationInfo::MethodHotness;
1764         Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1765         LayoutType state = LayoutType::kLayoutTypeUnused;
1766         if (hotness.IsHot()) {
1767           // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1768           // now.
1769           state = LayoutType::kLayoutTypeHot;
1770         } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1771           // Startup clinit or a method that only has the startup flag.
1772           state = LayoutType::kLayoutTypeStartupOnly;
1773         } else if (is_clinit) {
1774           state = LayoutType::kLayoutTypeUsedOnce;
1775         } else if (hotness.IsInProfile()) {
1776           state = LayoutType::kLayoutTypeSometimesUsed;
1777         }
1778         auto it = code_item_layout.emplace(code_item, state);
1779         if (!it.second) {
1780           LayoutType& layout_type = it.first->second;
1781           // Already exists, merge the hotness.
1782           layout_type = MergeLayoutType(layout_type, state);
1783         }
1784       }
1785     }
1786   }
1787 
1788   const auto& code_items = header_->CodeItems();
1789   if (VLOG_IS_ON(dex)) {
1790     size_t layout_count[static_cast<size_t>(LayoutType::kLayoutTypeCount)] = {};
1791     for (const std::unique_ptr<dex_ir::CodeItem>& code_item : code_items) {
1792       auto it = code_item_layout.find(code_item.get());
1793       DCHECK(it != code_item_layout.end());
1794       ++layout_count[static_cast<size_t>(it->second)];
1795     }
1796     for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
1797       LOG(INFO) << "Code items in category " << i << " count=" << layout_count[i];
1798     }
1799   }
1800 
1801   // Sort the code items vector by new layout. The writing process will take care of calculating
1802   // all the offsets. Stable sort to preserve any existing locality that might be there.
1803   std::stable_sort(code_items.begin(),
1804                    code_items.end(),
1805                    [&](const std::unique_ptr<dex_ir::CodeItem>& a,
1806                        const std::unique_ptr<dex_ir::CodeItem>& b) {
1807     auto it_a = code_item_layout.find(a.get());
1808     auto it_b = code_item_layout.find(b.get());
1809     DCHECK(it_a != code_item_layout.end());
1810     DCHECK(it_b != code_item_layout.end());
1811     const LayoutType layout_type_a = it_a->second;
1812     const LayoutType layout_type_b = it_b->second;
1813     return layout_type_a < layout_type_b;
1814   });
1815 }
1816 
LayoutOutputFile(const DexFile * dex_file)1817 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1818   LayoutStringData(dex_file);
1819   LayoutClassDefsAndClassData(dex_file);
1820   LayoutCodeItems(dex_file);
1821 }
1822 
OutputDexFile(const DexFile * input_dex_file,bool compute_offsets,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1823 bool DexLayout::OutputDexFile(const DexFile* input_dex_file,
1824                               bool compute_offsets,
1825                               std::unique_ptr<DexContainer>* dex_container,
1826                               std::string* error_msg) {
1827   const std::string& dex_file_location = input_dex_file->GetLocation();
1828   std::unique_ptr<File> new_file;
1829   // If options_.output_dex_directory_ is non null, we are outputting to a file.
1830   if (options_.output_dex_directory_ != nullptr) {
1831     std::string output_location(options_.output_dex_directory_);
1832     const size_t last_slash = dex_file_location.rfind('/');
1833     std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1834     if (output_location == dex_file_directory) {
1835       output_location = dex_file_location + ".new";
1836     } else {
1837       if (!output_location.empty() && output_location.back() != '/') {
1838         output_location += "/";
1839       }
1840       const size_t separator = dex_file_location.rfind('!');
1841       if (separator != std::string::npos) {
1842         output_location += dex_file_location.substr(separator + 1);
1843       } else {
1844         output_location += "classes.dex";
1845       }
1846     }
1847     new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1848     if (new_file == nullptr) {
1849       LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1850       return false;
1851     }
1852   }
1853   if (!DexWriter::Output(this, dex_container, compute_offsets, error_msg)) {
1854     return false;
1855   }
1856   if (new_file != nullptr) {
1857     DexContainer* const container = dex_container->get();
1858     DexContainer::Section* const main_section = container->GetMainSection();
1859     if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
1860       LOG(ERROR) << "Failed to write main section for dex file " << dex_file_location;
1861       new_file->Erase();
1862       return false;
1863     }
1864     DexContainer::Section* const data_section = container->GetDataSection();
1865     if (!new_file->WriteFully(data_section->Begin(), data_section->Size())) {
1866       LOG(ERROR) << "Failed to write data section for dex file " << dex_file_location;
1867       new_file->Erase();
1868       return false;
1869     }
1870     UNUSED(new_file->FlushCloseOrErase());
1871   }
1872   return true;
1873 }
1874 
1875 /*
1876  * Dumps the requested sections of the file.
1877  */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1878 bool DexLayout::ProcessDexFile(const char* file_name,
1879                                const DexFile* dex_file,
1880                                size_t dex_file_index,
1881                                std::unique_ptr<DexContainer>* dex_container,
1882                                std::string* error_msg) {
1883   const bool has_output_container = dex_container != nullptr;
1884   const bool output = options_.output_dex_directory_ != nullptr || has_output_container;
1885 
1886   // Try to avoid eagerly assigning offsets to find bugs since Offset will abort if the offset
1887   // is unassigned.
1888   bool eagerly_assign_offsets = false;
1889   if (options_.visualize_pattern_ || options_.show_section_statistics_ || options_.dump_) {
1890     // These options required the offsets for dumping purposes.
1891     eagerly_assign_offsets = true;
1892   }
1893   std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file,
1894                                                                eagerly_assign_offsets,
1895                                                                GetOptions()));
1896   SetHeader(header.get());
1897 
1898   if (options_.verbose_) {
1899     fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1900             file_name, dex_file->GetHeader().magic_ + 4);
1901   }
1902 
1903   if (options_.visualize_pattern_) {
1904     VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1905     return true;
1906   }
1907 
1908   if (options_.show_section_statistics_) {
1909     ShowDexSectionStatistics(header_, dex_file_index);
1910     return true;
1911   }
1912 
1913   // Dump dex file.
1914   if (options_.dump_) {
1915     DumpDexFile();
1916   }
1917 
1918   // In case we are outputting to a file, keep it open so we can verify.
1919   if (output) {
1920     // Layout information about what strings and code items are hot. Used by the writing process
1921     // to generate the sections that are stored in the oat file.
1922     bool do_layout = info_ != nullptr;
1923     if (do_layout) {
1924       LayoutOutputFile(dex_file);
1925     }
1926     // The output needs a dex container, use a temporary one.
1927     std::unique_ptr<DexContainer> temp_container;
1928     if (dex_container == nullptr) {
1929       dex_container = &temp_container;
1930     }
1931     // If we didn't set the offsets eagerly, we definitely need to compute them here.
1932     if (!OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets, dex_container, error_msg)) {
1933       return false;
1934     }
1935 
1936     // Clear header before verifying to reduce peak RAM usage.
1937     const size_t file_size = header_->FileSize();
1938     header.reset();
1939 
1940     // Verify the output dex file's structure, only enabled by default for debug builds.
1941     if (options_.verify_output_ && has_output_container) {
1942       std::string location = "memory mapped file for " + std::string(file_name);
1943       // Dex file verifier cannot handle compact dex.
1944       bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
1945       const ArtDexFileLoader dex_file_loader;
1946       DexContainer::Section* const main_section = (*dex_container)->GetMainSection();
1947       DexContainer::Section* const data_section = (*dex_container)->GetDataSection();
1948       DCHECK_EQ(file_size, main_section->Size())
1949           << main_section->Size() << " " << data_section->Size();
1950       std::unique_ptr<const DexFile> output_dex_file(
1951           dex_file_loader.OpenWithDataSection(
1952               main_section->Begin(),
1953               main_section->Size(),
1954               data_section->Begin(),
1955               data_section->Size(),
1956               location,
1957               /* location_checksum= */ 0,
1958               /*oat_dex_file=*/ nullptr,
1959               verify,
1960               /*verify_checksum=*/ false,
1961               error_msg));
1962       CHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << *error_msg;
1963 
1964       // Do IR-level comparison between input and output. This check ignores potential differences
1965       // due to layout, so offsets are not checked. Instead, it checks the data contents of each
1966       // item.
1967       //
1968       // Regenerate output IR to catch any bugs that might happen during writing.
1969       std::unique_ptr<dex_ir::Header> output_header(
1970           dex_ir::DexIrBuilder(*output_dex_file,
1971                                /*eagerly_assign_offsets=*/ true,
1972                                GetOptions()));
1973       std::unique_ptr<dex_ir::Header> orig_header(
1974           dex_ir::DexIrBuilder(*dex_file,
1975                                /*eagerly_assign_offsets=*/ true,
1976                                GetOptions()));
1977       CHECK(VerifyOutputDexFile(output_header.get(), orig_header.get(), error_msg)) << *error_msg;
1978     }
1979   }
1980   return true;
1981 }
1982 
1983 /*
1984  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
1985  */
ProcessFile(const char * file_name)1986 int DexLayout::ProcessFile(const char* file_name) {
1987   if (options_.verbose_) {
1988     fprintf(out_file_, "Processing '%s'...\n", file_name);
1989   }
1990 
1991   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
1992   // all of which are Zip archives with "classes.dex" inside.
1993   const bool verify_checksum = !options_.ignore_bad_checksum_;
1994   std::string error_msg;
1995   const ArtDexFileLoader dex_file_loader;
1996   std::vector<std::unique_ptr<const DexFile>> dex_files;
1997   if (!dex_file_loader.Open(
1998         file_name, file_name, /* verify= */ true, verify_checksum, &error_msg, &dex_files)) {
1999     // Display returned error message to user. Note that this error behavior
2000     // differs from the error messages shown by the original Dalvik dexdump.
2001     LOG(ERROR) << error_msg;
2002     return -1;
2003   }
2004 
2005   // Success. Either report checksum verification or process
2006   // all dex files found in given file.
2007   if (options_.checksum_only_) {
2008     fprintf(out_file_, "Checksum verified\n");
2009   } else {
2010     for (size_t i = 0; i < dex_files.size(); i++) {
2011       // Pass in a null container to avoid output by default.
2012       if (!ProcessDexFile(file_name,
2013                           dex_files[i].get(),
2014                           i,
2015                           /*dex_container=*/ nullptr,
2016                           &error_msg)) {
2017         LOG(WARNING) << "Failed to run dex file " << i << " in " << file_name << " : " << error_msg;
2018       }
2019     }
2020   }
2021   return 0;
2022 }
2023 
2024 }  // namespace art
2025