1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  * Implementation file of the dexlayout utility.
17  *
18  * This is a tool to read dex files into an internal representation,
19  * reorganize the representation, and emit dex files with a better
20  * file layout.
21  */
22 
23 #include "dexlayout.h"
24 
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
28 
29 #include <iostream>
30 #include <memory>
31 #include <sstream>
32 #include <vector>
33 
34 #include "android-base/stringprintf.h"
35 
36 #include "dex_ir_builder.h"
37 #include "dex_file-inl.h"
38 #include "dex_file_verifier.h"
39 #include "dex_instruction-inl.h"
40 #include "dex_verify.h"
41 #include "dex_visualize.h"
42 #include "dex_writer.h"
43 #include "jit/profile_compilation_info.h"
44 #include "mem_map.h"
45 #include "os.h"
46 #include "utils.h"
47 
48 namespace art {
49 
50 using android::base::StringPrintf;
51 
52 static constexpr uint32_t kDexCodeItemAlignment = 4;
53 
54 /*
55  * Flags for use with createAccessFlagStr().
56  */
57 enum AccessFor {
58   kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
59 };
60 const int kNumFlags = 18;
61 
62 /*
63  * Gets 2 little-endian bytes.
64  */
Get2LE(unsigned char const * src)65 static inline uint16_t Get2LE(unsigned char const* src) {
66   return src[0] | (src[1] << 8);
67 }
68 
69 /*
70  * Converts a type descriptor to human-readable "dotted" form.  For
71  * example, "Ljava/lang/String;" becomes "java.lang.String", and
72  * "[I" becomes "int[]".  Also converts '$' to '.', which means this
73  * form can't be converted back to a descriptor.
74  */
DescriptorToDotWrapper(const char * descriptor)75 static std::string DescriptorToDotWrapper(const char* descriptor) {
76   std::string result = DescriptorToDot(descriptor);
77   size_t found = result.find('$');
78   while (found != std::string::npos) {
79     result[found] = '.';
80     found = result.find('$', found);
81   }
82   return result;
83 }
84 
85 /*
86  * Converts the class name portion of a type descriptor to human-readable
87  * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
88  */
DescriptorClassToDot(const char * str)89 static std::string DescriptorClassToDot(const char* str) {
90   std::string descriptor(str);
91   // Reduce to just the class name prefix.
92   size_t last_slash = descriptor.rfind('/');
93   if (last_slash == std::string::npos) {
94     last_slash = 0;
95   }
96   // Start past the '/' or 'L'.
97   last_slash++;
98 
99   // Copy class name over, trimming trailing ';'.
100   size_t size = descriptor.size() - 1 - last_slash;
101   std::string result(descriptor.substr(last_slash, size));
102 
103   // Replace '$' with '.'.
104   size_t dollar_sign = result.find('$');
105   while (dollar_sign != std::string::npos) {
106     result[dollar_sign] = '.';
107     dollar_sign = result.find('$', dollar_sign);
108   }
109 
110   return result;
111 }
112 
113 /*
114  * Returns string representing the boolean value.
115  */
StrBool(bool val)116 static const char* StrBool(bool val) {
117   return val ? "true" : "false";
118 }
119 
120 /*
121  * Returns a quoted string representing the boolean value.
122  */
QuotedBool(bool val)123 static const char* QuotedBool(bool val) {
124   return val ? "\"true\"" : "\"false\"";
125 }
126 
127 /*
128  * Returns a quoted string representing the access flags.
129  */
QuotedVisibility(uint32_t access_flags)130 static const char* QuotedVisibility(uint32_t access_flags) {
131   if (access_flags & kAccPublic) {
132     return "\"public\"";
133   } else if (access_flags & kAccProtected) {
134     return "\"protected\"";
135   } else if (access_flags & kAccPrivate) {
136     return "\"private\"";
137   } else {
138     return "\"package\"";
139   }
140 }
141 
142 /*
143  * Counts the number of '1' bits in a word.
144  */
CountOnes(uint32_t val)145 static int CountOnes(uint32_t val) {
146   val = val - ((val >> 1) & 0x55555555);
147   val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
148   return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
149 }
150 
151 /*
152  * Creates a new string with human-readable access flags.
153  *
154  * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
155  */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)156 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
157   static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
158     {
159       "PUBLIC",                /* 0x00001 */
160       "PRIVATE",               /* 0x00002 */
161       "PROTECTED",             /* 0x00004 */
162       "STATIC",                /* 0x00008 */
163       "FINAL",                 /* 0x00010 */
164       "?",                     /* 0x00020 */
165       "?",                     /* 0x00040 */
166       "?",                     /* 0x00080 */
167       "?",                     /* 0x00100 */
168       "INTERFACE",             /* 0x00200 */
169       "ABSTRACT",              /* 0x00400 */
170       "?",                     /* 0x00800 */
171       "SYNTHETIC",             /* 0x01000 */
172       "ANNOTATION",            /* 0x02000 */
173       "ENUM",                  /* 0x04000 */
174       "?",                     /* 0x08000 */
175       "VERIFIED",              /* 0x10000 */
176       "OPTIMIZED",             /* 0x20000 */
177     }, {
178       "PUBLIC",                /* 0x00001 */
179       "PRIVATE",               /* 0x00002 */
180       "PROTECTED",             /* 0x00004 */
181       "STATIC",                /* 0x00008 */
182       "FINAL",                 /* 0x00010 */
183       "SYNCHRONIZED",          /* 0x00020 */
184       "BRIDGE",                /* 0x00040 */
185       "VARARGS",               /* 0x00080 */
186       "NATIVE",                /* 0x00100 */
187       "?",                     /* 0x00200 */
188       "ABSTRACT",              /* 0x00400 */
189       "STRICT",                /* 0x00800 */
190       "SYNTHETIC",             /* 0x01000 */
191       "?",                     /* 0x02000 */
192       "?",                     /* 0x04000 */
193       "MIRANDA",               /* 0x08000 */
194       "CONSTRUCTOR",           /* 0x10000 */
195       "DECLARED_SYNCHRONIZED", /* 0x20000 */
196     }, {
197       "PUBLIC",                /* 0x00001 */
198       "PRIVATE",               /* 0x00002 */
199       "PROTECTED",             /* 0x00004 */
200       "STATIC",                /* 0x00008 */
201       "FINAL",                 /* 0x00010 */
202       "?",                     /* 0x00020 */
203       "VOLATILE",              /* 0x00040 */
204       "TRANSIENT",             /* 0x00080 */
205       "?",                     /* 0x00100 */
206       "?",                     /* 0x00200 */
207       "?",                     /* 0x00400 */
208       "?",                     /* 0x00800 */
209       "SYNTHETIC",             /* 0x01000 */
210       "?",                     /* 0x02000 */
211       "ENUM",                  /* 0x04000 */
212       "?",                     /* 0x08000 */
213       "?",                     /* 0x10000 */
214       "?",                     /* 0x20000 */
215     },
216   };
217 
218   // Allocate enough storage to hold the expected number of strings,
219   // plus a space between each.  We over-allocate, using the longest
220   // string above as the base metric.
221   const int kLongest = 21;  // The strlen of longest string above.
222   const int count = CountOnes(flags);
223   char* str;
224   char* cp;
225   cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
226 
227   for (int i = 0; i < kNumFlags; i++) {
228     if (flags & 0x01) {
229       const char* accessStr = kAccessStrings[for_what][i];
230       const int len = strlen(accessStr);
231       if (cp != str) {
232         *cp++ = ' ';
233       }
234       memcpy(cp, accessStr, len);
235       cp += len;
236     }
237     flags >>= 1;
238   }  // for
239 
240   *cp = '\0';
241   return str;
242 }
243 
GetSignatureForProtoId(const dex_ir::ProtoId * proto)244 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
245   if (proto == nullptr) {
246     return "<no signature>";
247   }
248 
249   std::string result("(");
250   const dex_ir::TypeList* type_list = proto->Parameters();
251   if (type_list != nullptr) {
252     for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
253       result += type_id->GetStringId()->Data();
254     }
255   }
256   result += ")";
257   result += proto->ReturnType()->GetStringId()->Data();
258   return result;
259 }
260 
261 /*
262  * Copies character data from "data" to "out", converting non-ASCII values
263  * to fprintf format chars or an ASCII filler ('.' or '?').
264  *
265  * The output buffer must be able to hold (2*len)+1 bytes.  The result is
266  * NULL-terminated.
267  */
Asciify(char * out,const unsigned char * data,size_t len)268 static void Asciify(char* out, const unsigned char* data, size_t len) {
269   while (len--) {
270     if (*data < 0x20) {
271       // Could do more here, but we don't need them yet.
272       switch (*data) {
273         case '\0':
274           *out++ = '\\';
275           *out++ = '0';
276           break;
277         case '\n':
278           *out++ = '\\';
279           *out++ = 'n';
280           break;
281         default:
282           *out++ = '.';
283           break;
284       }  // switch
285     } else if (*data >= 0x80) {
286       *out++ = '?';
287     } else {
288       *out++ = *data;
289     }
290     data++;
291   }  // while
292   *out = '\0';
293 }
294 
295 /*
296  * Dumps a string value with some escape characters.
297  */
DumpEscapedString(const char * p,FILE * out_file)298 static void DumpEscapedString(const char* p, FILE* out_file) {
299   fputs("\"", out_file);
300   for (; *p; p++) {
301     switch (*p) {
302       case '\\':
303         fputs("\\\\", out_file);
304         break;
305       case '\"':
306         fputs("\\\"", out_file);
307         break;
308       case '\t':
309         fputs("\\t", out_file);
310         break;
311       case '\n':
312         fputs("\\n", out_file);
313         break;
314       case '\r':
315         fputs("\\r", out_file);
316         break;
317       default:
318         putc(*p, out_file);
319     }  // switch
320   }  // for
321   fputs("\"", out_file);
322 }
323 
324 /*
325  * Dumps a string as an XML attribute value.
326  */
DumpXmlAttribute(const char * p,FILE * out_file)327 static void DumpXmlAttribute(const char* p, FILE* out_file) {
328   for (; *p; p++) {
329     switch (*p) {
330       case '&':
331         fputs("&amp;", out_file);
332         break;
333       case '<':
334         fputs("&lt;", out_file);
335         break;
336       case '>':
337         fputs("&gt;", out_file);
338         break;
339       case '"':
340         fputs("&quot;", out_file);
341         break;
342       case '\t':
343         fputs("&#x9;", out_file);
344         break;
345       case '\n':
346         fputs("&#xA;", out_file);
347         break;
348       case '\r':
349         fputs("&#xD;", out_file);
350         break;
351       default:
352         putc(*p, out_file);
353     }  // switch
354   }  // for
355 }
356 
357 /*
358  * Helper for dumpInstruction(), which builds the string
359  * representation for the index in the given instruction.
360  * Returns a pointer to a buffer of sufficient size.
361  */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)362 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
363                                            const Instruction* dec_insn,
364                                            size_t buf_size) {
365   std::unique_ptr<char[]> buf(new char[buf_size]);
366   // Determine index and width of the string.
367   uint32_t index = 0;
368   uint32_t secondary_index = DexFile::kDexNoIndex;
369   uint32_t width = 4;
370   switch (Instruction::FormatOf(dec_insn->Opcode())) {
371     // SOME NOT SUPPORTED:
372     // case Instruction::k20bc:
373     case Instruction::k21c:
374     case Instruction::k35c:
375     // case Instruction::k35ms:
376     case Instruction::k3rc:
377     // case Instruction::k3rms:
378     // case Instruction::k35mi:
379     // case Instruction::k3rmi:
380       index = dec_insn->VRegB();
381       width = 4;
382       break;
383     case Instruction::k31c:
384       index = dec_insn->VRegB();
385       width = 8;
386       break;
387     case Instruction::k22c:
388     // case Instruction::k22cs:
389       index = dec_insn->VRegC();
390       width = 4;
391       break;
392     case Instruction::k45cc:
393     case Instruction::k4rcc:
394       index = dec_insn->VRegB();
395       secondary_index = dec_insn->VRegH();
396       width = 4;
397       break;
398     default:
399       break;
400   }  // switch
401 
402   // Determine index type.
403   size_t outSize = 0;
404   switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
405     case Instruction::kIndexUnknown:
406       // This function should never get called for this type, but do
407       // something sensible here, just to help with debugging.
408       outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
409       break;
410     case Instruction::kIndexNone:
411       // This function should never get called for this type, but do
412       // something sensible here, just to help with debugging.
413       outSize = snprintf(buf.get(), buf_size, "<no-index>");
414       break;
415     case Instruction::kIndexTypeRef:
416       if (index < header->GetCollections().TypeIdsSize()) {
417         const char* tp = header->GetCollections().GetTypeId(index)->GetStringId()->Data();
418         outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
419       } else {
420         outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
421       }
422       break;
423     case Instruction::kIndexStringRef:
424       if (index < header->GetCollections().StringIdsSize()) {
425         const char* st = header->GetCollections().GetStringId(index)->Data();
426         outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
427       } else {
428         outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
429       }
430       break;
431     case Instruction::kIndexMethodRef:
432       if (index < header->GetCollections().MethodIdsSize()) {
433         dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
434         const char* name = method_id->Name()->Data();
435         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
436         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
437         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
438                            back_descriptor, name, type_descriptor.c_str(), width, index);
439       } else {
440         outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
441       }
442       break;
443     case Instruction::kIndexFieldRef:
444       if (index < header->GetCollections().FieldIdsSize()) {
445         dex_ir::FieldId* field_id = header->GetCollections().GetFieldId(index);
446         const char* name = field_id->Name()->Data();
447         const char* type_descriptor = field_id->Type()->GetStringId()->Data();
448         const char* back_descriptor = field_id->Class()->GetStringId()->Data();
449         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
450                            back_descriptor, name, type_descriptor, width, index);
451       } else {
452         outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
453       }
454       break;
455     case Instruction::kIndexVtableOffset:
456       outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
457                          width, index, width, index);
458       break;
459     case Instruction::kIndexFieldOffset:
460       outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
461       break;
462     case Instruction::kIndexMethodAndProtoRef: {
463       std::string method("<method?>");
464       std::string proto("<proto?>");
465       if (index < header->GetCollections().MethodIdsSize()) {
466         dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
467         const char* name = method_id->Name()->Data();
468         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
469         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
470         method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
471       }
472       if (secondary_index < header->GetCollections().ProtoIdsSize()) {
473         dex_ir::ProtoId* proto_id = header->GetCollections().GetProtoId(secondary_index);
474         proto = GetSignatureForProtoId(proto_id);
475       }
476       outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
477                          method.c_str(), proto.c_str(), width, index, width, secondary_index);
478     }
479     break;
480     // SOME NOT SUPPORTED:
481     // case Instruction::kIndexVaries:
482     // case Instruction::kIndexInlineMethod:
483     default:
484       outSize = snprintf(buf.get(), buf_size, "<?>");
485       break;
486   }  // switch
487 
488   // Determine success of string construction.
489   if (outSize >= buf_size) {
490     // The buffer wasn't big enough; retry with computed size. Note: snprintf()
491     // doesn't count/ the '\0' as part of its returned size, so we add explicit
492     // space for it here.
493     return IndexString(header, dec_insn, outSize + 1);
494   }
495   return buf;
496 }
497 
498 /*
499  * Dumps encoded annotation.
500  */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)501 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
502   fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
503   // Display all name=value pairs.
504   for (auto& subannotation : *annotation->GetAnnotationElements()) {
505     fputc(' ', out_file_);
506     fputs(subannotation->GetName()->Data(), out_file_);
507     fputc('=', out_file_);
508     DumpEncodedValue(subannotation->GetValue());
509   }
510 }
511 /*
512  * Dumps encoded value.
513  */
DumpEncodedValue(const dex_ir::EncodedValue * data)514 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
515   switch (data->Type()) {
516     case DexFile::kDexAnnotationByte:
517       fprintf(out_file_, "%" PRId8, data->GetByte());
518       break;
519     case DexFile::kDexAnnotationShort:
520       fprintf(out_file_, "%" PRId16, data->GetShort());
521       break;
522     case DexFile::kDexAnnotationChar:
523       fprintf(out_file_, "%" PRIu16, data->GetChar());
524       break;
525     case DexFile::kDexAnnotationInt:
526       fprintf(out_file_, "%" PRId32, data->GetInt());
527       break;
528     case DexFile::kDexAnnotationLong:
529       fprintf(out_file_, "%" PRId64, data->GetLong());
530       break;
531     case DexFile::kDexAnnotationFloat: {
532       fprintf(out_file_, "%g", data->GetFloat());
533       break;
534     }
535     case DexFile::kDexAnnotationDouble: {
536       fprintf(out_file_, "%g", data->GetDouble());
537       break;
538     }
539     case DexFile::kDexAnnotationString: {
540       dex_ir::StringId* string_id = data->GetStringId();
541       if (options_.output_format_ == kOutputPlain) {
542         DumpEscapedString(string_id->Data(), out_file_);
543       } else {
544         DumpXmlAttribute(string_id->Data(), out_file_);
545       }
546       break;
547     }
548     case DexFile::kDexAnnotationType: {
549       dex_ir::TypeId* type_id = data->GetTypeId();
550       fputs(type_id->GetStringId()->Data(), out_file_);
551       break;
552     }
553     case DexFile::kDexAnnotationField:
554     case DexFile::kDexAnnotationEnum: {
555       dex_ir::FieldId* field_id = data->GetFieldId();
556       fputs(field_id->Name()->Data(), out_file_);
557       break;
558     }
559     case DexFile::kDexAnnotationMethod: {
560       dex_ir::MethodId* method_id = data->GetMethodId();
561       fputs(method_id->Name()->Data(), out_file_);
562       break;
563     }
564     case DexFile::kDexAnnotationArray: {
565       fputc('{', out_file_);
566       // Display all elements.
567       for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
568         fputc(' ', out_file_);
569         DumpEncodedValue(value.get());
570       }
571       fputs(" }", out_file_);
572       break;
573     }
574     case DexFile::kDexAnnotationAnnotation: {
575       DumpEncodedAnnotation(data->GetEncodedAnnotation());
576       break;
577     }
578     case DexFile::kDexAnnotationNull:
579       fputs("null", out_file_);
580       break;
581     case DexFile::kDexAnnotationBoolean:
582       fputs(StrBool(data->GetBoolean()), out_file_);
583       break;
584     default:
585       fputs("????", out_file_);
586       break;
587   }  // switch
588 }
589 
590 /*
591  * Dumps the file header.
592  */
DumpFileHeader()593 void DexLayout::DumpFileHeader() {
594   char sanitized[8 * 2 + 1];
595   dex_ir::Collections& collections = header_->GetCollections();
596   fprintf(out_file_, "DEX file header:\n");
597   Asciify(sanitized, header_->Magic(), 8);
598   fprintf(out_file_, "magic               : '%s'\n", sanitized);
599   fprintf(out_file_, "checksum            : %08x\n", header_->Checksum());
600   fprintf(out_file_, "signature           : %02x%02x...%02x%02x\n",
601           header_->Signature()[0], header_->Signature()[1],
602           header_->Signature()[DexFile::kSha1DigestSize - 2],
603           header_->Signature()[DexFile::kSha1DigestSize - 1]);
604   fprintf(out_file_, "file_size           : %d\n", header_->FileSize());
605   fprintf(out_file_, "header_size         : %d\n", header_->HeaderSize());
606   fprintf(out_file_, "link_size           : %d\n", header_->LinkSize());
607   fprintf(out_file_, "link_off            : %d (0x%06x)\n",
608           header_->LinkOffset(), header_->LinkOffset());
609   fprintf(out_file_, "string_ids_size     : %d\n", collections.StringIdsSize());
610   fprintf(out_file_, "string_ids_off      : %d (0x%06x)\n",
611           collections.StringIdsOffset(), collections.StringIdsOffset());
612   fprintf(out_file_, "type_ids_size       : %d\n", collections.TypeIdsSize());
613   fprintf(out_file_, "type_ids_off        : %d (0x%06x)\n",
614           collections.TypeIdsOffset(), collections.TypeIdsOffset());
615   fprintf(out_file_, "proto_ids_size      : %d\n", collections.ProtoIdsSize());
616   fprintf(out_file_, "proto_ids_off       : %d (0x%06x)\n",
617           collections.ProtoIdsOffset(), collections.ProtoIdsOffset());
618   fprintf(out_file_, "field_ids_size      : %d\n", collections.FieldIdsSize());
619   fprintf(out_file_, "field_ids_off       : %d (0x%06x)\n",
620           collections.FieldIdsOffset(), collections.FieldIdsOffset());
621   fprintf(out_file_, "method_ids_size     : %d\n", collections.MethodIdsSize());
622   fprintf(out_file_, "method_ids_off      : %d (0x%06x)\n",
623           collections.MethodIdsOffset(), collections.MethodIdsOffset());
624   fprintf(out_file_, "class_defs_size     : %d\n", collections.ClassDefsSize());
625   fprintf(out_file_, "class_defs_off      : %d (0x%06x)\n",
626           collections.ClassDefsOffset(), collections.ClassDefsOffset());
627   fprintf(out_file_, "data_size           : %d\n", header_->DataSize());
628   fprintf(out_file_, "data_off            : %d (0x%06x)\n\n",
629           header_->DataOffset(), header_->DataOffset());
630 }
631 
632 /*
633  * Dumps a class_def_item.
634  */
DumpClassDef(int idx)635 void DexLayout::DumpClassDef(int idx) {
636   // General class information.
637   dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
638   fprintf(out_file_, "Class #%d header:\n", idx);
639   fprintf(out_file_, "class_idx           : %d\n", class_def->ClassType()->GetIndex());
640   fprintf(out_file_, "access_flags        : %d (0x%04x)\n",
641           class_def->GetAccessFlags(), class_def->GetAccessFlags());
642   uint32_t superclass_idx =  class_def->Superclass() == nullptr ?
643       DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
644   fprintf(out_file_, "superclass_idx      : %d\n", superclass_idx);
645   fprintf(out_file_, "interfaces_off      : %d (0x%06x)\n",
646           class_def->InterfacesOffset(), class_def->InterfacesOffset());
647   uint32_t source_file_offset = 0xffffffffU;
648   if (class_def->SourceFile() != nullptr) {
649     source_file_offset = class_def->SourceFile()->GetIndex();
650   }
651   fprintf(out_file_, "source_file_idx     : %d\n", source_file_offset);
652   uint32_t annotations_offset = 0;
653   if (class_def->Annotations() != nullptr) {
654     annotations_offset = class_def->Annotations()->GetOffset();
655   }
656   fprintf(out_file_, "annotations_off     : %d (0x%06x)\n",
657           annotations_offset, annotations_offset);
658   if (class_def->GetClassData() == nullptr) {
659     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n", 0, 0);
660   } else {
661     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n",
662             class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
663   }
664 
665   // Fields and methods.
666   dex_ir::ClassData* class_data = class_def->GetClassData();
667   if (class_data != nullptr && class_data->StaticFields() != nullptr) {
668     fprintf(out_file_, "static_fields_size  : %zu\n", class_data->StaticFields()->size());
669   } else {
670     fprintf(out_file_, "static_fields_size  : 0\n");
671   }
672   if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
673     fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
674   } else {
675     fprintf(out_file_, "instance_fields_size: 0\n");
676   }
677   if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
678     fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
679   } else {
680     fprintf(out_file_, "direct_methods_size : 0\n");
681   }
682   if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
683     fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
684   } else {
685     fprintf(out_file_, "virtual_methods_size: 0\n");
686   }
687   fprintf(out_file_, "\n");
688 }
689 
690 /**
691  * Dumps an annotation set item.
692  */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)693 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
694   if (set_item == nullptr || set_item->GetItems()->size() == 0) {
695     fputs("  empty-annotation-set\n", out_file_);
696     return;
697   }
698   for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
699     if (annotation == nullptr) {
700       continue;
701     }
702     fputs("  ", out_file_);
703     switch (annotation->GetVisibility()) {
704       case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   out_file_); break;
705       case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
706       case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  out_file_); break;
707       default:                             fputs("VISIBILITY_UNKNOWN ", out_file_); break;
708     }  // switch
709     DumpEncodedAnnotation(annotation->GetAnnotation());
710     fputc('\n', out_file_);
711   }
712 }
713 
714 /*
715  * Dumps class annotations.
716  */
DumpClassAnnotations(int idx)717 void DexLayout::DumpClassAnnotations(int idx) {
718   dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
719   dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
720   if (annotations_directory == nullptr) {
721     return;  // none
722   }
723 
724   fprintf(out_file_, "Class #%d annotations:\n", idx);
725 
726   dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
727   dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
728   dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
729   dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
730 
731   // Annotations on the class itself.
732   if (class_set_item != nullptr) {
733     fprintf(out_file_, "Annotations on class\n");
734     DumpAnnotationSetItem(class_set_item);
735   }
736 
737   // Annotations on fields.
738   if (fields != nullptr) {
739     for (auto& field : *fields) {
740       const dex_ir::FieldId* field_id = field->GetFieldId();
741       const uint32_t field_idx = field_id->GetIndex();
742       const char* field_name = field_id->Name()->Data();
743       fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
744       DumpAnnotationSetItem(field->GetAnnotationSetItem());
745     }
746   }
747 
748   // Annotations on methods.
749   if (methods != nullptr) {
750     for (auto& method : *methods) {
751       const dex_ir::MethodId* method_id = method->GetMethodId();
752       const uint32_t method_idx = method_id->GetIndex();
753       const char* method_name = method_id->Name()->Data();
754       fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
755       DumpAnnotationSetItem(method->GetAnnotationSetItem());
756     }
757   }
758 
759   // Annotations on method parameters.
760   if (parameters != nullptr) {
761     for (auto& parameter : *parameters) {
762       const dex_ir::MethodId* method_id = parameter->GetMethodId();
763       const uint32_t method_idx = method_id->GetIndex();
764       const char* method_name = method_id->Name()->Data();
765       fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
766       uint32_t j = 0;
767       for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
768         fprintf(out_file_, "#%u\n", j);
769         DumpAnnotationSetItem(annotation);
770         ++j;
771       }
772     }
773   }
774 
775   fputc('\n', out_file_);
776 }
777 
778 /*
779  * Dumps an interface that a class declares to implement.
780  */
DumpInterface(const dex_ir::TypeId * type_item,int i)781 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
782   const char* interface_name = type_item->GetStringId()->Data();
783   if (options_.output_format_ == kOutputPlain) {
784     fprintf(out_file_, "    #%d              : '%s'\n", i, interface_name);
785   } else {
786     std::string dot(DescriptorToDotWrapper(interface_name));
787     fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
788   }
789 }
790 
791 /*
792  * Dumps the catches table associated with the code.
793  */
DumpCatches(const dex_ir::CodeItem * code)794 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
795   const uint16_t tries_size = code->TriesSize();
796 
797   // No catch table.
798   if (tries_size == 0) {
799     fprintf(out_file_, "      catches       : (none)\n");
800     return;
801   }
802 
803   // Dump all table entries.
804   fprintf(out_file_, "      catches       : %d\n", tries_size);
805   std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
806   for (uint32_t i = 0; i < tries_size; i++) {
807     const dex_ir::TryItem* try_item = (*tries)[i].get();
808     const uint32_t start = try_item->StartAddr();
809     const uint32_t end = start + try_item->InsnCount();
810     fprintf(out_file_, "        0x%04x - 0x%04x\n", start, end);
811     for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
812       const dex_ir::TypeId* type_id = handler->GetTypeId();
813       const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
814       fprintf(out_file_, "          %s -> 0x%04x\n", descriptor, handler->GetAddress());
815     }  // for
816   }  // for
817 }
818 
819 /*
820  * Dumps all positions table entries associated with the code.
821  */
DumpPositionInfo(const dex_ir::CodeItem * code)822 void DexLayout::DumpPositionInfo(const dex_ir::CodeItem* code) {
823   dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
824   if (debug_info == nullptr) {
825     return;
826   }
827   std::vector<std::unique_ptr<dex_ir::PositionInfo>>& positions = debug_info->GetPositionInfo();
828   for (size_t i = 0; i < positions.size(); ++i) {
829     fprintf(out_file_, "        0x%04x line=%d\n", positions[i]->address_, positions[i]->line_);
830   }
831 }
832 
833 /*
834  * Dumps all locals table entries associated with the code.
835  */
DumpLocalInfo(const dex_ir::CodeItem * code)836 void DexLayout::DumpLocalInfo(const dex_ir::CodeItem* code) {
837   dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
838   if (debug_info == nullptr) {
839     return;
840   }
841   std::vector<std::unique_ptr<dex_ir::LocalInfo>>& locals = debug_info->GetLocalInfo();
842   for (size_t i = 0; i < locals.size(); ++i) {
843     dex_ir::LocalInfo* entry = locals[i].get();
844     fprintf(out_file_, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
845             entry->start_address_, entry->end_address_, entry->reg_,
846             entry->name_.c_str(), entry->descriptor_.c_str(), entry->signature_.c_str());
847   }
848 }
849 
850 /*
851  * Dumps a single instruction.
852  */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)853 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
854                                 uint32_t code_offset,
855                                 uint32_t insn_idx,
856                                 uint32_t insn_width,
857                                 const Instruction* dec_insn) {
858   // Address of instruction (expressed as byte offset).
859   fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
860 
861   // Dump (part of) raw bytes.
862   const uint16_t* insns = code->Insns();
863   for (uint32_t i = 0; i < 8; i++) {
864     if (i < insn_width) {
865       if (i == 7) {
866         fprintf(out_file_, " ... ");
867       } else {
868         // Print 16-bit value in little-endian order.
869         const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
870         fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
871       }
872     } else {
873       fputs("     ", out_file_);
874     }
875   }  // for
876 
877   // Dump pseudo-instruction or opcode.
878   if (dec_insn->Opcode() == Instruction::NOP) {
879     const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
880     if (instr == Instruction::kPackedSwitchSignature) {
881       fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
882     } else if (instr == Instruction::kSparseSwitchSignature) {
883       fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
884     } else if (instr == Instruction::kArrayDataSignature) {
885       fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
886     } else {
887       fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
888     }
889   } else {
890     fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
891   }
892 
893   // Set up additional argument.
894   std::unique_ptr<char[]> index_buf;
895   if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
896     index_buf = IndexString(header_, dec_insn, 200);
897   }
898 
899   // Dump the instruction.
900   //
901   // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
902   //
903   switch (Instruction::FormatOf(dec_insn->Opcode())) {
904     case Instruction::k10x:        // op
905       break;
906     case Instruction::k12x:        // op vA, vB
907       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
908       break;
909     case Instruction::k11n:        // op vA, #+B
910       fprintf(out_file_, " v%d, #int %d // #%x",
911               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
912       break;
913     case Instruction::k11x:        // op vAA
914       fprintf(out_file_, " v%d", dec_insn->VRegA());
915       break;
916     case Instruction::k10t:        // op +AA
917     case Instruction::k20t: {      // op +AAAA
918       const int32_t targ = (int32_t) dec_insn->VRegA();
919       fprintf(out_file_, " %04x // %c%04x",
920               insn_idx + targ,
921               (targ < 0) ? '-' : '+',
922               (targ < 0) ? -targ : targ);
923       break;
924     }
925     case Instruction::k22x:        // op vAA, vBBBB
926       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
927       break;
928     case Instruction::k21t: {     // op vAA, +BBBB
929       const int32_t targ = (int32_t) dec_insn->VRegB();
930       fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
931               insn_idx + targ,
932               (targ < 0) ? '-' : '+',
933               (targ < 0) ? -targ : targ);
934       break;
935     }
936     case Instruction::k21s:        // op vAA, #+BBBB
937       fprintf(out_file_, " v%d, #int %d // #%x",
938               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
939       break;
940     case Instruction::k21h:        // op vAA, #+BBBB0000[00000000]
941       // The printed format varies a bit based on the actual opcode.
942       if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
943         const int32_t value = dec_insn->VRegB() << 16;
944         fprintf(out_file_, " v%d, #int %d // #%x",
945                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
946       } else {
947         const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
948         fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
949                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
950       }
951       break;
952     case Instruction::k21c:        // op vAA, thing@BBBB
953     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
954       fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
955       break;
956     case Instruction::k23x:        // op vAA, vBB, vCC
957       fprintf(out_file_, " v%d, v%d, v%d",
958               dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
959       break;
960     case Instruction::k22b:        // op vAA, vBB, #+CC
961       fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
962               dec_insn->VRegA(), dec_insn->VRegB(),
963               (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
964       break;
965     case Instruction::k22t: {      // op vA, vB, +CCCC
966       const int32_t targ = (int32_t) dec_insn->VRegC();
967       fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
968               dec_insn->VRegA(), dec_insn->VRegB(),
969               insn_idx + targ,
970               (targ < 0) ? '-' : '+',
971               (targ < 0) ? -targ : targ);
972       break;
973     }
974     case Instruction::k22s:        // op vA, vB, #+CCCC
975       fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
976               dec_insn->VRegA(), dec_insn->VRegB(),
977               (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
978       break;
979     case Instruction::k22c:        // op vA, vB, thing@CCCC
980     // NOT SUPPORTED:
981     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
982       fprintf(out_file_, " v%d, v%d, %s",
983               dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
984       break;
985     case Instruction::k30t:
986       fprintf(out_file_, " #%08x", dec_insn->VRegA());
987       break;
988     case Instruction::k31i: {     // op vAA, #+BBBBBBBB
989       // This is often, but not always, a float.
990       union {
991         float f;
992         uint32_t i;
993       } conv;
994       conv.i = dec_insn->VRegB();
995       fprintf(out_file_, " v%d, #float %g // #%08x",
996               dec_insn->VRegA(), conv.f, dec_insn->VRegB());
997       break;
998     }
999     case Instruction::k31t:       // op vAA, offset +BBBBBBBB
1000       fprintf(out_file_, " v%d, %08x // +%08x",
1001               dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
1002       break;
1003     case Instruction::k32x:        // op vAAAA, vBBBB
1004       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
1005       break;
1006     case Instruction::k35c:           // op {vC, vD, vE, vF, vG}, thing@BBBB
1007     case Instruction::k45cc: {        // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
1008     // NOT SUPPORTED:
1009     // case Instruction::k35ms:       // [opt] invoke-virtual+super
1010     // case Instruction::k35mi:       // [opt] inline invoke
1011       uint32_t arg[Instruction::kMaxVarArgRegs];
1012       dec_insn->GetVarArgs(arg);
1013       fputs(" {", out_file_);
1014       for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1015         if (i == 0) {
1016           fprintf(out_file_, "v%d", arg[i]);
1017         } else {
1018           fprintf(out_file_, ", v%d", arg[i]);
1019         }
1020       }  // for
1021       fprintf(out_file_, "}, %s", index_buf.get());
1022       break;
1023     }
1024     case Instruction::k3rc:           // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
1025     case Instruction::k4rcc:          // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
1026     // NOT SUPPORTED:
1027     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
1028     // case Instruction::k3rmi:       // [opt] execute-inline/range
1029       {
1030         // This doesn't match the "dx" output when some of the args are
1031         // 64-bit values -- dx only shows the first register.
1032         fputs(" {", out_file_);
1033         for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1034           if (i == 0) {
1035             fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
1036           } else {
1037             fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
1038           }
1039         }  // for
1040         fprintf(out_file_, "}, %s", index_buf.get());
1041       }
1042       break;
1043     case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
1044       // This is often, but not always, a double.
1045       union {
1046         double d;
1047         uint64_t j;
1048       } conv;
1049       conv.j = dec_insn->WideVRegB();
1050       fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1051               dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1052       break;
1053     }
1054     // NOT SUPPORTED:
1055     // case Instruction::k00x:        // unknown op or breakpoint
1056     //    break;
1057     default:
1058       fprintf(out_file_, " ???");
1059       break;
1060   }  // switch
1061 
1062   fputc('\n', out_file_);
1063 }
1064 
1065 /*
1066  * Dumps a bytecode disassembly.
1067  */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1068 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1069   dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1070   const char* name = method_id->Name()->Data();
1071   std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1072   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1073 
1074   // Generate header.
1075   std::string dot(DescriptorToDotWrapper(back_descriptor));
1076   fprintf(out_file_, "%06x:                                        |[%06x] %s.%s:%s\n",
1077           code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1078 
1079   // Iterate over all instructions.
1080   const uint16_t* insns = code->Insns();
1081   for (uint32_t insn_idx = 0; insn_idx < code->InsnsSize();) {
1082     const Instruction* instruction = Instruction::At(&insns[insn_idx]);
1083     const uint32_t insn_width = instruction->SizeInCodeUnits();
1084     if (insn_width == 0) {
1085       fprintf(stderr, "GLITCH: zero-width instruction at idx=0x%04x\n", insn_idx);
1086       break;
1087     }
1088     DumpInstruction(code, code_offset, insn_idx, insn_width, instruction);
1089     insn_idx += insn_width;
1090   }  // for
1091 }
1092 
1093 /*
1094  * Dumps code of a method.
1095  */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1096 void DexLayout::DumpCode(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1097   fprintf(out_file_, "      registers     : %d\n", code->RegistersSize());
1098   fprintf(out_file_, "      ins           : %d\n", code->InsSize());
1099   fprintf(out_file_, "      outs          : %d\n", code->OutsSize());
1100   fprintf(out_file_, "      insns size    : %d 16-bit code units\n",
1101           code->InsnsSize());
1102 
1103   // Bytecode disassembly, if requested.
1104   if (options_.disassemble_) {
1105     DumpBytecodes(idx, code, code_offset);
1106   }
1107 
1108   // Try-catch blocks.
1109   DumpCatches(code);
1110 
1111   // Positions and locals table in the debug info.
1112   fprintf(out_file_, "      positions     : \n");
1113   DumpPositionInfo(code);
1114   fprintf(out_file_, "      locals        : \n");
1115   DumpLocalInfo(code);
1116 }
1117 
1118 /*
1119  * Dumps a method.
1120  */
DumpMethod(uint32_t idx,uint32_t flags,const dex_ir::CodeItem * code,int i)1121 void DexLayout::DumpMethod(uint32_t idx, uint32_t flags, const dex_ir::CodeItem* code, int i) {
1122   // Bail for anything private if export only requested.
1123   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1124     return;
1125   }
1126 
1127   dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1128   const char* name = method_id->Name()->Data();
1129   char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1130   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1131   char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1132 
1133   if (options_.output_format_ == kOutputPlain) {
1134     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1135     fprintf(out_file_, "      name          : '%s'\n", name);
1136     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1137     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1138     if (code == nullptr) {
1139       fprintf(out_file_, "      code          : (none)\n");
1140     } else {
1141       fprintf(out_file_, "      code          -\n");
1142       DumpCode(idx, code, code->GetOffset());
1143     }
1144     if (options_.disassemble_) {
1145       fputc('\n', out_file_);
1146     }
1147   } else if (options_.output_format_ == kOutputXml) {
1148     const bool constructor = (name[0] == '<');
1149 
1150     // Method name and prototype.
1151     if (constructor) {
1152       std::string dot(DescriptorClassToDot(back_descriptor));
1153       fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1154       dot = DescriptorToDotWrapper(back_descriptor);
1155       fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1156     } else {
1157       fprintf(out_file_, "<method name=\"%s\"\n", name);
1158       const char* return_type = strrchr(type_descriptor, ')');
1159       if (return_type == nullptr) {
1160         fprintf(stderr, "bad method type descriptor '%s'\n", type_descriptor);
1161         goto bail;
1162       }
1163       std::string dot(DescriptorToDotWrapper(return_type + 1));
1164       fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1165       fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1166       fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1167       fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1168           (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1169     }
1170 
1171     // Additional method flags.
1172     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1173     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1174     // The "deprecated=" not knowable w/o parsing annotations.
1175     fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1176 
1177     // Parameters.
1178     if (type_descriptor[0] != '(') {
1179       fprintf(stderr, "ERROR: bad descriptor '%s'\n", type_descriptor);
1180       goto bail;
1181     }
1182     char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1183     const char* base = type_descriptor + 1;
1184     int arg_num = 0;
1185     while (*base != ')') {
1186       char* cp = tmp_buf;
1187       while (*base == '[') {
1188         *cp++ = *base++;
1189       }
1190       if (*base == 'L') {
1191         // Copy through ';'.
1192         do {
1193           *cp = *base++;
1194         } while (*cp++ != ';');
1195       } else {
1196         // Primitive char, copy it.
1197         if (strchr("ZBCSIFJD", *base) == nullptr) {
1198           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
1199           break;  // while
1200         }
1201         *cp++ = *base++;
1202       }
1203       // Null terminate and display.
1204       *cp++ = '\0';
1205       std::string dot(DescriptorToDotWrapper(tmp_buf));
1206       fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1207                         "</parameter>\n", arg_num++, dot.c_str());
1208     }  // while
1209     free(tmp_buf);
1210     if (constructor) {
1211       fprintf(out_file_, "</constructor>\n");
1212     } else {
1213       fprintf(out_file_, "</method>\n");
1214     }
1215   }
1216 
1217  bail:
1218   free(type_descriptor);
1219   free(access_str);
1220 }
1221 
1222 /*
1223  * Dumps a static (class) field.
1224  */
DumpSField(uint32_t idx,uint32_t flags,int i,dex_ir::EncodedValue * init)1225 void DexLayout::DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init) {
1226   // Bail for anything private if export only requested.
1227   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1228     return;
1229   }
1230 
1231   dex_ir::FieldId* field_id = header_->GetCollections().GetFieldId(idx);
1232   const char* name = field_id->Name()->Data();
1233   const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1234   const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1235   char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1236 
1237   if (options_.output_format_ == kOutputPlain) {
1238     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1239     fprintf(out_file_, "      name          : '%s'\n", name);
1240     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1241     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1242     if (init != nullptr) {
1243       fputs("      value         : ", out_file_);
1244       DumpEncodedValue(init);
1245       fputs("\n", out_file_);
1246     }
1247   } else if (options_.output_format_ == kOutputXml) {
1248     fprintf(out_file_, "<field name=\"%s\"\n", name);
1249     std::string dot(DescriptorToDotWrapper(type_descriptor));
1250     fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1251     fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1252     fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1253     // The "value=" is not knowable w/o parsing annotations.
1254     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1255     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1256     // The "deprecated=" is not knowable w/o parsing annotations.
1257     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1258     if (init != nullptr) {
1259       fputs(" value=\"", out_file_);
1260       DumpEncodedValue(init);
1261       fputs("\"\n", out_file_);
1262     }
1263     fputs(">\n</field>\n", out_file_);
1264   }
1265 
1266   free(access_str);
1267 }
1268 
1269 /*
1270  * Dumps an instance field.
1271  */
DumpIField(uint32_t idx,uint32_t flags,int i)1272 void DexLayout::DumpIField(uint32_t idx, uint32_t flags, int i) {
1273   DumpSField(idx, flags, i, nullptr);
1274 }
1275 
1276 /*
1277  * Dumps the class.
1278  *
1279  * Note "idx" is a DexClassDef index, not a DexTypeId index.
1280  *
1281  * If "*last_package" is nullptr or does not match the current class' package,
1282  * the value will be replaced with a newly-allocated string.
1283  */
DumpClass(int idx,char ** last_package)1284 void DexLayout::DumpClass(int idx, char** last_package) {
1285   dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
1286   // Omitting non-public class.
1287   if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1288     return;
1289   }
1290 
1291   if (options_.show_section_headers_) {
1292     DumpClassDef(idx);
1293   }
1294 
1295   if (options_.show_annotations_) {
1296     DumpClassAnnotations(idx);
1297   }
1298 
1299   // For the XML output, show the package name.  Ideally we'd gather
1300   // up the classes, sort them, and dump them alphabetically so the
1301   // package name wouldn't jump around, but that's not a great plan
1302   // for something that needs to run on the device.
1303   const char* class_descriptor =
1304       header_->GetCollections().GetClassDef(idx)->ClassType()->GetStringId()->Data();
1305   if (!(class_descriptor[0] == 'L' &&
1306         class_descriptor[strlen(class_descriptor)-1] == ';')) {
1307     // Arrays and primitives should not be defined explicitly. Keep going?
1308     fprintf(stderr, "Malformed class name '%s'\n", class_descriptor);
1309   } else if (options_.output_format_ == kOutputXml) {
1310     char* mangle = strdup(class_descriptor + 1);
1311     mangle[strlen(mangle)-1] = '\0';
1312 
1313     // Reduce to just the package name.
1314     char* last_slash = strrchr(mangle, '/');
1315     if (last_slash != nullptr) {
1316       *last_slash = '\0';
1317     } else {
1318       *mangle = '\0';
1319     }
1320 
1321     for (char* cp = mangle; *cp != '\0'; cp++) {
1322       if (*cp == '/') {
1323         *cp = '.';
1324       }
1325     }  // for
1326 
1327     if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1328       // Start of a new package.
1329       if (*last_package != nullptr) {
1330         fprintf(out_file_, "</package>\n");
1331       }
1332       fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1333       free(*last_package);
1334       *last_package = mangle;
1335     } else {
1336       free(mangle);
1337     }
1338   }
1339 
1340   // General class information.
1341   char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1342   const char* superclass_descriptor = nullptr;
1343   if (class_def->Superclass() != nullptr) {
1344     superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1345   }
1346   if (options_.output_format_ == kOutputPlain) {
1347     fprintf(out_file_, "Class #%d            -\n", idx);
1348     fprintf(out_file_, "  Class descriptor  : '%s'\n", class_descriptor);
1349     fprintf(out_file_, "  Access flags      : 0x%04x (%s)\n",
1350             class_def->GetAccessFlags(), access_str);
1351     if (superclass_descriptor != nullptr) {
1352       fprintf(out_file_, "  Superclass        : '%s'\n", superclass_descriptor);
1353     }
1354     fprintf(out_file_, "  Interfaces        -\n");
1355   } else {
1356     std::string dot(DescriptorClassToDot(class_descriptor));
1357     fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1358     if (superclass_descriptor != nullptr) {
1359       dot = DescriptorToDotWrapper(superclass_descriptor);
1360       fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1361     }
1362     fprintf(out_file_, " interface=%s\n",
1363             QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1364     fprintf(out_file_, " abstract=%s\n",
1365             QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1366     fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1367     fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1368     // The "deprecated=" not knowable w/o parsing annotations.
1369     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1370     fprintf(out_file_, ">\n");
1371   }
1372 
1373   // Interfaces.
1374   const dex_ir::TypeList* interfaces = class_def->Interfaces();
1375   if (interfaces != nullptr) {
1376     const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1377     for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1378       DumpInterface((*interfaces_vector)[i], i);
1379     }  // for
1380   }
1381 
1382   // Fields and methods.
1383   dex_ir::ClassData* class_data = class_def->GetClassData();
1384   // Prepare data for static fields.
1385   dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1386   dex_ir::EncodedValueVector* encoded_values =
1387       static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1388   const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1389 
1390   // Static fields.
1391   if (options_.output_format_ == kOutputPlain) {
1392     fprintf(out_file_, "  Static fields     -\n");
1393   }
1394   if (class_data != nullptr) {
1395     dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1396     if (static_fields != nullptr) {
1397       for (uint32_t i = 0; i < static_fields->size(); i++) {
1398         DumpSField((*static_fields)[i]->GetFieldId()->GetIndex(),
1399                    (*static_fields)[i]->GetAccessFlags(),
1400                    i,
1401                    i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1402       }  // for
1403     }
1404   }
1405 
1406   // Instance fields.
1407   if (options_.output_format_ == kOutputPlain) {
1408     fprintf(out_file_, "  Instance fields   -\n");
1409   }
1410   if (class_data != nullptr) {
1411     dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1412     if (instance_fields != nullptr) {
1413       for (uint32_t i = 0; i < instance_fields->size(); i++) {
1414         DumpIField((*instance_fields)[i]->GetFieldId()->GetIndex(),
1415                    (*instance_fields)[i]->GetAccessFlags(),
1416                    i);
1417       }  // for
1418     }
1419   }
1420 
1421   // Direct methods.
1422   if (options_.output_format_ == kOutputPlain) {
1423     fprintf(out_file_, "  Direct methods    -\n");
1424   }
1425   if (class_data != nullptr) {
1426     dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1427     if (direct_methods != nullptr) {
1428       for (uint32_t i = 0; i < direct_methods->size(); i++) {
1429         DumpMethod((*direct_methods)[i]->GetMethodId()->GetIndex(),
1430                    (*direct_methods)[i]->GetAccessFlags(),
1431                    (*direct_methods)[i]->GetCodeItem(),
1432                  i);
1433       }  // for
1434     }
1435   }
1436 
1437   // Virtual methods.
1438   if (options_.output_format_ == kOutputPlain) {
1439     fprintf(out_file_, "  Virtual methods   -\n");
1440   }
1441   if (class_data != nullptr) {
1442     dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1443     if (virtual_methods != nullptr) {
1444       for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1445         DumpMethod((*virtual_methods)[i]->GetMethodId()->GetIndex(),
1446                    (*virtual_methods)[i]->GetAccessFlags(),
1447                    (*virtual_methods)[i]->GetCodeItem(),
1448                    i);
1449       }  // for
1450     }
1451   }
1452 
1453   // End of class.
1454   if (options_.output_format_ == kOutputPlain) {
1455     const char* file_name = "unknown";
1456     if (class_def->SourceFile() != nullptr) {
1457       file_name = class_def->SourceFile()->Data();
1458     }
1459     const dex_ir::StringId* source_file = class_def->SourceFile();
1460     fprintf(out_file_, "  source_file_idx   : %d (%s)\n\n",
1461             source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1462   } else if (options_.output_format_ == kOutputXml) {
1463     fprintf(out_file_, "</class>\n");
1464   }
1465 
1466   free(access_str);
1467 }
1468 
DumpDexFile()1469 void DexLayout::DumpDexFile() {
1470   // Headers.
1471   if (options_.show_file_headers_) {
1472     DumpFileHeader();
1473   }
1474 
1475   // Open XML context.
1476   if (options_.output_format_ == kOutputXml) {
1477     fprintf(out_file_, "<api>\n");
1478   }
1479 
1480   // Iterate over all classes.
1481   char* package = nullptr;
1482   const uint32_t class_defs_size = header_->GetCollections().ClassDefsSize();
1483   for (uint32_t i = 0; i < class_defs_size; i++) {
1484     DumpClass(i, &package);
1485   }  // for
1486 
1487   // Free the last package allocated.
1488   if (package != nullptr) {
1489     fprintf(out_file_, "</package>\n");
1490     free(package);
1491   }
1492 
1493   // Close XML context.
1494   if (options_.output_format_ == kOutputXml) {
1495     fprintf(out_file_, "</api>\n");
1496   }
1497 }
1498 
LayoutClassDefsAndClassData(const DexFile * dex_file)1499 std::vector<dex_ir::ClassData*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1500   std::vector<dex_ir::ClassDef*> new_class_def_order;
1501   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1502     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1503     if (info_->ContainsClass(*dex_file, type_idx)) {
1504       new_class_def_order.push_back(class_def.get());
1505     }
1506   }
1507   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1508     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1509     if (!info_->ContainsClass(*dex_file, type_idx)) {
1510       new_class_def_order.push_back(class_def.get());
1511     }
1512   }
1513   uint32_t class_defs_offset = header_->GetCollections().ClassDefsOffset();
1514   uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
1515   std::unordered_set<dex_ir::ClassData*> visited_class_data;
1516   std::vector<dex_ir::ClassData*> new_class_data_order;
1517   for (uint32_t i = 0; i < new_class_def_order.size(); ++i) {
1518     dex_ir::ClassDef* class_def = new_class_def_order[i];
1519     class_def->SetIndex(i);
1520     class_def->SetOffset(class_defs_offset);
1521     class_defs_offset += dex_ir::ClassDef::ItemSize();
1522     dex_ir::ClassData* class_data = class_def->GetClassData();
1523     if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1524       class_data->SetOffset(class_data_offset);
1525       class_data_offset += class_data->GetSize();
1526       visited_class_data.insert(class_data);
1527       new_class_data_order.push_back(class_data);
1528     }
1529   }
1530   return new_class_data_order;
1531 }
1532 
LayoutStringData(const DexFile * dex_file)1533 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1534   const size_t num_strings = header_->GetCollections().StringIds().size();
1535   std::vector<bool> is_shorty(num_strings, false);
1536   std::vector<bool> from_hot_method(num_strings, false);
1537   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1538     // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1539     // as hot.
1540     const bool is_profile_class =
1541         info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1542     if (is_profile_class) {
1543       from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1544     }
1545     dex_ir::ClassData* data = class_def->GetClassData();
1546     if (data == nullptr) {
1547       continue;
1548     }
1549     for (size_t i = 0; i < 2; ++i) {
1550       for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1551         const dex_ir::MethodId* method_id = method->GetMethodId();
1552         dex_ir::CodeItem* code_item = method->GetCodeItem();
1553         if (code_item == nullptr) {
1554           continue;
1555         }
1556         const bool is_clinit = is_profile_class &&
1557             (method->GetAccessFlags() & kAccConstructor) != 0 &&
1558             (method->GetAccessFlags() & kAccStatic) != 0;
1559         const bool method_executed = is_clinit ||
1560             info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
1561         if (!method_executed) {
1562           continue;
1563         }
1564         is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1565         dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1566         if (fixups == nullptr) {
1567           continue;
1568         }
1569         if (fixups->StringIds() != nullptr) {
1570           // Add const-strings.
1571           for (dex_ir::StringId* id : *fixups->StringIds()) {
1572             from_hot_method[id->GetIndex()] = true;
1573           }
1574         }
1575         // TODO: Only visit field ids from static getters and setters.
1576         for (dex_ir::FieldId* id : *fixups->FieldIds()) {
1577           // Add the field names and types from getters and setters.
1578           from_hot_method[id->Name()->GetIndex()] = true;
1579           from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1580         }
1581       }
1582     }
1583   }
1584   // Sort string data by specified order.
1585   std::vector<dex_ir::StringId*> string_ids;
1586   size_t min_offset = std::numeric_limits<size_t>::max();
1587   size_t max_offset = 0;
1588   size_t hot_bytes = 0;
1589   for (auto& string_id : header_->GetCollections().StringIds()) {
1590     string_ids.push_back(string_id.get());
1591     const size_t cur_offset = string_id->DataItem()->GetOffset();
1592     CHECK_NE(cur_offset, 0u);
1593     min_offset = std::min(min_offset, cur_offset);
1594     dex_ir::StringData* data = string_id->DataItem();
1595     const size_t element_size = data->GetSize() + 1;  // Add one extra for null.
1596     size_t end_offset = cur_offset + element_size;
1597     if (is_shorty[string_id->GetIndex()] || from_hot_method[string_id->GetIndex()]) {
1598       hot_bytes += element_size;
1599     }
1600     max_offset = std::max(max_offset, end_offset);
1601   }
1602   VLOG(compiler) << "Hot string data bytes " << hot_bytes << "/" << max_offset - min_offset;
1603   std::sort(string_ids.begin(),
1604             string_ids.end(),
1605             [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1606                                            const dex_ir::StringId* b) {
1607     const bool a_is_hot = from_hot_method[a->GetIndex()];
1608     const bool b_is_hot = from_hot_method[b->GetIndex()];
1609     if (a_is_hot != b_is_hot) {
1610       return a_is_hot < b_is_hot;
1611     }
1612     // After hot methods are partitioned, subpartition shorties.
1613     const bool a_is_shorty = is_shorty[a->GetIndex()];
1614     const bool b_is_shorty = is_shorty[b->GetIndex()];
1615     if (a_is_shorty != b_is_shorty) {
1616       return a_is_shorty < b_is_shorty;
1617     }
1618     // Preserve order.
1619     return a->DataItem()->GetOffset() < b->DataItem()->GetOffset();
1620   });
1621   // Now we know what order we want the string data, reorder the offsets.
1622   size_t offset = min_offset;
1623   for (dex_ir::StringId* string_id : string_ids) {
1624     dex_ir::StringData* data = string_id->DataItem();
1625     data->SetOffset(offset);
1626     offset += data->GetSize() + 1;  // Add one extra for null.
1627   }
1628   if (offset > max_offset) {
1629     const uint32_t diff = offset - max_offset;
1630     // If we expanded the string data section, we need to update the offsets or else we will
1631     // corrupt the next section when writing out.
1632     FixupSections(header_->GetCollections().StringDatasOffset(), diff);
1633     // Update file size.
1634     header_->SetFileSize(header_->FileSize() + diff);
1635   }
1636 }
1637 
1638 // Orders code items according to specified class data ordering.
1639 // NOTE: If the section following the code items is byte aligned, the last code item is left in
1640 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
LayoutCodeItems(const DexFile * dex_file,std::vector<dex_ir::ClassData * > new_class_data_order)1641 int32_t DexLayout::LayoutCodeItems(const DexFile* dex_file,
1642                                    std::vector<dex_ir::ClassData*> new_class_data_order) {
1643   // Do not move code items if class data section precedes code item section.
1644   // ULEB encoding is variable length, causing problems determining the offset of the code items.
1645   // TODO: We should swap the order of these sections in the future to avoid this issue.
1646   uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
1647   uint32_t code_item_offset = header_->GetCollections().CodeItemsOffset();
1648   if (class_data_offset < code_item_offset) {
1649     return 0;
1650   }
1651 
1652   // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
1653   dex_ir::CodeItem* last_code_item = nullptr;
1654   std::unordered_set<dex_ir::CodeItem*> visited_code_items;
1655   bool is_code_item_aligned = IsNextSectionCodeItemAligned(code_item_offset);
1656   if (!is_code_item_aligned) {
1657     for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
1658       std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
1659       if (last_code_item == nullptr
1660           || last_code_item->GetOffset() < code_item->GetOffset()) {
1661         last_code_item = code_item.get();
1662       }
1663     }
1664   }
1665 
1666   enum CodeItemKind {
1667     kMethodNotExecuted = 0,
1668     kMethodExecuted = 1,
1669     kSize = 2,
1670   };
1671 
1672   static constexpr InvokeType invoke_types[] = {
1673       kDirect,
1674       kVirtual
1675   };
1676 
1677   std::unordered_set<dex_ir::CodeItem*> code_items[CodeItemKind::kSize];
1678   for (InvokeType invoke_type : invoke_types) {
1679     for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1680       const bool is_profile_class =
1681           info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1682 
1683       // Skip classes that are not defined in this dex file.
1684       dex_ir::ClassData* class_data = class_def->GetClassData();
1685       if (class_data == nullptr) {
1686         continue;
1687       }
1688       for (auto& method : *(invoke_type == InvokeType::kDirect
1689                                 ? class_data->DirectMethods()
1690                                 : class_data->VirtualMethods())) {
1691         const dex_ir::MethodId *method_id = method->GetMethodId();
1692         dex_ir::CodeItem *code_item = method->GetCodeItem();
1693         if (code_item == last_code_item || code_item == nullptr) {
1694           continue;
1695         }
1696         // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1697         // TODO: clinits are executed only once, consider separating them further.
1698         const bool is_clinit = is_profile_class &&
1699             (method->GetAccessFlags() & kAccConstructor) != 0 &&
1700             (method->GetAccessFlags() & kAccStatic) != 0;
1701         const bool is_method_executed = is_clinit ||
1702             info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
1703         code_items[is_method_executed
1704                        ? CodeItemKind::kMethodExecuted
1705                        : CodeItemKind::kMethodNotExecuted]
1706             .insert(code_item);
1707       }
1708     }
1709   }
1710 
1711   // total_diff includes diffs generated by both executed and non-executed methods.
1712   int32_t total_diff = 0;
1713   // The relative placement has no effect on correctness; it is used to ensure
1714   // the layout is deterministic
1715   for (std::unordered_set<dex_ir::CodeItem*>& code_items_set : code_items) {
1716     // diff is reset for executed and non-executed methods.
1717     int32_t diff = 0;
1718     for (dex_ir::ClassData* data : new_class_data_order) {
1719       data->SetOffset(data->GetOffset() + diff);
1720       for (InvokeType invoke_type : invoke_types) {
1721         for (auto &method : *(invoke_type == InvokeType::kDirect
1722                                   ? data->DirectMethods()
1723                                   : data->VirtualMethods())) {
1724           dex_ir::CodeItem* code_item = method->GetCodeItem();
1725           if (code_item != nullptr &&
1726               code_items_set.find(code_item) != code_items_set.end()) {
1727             diff += UnsignedLeb128Size(code_item_offset)
1728                 - UnsignedLeb128Size(code_item->GetOffset());
1729             code_item->SetOffset(code_item_offset);
1730             code_item_offset +=
1731                 RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
1732           }
1733         }
1734       }
1735     }
1736     total_diff += diff;
1737   }
1738   // Adjust diff to be 4-byte aligned.
1739   return RoundUp(total_diff, kDexCodeItemAlignment);
1740 }
1741 
IsNextSectionCodeItemAligned(uint32_t offset)1742 bool DexLayout::IsNextSectionCodeItemAligned(uint32_t offset) {
1743   dex_ir::Collections& collections = header_->GetCollections();
1744   std::set<uint32_t> section_offsets;
1745   section_offsets.insert(collections.MapListOffset());
1746   section_offsets.insert(collections.TypeListsOffset());
1747   section_offsets.insert(collections.AnnotationSetRefListsOffset());
1748   section_offsets.insert(collections.AnnotationSetItemsOffset());
1749   section_offsets.insert(collections.ClassDatasOffset());
1750   section_offsets.insert(collections.CodeItemsOffset());
1751   section_offsets.insert(collections.StringDatasOffset());
1752   section_offsets.insert(collections.DebugInfoItemsOffset());
1753   section_offsets.insert(collections.AnnotationItemsOffset());
1754   section_offsets.insert(collections.EncodedArrayItemsOffset());
1755   section_offsets.insert(collections.AnnotationsDirectoryItemsOffset());
1756 
1757   auto found = section_offsets.find(offset);
1758   if (found != section_offsets.end()) {
1759     found++;
1760     if (found != section_offsets.end()) {
1761       return *found % kDexCodeItemAlignment == 0;
1762     }
1763   }
1764   return false;
1765 }
1766 
1767 // Adjust offsets of every item in the specified section by diff bytes.
FixupSection(std::map<uint32_t,std::unique_ptr<T>> & map,uint32_t diff)1768 template<class T> void DexLayout::FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map,
1769                                                uint32_t diff) {
1770   for (auto& pair : map) {
1771     std::unique_ptr<T>& item = pair.second;
1772     item->SetOffset(item->GetOffset() + diff);
1773   }
1774 }
1775 
1776 // Adjust offsets of all sections with an address after the specified offset by diff bytes.
FixupSections(uint32_t offset,uint32_t diff)1777 void DexLayout::FixupSections(uint32_t offset, uint32_t diff) {
1778   dex_ir::Collections& collections = header_->GetCollections();
1779   uint32_t map_list_offset = collections.MapListOffset();
1780   if (map_list_offset > offset) {
1781     collections.SetMapListOffset(map_list_offset + diff);
1782   }
1783 
1784   uint32_t type_lists_offset = collections.TypeListsOffset();
1785   if (type_lists_offset > offset) {
1786     collections.SetTypeListsOffset(type_lists_offset + diff);
1787     FixupSection(collections.TypeLists(), diff);
1788   }
1789 
1790   uint32_t annotation_set_ref_lists_offset = collections.AnnotationSetRefListsOffset();
1791   if (annotation_set_ref_lists_offset > offset) {
1792     collections.SetAnnotationSetRefListsOffset(annotation_set_ref_lists_offset + diff);
1793     FixupSection(collections.AnnotationSetRefLists(), diff);
1794   }
1795 
1796   uint32_t annotation_set_items_offset = collections.AnnotationSetItemsOffset();
1797   if (annotation_set_items_offset > offset) {
1798     collections.SetAnnotationSetItemsOffset(annotation_set_items_offset + diff);
1799     FixupSection(collections.AnnotationSetItems(), diff);
1800   }
1801 
1802   uint32_t class_datas_offset = collections.ClassDatasOffset();
1803   if (class_datas_offset > offset) {
1804     collections.SetClassDatasOffset(class_datas_offset + diff);
1805     FixupSection(collections.ClassDatas(), diff);
1806   }
1807 
1808   uint32_t code_items_offset = collections.CodeItemsOffset();
1809   if (code_items_offset > offset) {
1810     collections.SetCodeItemsOffset(code_items_offset + diff);
1811     FixupSection(collections.CodeItems(), diff);
1812   }
1813 
1814   uint32_t string_datas_offset = collections.StringDatasOffset();
1815   if (string_datas_offset > offset) {
1816     collections.SetStringDatasOffset(string_datas_offset + diff);
1817     FixupSection(collections.StringDatas(), diff);
1818   }
1819 
1820   uint32_t debug_info_items_offset = collections.DebugInfoItemsOffset();
1821   if (debug_info_items_offset > offset) {
1822     collections.SetDebugInfoItemsOffset(debug_info_items_offset + diff);
1823     FixupSection(collections.DebugInfoItems(), diff);
1824   }
1825 
1826   uint32_t annotation_items_offset = collections.AnnotationItemsOffset();
1827   if (annotation_items_offset > offset) {
1828     collections.SetAnnotationItemsOffset(annotation_items_offset + diff);
1829     FixupSection(collections.AnnotationItems(), diff);
1830   }
1831 
1832   uint32_t encoded_array_items_offset = collections.EncodedArrayItemsOffset();
1833   if (encoded_array_items_offset > offset) {
1834     collections.SetEncodedArrayItemsOffset(encoded_array_items_offset + diff);
1835     FixupSection(collections.EncodedArrayItems(), diff);
1836   }
1837 
1838   uint32_t annotations_directory_items_offset = collections.AnnotationsDirectoryItemsOffset();
1839   if (annotations_directory_items_offset > offset) {
1840     collections.SetAnnotationsDirectoryItemsOffset(annotations_directory_items_offset + diff);
1841     FixupSection(collections.AnnotationsDirectoryItems(), diff);
1842   }
1843 }
1844 
LayoutOutputFile(const DexFile * dex_file)1845 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1846   LayoutStringData(dex_file);
1847   std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
1848   int32_t diff = LayoutCodeItems(dex_file, new_class_data_order);
1849   // Move sections after ClassData by diff bytes.
1850   FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
1851   // Update file size.
1852   header_->SetFileSize(header_->FileSize() + diff);
1853 }
1854 
OutputDexFile(const DexFile * dex_file)1855 void DexLayout::OutputDexFile(const DexFile* dex_file) {
1856   const std::string& dex_file_location = dex_file->GetLocation();
1857   std::string error_msg;
1858   std::unique_ptr<File> new_file;
1859   if (!options_.output_to_memmap_) {
1860     std::string output_location(options_.output_dex_directory_);
1861     size_t last_slash = dex_file_location.rfind('/');
1862     std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1863     if (output_location == dex_file_directory) {
1864       output_location = dex_file_location + ".new";
1865     } else if (last_slash != std::string::npos) {
1866       output_location += dex_file_location.substr(last_slash);
1867     } else {
1868       output_location += "/" + dex_file_location + ".new";
1869     }
1870     new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1871     if (new_file == nullptr) {
1872       LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1873       return;
1874     }
1875     if (ftruncate(new_file->Fd(), header_->FileSize()) != 0) {
1876       LOG(ERROR) << "Could not grow dex writer output file: " << output_location;;
1877       new_file->Erase();
1878       return;
1879     }
1880     mem_map_.reset(MemMap::MapFile(header_->FileSize(), PROT_READ | PROT_WRITE, MAP_SHARED,
1881         new_file->Fd(), 0, /*low_4gb*/ false, output_location.c_str(), &error_msg));
1882   } else {
1883     mem_map_.reset(MemMap::MapAnonymous("layout dex", nullptr, header_->FileSize(),
1884         PROT_READ | PROT_WRITE, /* low_4gb */ false, /* reuse */ false, &error_msg));
1885   }
1886   if (mem_map_ == nullptr) {
1887     LOG(ERROR) << "Could not create mem map for dex writer output: " << error_msg;
1888     if (new_file != nullptr) {
1889       new_file->Erase();
1890     }
1891     return;
1892   }
1893   DexWriter::Output(header_, mem_map_.get());
1894   if (new_file != nullptr) {
1895     UNUSED(new_file->FlushCloseOrErase());
1896   }
1897   // Verify the output dex file's structure for debug builds.
1898   if (kIsDebugBuild) {
1899     std::string location = "memory mapped file for " + dex_file_location;
1900     std::unique_ptr<const DexFile> output_dex_file(DexFile::Open(mem_map_->Begin(),
1901                                                                  mem_map_->Size(),
1902                                                                  location,
1903                                                                  header_->Checksum(),
1904                                                                  /*oat_dex_file*/ nullptr,
1905                                                                  /*verify*/ true,
1906                                                                  /*verify_checksum*/ false,
1907                                                                  &error_msg));
1908     DCHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << error_msg;
1909   }
1910   // Do IR-level comparison between input and output. This check ignores potential differences
1911   // due to layout, so offsets are not checked. Instead, it checks the data contents of each item.
1912   if (options_.verify_output_) {
1913     std::unique_ptr<dex_ir::Header> orig_header(dex_ir::DexIrBuilder(*dex_file));
1914     CHECK(VerifyOutputDexFile(orig_header.get(), header_, &error_msg)) << error_msg;
1915   }
1916 }
1917 
1918 /*
1919  * Dumps the requested sections of the file.
1920  */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index)1921 void DexLayout::ProcessDexFile(const char* file_name,
1922                                const DexFile* dex_file,
1923                                size_t dex_file_index) {
1924   std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file));
1925   SetHeader(header.get());
1926 
1927   if (options_.verbose_) {
1928     fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1929             file_name, dex_file->GetHeader().magic_ + 4);
1930   }
1931 
1932   if (options_.visualize_pattern_) {
1933     VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1934     return;
1935   }
1936 
1937   if (options_.show_section_statistics_) {
1938     ShowDexSectionStatistics(header_, dex_file_index);
1939     return;
1940   }
1941 
1942   // Dump dex file.
1943   if (options_.dump_) {
1944     DumpDexFile();
1945   }
1946 
1947   // Output dex file as file or memmap.
1948   if (options_.output_dex_directory_ != nullptr || options_.output_to_memmap_) {
1949     if (info_ != nullptr) {
1950       LayoutOutputFile(dex_file);
1951     }
1952     OutputDexFile(dex_file);
1953   }
1954 }
1955 
1956 /*
1957  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
1958  */
ProcessFile(const char * file_name)1959 int DexLayout::ProcessFile(const char* file_name) {
1960   if (options_.verbose_) {
1961     fprintf(out_file_, "Processing '%s'...\n", file_name);
1962   }
1963 
1964   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
1965   // all of which are Zip archives with "classes.dex" inside.
1966   const bool verify_checksum = !options_.ignore_bad_checksum_;
1967   std::string error_msg;
1968   std::vector<std::unique_ptr<const DexFile>> dex_files;
1969   if (!DexFile::Open(file_name, file_name, verify_checksum, &error_msg, &dex_files)) {
1970     // Display returned error message to user. Note that this error behavior
1971     // differs from the error messages shown by the original Dalvik dexdump.
1972     fputs(error_msg.c_str(), stderr);
1973     fputc('\n', stderr);
1974     return -1;
1975   }
1976 
1977   // Success. Either report checksum verification or process
1978   // all dex files found in given file.
1979   if (options_.checksum_only_) {
1980     fprintf(out_file_, "Checksum verified\n");
1981   } else {
1982     for (size_t i = 0; i < dex_files.size(); i++) {
1983       ProcessDexFile(file_name, dex_files[i].get(), i);
1984     }
1985   }
1986   return 0;
1987 }
1988 
1989 }  // namespace art
1990