1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "slicer/reader.h"
18 #include "slicer/dex_bytecode.h"
19 #include "slicer/chronometer.h"
20 #include "slicer/dex_leb128.h"
21 
22 #include <assert.h>
23 #include <string.h>
24 #include <type_traits>
25 #include <cstdlib>
26 
27 namespace dex {
28 
Reader(const dex::u1 * image,size_t size)29 Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) {
30   // init the header reference
31   header_ = ptr<dex::Header>(0);
32   ValidateHeader();
33 
34   // start with an "empty" .dex IR
35   dex_ir_ = std::make_shared<ir::DexFile>();
36   dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic));
37 }
38 
ClassDefs() const39 slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const {
40   return section<dex::ClassDef>(header_->class_defs_off,
41                                 header_->class_defs_size);
42 }
43 
StringIds() const44 slicer::ArrayView<const dex::StringId> Reader::StringIds() const {
45   return section<dex::StringId>(header_->string_ids_off,
46                                 header_->string_ids_size);
47 }
48 
TypeIds() const49 slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const {
50   return section<dex::TypeId>(header_->type_ids_off,
51                               header_->type_ids_size);
52 }
53 
FieldIds() const54 slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const {
55   return section<dex::FieldId>(header_->field_ids_off,
56                                header_->field_ids_size);
57 }
58 
MethodIds() const59 slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const {
60   return section<dex::MethodId>(header_->method_ids_off,
61                                 header_->method_ids_size);
62 }
63 
ProtoIds() const64 slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const {
65   return section<dex::ProtoId>(header_->proto_ids_off,
66                                header_->proto_ids_size);
67 }
68 
DexMapList() const69 const dex::MapList* Reader::DexMapList() const {
70   return dataPtr<dex::MapList>(header_->map_off);
71 }
72 
GetStringMUTF8(dex::u4 index) const73 const char* Reader::GetStringMUTF8(dex::u4 index) const {
74   if (index == dex::kNoIndex) {
75     return "<no_string>";
76   }
77   const dex::u1* strData = GetStringData(index);
78   dex::ReadULeb128(&strData);
79   return reinterpret_cast<const char*>(strData);
80 }
81 
CreateFullIr()82 void Reader::CreateFullIr() {
83   size_t classCount = ClassDefs().size();
84   for (size_t i = 0; i < classCount; ++i) {
85     CreateClassIr(i);
86   }
87 }
88 
CreateClassIr(dex::u4 index)89 void Reader::CreateClassIr(dex::u4 index) {
90   auto ir_class = GetClass(index);
91   SLICER_CHECK(ir_class != nullptr);
92 }
93 
94 // Returns the index of the class with the specified
95 // descriptor, or kNoIndex if not found
FindClassIndex(const char * class_descriptor) const96 dex::u4 Reader::FindClassIndex(const char* class_descriptor) const {
97   auto classes = ClassDefs();
98   auto types = TypeIds();
99   for (dex::u4 i = 0; i < classes.size(); ++i) {
100     auto typeId = types[classes[i].class_idx];
101     const char* descriptor = GetStringMUTF8(typeId.descriptor_idx);
102     if (strcmp(class_descriptor, descriptor) == 0) {
103       return i;
104     }
105   }
106   return dex::kNoIndex;
107 }
108 
109 // map a .dex index to corresponding .dex IR node
110 //
111 // NOTES:
112 //  1. the mapping beween an index and the indexed
113 //     .dex IR nodes is 1:1
114 //  2. we do a single index lookup for both existing
115 //     nodes as well as new nodes
116 //  3. dummy is an invalid, but non-null pointer value
117 //     used to check that the mapping loookup/update is atomic
118 //  4. there should be no recursion with the same index
119 //     (we use the dummy value to guard against this too)
120 //
GetClass(dex::u4 index)121 ir::Class* Reader::GetClass(dex::u4 index) {
122   SLICER_CHECK(index != dex::kNoIndex);
123   auto& p = dex_ir_->classes_map[index];
124   auto dummy = reinterpret_cast<ir::Class*>(1);
125   if (p == nullptr) {
126     p = dummy;
127     auto newClass = ParseClass(index);
128     SLICER_CHECK(p == dummy);
129     p = newClass;
130     dex_ir_->classes_indexes.MarkUsedIndex(index);
131   }
132   SLICER_CHECK(p != dummy);
133   return p;
134 }
135 
136 // map a .dex index to corresponding .dex IR node
137 // (see the Reader::GetClass() comments)
GetType(dex::u4 index)138 ir::Type* Reader::GetType(dex::u4 index) {
139   SLICER_CHECK(index != dex::kNoIndex);
140   auto& p = dex_ir_->types_map[index];
141   auto dummy = reinterpret_cast<ir::Type*>(1);
142   if (p == nullptr) {
143     p = dummy;
144     auto newType = ParseType(index);
145     SLICER_CHECK(p == dummy);
146     p = newType;
147     dex_ir_->types_indexes.MarkUsedIndex(index);
148   }
149   SLICER_CHECK(p != dummy);
150   return p;
151 }
152 
153 // map a .dex index to corresponding .dex IR node
154 // (see the Reader::GetClass() comments)
GetFieldDecl(dex::u4 index)155 ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) {
156   SLICER_CHECK(index != dex::kNoIndex);
157   auto& p = dex_ir_->fields_map[index];
158   auto dummy = reinterpret_cast<ir::FieldDecl*>(1);
159   if (p == nullptr) {
160     p = dummy;
161     auto newField = ParseFieldDecl(index);
162     SLICER_CHECK(p == dummy);
163     p = newField;
164     dex_ir_->fields_indexes.MarkUsedIndex(index);
165   }
166   SLICER_CHECK(p != dummy);
167   return p;
168 }
169 
170 // map a .dex index to corresponding .dex IR node
171 // (see the Reader::GetClass() comments)
GetMethodDecl(dex::u4 index)172 ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) {
173   SLICER_CHECK(index != dex::kNoIndex);
174   auto& p = dex_ir_->methods_map[index];
175   auto dummy = reinterpret_cast<ir::MethodDecl*>(1);
176   if (p == nullptr) {
177     p = dummy;
178     auto newMethod = ParseMethodDecl(index);
179     SLICER_CHECK(p == dummy);
180     p = newMethod;
181     dex_ir_->methods_indexes.MarkUsedIndex(index);
182   }
183   SLICER_CHECK(p != dummy);
184   return p;
185 }
186 
187 // map a .dex index to corresponding .dex IR node
188 // (see the Reader::GetClass() comments)
GetProto(dex::u4 index)189 ir::Proto* Reader::GetProto(dex::u4 index) {
190   SLICER_CHECK(index != dex::kNoIndex);
191   auto& p = dex_ir_->protos_map[index];
192   auto dummy = reinterpret_cast<ir::Proto*>(1);
193   if (p == nullptr) {
194     p = dummy;
195     auto newProto = ParseProto(index);
196     SLICER_CHECK(p == dummy);
197     p = newProto;
198     dex_ir_->protos_indexes.MarkUsedIndex(index);
199   }
200   SLICER_CHECK(p != dummy);
201   return p;
202 }
203 
204 // map a .dex index to corresponding .dex IR node
205 // (see the Reader::GetClass() comments)
GetString(dex::u4 index)206 ir::String* Reader::GetString(dex::u4 index) {
207   SLICER_CHECK(index != dex::kNoIndex);
208   auto& p = dex_ir_->strings_map[index];
209   auto dummy = reinterpret_cast<ir::String*>(1);
210   if (p == nullptr) {
211     p = dummy;
212     auto newString = ParseString(index);
213     SLICER_CHECK(p == dummy);
214     p = newString;
215     dex_ir_->strings_indexes.MarkUsedIndex(index);
216   }
217   SLICER_CHECK(p != dummy);
218   return p;
219 }
220 
ParseClass(dex::u4 index)221 ir::Class* Reader::ParseClass(dex::u4 index) {
222   auto& dex_class_def = ClassDefs()[index];
223   auto ir_class = dex_ir_->Alloc<ir::Class>();
224 
225   ir_class->type = GetType(dex_class_def.class_idx);
226   assert(ir_class->type->class_def == nullptr);
227   ir_class->type->class_def = ir_class;
228 
229   ir_class->access_flags = dex_class_def.access_flags;
230   ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off);
231 
232   if (dex_class_def.superclass_idx != dex::kNoIndex) {
233     ir_class->super_class = GetType(dex_class_def.superclass_idx);
234   }
235 
236   if (dex_class_def.source_file_idx != dex::kNoIndex) {
237     ir_class->source_file = GetString(dex_class_def.source_file_idx);
238   }
239 
240   if (dex_class_def.class_data_off != 0) {
241     const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off);
242 
243     dex::u4 static_fields_count = dex::ReadULeb128(&class_data);
244     dex::u4 instance_fields_count = dex::ReadULeb128(&class_data);
245     dex::u4 direct_methods_count = dex::ReadULeb128(&class_data);
246     dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data);
247 
248     dex::u4 base_index = dex::kNoIndex;
249     for (dex::u4 i = 0; i < static_fields_count; ++i) {
250       auto field = ParseEncodedField(&class_data, &base_index);
251       ir_class->static_fields.push_back(field);
252     }
253 
254     base_index = dex::kNoIndex;
255     for (dex::u4 i = 0; i < instance_fields_count; ++i) {
256       auto field = ParseEncodedField(&class_data, &base_index);
257       ir_class->instance_fields.push_back(field);
258     }
259 
260     base_index = dex::kNoIndex;
261     for (dex::u4 i = 0; i < direct_methods_count; ++i) {
262       auto method = ParseEncodedMethod(&class_data, &base_index);
263       ir_class->direct_methods.push_back(method);
264     }
265 
266     base_index = dex::kNoIndex;
267     for (dex::u4 i = 0; i < virtual_methods_count; ++i) {
268       auto method = ParseEncodedMethod(&class_data, &base_index);
269       ir_class->virtual_methods.push_back(method);
270     }
271   }
272 
273   ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off);
274   ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off);
275   ir_class->orig_index = index;
276 
277   return ir_class;
278 }
279 
ExtractAnnotations(dex::u4 offset)280 ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) {
281   if (offset == 0) {
282     return nullptr;
283   }
284 
285   SLICER_CHECK(offset % 4 == 0);
286 
287   // first check if we already extracted the same "annotations_directory_item"
288   auto& ir_annotations = annotations_directories_[offset];
289   if (ir_annotations == nullptr) {
290     ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>();
291 
292     auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset);
293 
294     ir_annotations->class_annotation =
295         ExtractAnnotationSet(dex_annotations->class_annotations_off);
296 
297     const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1);
298 
299     for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) {
300       ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr));
301     }
302 
303     for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) {
304       ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr));
305     }
306 
307     for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) {
308       ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr));
309     }
310   }
311   return ir_annotations;
312 }
313 
ExtractAnnotationItem(dex::u4 offset)314 ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) {
315   SLICER_CHECK(offset != 0);
316 
317   // first check if we already extracted the same "annotation_item"
318   auto& ir_annotation = annotations_[offset];
319   if (ir_annotation == nullptr) {
320     auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset);
321     const dex::u1* ptr = dexAnnotationItem->annotation;
322     ir_annotation = ParseAnnotation(&ptr);
323     ir_annotation->visibility = dexAnnotationItem->visibility;
324   }
325   return ir_annotation;
326 }
327 
ExtractAnnotationSet(dex::u4 offset)328 ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) {
329   if (offset == 0) {
330     return nullptr;
331   }
332 
333   SLICER_CHECK(offset % 4 == 0);
334 
335   // first check if we already extracted the same "annotation_set_item"
336   auto& ir_annotation_set = annotation_sets_[offset];
337   if (ir_annotation_set == nullptr) {
338     ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>();
339 
340     auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset);
341     for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) {
342       auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]);
343       assert(ir_annotation != nullptr);
344       ir_annotation_set->annotations.push_back(ir_annotation);
345     }
346   }
347   return ir_annotation_set;
348 }
349 
ExtractAnnotationSetRefList(dex::u4 offset)350 ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) {
351   SLICER_CHECK(offset % 4 == 0);
352 
353   auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset);
354   auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>();
355 
356   for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) {
357     dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off;
358     if (entry_offset != 0) {
359       auto ir_annotation_set = ExtractAnnotationSet(entry_offset);
360       SLICER_CHECK(ir_annotation_set != nullptr);
361       ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set);
362     }
363   }
364 
365   return ir_annotation_set_ref_list;
366 }
367 
ParseFieldAnnotation(const dex::u1 ** pptr)368 ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) {
369   auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr);
370   auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>();
371 
372   ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx);
373 
374   ir_field_annotation->annotations =
375       ExtractAnnotationSet(dex_field_annotation->annotations_off);
376   SLICER_CHECK(ir_field_annotation->annotations != nullptr);
377 
378   *pptr += sizeof(dex::FieldAnnotationsItem);
379   return ir_field_annotation;
380 }
381 
ParseMethodAnnotation(const dex::u1 ** pptr)382 ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) {
383   auto dex_method_annotation =
384       reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr);
385   auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>();
386 
387   ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx);
388 
389   ir_method_annotation->annotations =
390       ExtractAnnotationSet(dex_method_annotation->annotations_off);
391   SLICER_CHECK(ir_method_annotation->annotations != nullptr);
392 
393   *pptr += sizeof(dex::MethodAnnotationsItem);
394   return ir_method_annotation;
395 }
396 
ParseParamAnnotation(const dex::u1 ** pptr)397 ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) {
398   auto dex_param_annotation =
399       reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr);
400   auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>();
401 
402   ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx);
403 
404   ir_param_annotation->annotations =
405       ExtractAnnotationSetRefList(dex_param_annotation->annotations_off);
406   SLICER_CHECK(ir_param_annotation->annotations != nullptr);
407 
408   *pptr += sizeof(dex::ParameterAnnotationsItem);
409   return ir_param_annotation;
410 }
411 
ParseEncodedField(const dex::u1 ** pptr,dex::u4 * base_index)412 ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) {
413   auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>();
414 
415   auto field_index = dex::ReadULeb128(pptr);
416   SLICER_CHECK(field_index != dex::kNoIndex);
417   if (*base_index != dex::kNoIndex) {
418     SLICER_CHECK(field_index != 0);
419     field_index += *base_index;
420   }
421   *base_index = field_index;
422 
423   ir_encoded_field->decl = GetFieldDecl(field_index);
424   ir_encoded_field->access_flags = dex::ReadULeb128(pptr);
425 
426   return ir_encoded_field;
427 }
428 
429 // Parse an encoded variable-length integer value
430 // (sign-extend signed types, zero-extend unsigned types)
431 template <class T>
ParseIntValue(const dex::u1 ** pptr,size_t size)432 static T ParseIntValue(const dex::u1** pptr, size_t size) {
433   static_assert(std::is_integral<T>::value, "must be an integral type");
434 
435   SLICER_CHECK(size > 0);
436   SLICER_CHECK(size <= sizeof(T));
437 
438   T value = 0;
439   for (int i = 0; i < size; ++i) {
440     value |= T(*(*pptr)++) << (i * 8);
441   }
442 
443   // sign-extend?
444   if (std::is_signed<T>::value) {
445     size_t shift = (sizeof(T) - size) * 8;
446     value = T(value << shift) >> shift;
447   }
448 
449   return value;
450 }
451 
452 // Parse an encoded variable-length floating point value
453 // (zero-extend to the right)
454 template <class T>
ParseFloatValue(const dex::u1 ** pptr,size_t size)455 static T ParseFloatValue(const dex::u1** pptr, size_t size) {
456   SLICER_CHECK(size > 0);
457   SLICER_CHECK(size <= sizeof(T));
458 
459   T value = 0;
460   int start_byte = sizeof(T) - size;
461   for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0;
462        --size) {
463     *p++ = *(*pptr)++;
464   }
465   return value;
466 }
467 
ParseEncodedValue(const dex::u1 ** pptr)468 ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) {
469   auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>();
470 
471   SLICER_EXTRA(auto base_ptr = *pptr);
472 
473   dex::u1 header = *(*pptr)++;
474   dex::u1 type = header & dex::kEncodedValueTypeMask;
475   dex::u1 arg = header >> dex::kEncodedValueArgShift;
476 
477   ir_encoded_value->type = type;
478 
479   switch (type) {
480     case dex::kEncodedByte:
481       ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1);
482       break;
483 
484     case dex::kEncodedShort:
485       ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1);
486       break;
487 
488     case dex::kEncodedChar:
489       ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1);
490       break;
491 
492     case dex::kEncodedInt:
493       ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1);
494       break;
495 
496     case dex::kEncodedLong:
497       ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1);
498       break;
499 
500     case dex::kEncodedFloat:
501       ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1);
502       break;
503 
504     case dex::kEncodedDouble:
505       ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1);
506       break;
507 
508     case dex::kEncodedString: {
509       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
510       ir_encoded_value->u.string_value = GetString(index);
511     } break;
512 
513     case dex::kEncodedType: {
514       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
515       ir_encoded_value->u.type_value = GetType(index);
516     } break;
517 
518     case dex::kEncodedField: {
519       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
520       ir_encoded_value->u.field_value = GetFieldDecl(index);
521     } break;
522 
523     case dex::kEncodedMethod: {
524       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
525       ir_encoded_value->u.method_value = GetMethodDecl(index);
526     } break;
527 
528     case dex::kEncodedEnum: {
529       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
530       ir_encoded_value->u.enum_value = GetFieldDecl(index);
531     } break;
532 
533     case dex::kEncodedArray:
534       SLICER_CHECK(arg == 0);
535       ir_encoded_value->u.array_value = ParseEncodedArray(pptr);
536       break;
537 
538     case dex::kEncodedAnnotation:
539       SLICER_CHECK(arg == 0);
540       ir_encoded_value->u.annotation_value = ParseAnnotation(pptr);
541       break;
542 
543     case dex::kEncodedNull:
544       SLICER_CHECK(arg == 0);
545       break;
546 
547     case dex::kEncodedBoolean:
548       SLICER_CHECK(arg < 2);
549       ir_encoded_value->u.bool_value = (arg == 1);
550       break;
551 
552     default:
553       SLICER_CHECK(!"unexpected value type");
554   }
555 
556   SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr));
557 
558   return ir_encoded_value;
559 }
560 
ParseAnnotation(const dex::u1 ** pptr)561 ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) {
562   auto ir_annotation = dex_ir_->Alloc<ir::Annotation>();
563 
564   dex::u4 type_index = dex::ReadULeb128(pptr);
565   dex::u4 elements_count = dex::ReadULeb128(pptr);
566 
567   ir_annotation->type = GetType(type_index);
568   ir_annotation->visibility = dex::kVisibilityEncoded;
569 
570   for (dex::u4 i = 0; i < elements_count; ++i) {
571     auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>();
572 
573     ir_element->name = GetString(dex::ReadULeb128(pptr));
574     ir_element->value = ParseEncodedValue(pptr);
575 
576     ir_annotation->elements.push_back(ir_element);
577   }
578 
579   return ir_annotation;
580 }
581 
ParseEncodedArray(const dex::u1 ** pptr)582 ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) {
583   auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>();
584 
585   dex::u4 count = dex::ReadULeb128(pptr);
586   for (dex::u4 i = 0; i < count; ++i) {
587     ir_encoded_array->values.push_back(ParseEncodedValue(pptr));
588   }
589 
590   return ir_encoded_array;
591 }
592 
ExtractEncodedArray(dex::u4 offset)593 ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) {
594   if (offset == 0) {
595     return nullptr;
596   }
597 
598   // first check if we already extracted the same "annotation_item"
599   auto& ir_encoded_array = encoded_arrays_[offset];
600   if (ir_encoded_array == nullptr) {
601     auto ptr = dataPtr<dex::u1>(offset);
602     ir_encoded_array = ParseEncodedArray(&ptr);
603   }
604   return ir_encoded_array;
605 }
606 
ExtractDebugInfo(dex::u4 offset)607 ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) {
608   if (offset == 0) {
609     return nullptr;
610   }
611 
612   auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>();
613   const dex::u1* ptr = dataPtr<dex::u1>(offset);
614 
615   ir_debug_info->line_start = dex::ReadULeb128(&ptr);
616 
617   // TODO: implicit this param for non-static methods?
618   dex::u4 param_count = dex::ReadULeb128(&ptr);
619   for (dex::u4 i = 0; i < param_count; ++i) {
620     dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
621     auto ir_string =
622         (name_index == dex::kNoIndex) ? nullptr : GetString(name_index);
623     ir_debug_info->param_names.push_back(ir_string);
624   }
625 
626   // parse the debug info opcodes and note the
627   // references to strings and types (to make sure the IR
628   // is the full closure of all referenced items)
629   //
630   // TODO: design a generic debug info iterator?
631   //
632   auto base_ptr = ptr;
633   dex::u1 opcode = 0;
634   while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) {
635     switch (opcode) {
636       case dex::DBG_ADVANCE_PC:
637         // addr_diff
638         dex::ReadULeb128(&ptr);
639         break;
640 
641       case dex::DBG_ADVANCE_LINE:
642         // line_diff
643         dex::ReadSLeb128(&ptr);
644         break;
645 
646       case dex::DBG_START_LOCAL: {
647         // register_num
648         dex::ReadULeb128(&ptr);
649 
650         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
651         if (name_index != dex::kNoIndex) {
652           GetString(name_index);
653         }
654 
655         dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
656         if (type_index != dex::kNoIndex) {
657           GetType(type_index);
658         }
659       } break;
660 
661       case dex::DBG_START_LOCAL_EXTENDED: {
662         // register_num
663         dex::ReadULeb128(&ptr);
664 
665         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
666         if (name_index != dex::kNoIndex) {
667           GetString(name_index);
668         }
669 
670         dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
671         if (type_index != dex::kNoIndex) {
672           GetType(type_index);
673         }
674 
675         dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1;
676         if (sig_index != dex::kNoIndex) {
677           GetString(sig_index);
678         }
679       } break;
680 
681       case dex::DBG_END_LOCAL:
682       case dex::DBG_RESTART_LOCAL:
683         // register_num
684         dex::ReadULeb128(&ptr);
685         break;
686 
687       case dex::DBG_SET_FILE: {
688         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
689         if (name_index != dex::kNoIndex) {
690           GetString(name_index);
691         }
692       } break;
693     }
694   }
695 
696   ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr);
697 
698   return ir_debug_info;
699 }
700 
ExtractCode(dex::u4 offset)701 ir::Code* Reader::ExtractCode(dex::u4 offset) {
702   if (offset == 0) {
703     return nullptr;
704   }
705 
706   SLICER_CHECK(offset % 4 == 0);
707 
708   auto dex_code = dataPtr<dex::Code>(offset);
709   auto ir_code = dex_ir_->Alloc<ir::Code>();
710 
711   ir_code->registers = dex_code->registers_size;
712   ir_code->ins_count = dex_code->ins_size;
713   ir_code->outs_count = dex_code->outs_size;
714 
715   // instructions array
716   ir_code->instructions =
717       slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size);
718 
719   // parse the instructions to discover references to other
720   // IR nodes (see debug info stream parsing too)
721   ParseInstructions(ir_code->instructions);
722 
723   // try blocks & handlers
724   //
725   // TODO: a generic try/catch blocks iterator?
726   //
727   if (dex_code->tries_size != 0) {
728     dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2;
729     auto tries =
730         reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count);
731     auto handlers_list =
732         reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size);
733 
734     ir_code->try_blocks =
735         slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size);
736 
737     // parse the handlers list (and discover embedded references)
738     auto ptr = handlers_list;
739 
740     dex::u4 handlers_count = dex::ReadULeb128(&ptr);
741     SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size);
742 
743     for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) {
744       int catch_count = dex::ReadSLeb128(&ptr);
745 
746       for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) {
747         dex::u4 type_index = dex::ReadULeb128(&ptr);
748         GetType(type_index);
749 
750         // address
751         dex::ReadULeb128(&ptr);
752       }
753 
754       if (catch_count < 1) {
755         // catch_all_addr
756         dex::ReadULeb128(&ptr);
757       }
758     }
759 
760     ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list);
761   }
762 
763   ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off);
764 
765   return ir_code;
766 }
767 
ParseEncodedMethod(const dex::u1 ** pptr,dex::u4 * base_index)768 ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) {
769   auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>();
770 
771   auto method_index = dex::ReadULeb128(pptr);
772   SLICER_CHECK(method_index != dex::kNoIndex);
773   if (*base_index != dex::kNoIndex) {
774     SLICER_CHECK(method_index != 0);
775     method_index += *base_index;
776   }
777   *base_index = method_index;
778 
779   ir_encoded_method->decl = GetMethodDecl(method_index);
780   ir_encoded_method->access_flags = dex::ReadULeb128(pptr);
781 
782   dex::u4 code_offset = dex::ReadULeb128(pptr);
783   ir_encoded_method->code = ExtractCode(code_offset);
784 
785   // update the methods lookup table
786   dex_ir_->methods_lookup.Insert(ir_encoded_method);
787 
788   return ir_encoded_method;
789 }
790 
ParseType(dex::u4 index)791 ir::Type* Reader::ParseType(dex::u4 index) {
792   auto& dex_type = TypeIds()[index];
793   auto ir_type = dex_ir_->Alloc<ir::Type>();
794 
795   ir_type->descriptor = GetString(dex_type.descriptor_idx);
796   ir_type->orig_index = index;
797 
798   return ir_type;
799 }
800 
ParseFieldDecl(dex::u4 index)801 ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) {
802   auto& dex_field = FieldIds()[index];
803   auto ir_field = dex_ir_->Alloc<ir::FieldDecl>();
804 
805   ir_field->name = GetString(dex_field.name_idx);
806   ir_field->type = GetType(dex_field.type_idx);
807   ir_field->parent = GetType(dex_field.class_idx);
808   ir_field->orig_index = index;
809 
810   return ir_field;
811 }
812 
ParseMethodDecl(dex::u4 index)813 ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) {
814   auto& dex_method = MethodIds()[index];
815   auto ir_method = dex_ir_->Alloc<ir::MethodDecl>();
816 
817   ir_method->name = GetString(dex_method.name_idx);
818   ir_method->prototype = GetProto(dex_method.proto_idx);
819   ir_method->parent = GetType(dex_method.class_idx);
820   ir_method->orig_index = index;
821 
822   return ir_method;
823 }
824 
ExtractTypeList(dex::u4 offset)825 ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) {
826   if (offset == 0) {
827     return nullptr;
828   }
829 
830   // first check to see if we already extracted the same "type_list"
831   auto& ir_type_list = type_lists_[offset];
832   if (ir_type_list == nullptr) {
833     ir_type_list = dex_ir_->Alloc<ir::TypeList>();
834 
835     auto dex_type_list = dataPtr<dex::TypeList>(offset);
836     SLICER_WEAK_CHECK(dex_type_list->size > 0);
837 
838     for (dex::u4 i = 0; i < dex_type_list->size; ++i) {
839       ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx));
840     }
841   }
842 
843   return ir_type_list;
844 }
845 
ParseProto(dex::u4 index)846 ir::Proto* Reader::ParseProto(dex::u4 index) {
847   auto& dex_proto = ProtoIds()[index];
848   auto ir_proto = dex_ir_->Alloc<ir::Proto>();
849 
850   ir_proto->shorty = GetString(dex_proto.shorty_idx);
851   ir_proto->return_type = GetType(dex_proto.return_type_idx);
852   ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off);
853   ir_proto->orig_index = index;
854 
855   // update the prototypes lookup table
856   dex_ir_->prototypes_lookup.Insert(ir_proto);
857 
858   return ir_proto;
859 }
860 
ParseString(dex::u4 index)861 ir::String* Reader::ParseString(dex::u4 index) {
862   auto ir_string = dex_ir_->Alloc<ir::String>();
863 
864   auto data = GetStringData(index);
865   auto cstr = data;
866   dex::ReadULeb128(&cstr);
867   size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1;
868 
869   ir_string->data = slicer::MemView(data, size);
870   ir_string->orig_index = index;
871 
872   // update the strings lookup table
873   dex_ir_->strings_lookup.Insert(ir_string);
874 
875   return ir_string;
876 }
877 
ParseInstructions(slicer::ArrayView<const dex::u2> code)878 void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) {
879   const dex::u2* ptr = code.begin();
880   while (ptr < code.end()) {
881     auto dex_instr = dex::DecodeInstruction(ptr);
882 
883     dex::u4 index = dex::kNoIndex;
884     switch (dex::GetFormatFromOpcode(dex_instr.opcode)) {
885       case dex::k20bc:
886       case dex::k21c:
887       case dex::k31c:
888       case dex::k35c:
889       case dex::k3rc:
890         index = dex_instr.vB;
891         break;
892 
893       case dex::k22c:
894         index = dex_instr.vC;
895         break;
896 
897       default:
898         break;
899     }
900 
901     switch (GetIndexTypeFromOpcode(dex_instr.opcode)) {
902       case dex::kIndexStringRef:
903         GetString(index);
904         break;
905 
906       case dex::kIndexTypeRef:
907         GetType(index);
908         break;
909 
910       case dex::kIndexFieldRef:
911         GetFieldDecl(index);
912         break;
913 
914       case dex::kIndexMethodRef:
915         GetMethodDecl(index);
916         break;
917 
918       default:
919         break;
920     }
921 
922     auto isize = dex::GetWidthFromBytecode(ptr);
923     SLICER_CHECK(isize > 0);
924     ptr += isize;
925   }
926   SLICER_CHECK(ptr == code.end());
927 }
928 
929 // Basic .dex header structural checks
ValidateHeader()930 void Reader::ValidateHeader() {
931   SLICER_CHECK(size_ > sizeof(dex::Header));
932 
933   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
934   // estimate. b/72402467
935   SLICER_CHECK(header_->file_size <= size_);
936   SLICER_CHECK(header_->header_size == sizeof(dex::Header));
937   SLICER_CHECK(header_->endian_tag == dex::kEndianConstant);
938   SLICER_CHECK(header_->data_size % 4 == 0);
939 
940   // Known issue: The fields might be slighly corrupted b/65452964
941   // SLICER_CHECK(header_->data_off + header_->data_size <= size_);
942 
943   SLICER_CHECK(header_->string_ids_off % 4 == 0);
944   SLICER_CHECK(header_->type_ids_size < 65536);
945   SLICER_CHECK(header_->type_ids_off % 4 == 0);
946   SLICER_CHECK(header_->proto_ids_size < 65536);
947   SLICER_CHECK(header_->proto_ids_off % 4 == 0);
948   SLICER_CHECK(header_->field_ids_off % 4 == 0);
949   SLICER_CHECK(header_->method_ids_off % 4 == 0);
950   SLICER_CHECK(header_->class_defs_off % 4 == 0);
951   SLICER_CHECK(header_->map_off >= header_->data_off && header_->map_off < size_);
952   SLICER_CHECK(header_->link_size == 0);
953   SLICER_CHECK(header_->link_off == 0);
954   SLICER_CHECK(header_->data_off % 4 == 0);
955   SLICER_CHECK(header_->map_off % 4 == 0);
956 
957   // we seem to have .dex files with extra bytes at the end ...
958   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
959   // estimate. b/72402467
960   SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_);
961 
962   // but we should still have the whole data section
963 
964   // Known issue: The fields might be slighly corrupted b/65452964
965   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
966   // estimate. b/72402467
967   // SLICER_CHECK(header_->data_off + header_->data_size <= size_);
968 
969   // validate the map
970   // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size])
971   auto map_list = ptr<dex::MapList>(header_->map_off);
972   SLICER_CHECK(map_list->size > 0);
973   auto map_section_size =
974       sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size;
975   SLICER_CHECK(header_->map_off + map_section_size <= size_);
976 }
977 
978 }  // namespace dex
979