1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "slicer/reader.h"
18 #include "slicer/dex_bytecode.h"
19 #include "slicer/chronometer.h"
20 #include "slicer/dex_leb128.h"
21
22 #include <assert.h>
23 #include <string.h>
24 #include <type_traits>
25 #include <cstdlib>
26
27 namespace dex {
28
Reader(const dex::u1 * image,size_t size)29 Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) {
30 // init the header reference
31 header_ = ptr<dex::Header>(0);
32 ValidateHeader();
33
34 // start with an "empty" .dex IR
35 dex_ir_ = std::make_shared<ir::DexFile>();
36 dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic));
37 }
38
ClassDefs() const39 slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const {
40 return section<dex::ClassDef>(header_->class_defs_off,
41 header_->class_defs_size);
42 }
43
StringIds() const44 slicer::ArrayView<const dex::StringId> Reader::StringIds() const {
45 return section<dex::StringId>(header_->string_ids_off,
46 header_->string_ids_size);
47 }
48
TypeIds() const49 slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const {
50 return section<dex::TypeId>(header_->type_ids_off,
51 header_->type_ids_size);
52 }
53
FieldIds() const54 slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const {
55 return section<dex::FieldId>(header_->field_ids_off,
56 header_->field_ids_size);
57 }
58
MethodIds() const59 slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const {
60 return section<dex::MethodId>(header_->method_ids_off,
61 header_->method_ids_size);
62 }
63
ProtoIds() const64 slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const {
65 return section<dex::ProtoId>(header_->proto_ids_off,
66 header_->proto_ids_size);
67 }
68
DexMapList() const69 const dex::MapList* Reader::DexMapList() const {
70 return dataPtr<dex::MapList>(header_->map_off);
71 }
72
GetStringMUTF8(dex::u4 index) const73 const char* Reader::GetStringMUTF8(dex::u4 index) const {
74 if (index == dex::kNoIndex) {
75 return "<no_string>";
76 }
77 const dex::u1* strData = GetStringData(index);
78 dex::ReadULeb128(&strData);
79 return reinterpret_cast<const char*>(strData);
80 }
81
CreateFullIr()82 void Reader::CreateFullIr() {
83 size_t classCount = ClassDefs().size();
84 for (size_t i = 0; i < classCount; ++i) {
85 CreateClassIr(i);
86 }
87 }
88
CreateClassIr(dex::u4 index)89 void Reader::CreateClassIr(dex::u4 index) {
90 auto ir_class = GetClass(index);
91 SLICER_CHECK(ir_class != nullptr);
92 }
93
94 // Returns the index of the class with the specified
95 // descriptor, or kNoIndex if not found
FindClassIndex(const char * class_descriptor) const96 dex::u4 Reader::FindClassIndex(const char* class_descriptor) const {
97 auto classes = ClassDefs();
98 auto types = TypeIds();
99 for (dex::u4 i = 0; i < classes.size(); ++i) {
100 auto typeId = types[classes[i].class_idx];
101 const char* descriptor = GetStringMUTF8(typeId.descriptor_idx);
102 if (strcmp(class_descriptor, descriptor) == 0) {
103 return i;
104 }
105 }
106 return dex::kNoIndex;
107 }
108
109 // map a .dex index to corresponding .dex IR node
110 //
111 // NOTES:
112 // 1. the mapping beween an index and the indexed
113 // .dex IR nodes is 1:1
114 // 2. we do a single index lookup for both existing
115 // nodes as well as new nodes
116 // 3. dummy is an invalid, but non-null pointer value
117 // used to check that the mapping loookup/update is atomic
118 // 4. there should be no recursion with the same index
119 // (we use the dummy value to guard against this too)
120 //
GetClass(dex::u4 index)121 ir::Class* Reader::GetClass(dex::u4 index) {
122 SLICER_CHECK(index != dex::kNoIndex);
123 auto& p = dex_ir_->classes_map[index];
124 auto dummy = reinterpret_cast<ir::Class*>(1);
125 if (p == nullptr) {
126 p = dummy;
127 auto newClass = ParseClass(index);
128 SLICER_CHECK(p == dummy);
129 p = newClass;
130 dex_ir_->classes_indexes.MarkUsedIndex(index);
131 }
132 SLICER_CHECK(p != dummy);
133 return p;
134 }
135
136 // map a .dex index to corresponding .dex IR node
137 // (see the Reader::GetClass() comments)
GetType(dex::u4 index)138 ir::Type* Reader::GetType(dex::u4 index) {
139 SLICER_CHECK(index != dex::kNoIndex);
140 auto& p = dex_ir_->types_map[index];
141 auto dummy = reinterpret_cast<ir::Type*>(1);
142 if (p == nullptr) {
143 p = dummy;
144 auto newType = ParseType(index);
145 SLICER_CHECK(p == dummy);
146 p = newType;
147 dex_ir_->types_indexes.MarkUsedIndex(index);
148 }
149 SLICER_CHECK(p != dummy);
150 return p;
151 }
152
153 // map a .dex index to corresponding .dex IR node
154 // (see the Reader::GetClass() comments)
GetFieldDecl(dex::u4 index)155 ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) {
156 SLICER_CHECK(index != dex::kNoIndex);
157 auto& p = dex_ir_->fields_map[index];
158 auto dummy = reinterpret_cast<ir::FieldDecl*>(1);
159 if (p == nullptr) {
160 p = dummy;
161 auto newField = ParseFieldDecl(index);
162 SLICER_CHECK(p == dummy);
163 p = newField;
164 dex_ir_->fields_indexes.MarkUsedIndex(index);
165 }
166 SLICER_CHECK(p != dummy);
167 return p;
168 }
169
170 // map a .dex index to corresponding .dex IR node
171 // (see the Reader::GetClass() comments)
GetMethodDecl(dex::u4 index)172 ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) {
173 SLICER_CHECK(index != dex::kNoIndex);
174 auto& p = dex_ir_->methods_map[index];
175 auto dummy = reinterpret_cast<ir::MethodDecl*>(1);
176 if (p == nullptr) {
177 p = dummy;
178 auto newMethod = ParseMethodDecl(index);
179 SLICER_CHECK(p == dummy);
180 p = newMethod;
181 dex_ir_->methods_indexes.MarkUsedIndex(index);
182 }
183 SLICER_CHECK(p != dummy);
184 return p;
185 }
186
187 // map a .dex index to corresponding .dex IR node
188 // (see the Reader::GetClass() comments)
GetProto(dex::u4 index)189 ir::Proto* Reader::GetProto(dex::u4 index) {
190 SLICER_CHECK(index != dex::kNoIndex);
191 auto& p = dex_ir_->protos_map[index];
192 auto dummy = reinterpret_cast<ir::Proto*>(1);
193 if (p == nullptr) {
194 p = dummy;
195 auto newProto = ParseProto(index);
196 SLICER_CHECK(p == dummy);
197 p = newProto;
198 dex_ir_->protos_indexes.MarkUsedIndex(index);
199 }
200 SLICER_CHECK(p != dummy);
201 return p;
202 }
203
204 // map a .dex index to corresponding .dex IR node
205 // (see the Reader::GetClass() comments)
GetString(dex::u4 index)206 ir::String* Reader::GetString(dex::u4 index) {
207 SLICER_CHECK(index != dex::kNoIndex);
208 auto& p = dex_ir_->strings_map[index];
209 auto dummy = reinterpret_cast<ir::String*>(1);
210 if (p == nullptr) {
211 p = dummy;
212 auto newString = ParseString(index);
213 SLICER_CHECK(p == dummy);
214 p = newString;
215 dex_ir_->strings_indexes.MarkUsedIndex(index);
216 }
217 SLICER_CHECK(p != dummy);
218 return p;
219 }
220
ParseClass(dex::u4 index)221 ir::Class* Reader::ParseClass(dex::u4 index) {
222 auto& dex_class_def = ClassDefs()[index];
223 auto ir_class = dex_ir_->Alloc<ir::Class>();
224
225 ir_class->type = GetType(dex_class_def.class_idx);
226 assert(ir_class->type->class_def == nullptr);
227 ir_class->type->class_def = ir_class;
228
229 ir_class->access_flags = dex_class_def.access_flags;
230 ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off);
231
232 if (dex_class_def.superclass_idx != dex::kNoIndex) {
233 ir_class->super_class = GetType(dex_class_def.superclass_idx);
234 }
235
236 if (dex_class_def.source_file_idx != dex::kNoIndex) {
237 ir_class->source_file = GetString(dex_class_def.source_file_idx);
238 }
239
240 if (dex_class_def.class_data_off != 0) {
241 const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off);
242
243 dex::u4 static_fields_count = dex::ReadULeb128(&class_data);
244 dex::u4 instance_fields_count = dex::ReadULeb128(&class_data);
245 dex::u4 direct_methods_count = dex::ReadULeb128(&class_data);
246 dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data);
247
248 dex::u4 base_index = dex::kNoIndex;
249 for (dex::u4 i = 0; i < static_fields_count; ++i) {
250 auto field = ParseEncodedField(&class_data, &base_index);
251 ir_class->static_fields.push_back(field);
252 }
253
254 base_index = dex::kNoIndex;
255 for (dex::u4 i = 0; i < instance_fields_count; ++i) {
256 auto field = ParseEncodedField(&class_data, &base_index);
257 ir_class->instance_fields.push_back(field);
258 }
259
260 base_index = dex::kNoIndex;
261 for (dex::u4 i = 0; i < direct_methods_count; ++i) {
262 auto method = ParseEncodedMethod(&class_data, &base_index);
263 ir_class->direct_methods.push_back(method);
264 }
265
266 base_index = dex::kNoIndex;
267 for (dex::u4 i = 0; i < virtual_methods_count; ++i) {
268 auto method = ParseEncodedMethod(&class_data, &base_index);
269 ir_class->virtual_methods.push_back(method);
270 }
271 }
272
273 ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off);
274 ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off);
275 ir_class->orig_index = index;
276
277 return ir_class;
278 }
279
ExtractAnnotations(dex::u4 offset)280 ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) {
281 if (offset == 0) {
282 return nullptr;
283 }
284
285 SLICER_CHECK(offset % 4 == 0);
286
287 // first check if we already extracted the same "annotations_directory_item"
288 auto& ir_annotations = annotations_directories_[offset];
289 if (ir_annotations == nullptr) {
290 ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>();
291
292 auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset);
293
294 ir_annotations->class_annotation =
295 ExtractAnnotationSet(dex_annotations->class_annotations_off);
296
297 const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1);
298
299 for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) {
300 ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr));
301 }
302
303 for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) {
304 ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr));
305 }
306
307 for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) {
308 ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr));
309 }
310 }
311 return ir_annotations;
312 }
313
ExtractAnnotationItem(dex::u4 offset)314 ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) {
315 SLICER_CHECK(offset != 0);
316
317 // first check if we already extracted the same "annotation_item"
318 auto& ir_annotation = annotations_[offset];
319 if (ir_annotation == nullptr) {
320 auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset);
321 const dex::u1* ptr = dexAnnotationItem->annotation;
322 ir_annotation = ParseAnnotation(&ptr);
323 ir_annotation->visibility = dexAnnotationItem->visibility;
324 }
325 return ir_annotation;
326 }
327
ExtractAnnotationSet(dex::u4 offset)328 ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) {
329 if (offset == 0) {
330 return nullptr;
331 }
332
333 SLICER_CHECK(offset % 4 == 0);
334
335 // first check if we already extracted the same "annotation_set_item"
336 auto& ir_annotation_set = annotation_sets_[offset];
337 if (ir_annotation_set == nullptr) {
338 ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>();
339
340 auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset);
341 for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) {
342 auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]);
343 assert(ir_annotation != nullptr);
344 ir_annotation_set->annotations.push_back(ir_annotation);
345 }
346 }
347 return ir_annotation_set;
348 }
349
ExtractAnnotationSetRefList(dex::u4 offset)350 ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) {
351 SLICER_CHECK(offset % 4 == 0);
352
353 auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset);
354 auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>();
355
356 for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) {
357 dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off;
358 if (entry_offset != 0) {
359 auto ir_annotation_set = ExtractAnnotationSet(entry_offset);
360 SLICER_CHECK(ir_annotation_set != nullptr);
361 ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set);
362 }
363 }
364
365 return ir_annotation_set_ref_list;
366 }
367
ParseFieldAnnotation(const dex::u1 ** pptr)368 ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) {
369 auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr);
370 auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>();
371
372 ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx);
373
374 ir_field_annotation->annotations =
375 ExtractAnnotationSet(dex_field_annotation->annotations_off);
376 SLICER_CHECK(ir_field_annotation->annotations != nullptr);
377
378 *pptr += sizeof(dex::FieldAnnotationsItem);
379 return ir_field_annotation;
380 }
381
ParseMethodAnnotation(const dex::u1 ** pptr)382 ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) {
383 auto dex_method_annotation =
384 reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr);
385 auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>();
386
387 ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx);
388
389 ir_method_annotation->annotations =
390 ExtractAnnotationSet(dex_method_annotation->annotations_off);
391 SLICER_CHECK(ir_method_annotation->annotations != nullptr);
392
393 *pptr += sizeof(dex::MethodAnnotationsItem);
394 return ir_method_annotation;
395 }
396
ParseParamAnnotation(const dex::u1 ** pptr)397 ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) {
398 auto dex_param_annotation =
399 reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr);
400 auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>();
401
402 ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx);
403
404 ir_param_annotation->annotations =
405 ExtractAnnotationSetRefList(dex_param_annotation->annotations_off);
406 SLICER_CHECK(ir_param_annotation->annotations != nullptr);
407
408 *pptr += sizeof(dex::ParameterAnnotationsItem);
409 return ir_param_annotation;
410 }
411
ParseEncodedField(const dex::u1 ** pptr,dex::u4 * base_index)412 ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) {
413 auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>();
414
415 auto field_index = dex::ReadULeb128(pptr);
416 SLICER_CHECK(field_index != dex::kNoIndex);
417 if (*base_index != dex::kNoIndex) {
418 SLICER_CHECK(field_index != 0);
419 field_index += *base_index;
420 }
421 *base_index = field_index;
422
423 ir_encoded_field->decl = GetFieldDecl(field_index);
424 ir_encoded_field->access_flags = dex::ReadULeb128(pptr);
425
426 return ir_encoded_field;
427 }
428
429 // Parse an encoded variable-length integer value
430 // (sign-extend signed types, zero-extend unsigned types)
431 template <class T>
ParseIntValue(const dex::u1 ** pptr,size_t size)432 static T ParseIntValue(const dex::u1** pptr, size_t size) {
433 static_assert(std::is_integral<T>::value, "must be an integral type");
434
435 SLICER_CHECK(size > 0);
436 SLICER_CHECK(size <= sizeof(T));
437
438 T value = 0;
439 for (int i = 0; i < size; ++i) {
440 value |= T(*(*pptr)++) << (i * 8);
441 }
442
443 // sign-extend?
444 if (std::is_signed<T>::value) {
445 size_t shift = (sizeof(T) - size) * 8;
446 value = T(value << shift) >> shift;
447 }
448
449 return value;
450 }
451
452 // Parse an encoded variable-length floating point value
453 // (zero-extend to the right)
454 template <class T>
ParseFloatValue(const dex::u1 ** pptr,size_t size)455 static T ParseFloatValue(const dex::u1** pptr, size_t size) {
456 SLICER_CHECK(size > 0);
457 SLICER_CHECK(size <= sizeof(T));
458
459 T value = 0;
460 int start_byte = sizeof(T) - size;
461 for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0;
462 --size) {
463 *p++ = *(*pptr)++;
464 }
465 return value;
466 }
467
ParseEncodedValue(const dex::u1 ** pptr)468 ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) {
469 auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>();
470
471 SLICER_EXTRA(auto base_ptr = *pptr);
472
473 dex::u1 header = *(*pptr)++;
474 dex::u1 type = header & dex::kEncodedValueTypeMask;
475 dex::u1 arg = header >> dex::kEncodedValueArgShift;
476
477 ir_encoded_value->type = type;
478
479 switch (type) {
480 case dex::kEncodedByte:
481 ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1);
482 break;
483
484 case dex::kEncodedShort:
485 ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1);
486 break;
487
488 case dex::kEncodedChar:
489 ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1);
490 break;
491
492 case dex::kEncodedInt:
493 ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1);
494 break;
495
496 case dex::kEncodedLong:
497 ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1);
498 break;
499
500 case dex::kEncodedFloat:
501 ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1);
502 break;
503
504 case dex::kEncodedDouble:
505 ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1);
506 break;
507
508 case dex::kEncodedString: {
509 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
510 ir_encoded_value->u.string_value = GetString(index);
511 } break;
512
513 case dex::kEncodedType: {
514 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
515 ir_encoded_value->u.type_value = GetType(index);
516 } break;
517
518 case dex::kEncodedField: {
519 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
520 ir_encoded_value->u.field_value = GetFieldDecl(index);
521 } break;
522
523 case dex::kEncodedMethod: {
524 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
525 ir_encoded_value->u.method_value = GetMethodDecl(index);
526 } break;
527
528 case dex::kEncodedEnum: {
529 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
530 ir_encoded_value->u.enum_value = GetFieldDecl(index);
531 } break;
532
533 case dex::kEncodedArray:
534 SLICER_CHECK(arg == 0);
535 ir_encoded_value->u.array_value = ParseEncodedArray(pptr);
536 break;
537
538 case dex::kEncodedAnnotation:
539 SLICER_CHECK(arg == 0);
540 ir_encoded_value->u.annotation_value = ParseAnnotation(pptr);
541 break;
542
543 case dex::kEncodedNull:
544 SLICER_CHECK(arg == 0);
545 break;
546
547 case dex::kEncodedBoolean:
548 SLICER_CHECK(arg < 2);
549 ir_encoded_value->u.bool_value = (arg == 1);
550 break;
551
552 default:
553 SLICER_CHECK(!"unexpected value type");
554 }
555
556 SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr));
557
558 return ir_encoded_value;
559 }
560
ParseAnnotation(const dex::u1 ** pptr)561 ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) {
562 auto ir_annotation = dex_ir_->Alloc<ir::Annotation>();
563
564 dex::u4 type_index = dex::ReadULeb128(pptr);
565 dex::u4 elements_count = dex::ReadULeb128(pptr);
566
567 ir_annotation->type = GetType(type_index);
568 ir_annotation->visibility = dex::kVisibilityEncoded;
569
570 for (dex::u4 i = 0; i < elements_count; ++i) {
571 auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>();
572
573 ir_element->name = GetString(dex::ReadULeb128(pptr));
574 ir_element->value = ParseEncodedValue(pptr);
575
576 ir_annotation->elements.push_back(ir_element);
577 }
578
579 return ir_annotation;
580 }
581
ParseEncodedArray(const dex::u1 ** pptr)582 ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) {
583 auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>();
584
585 dex::u4 count = dex::ReadULeb128(pptr);
586 for (dex::u4 i = 0; i < count; ++i) {
587 ir_encoded_array->values.push_back(ParseEncodedValue(pptr));
588 }
589
590 return ir_encoded_array;
591 }
592
ExtractEncodedArray(dex::u4 offset)593 ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) {
594 if (offset == 0) {
595 return nullptr;
596 }
597
598 // first check if we already extracted the same "annotation_item"
599 auto& ir_encoded_array = encoded_arrays_[offset];
600 if (ir_encoded_array == nullptr) {
601 auto ptr = dataPtr<dex::u1>(offset);
602 ir_encoded_array = ParseEncodedArray(&ptr);
603 }
604 return ir_encoded_array;
605 }
606
ExtractDebugInfo(dex::u4 offset)607 ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) {
608 if (offset == 0) {
609 return nullptr;
610 }
611
612 auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>();
613 const dex::u1* ptr = dataPtr<dex::u1>(offset);
614
615 ir_debug_info->line_start = dex::ReadULeb128(&ptr);
616
617 // TODO: implicit this param for non-static methods?
618 dex::u4 param_count = dex::ReadULeb128(&ptr);
619 for (dex::u4 i = 0; i < param_count; ++i) {
620 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
621 auto ir_string =
622 (name_index == dex::kNoIndex) ? nullptr : GetString(name_index);
623 ir_debug_info->param_names.push_back(ir_string);
624 }
625
626 // parse the debug info opcodes and note the
627 // references to strings and types (to make sure the IR
628 // is the full closure of all referenced items)
629 //
630 // TODO: design a generic debug info iterator?
631 //
632 auto base_ptr = ptr;
633 dex::u1 opcode = 0;
634 while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) {
635 switch (opcode) {
636 case dex::DBG_ADVANCE_PC:
637 // addr_diff
638 dex::ReadULeb128(&ptr);
639 break;
640
641 case dex::DBG_ADVANCE_LINE:
642 // line_diff
643 dex::ReadSLeb128(&ptr);
644 break;
645
646 case dex::DBG_START_LOCAL: {
647 // register_num
648 dex::ReadULeb128(&ptr);
649
650 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
651 if (name_index != dex::kNoIndex) {
652 GetString(name_index);
653 }
654
655 dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
656 if (type_index != dex::kNoIndex) {
657 GetType(type_index);
658 }
659 } break;
660
661 case dex::DBG_START_LOCAL_EXTENDED: {
662 // register_num
663 dex::ReadULeb128(&ptr);
664
665 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
666 if (name_index != dex::kNoIndex) {
667 GetString(name_index);
668 }
669
670 dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
671 if (type_index != dex::kNoIndex) {
672 GetType(type_index);
673 }
674
675 dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1;
676 if (sig_index != dex::kNoIndex) {
677 GetString(sig_index);
678 }
679 } break;
680
681 case dex::DBG_END_LOCAL:
682 case dex::DBG_RESTART_LOCAL:
683 // register_num
684 dex::ReadULeb128(&ptr);
685 break;
686
687 case dex::DBG_SET_FILE: {
688 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
689 if (name_index != dex::kNoIndex) {
690 GetString(name_index);
691 }
692 } break;
693 }
694 }
695
696 ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr);
697
698 return ir_debug_info;
699 }
700
ExtractCode(dex::u4 offset)701 ir::Code* Reader::ExtractCode(dex::u4 offset) {
702 if (offset == 0) {
703 return nullptr;
704 }
705
706 SLICER_CHECK(offset % 4 == 0);
707
708 auto dex_code = dataPtr<dex::Code>(offset);
709 auto ir_code = dex_ir_->Alloc<ir::Code>();
710
711 ir_code->registers = dex_code->registers_size;
712 ir_code->ins_count = dex_code->ins_size;
713 ir_code->outs_count = dex_code->outs_size;
714
715 // instructions array
716 ir_code->instructions =
717 slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size);
718
719 // parse the instructions to discover references to other
720 // IR nodes (see debug info stream parsing too)
721 ParseInstructions(ir_code->instructions);
722
723 // try blocks & handlers
724 //
725 // TODO: a generic try/catch blocks iterator?
726 //
727 if (dex_code->tries_size != 0) {
728 dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2;
729 auto tries =
730 reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count);
731 auto handlers_list =
732 reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size);
733
734 ir_code->try_blocks =
735 slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size);
736
737 // parse the handlers list (and discover embedded references)
738 auto ptr = handlers_list;
739
740 dex::u4 handlers_count = dex::ReadULeb128(&ptr);
741 SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size);
742
743 for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) {
744 int catch_count = dex::ReadSLeb128(&ptr);
745
746 for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) {
747 dex::u4 type_index = dex::ReadULeb128(&ptr);
748 GetType(type_index);
749
750 // address
751 dex::ReadULeb128(&ptr);
752 }
753
754 if (catch_count < 1) {
755 // catch_all_addr
756 dex::ReadULeb128(&ptr);
757 }
758 }
759
760 ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list);
761 }
762
763 ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off);
764
765 return ir_code;
766 }
767
ParseEncodedMethod(const dex::u1 ** pptr,dex::u4 * base_index)768 ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) {
769 auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>();
770
771 auto method_index = dex::ReadULeb128(pptr);
772 SLICER_CHECK(method_index != dex::kNoIndex);
773 if (*base_index != dex::kNoIndex) {
774 SLICER_CHECK(method_index != 0);
775 method_index += *base_index;
776 }
777 *base_index = method_index;
778
779 ir_encoded_method->decl = GetMethodDecl(method_index);
780 ir_encoded_method->access_flags = dex::ReadULeb128(pptr);
781
782 dex::u4 code_offset = dex::ReadULeb128(pptr);
783 ir_encoded_method->code = ExtractCode(code_offset);
784
785 // update the methods lookup table
786 dex_ir_->methods_lookup.Insert(ir_encoded_method);
787
788 return ir_encoded_method;
789 }
790
ParseType(dex::u4 index)791 ir::Type* Reader::ParseType(dex::u4 index) {
792 auto& dex_type = TypeIds()[index];
793 auto ir_type = dex_ir_->Alloc<ir::Type>();
794
795 ir_type->descriptor = GetString(dex_type.descriptor_idx);
796 ir_type->orig_index = index;
797
798 return ir_type;
799 }
800
ParseFieldDecl(dex::u4 index)801 ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) {
802 auto& dex_field = FieldIds()[index];
803 auto ir_field = dex_ir_->Alloc<ir::FieldDecl>();
804
805 ir_field->name = GetString(dex_field.name_idx);
806 ir_field->type = GetType(dex_field.type_idx);
807 ir_field->parent = GetType(dex_field.class_idx);
808 ir_field->orig_index = index;
809
810 return ir_field;
811 }
812
ParseMethodDecl(dex::u4 index)813 ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) {
814 auto& dex_method = MethodIds()[index];
815 auto ir_method = dex_ir_->Alloc<ir::MethodDecl>();
816
817 ir_method->name = GetString(dex_method.name_idx);
818 ir_method->prototype = GetProto(dex_method.proto_idx);
819 ir_method->parent = GetType(dex_method.class_idx);
820 ir_method->orig_index = index;
821
822 return ir_method;
823 }
824
ExtractTypeList(dex::u4 offset)825 ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) {
826 if (offset == 0) {
827 return nullptr;
828 }
829
830 // first check to see if we already extracted the same "type_list"
831 auto& ir_type_list = type_lists_[offset];
832 if (ir_type_list == nullptr) {
833 ir_type_list = dex_ir_->Alloc<ir::TypeList>();
834
835 auto dex_type_list = dataPtr<dex::TypeList>(offset);
836 SLICER_WEAK_CHECK(dex_type_list->size > 0);
837
838 for (dex::u4 i = 0; i < dex_type_list->size; ++i) {
839 ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx));
840 }
841 }
842
843 return ir_type_list;
844 }
845
ParseProto(dex::u4 index)846 ir::Proto* Reader::ParseProto(dex::u4 index) {
847 auto& dex_proto = ProtoIds()[index];
848 auto ir_proto = dex_ir_->Alloc<ir::Proto>();
849
850 ir_proto->shorty = GetString(dex_proto.shorty_idx);
851 ir_proto->return_type = GetType(dex_proto.return_type_idx);
852 ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off);
853 ir_proto->orig_index = index;
854
855 // update the prototypes lookup table
856 dex_ir_->prototypes_lookup.Insert(ir_proto);
857
858 return ir_proto;
859 }
860
ParseString(dex::u4 index)861 ir::String* Reader::ParseString(dex::u4 index) {
862 auto ir_string = dex_ir_->Alloc<ir::String>();
863
864 auto data = GetStringData(index);
865 auto cstr = data;
866 dex::ReadULeb128(&cstr);
867 size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1;
868
869 ir_string->data = slicer::MemView(data, size);
870 ir_string->orig_index = index;
871
872 // update the strings lookup table
873 dex_ir_->strings_lookup.Insert(ir_string);
874
875 return ir_string;
876 }
877
ParseInstructions(slicer::ArrayView<const dex::u2> code)878 void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) {
879 const dex::u2* ptr = code.begin();
880 while (ptr < code.end()) {
881 auto dex_instr = dex::DecodeInstruction(ptr);
882
883 dex::u4 index = dex::kNoIndex;
884 switch (dex::GetFormatFromOpcode(dex_instr.opcode)) {
885 case dex::kFmt20bc:
886 case dex::kFmt21c:
887 case dex::kFmt31c:
888 case dex::kFmt35c:
889 case dex::kFmt3rc:
890 index = dex_instr.vB;
891 break;
892
893 case dex::kFmt22c:
894 index = dex_instr.vC;
895 break;
896
897 default:
898 break;
899 }
900
901 switch (GetIndexTypeFromOpcode(dex_instr.opcode)) {
902 case dex::kIndexStringRef:
903 GetString(index);
904 break;
905
906 case dex::kIndexTypeRef:
907 GetType(index);
908 break;
909
910 case dex::kIndexFieldRef:
911 GetFieldDecl(index);
912 break;
913
914 case dex::kIndexMethodRef:
915 GetMethodDecl(index);
916 break;
917
918 default:
919 break;
920 }
921
922 auto isize = dex::GetWidthFromBytecode(ptr);
923 SLICER_CHECK(isize > 0);
924 ptr += isize;
925 }
926 SLICER_CHECK(ptr == code.end());
927 }
928
929 // Basic .dex header structural checks
ValidateHeader()930 void Reader::ValidateHeader() {
931 SLICER_CHECK(size_ > sizeof(dex::Header));
932
933 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
934 // estimate. b/72402467
935 SLICER_CHECK(header_->file_size <= size_);
936 SLICER_CHECK(header_->header_size == sizeof(dex::Header));
937 SLICER_CHECK(header_->endian_tag == dex::kEndianConstant);
938 SLICER_CHECK(header_->data_size % 4 == 0);
939
940 // Known issue: The fields might be slighly corrupted b/65452964
941 // SLICER_CHECK(header_->data_off + header_->data_size <= size_);
942
943 SLICER_CHECK(header_->string_ids_off % 4 == 0);
944 SLICER_CHECK(header_->type_ids_size < 65536);
945 SLICER_CHECK(header_->type_ids_off % 4 == 0);
946 SLICER_CHECK(header_->proto_ids_size < 65536);
947 SLICER_CHECK(header_->proto_ids_off % 4 == 0);
948 SLICER_CHECK(header_->field_ids_off % 4 == 0);
949 SLICER_CHECK(header_->method_ids_off % 4 == 0);
950 SLICER_CHECK(header_->class_defs_off % 4 == 0);
951 SLICER_CHECK(header_->map_off >= header_->data_off && header_->map_off < size_);
952 SLICER_CHECK(header_->link_size == 0);
953 SLICER_CHECK(header_->link_off == 0);
954 SLICER_CHECK(header_->data_off % 4 == 0);
955 SLICER_CHECK(header_->map_off % 4 == 0);
956
957 // we seem to have .dex files with extra bytes at the end ...
958 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
959 // estimate. b/72402467
960 SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_);
961
962 // but we should still have the whole data section
963
964 // Known issue: The fields might be slighly corrupted b/65452964
965 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
966 // estimate. b/72402467
967 // SLICER_CHECK(header_->data_off + header_->data_size <= size_);
968
969 // validate the map
970 // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size])
971 auto map_list = ptr<dex::MapList>(header_->map_off);
972 SLICER_CHECK(map_list->size > 0);
973 auto map_section_size =
974 sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size;
975 SLICER_CHECK(header_->map_off + map_section_size <= size_);
976 }
977
978 } // namespace dex
979