1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "slicer/dex_ir.h" 18 #include "slicer/chronometer.h" 19 #include "slicer/dex_utf8.h" 20 #include "slicer/dex_format.h" 21 22 #include <algorithm> 23 #include <cstdint> 24 #include <map> 25 #include <memory> 26 #include <vector> 27 #include <sstream> 28 #include <functional> 29 30 namespace ir { 31 32 // DBJ2a string hash 33 static uint32_t HashString(const char* cstr) { 34 uint32_t hash = 5381; // DBJ2 magic prime value 35 while (*cstr) { 36 hash = ((hash << 5) + hash) ^ *cstr++; 37 } 38 return hash; 39 } 40 41 uint32_t StringsHasher::Hash(const char* string_key) const { 42 return HashString(string_key); 43 } 44 45 bool StringsHasher::Compare(const char* string_key, const String* string) const { 46 return dex::Utf8Cmp(string_key, string->c_str()) == 0; 47 } 48 49 uint32_t ProtosHasher::Hash(const std::string& proto_key) const { 50 return HashString(proto_key.c_str()); 51 } 52 53 bool ProtosHasher::Compare(const std::string& proto_key, const Proto* proto) const { 54 return proto_key == proto->Signature(); 55 } 56 57 MethodKey MethodsHasher::GetKey(const EncodedMethod* method) const { 58 MethodKey method_key; 59 method_key.class_descriptor = method->decl->parent->descriptor; 60 method_key.method_name = method->decl->name; 61 method_key.prototype = method->decl->prototype; 62 return method_key; 63 } 64 65 uint32_t MethodsHasher::Hash(const MethodKey& method_key) const { 66 return static_cast<uint32_t>(std::hash<void*>{}(method_key.class_descriptor) ^ 67 std::hash<void*>{}(method_key.method_name) ^ 68 std::hash<void*>{}(method_key.prototype)); 69 } 70 71 bool MethodsHasher::Compare(const MethodKey& method_key, const EncodedMethod* method) const { 72 return method_key.class_descriptor == method->decl->parent->descriptor && 73 method_key.method_name == method->decl->name && 74 method_key.prototype == method->decl->prototype; 75 } 76 77 // Human-readable type declaration 78 std::string Type::Decl() const { 79 return dex::DescriptorToDecl(descriptor->c_str()); 80 } 81 82 Type::Category Type::GetCategory() const { 83 switch (*descriptor->c_str()) { 84 case 'L': 85 case '[': 86 return Category::Reference; 87 case 'V': 88 return Category::Void; 89 case 'D': 90 case 'J': 91 return Category::WideScalar; 92 default: 93 return Category::Scalar; 94 } 95 } 96 97 // Create the corresponding JNI signature: 98 // https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/types.html#type_signatures 99 std::string Proto::Signature() const { 100 std::stringstream ss; 101 ss << "("; 102 if (param_types != nullptr) { 103 for (const auto& type : param_types->types) { 104 ss << type->descriptor->c_str(); 105 } 106 } 107 ss << ")"; 108 ss << return_type->descriptor->c_str(); 109 return ss.str(); 110 } 111 112 // Helper for IR normalization 113 // (it sorts items and update the numeric idexes to match) 114 template <class T, class C> 115 static void IndexItems(std::vector<T>& items, C comp) { 116 std::sort(items.begin(), items.end(), comp); 117 for (size_t i = 0; i < items.size(); ++i) { 118 items[i]->index = i; 119 } 120 } 121 122 // Helper for IR normalization (DFS for topological sort) 123 // 124 // NOTE: this recursive version is clean and simple and we know 125 // that the max depth is bounded (exactly 1 for JVMTI and a small 126 // max for general case - the largest .dex file in AOSP has 5000 classes 127 // total) 128 // 129 void DexFile::TopSortClassIndex(Class* irClass, dex::u4* nextIndex) { 130 if (irClass->index == dex::u4(-1)) { 131 if (irClass->super_class && irClass->super_class->class_def) { 132 TopSortClassIndex(irClass->super_class->class_def, nextIndex); 133 } 134 135 if (irClass->interfaces) { 136 for (Type* interfaceType : irClass->interfaces->types) { 137 if (interfaceType->class_def) { 138 TopSortClassIndex(interfaceType->class_def, nextIndex); 139 } 140 } 141 } 142 143 SLICER_CHECK(*nextIndex < classes.size()); 144 irClass->index = (*nextIndex)++; 145 } 146 } 147 148 // Helper for IR normalization 149 // (topological sort the classes) 150 void DexFile::SortClassIndexes() { 151 for (auto& irClass : classes) { 152 irClass->index = dex::u4(-1); 153 } 154 155 dex::u4 nextIndex = 0; 156 for (auto& irClass : classes) { 157 TopSortClassIndex(irClass.get(), &nextIndex); 158 } 159 } 160 161 // Helper for NormalizeClass() 162 static void SortEncodedFields(std::vector<EncodedField*>* fields) { 163 std::sort(fields->begin(), fields->end(), 164 [](const EncodedField* a, const EncodedField* b) { 165 SLICER_CHECK(a->decl->index != b->decl->index || a == b); 166 return a->decl->index < b->decl->index; 167 }); 168 } 169 170 // Helper for NormalizeClass() 171 static void SortEncodedMethods(std::vector<EncodedMethod*>* methods) { 172 std::sort(methods->begin(), methods->end(), 173 [](const EncodedMethod* a, const EncodedMethod* b) { 174 SLICER_CHECK(a->decl->index != b->decl->index || a == b); 175 return a->decl->index < b->decl->index; 176 }); 177 } 178 179 // Helper for IR normalization 180 // (sort the field & method arrays) 181 static void NormalizeClass(Class* irClass) { 182 SortEncodedFields(&irClass->static_fields); 183 SortEncodedFields(&irClass->instance_fields); 184 SortEncodedMethods(&irClass->direct_methods); 185 SortEncodedMethods(&irClass->virtual_methods); 186 } 187 188 // Prepare the IR for generating a .dex image 189 // (the .dex format requires a specific sort order for some of the arrays, etc...) 190 // 191 // TODO: not a great solution - move this logic to the writer! 192 // 193 // TODO: the comparison predicate can be better expressed by using std::tie() 194 // Ex. FieldDecl has a method comp() returning tie(parent->index, name->index, type->index) 195 // 196 void DexFile::Normalize() { 197 // sort build the .dex indexes 198 IndexItems(strings, [](const own<String>& a, const own<String>& b) { 199 // this list must be sorted by std::string contents, using UTF-16 code point values 200 // (not in a locale-sensitive manner) 201 return dex::Utf8Cmp(a->c_str(), b->c_str()) < 0; 202 }); 203 204 IndexItems(types, [](const own<Type>& a, const own<Type>& b) { 205 // this list must be sorted by string_id index 206 return a->descriptor->index < b->descriptor->index; 207 }); 208 209 IndexItems(protos, [](const own<Proto>& a, const own<Proto>& b) { 210 // this list must be sorted in return-type (by type_id index) major order, 211 // and then by argument list (lexicographic ordering, individual arguments 212 // ordered by type_id index) 213 if (a->return_type->index != b->return_type->index) { 214 return a->return_type->index < b->return_type->index; 215 } else { 216 std::vector<Type*> empty; 217 const auto& aParamTypes = a->param_types ? a->param_types->types : empty; 218 const auto& bParamTypes = b->param_types ? b->param_types->types : empty; 219 return std::lexicographical_compare( 220 aParamTypes.begin(), aParamTypes.end(), bParamTypes.begin(), 221 bParamTypes.end(), 222 [](const Type* t1, const Type* t2) { return t1->index < t2->index; }); 223 } 224 }); 225 226 IndexItems(fields, [](const own<FieldDecl>& a, const own<FieldDecl>& b) { 227 // this list must be sorted, where the defining type (by type_id index) is 228 // the major order, field name (by string_id index) is the intermediate 229 // order, and type (by type_id index) is the minor order 230 return (a->parent->index != b->parent->index) 231 ? a->parent->index < b->parent->index 232 : (a->name->index != b->name->index) 233 ? a->name->index < b->name->index 234 : a->type->index < b->type->index; 235 }); 236 237 IndexItems(methods, [](const own<MethodDecl>& a, const own<MethodDecl>& b) { 238 // this list must be sorted, where the defining type (by type_id index) is 239 // the major order, method name (by string_id index) is the intermediate 240 // order, and method prototype (by proto_id index) is the minor order 241 return (a->parent->index != b->parent->index) 242 ? a->parent->index < b->parent->index 243 : (a->name->index != b->name->index) 244 ? a->name->index < b->name->index 245 : a->prototype->index < b->prototype->index; 246 }); 247 248 // reverse topological sort 249 // 250 // the classes must be ordered such that a given class's superclass and 251 // implemented interfaces appear in the list earlier than the referring 252 // class 253 // 254 // CONSIDER: for the BCI-only scenario we can avoid this 255 // 256 SortClassIndexes(); 257 258 IndexItems(classes, [&](const own<Class>& a, const own<Class>& b) { 259 SLICER_CHECK(a->index < classes.size()); 260 SLICER_CHECK(b->index < classes.size()); 261 SLICER_CHECK(a->index != b->index || a == b); 262 return a->index < b->index; 263 }); 264 265 // normalize class data 266 for (const auto& irClass : classes) { 267 NormalizeClass(irClass.get()); 268 } 269 270 // normalize annotations 271 for (const auto& irAnnotation : annotations) { 272 // elements must be sorted in increasing order by string_id index 273 auto& elements = irAnnotation->elements; 274 std::sort(elements.begin(), elements.end(), 275 [](const AnnotationElement* a, const AnnotationElement* b) { 276 return a->name->index < b->name->index; 277 }); 278 } 279 280 // normalize "annotation_set_item" 281 for (const auto& irAnnotationSet : annotation_sets) { 282 // The elements must be sorted in increasing order, by type_idx 283 auto& annotations = irAnnotationSet->annotations; 284 std::sort(annotations.begin(), annotations.end(), 285 [](const Annotation* a, const Annotation* b) { 286 return a->type->index < b->type->index; 287 }); 288 } 289 290 // normalize "annotations_directory_item" 291 for (const auto& irAnnotationDirectory : annotations_directories) { 292 // field_annotations: The elements of the list must be 293 // sorted in increasing order, by field_idx 294 auto& field_annotations = irAnnotationDirectory->field_annotations; 295 std::sort(field_annotations.begin(), field_annotations.end(), 296 [](const FieldAnnotation* a, const FieldAnnotation* b) { 297 return a->field_decl->index < b->field_decl->index; 298 }); 299 300 // method_annotations: The elements of the list must be 301 // sorted in increasing order, by method_idx 302 auto& method_annotations = irAnnotationDirectory->method_annotations; 303 std::sort(method_annotations.begin(), method_annotations.end(), 304 [](const MethodAnnotation* a, const MethodAnnotation* b) { 305 return a->method_decl->index < b->method_decl->index; 306 }); 307 308 // parameter_annotations: The elements of the list must be 309 // sorted in increasing order, by method_idx 310 auto& param_annotations = irAnnotationDirectory->param_annotations; 311 std::sort(param_annotations.begin(), param_annotations.end(), 312 [](const ParamAnnotation* a, const ParamAnnotation* b) { 313 return a->method_decl->index < b->method_decl->index; 314 }); 315 } 316 } 317 318 } // namespace ir 319 320