1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "experimental.h"
18 #include "slicer/code_ir.h"
19 #include "slicer/control_flow_graph.h"
20 #include "slicer/dex_ir.h"
21 #include "slicer/dex_ir_builder.h"
22 #include "slicer/instrumentation.h"
23 
24 #include <string.h>
25 #include <map>
26 #include <memory>
27 #include <vector>
28 
29 namespace experimental {
30 
31 // Rewrites every method through raising to code IR -> back to bytecode
32 // (also stress the CFG creation)
FullRewrite(std::shared_ptr<ir::DexFile> dex_ir)33 void FullRewrite(std::shared_ptr<ir::DexFile> dex_ir) {
34   for (auto& ir_method : dex_ir->encoded_methods) {
35     if (ir_method->code != nullptr) {
36       lir::CodeIr code_ir(ir_method.get(), dex_ir);
37       lir::ControlFlowGraph cfg_compact(&code_ir, false);
38       lir::ControlFlowGraph cfg_verbose(&code_ir, true);
39       code_ir.Assemble();
40     }
41   }
42 }
43 
44 // For every method body in the .dex image, replace invoke-virtual[/range]
45 // instances with a invoke-static[/range] to a fictitious Tracer.WrapInvoke(<args...>)
46 // WrapInvoke() is a static method which takes the same arguments as the
47 // original method plus an explicit "this" argument, and returns the same
48 // type as the original method.
StressWrapInvoke(std::shared_ptr<ir::DexFile> dex_ir)49 void StressWrapInvoke(std::shared_ptr<ir::DexFile> dex_ir) {
50   for (auto& ir_method : dex_ir->encoded_methods) {
51     if (ir_method->code == nullptr) {
52       continue;
53     }
54 
55     lir::CodeIr code_ir(ir_method.get(), dex_ir);
56     ir::Builder builder(dex_ir);
57 
58     // search for invoke-virtual[/range] bytecodes
59     //
60     // NOTE: we may be removing the current bytecode
61     //   from the instructions list so we must use a
62     //   different iteration style (it++ is done before
63     //   handling *it, not after as in a normal iteration)
64     //
65     auto it = code_ir.instructions.begin();
66     while (it != code_ir.instructions.end()) {
67       auto instr = *it++;
68       auto bytecode = dynamic_cast<lir::Bytecode*>(instr);
69       if (bytecode == nullptr) {
70         continue;
71       }
72 
73       dex::Opcode new_call_opcode = dex::OP_NOP;
74       switch (bytecode->opcode) {
75         case dex::OP_INVOKE_VIRTUAL:
76           new_call_opcode = dex::OP_INVOKE_STATIC;
77           break;
78         case dex::OP_INVOKE_VIRTUAL_RANGE:
79           new_call_opcode = dex::OP_INVOKE_STATIC_RANGE;
80           break;
81         default:
82           // skip instruction ...
83           continue;
84       }
85       assert(new_call_opcode != dex::OP_NOP);
86 
87       auto orig_method = bytecode->CastOperand<lir::Method>(1)->ir_method;
88 
89       // construct the wrapper method declaration
90       std::vector<ir::Type*> param_types;
91       param_types.push_back(orig_method->parent);
92       if (orig_method->prototype->param_types != nullptr) {
93         const auto& orig_param_types = orig_method->prototype->param_types->types;
94         param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end());
95       }
96 
97       auto ir_proto = builder.GetProto(orig_method->prototype->return_type,
98                                        builder.GetTypeList(param_types));
99 
100       auto ir_method_decl = builder.GetMethodDecl(builder.GetAsciiString("WrapInvoke"),
101                                                   ir_proto,
102                                                   builder.GetType("LTracer;"));
103 
104       auto wraper_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
105 
106       // new call bytecode
107       auto new_call = code_ir.Alloc<lir::Bytecode>();
108       new_call->opcode = new_call_opcode;
109       new_call->operands.push_back(bytecode->operands[0]);
110       new_call->operands.push_back(wraper_method);
111       code_ir.instructions.InsertBefore(bytecode, new_call);
112 
113       // remove the old call bytecode
114       //
115       // NOTE: we can mutate the original bytecode directly
116       //  since the instructions can't have multiple references
117       //  in the code IR, but for testing purposes we'll do it
118       //  the hard way here
119       //
120       code_ir.instructions.Remove(bytecode);
121     }
122 
123     code_ir.Assemble();
124   }
125 }
126 
127 // For every method in the .dex image, insert an "entry hook" call
128 // to a fictitious method: Tracer.OnEntry(<args...>). OnEntry() has the
129 // same argument types as the instrumented method plus an explicit
130 // "this" for non-static methods. On entry to the instumented method
131 // we'll call OnEntry() with the values of the incoming arguments.
132 //
133 // NOTE: the entry hook will forward all the incoming arguments
134 //   so we need to define an Tracer.OnEntry overload for every method
135 //   signature. This means that for very large .dex images, approaching
136 //   the 64k method limit, we might not be able to allocate new method declarations.
137 //   (which is ok, and a good test case, since this is a stress scenario)
138 //
StressEntryHook(std::shared_ptr<ir::DexFile> dex_ir)139 void StressEntryHook(std::shared_ptr<ir::DexFile> dex_ir) {
140   for (auto& ir_method : dex_ir->encoded_methods) {
141     if (ir_method->code == nullptr) {
142       continue;
143     }
144 
145     lir::CodeIr code_ir(ir_method.get(), dex_ir);
146     ir::Builder builder(dex_ir);
147 
148     // 1. construct call target
149     std::vector<ir::Type*> param_types;
150     if ((ir_method->access_flags & dex::kAccStatic) == 0) {
151       param_types.push_back(ir_method->decl->parent);
152     }
153     if (ir_method->decl->prototype->param_types != nullptr) {
154       const auto& orig_param_types = ir_method->decl->prototype->param_types->types;
155       param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end());
156     }
157 
158     auto ir_proto = builder.GetProto(builder.GetType("V"),
159                                      builder.GetTypeList(param_types));
160 
161     auto ir_method_decl = builder.GetMethodDecl(builder.GetAsciiString("OnEntry"),
162                                                 ir_proto,
163                                                 builder.GetType("LTracer;"));
164 
165     auto target_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
166 
167     // 2. argument registers
168     auto regs = ir_method->code->registers;
169     auto args_count = ir_method->code->ins_count;
170     auto args = code_ir.Alloc<lir::VRegRange>(regs - args_count, args_count);
171 
172     // 3. call bytecode
173     auto call = code_ir.Alloc<lir::Bytecode>();
174     call->opcode = dex::OP_INVOKE_STATIC_RANGE;
175     call->operands.push_back(args);
176     call->operands.push_back(target_method);
177 
178     // 4. insert the hook before the first bytecode
179     for (auto instr : code_ir.instructions) {
180       auto bytecode = dynamic_cast<lir::Bytecode*>(instr);
181       if (bytecode == nullptr) {
182         continue;
183       }
184       code_ir.instructions.InsertBefore(bytecode, call);
185       break;
186     }
187 
188     code_ir.Assemble();
189   }
190 }
191 
192 // For every method in the .dex image, insert an "exit hook" call
193 // to a fictitious method: Tracer.OnExit(<return value...>).
194 // OnExit() is called right before returning from the instrumented
195 // method (on the non-exceptional path) and it will be passed the return
196 // value, if any. For non-void return types, the return value from OnExit()
197 // will also be used as the return value of the instrumented method.
StressExitHook(std::shared_ptr<ir::DexFile> dex_ir)198 void StressExitHook(std::shared_ptr<ir::DexFile> dex_ir) {
199   for (auto& ir_method : dex_ir->encoded_methods) {
200     if (ir_method->code == nullptr) {
201       continue;
202     }
203 
204     lir::CodeIr code_ir(ir_method.get(), dex_ir);
205     ir::Builder builder(dex_ir);
206 
207     // do we have a void-return method?
208     bool return_void =
209         ::strcmp(ir_method->decl->prototype->return_type->descriptor->c_str(), "V") == 0;
210 
211     // 1. construct call target
212     std::vector<ir::Type*> param_types;
213     if (!return_void) {
214       param_types.push_back(ir_method->decl->prototype->return_type);
215     }
216 
217     auto ir_proto = builder.GetProto(ir_method->decl->prototype->return_type,
218                                      builder.GetTypeList(param_types));
219 
220     auto ir_method_decl = builder.GetMethodDecl(builder.GetAsciiString("OnExit"),
221                                                 ir_proto,
222                                                 builder.GetType("LTracer;"));
223 
224     auto target_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
225 
226     // 2. find and instrument the return instructions
227     for (auto instr : code_ir.instructions) {
228       auto bytecode = dynamic_cast<lir::Bytecode*>(instr);
229       if (bytecode == nullptr) {
230         continue;
231       }
232 
233       dex::Opcode move_result_opcode = dex::OP_NOP;
234       dex::u4 reg = 0;
235       int reg_count = 0;
236 
237       switch (bytecode->opcode) {
238         case dex::OP_RETURN_VOID:
239           SLICER_CHECK(return_void);
240           break;
241         case dex::OP_RETURN:
242           SLICER_CHECK(!return_void);
243           move_result_opcode = dex::OP_MOVE_RESULT;
244           reg = bytecode->CastOperand<lir::VReg>(0)->reg;
245           reg_count = 1;
246           break;
247         case dex::OP_RETURN_OBJECT:
248           SLICER_CHECK(!return_void);
249           move_result_opcode = dex::OP_MOVE_RESULT_OBJECT;
250           reg = bytecode->CastOperand<lir::VReg>(0)->reg;
251           reg_count = 1;
252           break;
253         case dex::OP_RETURN_WIDE:
254           SLICER_CHECK(!return_void);
255           move_result_opcode = dex::OP_MOVE_RESULT_WIDE;
256           reg = bytecode->CastOperand<lir::VRegPair>(0)->base_reg;
257           reg_count = 2;
258           break;
259         default:
260           // skip the bytecode...
261           continue;
262       }
263 
264       // the call bytecode
265       auto args = code_ir.Alloc<lir::VRegRange>(reg, reg_count);
266       auto call = code_ir.Alloc<lir::Bytecode>();
267       call->opcode = dex::OP_INVOKE_STATIC_RANGE;
268       call->operands.push_back(args);
269       call->operands.push_back(target_method);
270       code_ir.instructions.InsertBefore(bytecode, call);
271 
272       // move result back to the right register
273       //
274       // NOTE: we're reusing the original return's operand,
275       //   which is valid and more efficient than allocating
276       //   a new LIR node, but it's also fragile: we need to be
277       //   very careful about mutating shared nodes.
278       //
279       if (move_result_opcode != dex::OP_NOP) {
280         auto move_result = code_ir.Alloc<lir::Bytecode>();
281         move_result->opcode = move_result_opcode;
282         move_result->operands.push_back(bytecode->operands[0]);
283         code_ir.instructions.InsertBefore(bytecode, move_result);
284       }
285     }
286 
287     code_ir.Assemble();
288   }
289 }
290 
291 // Test slicer::MethodInstrumenter
TestMethodInstrumenter(std::shared_ptr<ir::DexFile> dex_ir)292 void TestMethodInstrumenter(std::shared_ptr<ir::DexFile> dex_ir) {
293   slicer::MethodInstrumenter mi(dex_ir);
294   mi.AddTransformation<slicer::EntryHook>(ir::MethodId("LTracer;", "onFooEntry"), true);
295   mi.AddTransformation<slicer::EntryHook>(ir::MethodId("LTracer;", "onFooEntry"), false);
296   mi.AddTransformation<slicer::ExitHook>(ir::MethodId("LTracer;", "onFooExit"));
297   mi.AddTransformation<slicer::DetourVirtualInvoke>(
298       ir::MethodId("LBase;", "foo", "(ILjava/lang/String;)I"),
299       ir::MethodId("LTracer;", "wrapFoo"))  ;
300 
301   auto method1 = ir::MethodId("LTarget;", "foo", "(ILjava/lang/String;)I");
302   SLICER_CHECK(mi.InstrumentMethod(method1));
303 
304   auto method2 = ir::MethodId("LTarget;", "foo", "(I[[Ljava/lang/String;)Ljava/lang/Integer;");
305   SLICER_CHECK(mi.InstrumentMethod(method2));
306 }
307 
308 // Stress scratch register allocation
StressScratchRegs(std::shared_ptr<ir::DexFile> dex_ir)309 void StressScratchRegs(std::shared_ptr<ir::DexFile> dex_ir) {
310   slicer::MethodInstrumenter mi(dex_ir);
311 
312   // queue multiple allocations to stress corner cases (various counts and alignments)
313   auto t1 = mi.AddTransformation<slicer::AllocateScratchRegs>(1, false);
314   auto t2 = mi.AddTransformation<slicer::AllocateScratchRegs>(1, false);
315   auto t3 = mi.AddTransformation<slicer::AllocateScratchRegs>(1);
316   auto t4 = mi.AddTransformation<slicer::AllocateScratchRegs>(20);
317 
318   // apply the transformations to every single method
319   for (auto& ir_method : dex_ir->encoded_methods) {
320     if (ir_method->code != nullptr) {
321       SLICER_CHECK(mi.InstrumentMethod(ir_method.get()));
322       SLICER_CHECK(t1->ScratchRegs().size() == 1);
323       SLICER_CHECK(t2->ScratchRegs().size() == 1);
324       SLICER_CHECK(t3->ScratchRegs().size() == 1);
325       SLICER_CHECK(t4->ScratchRegs().size() == 20);
326     }
327   }
328 }
329 
330 // Sample code coverage instrumentation: on the entry of every
331 // basic block, inject a call to a tracing method:
332 //
333 //   CodeCoverage.TraceBasicBlock(block_id)
334 //
CodeCoverage(std::shared_ptr<ir::DexFile> dex_ir)335 void CodeCoverage(std::shared_ptr<ir::DexFile> dex_ir) {
336   ir::Builder builder(dex_ir);
337   slicer::AllocateScratchRegs alloc_regs(1);
338   int basic_block_id = 1;
339 
340   constexpr const char* kTracerClass = "LCodeCoverage;";
341 
342   // create the tracing method declaration
343   std::vector<ir::Type*> param_types { builder.GetType("I") };
344   auto ir_proto =
345       builder.GetProto(builder.GetType("V"),
346                        builder.GetTypeList(param_types));
347   auto ir_method_decl =
348       builder.GetMethodDecl(builder.GetAsciiString("TraceBasicBlock"),
349                             ir_proto,
350                             builder.GetType(kTracerClass));
351 
352   // instrument every method (except for the tracer class methods)
353   for (auto& ir_method : dex_ir->encoded_methods) {
354     if (ir_method->code == nullptr) {
355       continue;
356     }
357 
358     // don't instrument the methods of the tracer class
359     if (std::strcmp(ir_method->decl->parent->descriptor->c_str(), kTracerClass) == 0) {
360       continue;
361     }
362 
363     lir::CodeIr code_ir(ir_method.get(), dex_ir);
364     lir::ControlFlowGraph cfg(&code_ir, true);
365 
366     // allocate a scratch register
367     //
368     // NOTE: we're assuming this does not change the CFG!
369     //   (this is the case here, but transformations which
370     //    alter the basic blocks boundaries or the code flow
371     //    would invalidate existing CFGs)
372     //
373     alloc_regs.Apply(&code_ir);
374     dex::u4 scratch_reg = *alloc_regs.ScratchRegs().begin();
375 
376     // TODO: handle very "high" registers
377     if (scratch_reg > 0xff) {
378       printf("WARNING: can't instrument method %s.%s%s\n",
379              ir_method->decl->parent->Decl().c_str(),
380              ir_method->decl->name->c_str(),
381              ir_method->decl->prototype->Signature().c_str());
382       continue;
383     }
384 
385     auto tracing_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
386 
387     // instrument each basic block entry point
388     for (const auto& block : cfg.basic_blocks) {
389       // generate the map of basic blocks
390       printf("%8u: mi=%u s=%u e=%u\n",
391              static_cast<dex::u4>(basic_block_id),
392              ir_method->decl->orig_index,
393              block.region.first->offset,
394              block.region.last->offset);
395 
396       // find first bytecode in the basic block
397       lir::Instruction* trace_point = nullptr;
398       for (auto instr = block.region.first; instr != nullptr; instr = instr->next) {
399         trace_point = dynamic_cast<lir::Bytecode*>(instr);
400         if (trace_point != nullptr || instr == block.region.last) {
401           break;
402         }
403       }
404       SLICER_CHECK(trace_point != nullptr);
405 
406       // special case: don't separate 'move-result-<kind>' from the preceding invoke
407       auto opcode = static_cast<lir::Bytecode*>(trace_point)->opcode;
408       if (opcode == dex::OP_MOVE_RESULT ||
409           opcode == dex::OP_MOVE_RESULT_WIDE ||
410           opcode == dex::OP_MOVE_RESULT_OBJECT) {
411         trace_point = trace_point->next;
412       }
413 
414       // arg_reg = block_id
415       auto load_block_id = code_ir.Alloc<lir::Bytecode>();
416       load_block_id->opcode = dex::OP_CONST;
417       load_block_id->operands.push_back(code_ir.Alloc<lir::VReg>(scratch_reg));
418       load_block_id->operands.push_back(code_ir.Alloc<lir::Const32>(basic_block_id));
419       code_ir.instructions.InsertBefore(trace_point, load_block_id);
420 
421       // call the tracing method
422       auto trace_call = code_ir.Alloc<lir::Bytecode>();
423       trace_call->opcode = dex::OP_INVOKE_STATIC_RANGE;
424       trace_call->operands.push_back(code_ir.Alloc<lir::VRegRange>(scratch_reg, 1));
425       trace_call->operands.push_back(tracing_method);
426       code_ir.instructions.InsertBefore(trace_point, trace_call);
427 
428       ++basic_block_id;
429     }
430 
431     code_ir.Assemble();
432   }
433 }
434 
435 // Stress the roundtrip: EncodedMethod -> MethodId -> FindMethod -> EncodedMethod
436 // NOTE: until we start indexing methods this test is slow on debug builds + large .dex images
StressFindMethod(std::shared_ptr<ir::DexFile> dex_ir)437 void StressFindMethod(std::shared_ptr<ir::DexFile> dex_ir) {
438   ir::Builder builder(dex_ir);
439   int method_count = 0;
440   for (auto& ir_method : dex_ir->encoded_methods) {
441     auto decl = ir_method->decl;
442     auto signature = decl->prototype->Signature();
443     auto class_descriptor = decl->parent->descriptor;
444     ir::MethodId method_id(class_descriptor->c_str(), decl->name->c_str(), signature.c_str());
445     SLICER_CHECK(builder.FindMethod(method_id) == ir_method.get());
446     ++method_count;
447   }
448   printf("Everything looks fine, found %d methods.\n", method_count);
449 }
450 
PrintHistogram(const std::map<int,int> histogram,const char * name)451 static void PrintHistogram(const std::map<int, int> histogram, const char* name) {
452   constexpr int kHistogramWidth = 100;
453   int max_count = 0;
454   for (const auto& kv : histogram) {
455     max_count = std::max(max_count, kv.second);
456   }
457   printf("\nHistogram: %s [max_count=%d]\n\n", name, max_count);
458   for (const auto& kv : histogram) {
459     printf("%6d [ %3d ] ", kv.second, kv.first);
460     int hist_len = static_cast<int>(static_cast<double>(kv.second) / max_count * kHistogramWidth);
461     for (int i = 0; i <= hist_len; ++i) {
462       printf("*");
463     }
464     printf("\n");
465   }
466 }
467 
468 // Builds a histogram of registers count per method
RegsHistogram(std::shared_ptr<ir::DexFile> dex_ir)469 void RegsHistogram(std::shared_ptr<ir::DexFile> dex_ir) {
470   std::map<int, int> regs_histogram;
471   std::map<int, int> param_histogram;
472   std::map<int, int> extra_histogram;
473   for (auto& ir_method : dex_ir->encoded_methods) {
474     if (ir_method->code != nullptr) {
475       const int regs = ir_method->code->registers;
476       const int ins =  ir_method->code->ins_count;
477       SLICER_CHECK(regs >= ins);
478       regs_histogram[regs]++;
479       param_histogram[ins]++;
480       extra_histogram[regs - ins]++;
481     }
482   }
483   PrintHistogram(regs_histogram, "Method registers");
484   PrintHistogram(param_histogram, "Method parameter registers");
485   PrintHistogram(regs_histogram, "Method extra registers (total - parameters)");
486 }
487 
488 void ListExperiments(std::shared_ptr<ir::DexFile> dex_ir);
489 
490 using Experiment = void (*)(std::shared_ptr<ir::DexFile>);
491 
492 // the registry of available experiments
493 std::map<std::string, Experiment> experiments_registry = {
494     { "list_experiments", &ListExperiments },
495     { "full_rewrite", &FullRewrite },
496     { "stress_entry_hook", &StressEntryHook },
497     { "stress_exit_hook", &StressExitHook },
498     { "stress_wrap_invoke", &StressWrapInvoke },
499     { "test_method_instrumenter", &TestMethodInstrumenter },
500     { "stress_find_method", &StressFindMethod },
501     { "stress_scratch_regs", &StressScratchRegs },
502     { "regs_histogram", &RegsHistogram },
503     { "code_coverage", &CodeCoverage },
504 };
505 
506 // Lists all the registered experiments
ListExperiments(std::shared_ptr<ir::DexFile> dex_ir)507 void ListExperiments(std::shared_ptr<ir::DexFile> dex_ir) {
508   printf("\nAvailable experiments:\n");
509   printf("-------------------------\n");
510   for (auto& e : experiments_registry) {
511     printf("  %s\n", e.first.c_str());
512   }
513   printf("-------------------------\n\n");
514 }
515 
516 // Driver for running experiments
Run(const char * experiment,std::shared_ptr<ir::DexFile> dex_ir)517 void Run(const char* experiment, std::shared_ptr<ir::DexFile> dex_ir) {
518   auto it = experiments_registry.find(experiment);
519   if (it == experiments_registry.end()) {
520     printf("\nUnknown experiment '%s'\n", experiment);
521     ListExperiments(dex_ir);
522     exit(1);
523   }
524 
525   // running the experiment entry point
526   (*it->second)(dex_ir);
527 }
528 
529 }  // namespace experimental
530