/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "slicer/instrumentation.h" #include "slicer/dex_ir_builder.h" namespace slicer { namespace { struct BytecodeConvertingVisitor : public lir::Visitor { lir::Bytecode* out = nullptr; bool Visit(lir::Bytecode* bytecode) { out = bytecode; return true; } }; void BoxValue(lir::Bytecode* bytecode, lir::CodeIr* code_ir, ir::Type* type, dex::u4 src_reg, dex::u4 dst_reg) { bool is_wide = false; const char* boxed_type_name = nullptr; switch (*(type->descriptor)->c_str()) { case 'Z': boxed_type_name = "Ljava/lang/Boolean;"; break; case 'B': boxed_type_name = "Ljava/lang/Byte;"; break; case 'C': boxed_type_name = "Ljava/lang/Character;"; break; case 'S': boxed_type_name = "Ljava/lang/Short;"; break; case 'I': boxed_type_name = "Ljava/lang/Integer;"; break; case 'J': is_wide = true; boxed_type_name = "Ljava/lang/Long;"; break; case 'F': boxed_type_name = "Ljava/lang/Float;"; break; case 'D': is_wide = true; boxed_type_name = "Ljava/lang/Double;"; break; } SLICER_CHECK(boxed_type_name != nullptr); ir::Builder builder(code_ir->dex_ir); std::vector param_types; param_types.push_back(type); auto boxed_type = builder.GetType(boxed_type_name); auto ir_proto = builder.GetProto(boxed_type, builder.GetTypeList(param_types)); auto ir_method_decl = builder.GetMethodDecl( builder.GetAsciiString("valueOf"), ir_proto, boxed_type); auto boxing_method = code_ir->Alloc(ir_method_decl, ir_method_decl->orig_index); auto args = code_ir->Alloc(src_reg, 1 + is_wide); auto boxing_invoke = code_ir->Alloc(); boxing_invoke->opcode = dex::OP_INVOKE_STATIC_RANGE; boxing_invoke->operands.push_back(args); boxing_invoke->operands.push_back(boxing_method); code_ir->instructions.InsertBefore(bytecode, boxing_invoke); auto move_result = code_ir->Alloc(); move_result->opcode = dex::OP_MOVE_RESULT_OBJECT; move_result->operands.push_back(code_ir->Alloc(dst_reg)); code_ir->instructions.InsertBefore(bytecode, move_result); } } // namespace bool EntryHook::Apply(lir::CodeIr* code_ir) { lir::Bytecode* bytecode = nullptr; // find the first bytecode in the method body to insert the hook before it for (auto instr : code_ir->instructions) { BytecodeConvertingVisitor visitor; instr->Accept(&visitor); bytecode = visitor.out; if (bytecode != nullptr) { break; } } if (bytecode == nullptr) { return false; } if (tweak_ == Tweak::ArrayParams) { return InjectArrayParamsHook(code_ir, bytecode); } ir::Builder builder(code_ir->dex_ir); const auto ir_method = code_ir->ir_method; // construct the hook method declaration std::vector param_types; if ((ir_method->access_flags & dex::kAccStatic) == 0) { ir::Type* this_argument_type; switch (tweak_) { case Tweak::ThisAsObject: this_argument_type = builder.GetType("Ljava/lang/Object;"); break; default: this_argument_type = ir_method->decl->parent; break; } param_types.push_back(this_argument_type); } if (ir_method->decl->prototype->param_types != nullptr) { const auto& orig_param_types = ir_method->decl->prototype->param_types->types; param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end()); } auto ir_proto = builder.GetProto(builder.GetType("V"), builder.GetTypeList(param_types)); auto ir_method_decl = builder.GetMethodDecl( builder.GetAsciiString(hook_method_id_.method_name), ir_proto, builder.GetType(hook_method_id_.class_descriptor)); auto hook_method = code_ir->Alloc(ir_method_decl, ir_method_decl->orig_index); // argument registers auto regs = ir_method->code->registers; auto args_count = ir_method->code->ins_count; auto args = code_ir->Alloc(regs - args_count, args_count); // invoke hook bytecode auto hook_invoke = code_ir->Alloc(); hook_invoke->opcode = dex::OP_INVOKE_STATIC_RANGE; hook_invoke->operands.push_back(args); hook_invoke->operands.push_back(hook_method); // insert the hook before the first bytecode in the method body code_ir->instructions.InsertBefore(bytecode, hook_invoke); return true; } bool EntryHook::InjectArrayParamsHook(lir::CodeIr* code_ir, lir::Bytecode* bytecode) { ir::Builder builder(code_ir->dex_ir); const auto ir_method = code_ir->ir_method; auto param_types_list = ir_method->decl->prototype->param_types; auto param_types = param_types_list != nullptr ? param_types_list->types : std::vector(); bool is_static = (ir_method->access_flags & dex::kAccStatic) != 0; bool needsBoxingReg = false; for (auto type: param_types) { needsBoxingReg |= type->GetCategory() != ir::Type::Category::Reference; } // allocate scract registers slicer::AllocateScratchRegs alloc_regs(2 + needsBoxingReg); alloc_regs.Apply(code_ir); auto reg_iterator = alloc_regs.ScratchRegs().begin(); // register that will store size of during allocation // later will be reused to store index when do "aput" dex::u4 array_size_reg = *(reg_iterator); // register that will store an array that will be passed // as a parameter in entry hook dex::u4 array_reg = *(++reg_iterator); // if we need to boxing, this register stores result of boxing dex::u4 boxing_reg = needsBoxingReg ? *(++reg_iterator) : 0; // TODO: handle very "high" registers if (boxing_reg > 0xff) { printf("WARNING: can't instrument method %s.%s%s\n", ir_method->decl->parent->Decl().c_str(), ir_method->decl->name->c_str(), ir_method->decl->prototype->Signature().c_str()); return false; } // array size bytecode auto const_size_op = code_ir->Alloc(); const_size_op->opcode = dex::OP_CONST; const_size_op->operands.push_back(code_ir->Alloc(array_size_reg)); const_size_op->operands.push_back(code_ir->Alloc(param_types.size() + !is_static)); code_ir->instructions.InsertBefore(bytecode, const_size_op); // allocate array const auto obj_array_type = builder.GetType("[Ljava/lang/Object;"); auto allocate_array_op = code_ir->Alloc(); allocate_array_op->opcode = dex::OP_NEW_ARRAY; allocate_array_op->operands.push_back(code_ir->Alloc(array_reg)); allocate_array_op->operands.push_back(code_ir->Alloc(array_size_reg)); allocate_array_op->operands.push_back( code_ir->Alloc(obj_array_type, obj_array_type->orig_index)); code_ir->instructions.InsertBefore(bytecode, allocate_array_op); // fill the array with parameters passed into function std::vector types; if (!is_static) { types.push_back(ir_method->decl->parent); } types.insert(types.end(), param_types.begin(), param_types.end()); // register where params start dex::u4 current_reg = ir_method->code->registers - ir_method->code->ins_count; // reuse not needed anymore register to store indexes dex::u4 array_index_reg = array_size_reg; int i = 0; for (auto type: types) { dex::u4 src_reg = 0; if (type->GetCategory() != ir::Type::Category::Reference) { BoxValue(bytecode, code_ir, type, current_reg, boxing_reg); src_reg = boxing_reg; current_reg += 1 + (type->GetCategory() == ir::Type::Category::WideScalar); } else { src_reg = current_reg; current_reg++; } auto index_const_op = code_ir->Alloc(); index_const_op->opcode = dex::OP_CONST; index_const_op->operands.push_back(code_ir->Alloc(array_index_reg)); index_const_op->operands.push_back(code_ir->Alloc(i++)); code_ir->instructions.InsertBefore(bytecode, index_const_op); auto aput_op = code_ir->Alloc(); aput_op->opcode = dex::OP_APUT_OBJECT; aput_op->operands.push_back(code_ir->Alloc(src_reg)); aput_op->operands.push_back(code_ir->Alloc(array_reg)); aput_op->operands.push_back(code_ir->Alloc(array_index_reg)); code_ir->instructions.InsertBefore(bytecode, aput_op); } std::vector hook_param_types; hook_param_types.push_back(obj_array_type); auto ir_proto = builder.GetProto(builder.GetType("V"), builder.GetTypeList(hook_param_types)); auto ir_method_decl = builder.GetMethodDecl( builder.GetAsciiString(hook_method_id_.method_name), ir_proto, builder.GetType(hook_method_id_.class_descriptor)); auto hook_method = code_ir->Alloc(ir_method_decl, ir_method_decl->orig_index); auto args = code_ir->Alloc(array_reg, 1); auto hook_invoke = code_ir->Alloc(); hook_invoke->opcode = dex::OP_INVOKE_STATIC_RANGE; hook_invoke->operands.push_back(args); hook_invoke->operands.push_back(hook_method); code_ir->instructions.InsertBefore(bytecode, hook_invoke); return true; } bool ExitHook::Apply(lir::CodeIr* code_ir) { ir::Builder builder(code_ir->dex_ir); const auto ir_method = code_ir->ir_method; const auto declared_return_type = ir_method->decl->prototype->return_type; bool return_as_object = tweak_ == Tweak::ReturnAsObject; // do we have a void-return method? bool return_void = (::strcmp(declared_return_type->descriptor->c_str(), "V") == 0); // returnAsObject supports only object return type; SLICER_CHECK(!return_as_object || (declared_return_type->GetCategory() == ir::Type::Category::Reference)); const auto return_type = return_as_object ? builder.GetType("Ljava/lang/Object;") : declared_return_type; // construct the hook method declaration std::vector param_types; if (!return_void) { param_types.push_back(return_type); } auto ir_proto = builder.GetProto(return_type, builder.GetTypeList(param_types)); auto ir_method_decl = builder.GetMethodDecl( builder.GetAsciiString(hook_method_id_.method_name), ir_proto, builder.GetType(hook_method_id_.class_descriptor)); auto hook_method = code_ir->Alloc(ir_method_decl, ir_method_decl->orig_index); // find and instrument all return instructions for (auto instr : code_ir->instructions) { BytecodeConvertingVisitor visitor; instr->Accept(&visitor); auto bytecode = visitor.out; if (bytecode == nullptr) { continue; } dex::Opcode move_result_opcode = dex::OP_NOP; dex::u4 reg = 0; int reg_count = 0; switch (bytecode->opcode) { case dex::OP_RETURN_VOID: SLICER_CHECK(return_void); break; case dex::OP_RETURN: SLICER_CHECK(!return_void); move_result_opcode = dex::OP_MOVE_RESULT; reg = bytecode->CastOperand(0)->reg; reg_count = 1; break; case dex::OP_RETURN_OBJECT: SLICER_CHECK(!return_void); move_result_opcode = dex::OP_MOVE_RESULT_OBJECT; reg = bytecode->CastOperand(0)->reg; reg_count = 1; break; case dex::OP_RETURN_WIDE: SLICER_CHECK(!return_void); move_result_opcode = dex::OP_MOVE_RESULT_WIDE; reg = bytecode->CastOperand(0)->base_reg; reg_count = 2; break; default: // skip the bytecode... continue; } // invoke hook bytecode auto args = code_ir->Alloc(reg, reg_count); auto hook_invoke = code_ir->Alloc(); hook_invoke->opcode = dex::OP_INVOKE_STATIC_RANGE; hook_invoke->operands.push_back(args); hook_invoke->operands.push_back(hook_method); code_ir->instructions.InsertBefore(bytecode, hook_invoke); // move result back to the right register // // NOTE: we're reusing the original return's operand, // which is valid and more efficient than allocating // a new LIR node, but it's also fragile: we need to be // very careful about mutating shared nodes. // if (move_result_opcode != dex::OP_NOP) { auto move_result = code_ir->Alloc(); move_result->opcode = move_result_opcode; move_result->operands.push_back(bytecode->operands[0]); code_ir->instructions.InsertBefore(bytecode, move_result); if (tweak_ == Tweak::ReturnAsObject) { auto check_cast = code_ir->Alloc(); check_cast->opcode = dex::OP_CHECK_CAST; check_cast->operands.push_back(code_ir->Alloc(reg)); check_cast->operands.push_back( code_ir->Alloc(declared_return_type, declared_return_type->orig_index)); code_ir->instructions.InsertBefore(bytecode, check_cast); } } } return true; } bool DetourHook::Apply(lir::CodeIr* code_ir) { ir::Builder builder(code_ir->dex_ir); // search for matching invoke-virtual[/range] bytecodes for (auto instr : code_ir->instructions) { BytecodeConvertingVisitor visitor; instr->Accept(&visitor); auto bytecode = visitor.out; if (bytecode == nullptr) { continue; } dex::Opcode new_call_opcode = GetNewOpcode(bytecode->opcode); if (new_call_opcode == dex::OP_NOP) { continue; } auto orig_method = bytecode->CastOperand(1)->ir_method; if (!orig_method_id_.Match(orig_method)) { // this is not the method you're looking for... continue; } // construct the detour method declaration // (matching the original method, plus an explicit "this" argument) std::vector param_types; param_types.push_back(orig_method->parent); if (orig_method->prototype->param_types != nullptr) { const auto& orig_param_types = orig_method->prototype->param_types->types; param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end()); } auto ir_proto = builder.GetProto(orig_method->prototype->return_type, builder.GetTypeList(param_types)); auto ir_method_decl = builder.GetMethodDecl( builder.GetAsciiString(detour_method_id_.method_name), ir_proto, builder.GetType(detour_method_id_.class_descriptor)); auto detour_method = code_ir->Alloc(ir_method_decl, ir_method_decl->orig_index); // We mutate the original invoke bytecode in-place: this is ok // because lir::Instructions can't be shared (referenced multiple times) // in the code IR. It's also simpler and more efficient than allocating a // new IR invoke bytecode. bytecode->opcode = new_call_opcode; bytecode->operands[1] = detour_method; } return true; } dex::Opcode DetourVirtualInvoke::GetNewOpcode(dex::Opcode opcode) { switch (opcode) { case dex::OP_INVOKE_VIRTUAL: return dex::OP_INVOKE_STATIC; case dex::OP_INVOKE_VIRTUAL_RANGE: return dex::OP_INVOKE_STATIC_RANGE; default: // skip instruction ... return dex::OP_NOP; } } dex::Opcode DetourInterfaceInvoke::GetNewOpcode(dex::Opcode opcode) { switch (opcode) { case dex::OP_INVOKE_INTERFACE: return dex::OP_INVOKE_STATIC; case dex::OP_INVOKE_INTERFACE_RANGE: return dex::OP_INVOKE_STATIC_RANGE; default: // skip instruction ... return dex::OP_NOP; } } // Register re-numbering visitor // (renumbers vN to vN+shift) class RegsRenumberVisitor : public lir::Visitor { public: explicit RegsRenumberVisitor(int shift) : shift_(shift) { SLICER_CHECK(shift > 0); } private: virtual bool Visit(lir::Bytecode* bytecode) override { for (auto operand : bytecode->operands) { operand->Accept(this); } return true; } virtual bool Visit(lir::DbgInfoAnnotation* dbg_annotation) override { for (auto operand : dbg_annotation->operands) { operand->Accept(this); } return true; } virtual bool Visit(lir::VReg* vreg) override { vreg->reg += shift_; return true; } virtual bool Visit(lir::VRegPair* vreg_pair) override { vreg_pair->base_reg += shift_; return true; } virtual bool Visit(lir::VRegList* vreg_list) override { for (auto& reg : vreg_list->registers) { reg += shift_; } return true; } virtual bool Visit(lir::VRegRange* vreg_range) override { vreg_range->base_reg += shift_; return true; } private: int shift_ = 0; }; // Try to allocate registers by renumbering the existing allocation // // NOTE: we can't bump the register count over 16 since it may // make existing bytecodes "unencodable" (if they have 4 bit reg fields) // void AllocateScratchRegs::RegsRenumbering(lir::CodeIr* code_ir) { SLICER_CHECK(left_to_allocate_ > 0); int delta = std::min(left_to_allocate_, 16 - static_cast(code_ir->ir_method->code->registers)); if (delta < 1) { // can't allocate any registers through renumbering return; } assert(delta <= 16); // renumber existing registers RegsRenumberVisitor visitor(delta); for (auto instr : code_ir->instructions) { instr->Accept(&visitor); } // we just allocated "delta" registers (v0..vX) Allocate(code_ir, 0, delta); } // Allocates registers by generating prologue code to relocate params // into their original registers (parameters are allocated in the last IN registers) // // There are three types of register moves depending on the value type: // 1. vreg -> vreg // 2. vreg/wide -> vreg/wide // 3. vreg/obj -> vreg/obj // void AllocateScratchRegs::ShiftParams(lir::CodeIr* code_ir) { const auto ir_method = code_ir->ir_method; SLICER_CHECK(ir_method->code->ins_count > 0); SLICER_CHECK(left_to_allocate_ > 0); // build a param list with the explicit "this" argument for non-static methods std::vector param_types; if ((ir_method->access_flags & dex::kAccStatic) == 0) { param_types.push_back(ir_method->decl->parent); } if (ir_method->decl->prototype->param_types != nullptr) { const auto& orig_param_types = ir_method->decl->prototype->param_types->types; param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end()); } const dex::u4 shift = left_to_allocate_; Allocate(code_ir, ir_method->code->registers, left_to_allocate_); assert(left_to_allocate_ == 0); const dex::u4 regs = ir_method->code->registers; const dex::u4 ins_count = ir_method->code->ins_count; SLICER_CHECK(regs >= ins_count); // generate the args "relocation" instructions auto first_instr = code_ir->instructions.begin(); dex::u4 reg = regs - ins_count; for (const auto& type : param_types) { auto move = code_ir->Alloc(); switch (type->GetCategory()) { case ir::Type::Category::Reference: move->opcode = dex::OP_MOVE_OBJECT_16; move->operands.push_back(code_ir->Alloc(reg - shift)); move->operands.push_back(code_ir->Alloc(reg)); reg += 1; break; case ir::Type::Category::Scalar: move->opcode = dex::OP_MOVE_16; move->operands.push_back(code_ir->Alloc(reg - shift)); move->operands.push_back(code_ir->Alloc(reg)); reg += 1; break; case ir::Type::Category::WideScalar: move->opcode = dex::OP_MOVE_WIDE_16; move->operands.push_back(code_ir->Alloc(reg - shift)); move->operands.push_back(code_ir->Alloc(reg)); reg += 2; break; case ir::Type::Category::Void: SLICER_FATAL("void parameter type"); } code_ir->instructions.insert(first_instr, move); } } // Mark [first_reg, first_reg + count) as scratch registers void AllocateScratchRegs::Allocate(lir::CodeIr* code_ir, dex::u4 first_reg, int count) { SLICER_CHECK(count > 0 && count <= left_to_allocate_); code_ir->ir_method->code->registers += count; left_to_allocate_ -= count; for (int i = 0; i < count; ++i) { SLICER_CHECK(scratch_regs_.insert(first_reg + i).second); } } // Allocate scratch registers without doing a full register allocation: // // 1. if there are not params, increase the method regs count and we're done // 2. if the method uses less than 16 registers, we can renumber the existing registers // 3. if we still have registers to allocate, increase the method registers count, // and generate prologue code to shift the param regs into their original registers // bool AllocateScratchRegs::Apply(lir::CodeIr* code_ir) { const auto code = code_ir->ir_method->code; // .dex bytecode allows up to 64k vregs SLICER_CHECK(code->registers + allocate_count_ <= (1 << 16)); scratch_regs_.clear(); left_to_allocate_ = allocate_count_; // can we allocate by simply incrementing the method regs count? if (code->ins_count == 0) { Allocate(code_ir, code->registers, left_to_allocate_); return true; } // allocate as many registers as possible using renumbering if (allow_renumbering_) { RegsRenumbering(code_ir); } // if we still have registers to allocate, generate prologue // code to shift the params into their original registers if (left_to_allocate_ > 0) { ShiftParams(code_ir); } assert(left_to_allocate_ == 0); assert(scratch_regs_.size() == size_t(allocate_count_)); return true; } bool MethodInstrumenter::InstrumentMethod(ir::EncodedMethod* ir_method) { SLICER_CHECK(ir_method != nullptr); if (ir_method->code == nullptr) { // can't instrument abstract methods return false; } // apply all the queued transformations lir::CodeIr code_ir(ir_method, dex_ir_); for (const auto& transformation : transformations_) { if (!transformation->Apply(&code_ir)) { // the transformation failed, bail out... return false; } } code_ir.Assemble(); return true; } bool MethodInstrumenter::InstrumentMethod(const ir::MethodId& method_id) { // locate the method to be instrumented ir::Builder builder(dex_ir_); auto ir_method = builder.FindMethod(method_id); if (ir_method == nullptr) { // we couldn't find the specified method return false; } return InstrumentMethod(ir_method); } } // namespace slicer