1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "dex_format.h" 20 21 #include <stddef.h> 22 23 // .dex bytecode definitions and helpers: 24 // https://source.android.com/devices/tech/dalvik/dalvik-bytecode.html 25 26 namespace dex { 27 28 // The number of Dalvik opcodes 29 constexpr size_t kNumPackedOpcodes = 0x100; 30 31 // Switch table and array data signatures are a code unit consisting 32 // of "NOP" (0x00) in the low-order byte and a non-zero identifying 33 // code in the high-order byte. (A true NOP is 0x0000.) 34 constexpr u2 kPackedSwitchSignature = 0x0100; 35 constexpr u2 kSparseSwitchSignature = 0x0200; 36 constexpr u2 kArrayDataSignature = 0x0300; 37 38 // Enumeration of all Dalvik opcodes 39 enum Opcode : u1 { 40 OP_NOP = 0x00, 41 OP_MOVE = 0x01, 42 OP_MOVE_FROM16 = 0x02, 43 OP_MOVE_16 = 0x03, 44 OP_MOVE_WIDE = 0x04, 45 OP_MOVE_WIDE_FROM16 = 0x05, 46 OP_MOVE_WIDE_16 = 0x06, 47 OP_MOVE_OBJECT = 0x07, 48 OP_MOVE_OBJECT_FROM16 = 0x08, 49 OP_MOVE_OBJECT_16 = 0x09, 50 OP_MOVE_RESULT = 0x0a, 51 OP_MOVE_RESULT_WIDE = 0x0b, 52 OP_MOVE_RESULT_OBJECT = 0x0c, 53 OP_MOVE_EXCEPTION = 0x0d, 54 OP_RETURN_VOID = 0x0e, 55 OP_RETURN = 0x0f, 56 OP_RETURN_WIDE = 0x10, 57 OP_RETURN_OBJECT = 0x11, 58 OP_CONST_4 = 0x12, 59 OP_CONST_16 = 0x13, 60 OP_CONST = 0x14, 61 OP_CONST_HIGH16 = 0x15, 62 OP_CONST_WIDE_16 = 0x16, 63 OP_CONST_WIDE_32 = 0x17, 64 OP_CONST_WIDE = 0x18, 65 OP_CONST_WIDE_HIGH16 = 0x19, 66 OP_CONST_STRING = 0x1a, 67 OP_CONST_STRING_JUMBO = 0x1b, 68 OP_CONST_CLASS = 0x1c, 69 OP_MONITOR_ENTER = 0x1d, 70 OP_MONITOR_EXIT = 0x1e, 71 OP_CHECK_CAST = 0x1f, 72 OP_INSTANCE_OF = 0x20, 73 OP_ARRAY_LENGTH = 0x21, 74 OP_NEW_INSTANCE = 0x22, 75 OP_NEW_ARRAY = 0x23, 76 OP_FILLED_NEW_ARRAY = 0x24, 77 OP_FILLED_NEW_ARRAY_RANGE = 0x25, 78 OP_FILL_ARRAY_DATA = 0x26, 79 OP_THROW = 0x27, 80 OP_GOTO = 0x28, 81 OP_GOTO_16 = 0x29, 82 OP_GOTO_32 = 0x2a, 83 OP_PACKED_SWITCH = 0x2b, 84 OP_SPARSE_SWITCH = 0x2c, 85 OP_CMPL_FLOAT = 0x2d, 86 OP_CMPG_FLOAT = 0x2e, 87 OP_CMPL_DOUBLE = 0x2f, 88 OP_CMPG_DOUBLE = 0x30, 89 OP_CMP_LONG = 0x31, 90 OP_IF_EQ = 0x32, 91 OP_IF_NE = 0x33, 92 OP_IF_LT = 0x34, 93 OP_IF_GE = 0x35, 94 OP_IF_GT = 0x36, 95 OP_IF_LE = 0x37, 96 OP_IF_EQZ = 0x38, 97 OP_IF_NEZ = 0x39, 98 OP_IF_LTZ = 0x3a, 99 OP_IF_GEZ = 0x3b, 100 OP_IF_GTZ = 0x3c, 101 OP_IF_LEZ = 0x3d, 102 OP_UNUSED_3E = 0x3e, 103 OP_UNUSED_3F = 0x3f, 104 OP_UNUSED_40 = 0x40, 105 OP_UNUSED_41 = 0x41, 106 OP_UNUSED_42 = 0x42, 107 OP_UNUSED_43 = 0x43, 108 OP_AGET = 0x44, 109 OP_AGET_WIDE = 0x45, 110 OP_AGET_OBJECT = 0x46, 111 OP_AGET_BOOLEAN = 0x47, 112 OP_AGET_BYTE = 0x48, 113 OP_AGET_CHAR = 0x49, 114 OP_AGET_SHORT = 0x4a, 115 OP_APUT = 0x4b, 116 OP_APUT_WIDE = 0x4c, 117 OP_APUT_OBJECT = 0x4d, 118 OP_APUT_BOOLEAN = 0x4e, 119 OP_APUT_BYTE = 0x4f, 120 OP_APUT_CHAR = 0x50, 121 OP_APUT_SHORT = 0x51, 122 OP_IGET = 0x52, 123 OP_IGET_WIDE = 0x53, 124 OP_IGET_OBJECT = 0x54, 125 OP_IGET_BOOLEAN = 0x55, 126 OP_IGET_BYTE = 0x56, 127 OP_IGET_CHAR = 0x57, 128 OP_IGET_SHORT = 0x58, 129 OP_IPUT = 0x59, 130 OP_IPUT_WIDE = 0x5a, 131 OP_IPUT_OBJECT = 0x5b, 132 OP_IPUT_BOOLEAN = 0x5c, 133 OP_IPUT_BYTE = 0x5d, 134 OP_IPUT_CHAR = 0x5e, 135 OP_IPUT_SHORT = 0x5f, 136 OP_SGET = 0x60, 137 OP_SGET_WIDE = 0x61, 138 OP_SGET_OBJECT = 0x62, 139 OP_SGET_BOOLEAN = 0x63, 140 OP_SGET_BYTE = 0x64, 141 OP_SGET_CHAR = 0x65, 142 OP_SGET_SHORT = 0x66, 143 OP_SPUT = 0x67, 144 OP_SPUT_WIDE = 0x68, 145 OP_SPUT_OBJECT = 0x69, 146 OP_SPUT_BOOLEAN = 0x6a, 147 OP_SPUT_BYTE = 0x6b, 148 OP_SPUT_CHAR = 0x6c, 149 OP_SPUT_SHORT = 0x6d, 150 OP_INVOKE_VIRTUAL = 0x6e, 151 OP_INVOKE_SUPER = 0x6f, 152 OP_INVOKE_DIRECT = 0x70, 153 OP_INVOKE_STATIC = 0x71, 154 OP_INVOKE_INTERFACE = 0x72, 155 OP_UNUSED_73 = 0x73, 156 OP_INVOKE_VIRTUAL_RANGE = 0x74, 157 OP_INVOKE_SUPER_RANGE = 0x75, 158 OP_INVOKE_DIRECT_RANGE = 0x76, 159 OP_INVOKE_STATIC_RANGE = 0x77, 160 OP_INVOKE_INTERFACE_RANGE = 0x78, 161 OP_UNUSED_79 = 0x79, 162 OP_UNUSED_7A = 0x7a, 163 OP_NEG_INT = 0x7b, 164 OP_NOT_INT = 0x7c, 165 OP_NEG_LONG = 0x7d, 166 OP_NOT_LONG = 0x7e, 167 OP_NEG_FLOAT = 0x7f, 168 OP_NEG_DOUBLE = 0x80, 169 OP_INT_TO_LONG = 0x81, 170 OP_INT_TO_FLOAT = 0x82, 171 OP_INT_TO_DOUBLE = 0x83, 172 OP_LONG_TO_INT = 0x84, 173 OP_LONG_TO_FLOAT = 0x85, 174 OP_LONG_TO_DOUBLE = 0x86, 175 OP_FLOAT_TO_INT = 0x87, 176 OP_FLOAT_TO_LONG = 0x88, 177 OP_FLOAT_TO_DOUBLE = 0x89, 178 OP_DOUBLE_TO_INT = 0x8a, 179 OP_DOUBLE_TO_LONG = 0x8b, 180 OP_DOUBLE_TO_FLOAT = 0x8c, 181 OP_INT_TO_BYTE = 0x8d, 182 OP_INT_TO_CHAR = 0x8e, 183 OP_INT_TO_SHORT = 0x8f, 184 OP_ADD_INT = 0x90, 185 OP_SUB_INT = 0x91, 186 OP_MUL_INT = 0x92, 187 OP_DIV_INT = 0x93, 188 OP_REM_INT = 0x94, 189 OP_AND_INT = 0x95, 190 OP_OR_INT = 0x96, 191 OP_XOR_INT = 0x97, 192 OP_SHL_INT = 0x98, 193 OP_SHR_INT = 0x99, 194 OP_USHR_INT = 0x9a, 195 OP_ADD_LONG = 0x9b, 196 OP_SUB_LONG = 0x9c, 197 OP_MUL_LONG = 0x9d, 198 OP_DIV_LONG = 0x9e, 199 OP_REM_LONG = 0x9f, 200 OP_AND_LONG = 0xa0, 201 OP_OR_LONG = 0xa1, 202 OP_XOR_LONG = 0xa2, 203 OP_SHL_LONG = 0xa3, 204 OP_SHR_LONG = 0xa4, 205 OP_USHR_LONG = 0xa5, 206 OP_ADD_FLOAT = 0xa6, 207 OP_SUB_FLOAT = 0xa7, 208 OP_MUL_FLOAT = 0xa8, 209 OP_DIV_FLOAT = 0xa9, 210 OP_REM_FLOAT = 0xaa, 211 OP_ADD_DOUBLE = 0xab, 212 OP_SUB_DOUBLE = 0xac, 213 OP_MUL_DOUBLE = 0xad, 214 OP_DIV_DOUBLE = 0xae, 215 OP_REM_DOUBLE = 0xaf, 216 OP_ADD_INT_2ADDR = 0xb0, 217 OP_SUB_INT_2ADDR = 0xb1, 218 OP_MUL_INT_2ADDR = 0xb2, 219 OP_DIV_INT_2ADDR = 0xb3, 220 OP_REM_INT_2ADDR = 0xb4, 221 OP_AND_INT_2ADDR = 0xb5, 222 OP_OR_INT_2ADDR = 0xb6, 223 OP_XOR_INT_2ADDR = 0xb7, 224 OP_SHL_INT_2ADDR = 0xb8, 225 OP_SHR_INT_2ADDR = 0xb9, 226 OP_USHR_INT_2ADDR = 0xba, 227 OP_ADD_LONG_2ADDR = 0xbb, 228 OP_SUB_LONG_2ADDR = 0xbc, 229 OP_MUL_LONG_2ADDR = 0xbd, 230 OP_DIV_LONG_2ADDR = 0xbe, 231 OP_REM_LONG_2ADDR = 0xbf, 232 OP_AND_LONG_2ADDR = 0xc0, 233 OP_OR_LONG_2ADDR = 0xc1, 234 OP_XOR_LONG_2ADDR = 0xc2, 235 OP_SHL_LONG_2ADDR = 0xc3, 236 OP_SHR_LONG_2ADDR = 0xc4, 237 OP_USHR_LONG_2ADDR = 0xc5, 238 OP_ADD_FLOAT_2ADDR = 0xc6, 239 OP_SUB_FLOAT_2ADDR = 0xc7, 240 OP_MUL_FLOAT_2ADDR = 0xc8, 241 OP_DIV_FLOAT_2ADDR = 0xc9, 242 OP_REM_FLOAT_2ADDR = 0xca, 243 OP_ADD_DOUBLE_2ADDR = 0xcb, 244 OP_SUB_DOUBLE_2ADDR = 0xcc, 245 OP_MUL_DOUBLE_2ADDR = 0xcd, 246 OP_DIV_DOUBLE_2ADDR = 0xce, 247 OP_REM_DOUBLE_2ADDR = 0xcf, 248 OP_ADD_INT_LIT16 = 0xd0, 249 OP_RSUB_INT = 0xd1, 250 OP_MUL_INT_LIT16 = 0xd2, 251 OP_DIV_INT_LIT16 = 0xd3, 252 OP_REM_INT_LIT16 = 0xd4, 253 OP_AND_INT_LIT16 = 0xd5, 254 OP_OR_INT_LIT16 = 0xd6, 255 OP_XOR_INT_LIT16 = 0xd7, 256 OP_ADD_INT_LIT8 = 0xd8, 257 OP_RSUB_INT_LIT8 = 0xd9, 258 OP_MUL_INT_LIT8 = 0xda, 259 OP_DIV_INT_LIT8 = 0xdb, 260 OP_REM_INT_LIT8 = 0xdc, 261 OP_AND_INT_LIT8 = 0xdd, 262 OP_OR_INT_LIT8 = 0xde, 263 OP_XOR_INT_LIT8 = 0xdf, 264 OP_SHL_INT_LIT8 = 0xe0, 265 OP_SHR_INT_LIT8 = 0xe1, 266 OP_USHR_INT_LIT8 = 0xe2, 267 OP_IGET_VOLATILE = 0xe3, 268 OP_IPUT_VOLATILE = 0xe4, 269 OP_SGET_VOLATILE = 0xe5, 270 OP_SPUT_VOLATILE = 0xe6, 271 OP_IGET_OBJECT_VOLATILE = 0xe7, 272 OP_IGET_WIDE_VOLATILE = 0xe8, 273 OP_IPUT_WIDE_VOLATILE = 0xe9, 274 OP_SGET_WIDE_VOLATILE = 0xea, 275 OP_SPUT_WIDE_VOLATILE = 0xeb, 276 OP_BREAKPOINT = 0xec, 277 OP_THROW_VERIFICATION_ERROR = 0xed, 278 OP_EXECUTE_INLINE = 0xee, 279 OP_EXECUTE_INLINE_RANGE = 0xef, 280 OP_INVOKE_OBJECT_INIT_RANGE = 0xf0, 281 OP_RETURN_VOID_BARRIER = 0xf1, 282 OP_IGET_QUICK = 0xf2, 283 OP_IGET_WIDE_QUICK = 0xf3, 284 OP_IGET_OBJECT_QUICK = 0xf4, 285 OP_IPUT_QUICK = 0xf5, 286 OP_IPUT_WIDE_QUICK = 0xf6, 287 OP_IPUT_OBJECT_QUICK = 0xf7, 288 OP_INVOKE_VIRTUAL_QUICK = 0xf8, 289 OP_INVOKE_VIRTUAL_QUICK_RANGE = 0xf9, 290 OP_INVOKE_SUPER_QUICK = 0xfa, 291 OP_INVOKE_SUPER_QUICK_RANGE = 0xfb, 292 OP_IPUT_OBJECT_VOLATILE = 0xfc, 293 OP_SGET_OBJECT_VOLATILE = 0xfd, 294 OP_SPUT_OBJECT_VOLATILE = 0xfe, 295 OP_UNUSED_FF = 0xff, 296 }; 297 298 // Instruction formats associated with Dalvik opcodes 299 enum InstructionFormat : u1 { 300 kFmt00x = 0, // unknown format (also used for "breakpoint" opcode) 301 kFmt10x, // op 302 kFmt12x, // op vA, vB 303 kFmt11n, // op vA, #+B 304 kFmt11x, // op vAA 305 kFmt10t, // op +AA 306 kFmt20bc, // [opt] op AA, thing@BBBB 307 kFmt20t, // op +AAAA 308 kFmt22x, // op vAA, vBBBB 309 kFmt21t, // op vAA, +BBBB 310 kFmt21s, // op vAA, #+BBBB 311 kFmt21h, // op vAA, #+BBBB00000[00000000] 312 kFmt21c, // op vAA, thing@BBBB 313 kFmt23x, // op vAA, vBB, vCC 314 kFmt22b, // op vAA, vBB, #+CC 315 kFmt22t, // op vA, vB, +CCCC 316 kFmt22s, // op vA, vB, #+CCCC 317 kFmt22c, // op vA, vB, thing@CCCC 318 kFmt22cs, // [opt] op vA, vB, field offset CCCC 319 kFmt30t, // op +AAAAAAAA 320 kFmt32x, // op vAAAA, vBBBB 321 kFmt31i, // op vAA, #+BBBBBBBB 322 kFmt31t, // op vAA, +BBBBBBBB 323 kFmt31c, // op vAA, string@BBBBBBBB 324 kFmt35c, // op {vC,vD,vE,vF,vG}, thing@BBBB 325 kFmt35ms, // [opt] invoke-virtual+super 326 kFmt3rc, // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB 327 kFmt3rms, // [opt] invoke-virtual+super/range 328 kFmt51l, // op vAA, #+BBBBBBBBBBBBBBBB 329 kFmt35mi, // [opt] inline invoke 330 kFmt3rmi, // [opt] inline invoke/range 331 }; 332 333 using OpcodeFlags = u4; 334 335 enum : OpcodeFlags { 336 kInstrCanBranch = 1 << 0, // conditional or unconditional branch 337 kInstrCanContinue = 1 << 1, // flow can continue to next statement 338 kInstrCanSwitch = 1 << 2, // switch statement 339 kInstrCanThrow = 1 << 3, // could cause an exception to be thrown 340 kInstrCanReturn = 1 << 4, // returns, no additional statements 341 kInstrInvoke = 1 << 5, // a flavor of invoke 342 kInstrWideRegA = 1 << 6, // wide (64bit) vA 343 kInstrWideRegB = 1 << 7, // wide (64bit) vB 344 kInstrWideRegC = 1 << 8, // wide (64bit) vC 345 }; 346 347 // Types of indexed reference that are associated with opcodes whose 348 // formats include such an indexed reference (e.g., 21c and 35c). 349 enum InstructionIndexType : u1 { 350 kIndexUnknown = 0, 351 kIndexNone, // has no index 352 kIndexVaries, // "It depends." Used for throw-verification-error 353 kIndexTypeRef, // type reference index 354 kIndexStringRef, // string reference index 355 kIndexMethodRef, // method reference index 356 kIndexFieldRef, // field reference index 357 kIndexInlineMethod, // inline method index (for inline linked methods) 358 kIndexVtableOffset, // vtable offset (for static linked methods) 359 kIndexFieldOffset // field offset (for static linked fields) 360 }; 361 362 // Holds the contents of a decoded instruction. 363 struct Instruction { 364 u4 vA; // the A field of the instruction 365 u4 vB; // the B field of the instruction 366 u8 vB_wide; // 64bit version of the B field (for kFmt51l) 367 u4 vC; // the C field of the instruction 368 u4 arg[5]; // vC/D/E/F/G in invoke or filled-new-array 369 Opcode opcode; // instruction opcode 370 }; 371 372 // "packed-switch-payload" format 373 struct PackedSwitchPayload { 374 u2 ident; 375 u2 size; 376 s4 first_key; 377 s4 targets[]; 378 }; 379 380 // "sparse-switch-payload" format 381 struct SparseSwitchPayload { 382 u2 ident; 383 u2 size; 384 s4 data[]; 385 }; 386 387 // "fill-array-data-payload" format 388 struct ArrayData { 389 u2 ident; 390 u2 element_width; 391 u4 size; 392 u1 data[]; 393 }; 394 395 // Extracts the opcode from a Dalvik code unit (bytecode) 396 Opcode OpcodeFromBytecode(u2 bytecode); 397 398 // Returns the name of an opcode 399 const char* GetOpcodeName(Opcode opcode); 400 401 // Returns the index type associated with the specified opcode 402 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode); 403 404 // Returns the format associated with the specified opcode 405 InstructionFormat GetFormatFromOpcode(Opcode opcode); 406 407 // Returns the flags for the specified opcode 408 OpcodeFlags GetFlagsFromOpcode(Opcode opcode); 409 410 // Returns the instruction width for the specified opcode 411 size_t GetWidthFromOpcode(Opcode opcode); 412 413 // Return the width of the specified instruction, or 0 if not defined. Also 414 // works for special OP_NOP entries, including switch statement data tables 415 // and array data. 416 size_t GetWidthFromBytecode(const u2* bytecode); 417 418 // Decode a .dex bytecode 419 Instruction DecodeInstruction(const u2* bytecode); 420 421 } // namespace dex 422