1%def binop(preinstr="", result="r0", chkzero="0", instr=""): 2 /* 3 * Generic 32-bit binary operation. Provide an "instr" line that 4 * specifies an instruction that performs "result = r0 op r1". 5 * This could be an ARM instruction or a function call. (If the result 6 * comes back in a register other than r0, you can override "result".) 7 * 8 * If "chkzero" is set to 1, we perform a divide-by-zero check on 9 * vCC (r1). Useful for integer division and modulus. Note that we 10 * *don't* check for (INT_MIN / -1) here, because the ARM math lib 11 * handles it correctly. 12 * 13 * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int, 14 * xor-int, shl-int, shr-int, ushr-int, add-float, sub-float, 15 * mul-float, div-float, rem-float 16 */ 17 /* binop vAA, vBB, vCC */ 18 FETCH r0, 1 @ r0<- CCBB 19 mov r9, rINST, lsr #8 @ r9<- AA 20 mov r3, r0, lsr #8 @ r3<- CC 21 and r2, r0, #255 @ r2<- BB 22 GET_VREG r1, r3 @ r1<- vCC 23 GET_VREG r0, r2 @ r0<- vBB 24 .if $chkzero 25 cmp r1, #0 @ is second operand zero? 26 beq common_errDivideByZero 27 .endif 28 29 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 30 $preinstr @ optional op; may set condition codes 31 $instr @ $result<- op, r0-r3 changed 32 GET_INST_OPCODE ip @ extract opcode from rINST 33 SET_VREG $result, r9 @ vAA<- $result 34 GOTO_OPCODE ip @ jump to next instruction 35 /* 11-14 instructions */ 36 37%def binop2addr(preinstr="", result="r0", chkzero="0", instr=""): 38 /* 39 * Generic 32-bit "/2addr" binary operation. Provide an "instr" line 40 * that specifies an instruction that performs "result = r0 op r1". 41 * This could be an ARM instruction or a function call. (If the result 42 * comes back in a register other than r0, you can override "result".) 43 * 44 * If "chkzero" is set to 1, we perform a divide-by-zero check on 45 * vCC (r1). Useful for integer division and modulus. 46 * 47 * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr, 48 * rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr, 49 * shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr, 50 * sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr 51 */ 52 /* binop/2addr vA, vB */ 53 mov r3, rINST, lsr #12 @ r3<- B 54 ubfx r9, rINST, #8, #4 @ r9<- A 55 GET_VREG r1, r3 @ r1<- vB 56 GET_VREG r0, r9 @ r0<- vA 57 .if $chkzero 58 cmp r1, #0 @ is second operand zero? 59 beq common_errDivideByZero 60 .endif 61 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 62 63 $preinstr @ optional op; may set condition codes 64 $instr @ $result<- op, r0-r3 changed 65 GET_INST_OPCODE ip @ extract opcode from rINST 66 SET_VREG $result, r9 @ vAA<- $result 67 GOTO_OPCODE ip @ jump to next instruction 68 /* 10-13 instructions */ 69 70%def binopLit16(result="r0", chkzero="0", instr=""): 71 /* 72 * Generic 32-bit "lit16" binary operation. Provide an "instr" line 73 * that specifies an instruction that performs "result = r0 op r1". 74 * This could be an ARM instruction or a function call. (If the result 75 * comes back in a register other than r0, you can override "result".) 76 * 77 * If "chkzero" is set to 1, we perform a divide-by-zero check on 78 * vCC (r1). Useful for integer division and modulus. 79 * 80 * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16, 81 * rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16 82 */ 83 /* binop/lit16 vA, vB, #+CCCC */ 84 FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended) 85 mov r2, rINST, lsr #12 @ r2<- B 86 ubfx r9, rINST, #8, #4 @ r9<- A 87 GET_VREG r0, r2 @ r0<- vB 88 .if $chkzero 89 cmp r1, #0 @ is second operand zero? 90 beq common_errDivideByZero 91 .endif 92 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 93 94 $instr @ $result<- op, r0-r3 changed 95 GET_INST_OPCODE ip @ extract opcode from rINST 96 SET_VREG $result, r9 @ vAA<- $result 97 GOTO_OPCODE ip @ jump to next instruction 98 /* 10-13 instructions */ 99 100%def binopLit8(extract="asr r1, r3, #8", result="r0", chkzero="0", instr=""): 101 /* 102 * Generic 32-bit "lit8" binary operation. Provide an "instr" line 103 * that specifies an instruction that performs "result = r0 op r1". 104 * This could be an ARM instruction or a function call. (If the result 105 * comes back in a register other than r0, you can override "result".) 106 * 107 * You can override "extract" if the extraction of the literal value 108 * from r3 to r1 is not the default "asr r1, r3, #8". The extraction 109 * can be omitted completely if the shift is embedded in "instr". 110 * 111 * If "chkzero" is set to 1, we perform a divide-by-zero check on 112 * vCC (r1). Useful for integer division and modulus. 113 * 114 * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8, 115 * rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8, 116 * shl-int/lit8, shr-int/lit8, ushr-int/lit8 117 */ 118 /* binop/lit8 vAA, vBB, #+CC */ 119 FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC) 120 mov r9, rINST, lsr #8 @ r9<- AA 121 and r2, r3, #255 @ r2<- BB 122 GET_VREG r0, r2 @ r0<- vBB 123 $extract @ optional; typically r1<- ssssssCC (sign extended) 124 .if $chkzero 125 @cmp r1, #0 @ is second operand zero? 126 beq common_errDivideByZero 127 .endif 128 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 129 130 $instr @ $result<- op, r0-r3 changed 131 GET_INST_OPCODE ip @ extract opcode from rINST 132 SET_VREG $result, r9 @ vAA<- $result 133 GOTO_OPCODE ip @ jump to next instruction 134 /* 10-12 instructions */ 135 136%def binopWide(preinstr="", result0="r0", result1="r1", chkzero="0", instr=""): 137 /* 138 * Generic 64-bit binary operation. Provide an "instr" line that 139 * specifies an instruction that performs "result = r0-r1 op r2-r3". 140 * This could be an ARM instruction or a function call. (If the result 141 * comes back in a register other than r0, you can override "result".) 142 * 143 * If "chkzero" is set to 1, we perform a divide-by-zero check on 144 * vCC (r1). Useful for integer division and modulus. 145 * 146 * for: add-long, sub-long, div-long, rem-long, and-long, or-long, 147 * xor-long, add-double, sub-double, mul-double, div-double, 148 * rem-double 149 * 150 * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. 151 */ 152 /* binop vAA, vBB, vCC */ 153 FETCH r0, 1 @ r0<- CCBB 154 mov rINST, rINST, lsr #8 @ rINST<- AA 155 and r2, r0, #255 @ r2<- BB 156 mov r3, r0, lsr #8 @ r3<- CC 157 VREG_INDEX_TO_ADDR r9, rINST @ r9<- &fp[AA] 158 VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 159 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 160 GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1 161 GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1 162 .if $chkzero 163 orrs ip, r2, r3 @ second arg (r2-r3) is zero? 164 beq common_errDivideByZero 165 .endif 166 CLEAR_SHADOW_PAIR rINST, lr, ip @ Zero out the shadow regs 167 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 168 $preinstr @ optional op; may set condition codes 169 $instr @ result<- op, r0-r3 changed 170 GET_INST_OPCODE ip @ extract opcode from rINST 171 SET_VREG_WIDE_BY_ADDR $result0,$result1,r9 @ vAA/vAA+1<, $result0/$result1 172 GOTO_OPCODE ip @ jump to next instruction 173 /* 14-17 instructions */ 174 175%def binopWide2addr(preinstr="", result0="r0", result1="r1", chkzero="0", instr=""): 176 /* 177 * Generic 64-bit "/2addr" binary operation. Provide an "instr" line 178 * that specifies an instruction that performs "result = r0-r1 op r2-r3". 179 * This could be an ARM instruction or a function call. (If the result 180 * comes back in a register other than r0, you can override "result".) 181 * 182 * If "chkzero" is set to 1, we perform a divide-by-zero check on 183 * vCC (r1). Useful for integer division and modulus. 184 * 185 * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr, 186 * and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr, 187 * sub-double/2addr, mul-double/2addr, div-double/2addr, 188 * rem-double/2addr 189 */ 190 /* binop/2addr vA, vB */ 191 mov r1, rINST, lsr #12 @ r1<- B 192 ubfx rINST, rINST, #8, #4 @ rINST<- A 193 VREG_INDEX_TO_ADDR r1, r1 @ r1<- &fp[B] 194 VREG_INDEX_TO_ADDR r9, rINST @ r9<- &fp[A] 195 GET_VREG_WIDE_BY_ADDR r2, r3, r1 @ r2/r3<- vBB/vBB+1 196 GET_VREG_WIDE_BY_ADDR r0, r1, r9 @ r0/r1<- vAA/vAA+1 197 .if $chkzero 198 orrs ip, r2, r3 @ second arg (r2-r3) is zero? 199 beq common_errDivideByZero 200 .endif 201 CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs 202 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 203 $preinstr @ optional op; may set condition codes 204 $instr @ result<- op, r0-r3 changed 205 GET_INST_OPCODE ip @ extract opcode from rINST 206 SET_VREG_WIDE_BY_ADDR $result0,$result1,r9 @ vAA/vAA+1<- $result0/$result1 207 GOTO_OPCODE ip @ jump to next instruction 208 /* 12-15 instructions */ 209 210%def unop(preinstr="", instr=""): 211 /* 212 * Generic 32-bit unary operation. Provide an "instr" line that 213 * specifies an instruction that performs "result = op r0". 214 * This could be an ARM instruction or a function call. 215 * 216 * for: neg-int, not-int, neg-float, int-to-float, float-to-int, 217 * int-to-byte, int-to-char, int-to-short 218 */ 219 /* unop vA, vB */ 220 mov r3, rINST, lsr #12 @ r3<- B 221 ubfx r9, rINST, #8, #4 @ r9<- A 222 GET_VREG r0, r3 @ r0<- vB 223 $preinstr @ optional op; may set condition codes 224 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 225 $instr @ r0<- op, r0-r3 changed 226 GET_INST_OPCODE ip @ extract opcode from rINST 227 SET_VREG r0, r9 @ vAA<- r0 228 GOTO_OPCODE ip @ jump to next instruction 229 /* 8-9 instructions */ 230 231%def unopNarrower(preinstr="", instr=""): 232 /* 233 * Generic 64bit-to-32bit unary operation. Provide an "instr" line 234 * that specifies an instruction that performs "result = op r0/r1", where 235 * "result" is a 32-bit quantity in r0. 236 * 237 * For: long-to-float 238 * 239 * (This would work for long-to-int, but that instruction is actually 240 * an exact match for op_move.) 241 */ 242 /* unop vA, vB */ 243 mov r3, rINST, lsr #12 @ r3<- B 244 ubfx r9, rINST, #8, #4 @ r9<- A 245 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B] 246 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vB/vB+1 247 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 248 $preinstr @ optional op; may set condition codes 249 $instr @ r0<- op, r0-r3 changed 250 GET_INST_OPCODE ip @ extract opcode from rINST 251 SET_VREG r0, r9 @ vA<- r0 252 GOTO_OPCODE ip @ jump to next instruction 253 /* 9-10 instructions */ 254 255%def unopWide(preinstr="", instr=""): 256 /* 257 * Generic 64-bit unary operation. Provide an "instr" line that 258 * specifies an instruction that performs "result = op r0/r1". 259 * This could be an ARM instruction or a function call. 260 * 261 * For: neg-long, not-long, neg-double, long-to-double, double-to-long 262 */ 263 /* unop vA, vB */ 264 mov r3, rINST, lsr #12 @ r3<- B 265 ubfx rINST, rINST, #8, #4 @ rINST<- A 266 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B] 267 VREG_INDEX_TO_ADDR r9, rINST @ r9<- &fp[A] 268 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vAA 269 CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs 270 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 271 $preinstr @ optional op; may set condition codes 272 $instr @ r0/r1<- op, r2-r3 changed 273 GET_INST_OPCODE ip @ extract opcode from rINST 274 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA<- r0/r1 275 GOTO_OPCODE ip @ jump to next instruction 276 /* 10-11 instructions */ 277 278%def unopWider(preinstr="", instr=""): 279 /* 280 * Generic 32bit-to-64bit unary operation. Provide an "instr" line 281 * that specifies an instruction that performs "result = op r0", where 282 * "result" is a 64-bit quantity in r0/r1. 283 * 284 * For: int-to-long, int-to-double, float-to-long, float-to-double 285 */ 286 /* unop vA, vB */ 287 mov r3, rINST, lsr #12 @ r3<- B 288 ubfx rINST, rINST, #8, #4 @ rINST<- A 289 GET_VREG r0, r3 @ r0<- vB 290 VREG_INDEX_TO_ADDR r9, rINST @ r9<- &fp[A] 291 $preinstr @ optional op; may set condition codes 292 CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs 293 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 294 $instr @ r0<- op, r0-r3 changed 295 GET_INST_OPCODE ip @ extract opcode from rINST 296 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vA/vA+1<- r0/r1 297 GOTO_OPCODE ip @ jump to next instruction 298 /* 9-10 instructions */ 299 300%def op_add_int(): 301% binop(instr="add r0, r0, r1") 302 303%def op_add_int_2addr(): 304% binop2addr(instr="add r0, r0, r1") 305 306%def op_add_int_lit16(): 307% binopLit16(instr="add r0, r0, r1") 308 309%def op_add_int_lit8(): 310% binopLit8(extract="", instr="add r0, r0, r3, asr #8") 311 312%def op_add_long(): 313% binopWide(preinstr="adds r0, r0, r2", instr="adc r1, r1, r3") 314 315%def op_add_long_2addr(): 316% binopWide2addr(preinstr="adds r0, r0, r2", instr="adc r1, r1, r3") 317 318%def op_and_int(): 319% binop(instr="and r0, r0, r1") 320 321%def op_and_int_2addr(): 322% binop2addr(instr="and r0, r0, r1") 323 324%def op_and_int_lit16(): 325% binopLit16(instr="and r0, r0, r1") 326 327%def op_and_int_lit8(): 328% binopLit8(extract="", instr="and r0, r0, r3, asr #8") 329 330%def op_and_long(): 331% binopWide(preinstr="and r0, r0, r2", instr="and r1, r1, r3") 332 333%def op_and_long_2addr(): 334% binopWide2addr(preinstr="and r0, r0, r2", instr="and r1, r1, r3") 335 336%def op_cmp_long(): 337 /* 338 * Compare two 64-bit values. Puts 0, 1, or -1 into the destination 339 * register based on the results of the comparison. 340 */ 341 /* cmp-long vAA, vBB, vCC */ 342 FETCH r0, 1 @ r0<- CCBB 343 mov r9, rINST, lsr #8 @ r9<- AA 344 and r2, r0, #255 @ r2<- BB 345 mov r3, r0, lsr #8 @ r3<- CC 346 VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 347 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 348 GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1 349 GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1 350 cmp r0, r2 351 sbcs ip, r1, r3 @ Sets correct CCs for checking LT (but not EQ/NE) 352 mov ip, #0 353 mvnlt ip, #0 @ -1 354 cmpeq r0, r2 @ For correct EQ/NE, we may need to repeat the first CMP 355 orrne ip, #1 356 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 357 SET_VREG ip, r9 @ vAA<- ip 358 GET_INST_OPCODE ip @ extract opcode from rINST 359 GOTO_OPCODE ip @ jump to next instruction 360 361%def op_div_int(): 362 /* 363 * Specialized 32-bit binary operation 364 * 365 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 366 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 367 * ARMv7 CPUs that have hardware division support). 368 * 369 * div-int 370 * 371 */ 372 FETCH r0, 1 @ r0<- CCBB 373 mov r9, rINST, lsr #8 @ r9<- AA 374 mov r3, r0, lsr #8 @ r3<- CC 375 and r2, r0, #255 @ r2<- BB 376 GET_VREG r1, r3 @ r1<- vCC 377 GET_VREG r0, r2 @ r0<- vBB 378 cmp r1, #0 @ is second operand zero? 379 beq common_errDivideByZero 380 381 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 382#ifdef __ARM_ARCH_EXT_IDIV__ 383 sdiv r0, r0, r1 @ r0<- op 384#else 385 bl __aeabi_idiv @ r0<- op, r0-r3 changed 386#endif 387 GET_INST_OPCODE ip @ extract opcode from rINST 388 SET_VREG r0, r9 @ vAA<- r0 389 GOTO_OPCODE ip @ jump to next instruction 390 /* 11-14 instructions */ 391 392%def op_div_int_2addr(): 393 /* 394 * Specialized 32-bit binary operation 395 * 396 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 397 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 398 * ARMv7 CPUs that have hardware division support). 399 * 400 * div-int/2addr 401 * 402 */ 403 mov r3, rINST, lsr #12 @ r3<- B 404 ubfx r9, rINST, #8, #4 @ r9<- A 405 GET_VREG r1, r3 @ r1<- vB 406 GET_VREG r0, r9 @ r0<- vA 407 cmp r1, #0 @ is second operand zero? 408 beq common_errDivideByZero 409 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 410 411#ifdef __ARM_ARCH_EXT_IDIV__ 412 sdiv r0, r0, r1 @ r0<- op 413#else 414 bl __aeabi_idiv @ r0<- op, r0-r3 changed 415#endif 416 GET_INST_OPCODE ip @ extract opcode from rINST 417 SET_VREG r0, r9 @ vAA<- r0 418 GOTO_OPCODE ip @ jump to next instruction 419 /* 10-13 instructions */ 420 421 422%def op_div_int_lit16(): 423 /* 424 * Specialized 32-bit binary operation 425 * 426 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 427 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 428 * ARMv7 CPUs that have hardware division support). 429 * 430 * div-int/lit16 431 * 432 */ 433 FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended) 434 mov r2, rINST, lsr #12 @ r2<- B 435 ubfx r9, rINST, #8, #4 @ r9<- A 436 GET_VREG r0, r2 @ r0<- vB 437 cmp r1, #0 @ is second operand zero? 438 beq common_errDivideByZero 439 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 440 441#ifdef __ARM_ARCH_EXT_IDIV__ 442 sdiv r0, r0, r1 @ r0<- op 443#else 444 bl __aeabi_idiv @ r0<- op, r0-r3 changed 445#endif 446 GET_INST_OPCODE ip @ extract opcode from rINST 447 SET_VREG r0, r9 @ vAA<- r0 448 GOTO_OPCODE ip @ jump to next instruction 449 /* 10-13 instructions */ 450 451%def op_div_int_lit8(): 452 /* 453 * Specialized 32-bit binary operation 454 * 455 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 456 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 457 * ARMv7 CPUs that have hardware division support). 458 * 459 * div-int/lit8 460 * 461 */ 462 FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC 463 mov r9, rINST, lsr #8 @ r9<- AA 464 and r2, r3, #255 @ r2<- BB 465 GET_VREG r0, r2 @ r0<- vBB 466 movs r1, r3, asr #8 @ r1<- ssssssCC (sign extended) 467 @cmp r1, #0 @ is second operand zero? 468 beq common_errDivideByZero 469 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 470 471#ifdef __ARM_ARCH_EXT_IDIV__ 472 sdiv r0, r0, r1 @ r0<- op 473#else 474 bl __aeabi_idiv @ r0<- op, r0-r3 changed 475#endif 476 GET_INST_OPCODE ip @ extract opcode from rINST 477 SET_VREG r0, r9 @ vAA<- r0 478 GOTO_OPCODE ip @ jump to next instruction 479 /* 10-12 instructions */ 480 481%def op_div_long(): 482% binopWide(instr="bl __aeabi_ldivmod", chkzero="1") 483 484%def op_div_long_2addr(): 485% binopWide2addr(instr="bl __aeabi_ldivmod", chkzero="1") 486 487%def op_int_to_byte(): 488% unop(instr="sxtb r0, r0") 489 490%def op_int_to_char(): 491% unop(instr="uxth r0, r0") 492 493%def op_int_to_long(): 494% unopWider(instr="mov r1, r0, asr #31") 495 496%def op_int_to_short(): 497% unop(instr="sxth r0, r0") 498 499%def op_long_to_int(): 500/* we ignore the high word, making this equivalent to a 32-bit reg move */ 501% op_move() 502 503%def op_mul_int(): 504/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */ 505% binop(instr="mul r0, r1, r0") 506 507%def op_mul_int_2addr(): 508/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */ 509% binop2addr(instr="mul r0, r1, r0") 510 511%def op_mul_int_lit16(): 512/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */ 513% binopLit16(instr="mul r0, r1, r0") 514 515%def op_mul_int_lit8(): 516/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */ 517% binopLit8(instr="mul r0, r1, r0") 518 519%def op_mul_long(): 520 /* 521 * Signed 64-bit integer multiply. 522 * 523 * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 524 * WX 525 * x YZ 526 * -------- 527 * ZW ZX 528 * YW YX 529 * 530 * The low word of the result holds ZX, the high word holds 531 * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 532 * it doesn't fit in the low 64 bits. 533 * 534 * Unlike most ARM math operations, multiply instructions have 535 * restrictions on using the same register more than once (Rd and Rm 536 * cannot be the same). 537 */ 538 /* mul-long vAA, vBB, vCC */ 539 FETCH r0, 1 @ r0<- CCBB 540 and r2, r0, #255 @ r2<- BB 541 mov r3, r0, lsr #8 @ r3<- CC 542 VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 543 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 544 GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1 545 GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1 546 mul ip, r2, r1 @ ip<- ZxW 547 umull r1, lr, r2, r0 @ r1/lr <- ZxX 548 mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 549 mov r0, rINST, lsr #8 @ r0<- AA 550 add r2, r2, lr @ r2<- lr + low(ZxW + (YxX)) 551 CLEAR_SHADOW_PAIR r0, lr, ip @ Zero out the shadow regs 552 VREG_INDEX_TO_ADDR r0, r0 @ r0<- &fp[AA] 553 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 554 GET_INST_OPCODE ip @ extract opcode from rINST 555 SET_VREG_WIDE_BY_ADDR r1, r2 , r0 @ vAA/vAA+1<- r1/r2 556 GOTO_OPCODE ip @ jump to next instruction 557 558%def op_mul_long_2addr(): 559 /* 560 * Signed 64-bit integer multiply, "/2addr" version. 561 * 562 * See op_mul_long for an explanation. 563 * 564 * We get a little tight on registers, so to avoid looking up &fp[A] 565 * again we stuff it into rINST. 566 */ 567 /* mul-long/2addr vA, vB */ 568 mov r1, rINST, lsr #12 @ r1<- B 569 ubfx r9, rINST, #8, #4 @ r9<- A 570 VREG_INDEX_TO_ADDR r1, r1 @ r1<- &fp[B] 571 VREG_INDEX_TO_ADDR rINST, r9 @ rINST<- &fp[A] 572 GET_VREG_WIDE_BY_ADDR r2, r3, r1 @ r2/r3<- vBB/vBB+1 573 GET_VREG_WIDE_BY_ADDR r0, r1, rINST @ r0/r1<- vAA/vAA+1 574 mul ip, r2, r1 @ ip<- ZxW 575 umull r1, lr, r2, r0 @ r1/lr <- ZxX 576 mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 577 mov r0, rINST @ r0<- &fp[A] (free up rINST) 578 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 579 add r2, r2, lr @ r2<- r2 + low(ZxW + (YxX)) 580 GET_INST_OPCODE ip @ extract opcode from rINST 581 SET_VREG_WIDE_BY_ADDR r1, r2, r0 @ vAA/vAA+1<- r1/r2 582 GOTO_OPCODE ip @ jump to next instruction 583 584%def op_neg_int(): 585% unop(instr="rsb r0, r0, #0") 586 587%def op_neg_long(): 588% unopWide(preinstr="rsbs r0, r0, #0", instr="rsc r1, r1, #0") 589 590%def op_not_int(): 591% unop(instr="mvn r0, r0") 592 593%def op_not_long(): 594% unopWide(preinstr="mvn r0, r0", instr="mvn r1, r1") 595 596%def op_or_int(): 597% binop(instr="orr r0, r0, r1") 598 599%def op_or_int_2addr(): 600% binop2addr(instr="orr r0, r0, r1") 601 602%def op_or_int_lit16(): 603% binopLit16(instr="orr r0, r0, r1") 604 605%def op_or_int_lit8(): 606% binopLit8(extract="", instr="orr r0, r0, r3, asr #8") 607 608%def op_or_long(): 609% binopWide(preinstr="orr r0, r0, r2", instr="orr r1, r1, r3") 610 611%def op_or_long_2addr(): 612% binopWide2addr(preinstr="orr r0, r0, r2", instr="orr r1, r1, r3") 613 614%def op_rem_int(): 615 /* 616 * Specialized 32-bit binary operation 617 * 618 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 619 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 620 * ARMv7 CPUs that have hardware division support). 621 * 622 * NOTE: idivmod returns quotient in r0 and remainder in r1 623 * 624 * rem-int 625 * 626 */ 627 FETCH r0, 1 @ r0<- CCBB 628 mov r9, rINST, lsr #8 @ r9<- AA 629 mov r3, r0, lsr #8 @ r3<- CC 630 and r2, r0, #255 @ r2<- BB 631 GET_VREG r1, r3 @ r1<- vCC 632 GET_VREG r0, r2 @ r0<- vBB 633 cmp r1, #0 @ is second operand zero? 634 beq common_errDivideByZero 635 636 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 637#ifdef __ARM_ARCH_EXT_IDIV__ 638 sdiv r2, r0, r1 639 mls r1, r1, r2, r0 @ r1<- op, r0-r2 changed 640#else 641 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 642#endif 643 GET_INST_OPCODE ip @ extract opcode from rINST 644 SET_VREG r1, r9 @ vAA<- r1 645 GOTO_OPCODE ip @ jump to next instruction 646 /* 11-14 instructions */ 647 648%def op_rem_int_2addr(): 649 /* 650 * Specialized 32-bit binary operation 651 * 652 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 653 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 654 * ARMv7 CPUs that have hardware division support). 655 * 656 * NOTE: idivmod returns quotient in r0 and remainder in r1 657 * 658 * rem-int/2addr 659 * 660 */ 661 mov r3, rINST, lsr #12 @ r3<- B 662 ubfx r9, rINST, #8, #4 @ r9<- A 663 GET_VREG r1, r3 @ r1<- vB 664 GET_VREG r0, r9 @ r0<- vA 665 cmp r1, #0 @ is second operand zero? 666 beq common_errDivideByZero 667 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 668 669#ifdef __ARM_ARCH_EXT_IDIV__ 670 sdiv r2, r0, r1 671 mls r1, r1, r2, r0 @ r1<- op 672#else 673 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 674#endif 675 GET_INST_OPCODE ip @ extract opcode from rINST 676 SET_VREG r1, r9 @ vAA<- r1 677 GOTO_OPCODE ip @ jump to next instruction 678 /* 10-13 instructions */ 679 680 681%def op_rem_int_lit16(): 682 /* 683 * Specialized 32-bit binary operation 684 * 685 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 686 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 687 * ARMv7 CPUs that have hardware division support). 688 * 689 * NOTE: idivmod returns quotient in r0 and remainder in r1 690 * 691 * rem-int/lit16 692 * 693 */ 694 FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended) 695 mov r2, rINST, lsr #12 @ r2<- B 696 ubfx r9, rINST, #8, #4 @ r9<- A 697 GET_VREG r0, r2 @ r0<- vB 698 cmp r1, #0 @ is second operand zero? 699 beq common_errDivideByZero 700 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 701 702#ifdef __ARM_ARCH_EXT_IDIV__ 703 sdiv r2, r0, r1 704 mls r1, r1, r2, r0 @ r1<- op 705#else 706 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 707#endif 708 GET_INST_OPCODE ip @ extract opcode from rINST 709 SET_VREG r1, r9 @ vAA<- r1 710 GOTO_OPCODE ip @ jump to next instruction 711 /* 10-13 instructions */ 712 713%def op_rem_int_lit8(): 714 /* 715 * Specialized 32-bit binary operation 716 * 717 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 718 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 719 * ARMv7 CPUs that have hardware division support). 720 * 721 * NOTE: idivmod returns quotient in r0 and remainder in r1 722 * 723 * rem-int/lit8 724 * 725 */ 726 FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC) 727 mov r9, rINST, lsr #8 @ r9<- AA 728 and r2, r3, #255 @ r2<- BB 729 GET_VREG r0, r2 @ r0<- vBB 730 movs r1, r3, asr #8 @ r1<- ssssssCC (sign extended) 731 @cmp r1, #0 @ is second operand zero? 732 beq common_errDivideByZero 733 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 734 735#ifdef __ARM_ARCH_EXT_IDIV__ 736 sdiv r2, r0, r1 737 mls r1, r1, r2, r0 @ r1<- op 738#else 739 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 740#endif 741 GET_INST_OPCODE ip @ extract opcode from rINST 742 SET_VREG r1, r9 @ vAA<- r1 743 GOTO_OPCODE ip @ jump to next instruction 744 /* 10-12 instructions */ 745 746%def op_rem_long(): 747/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */ 748% binopWide(instr="bl __aeabi_ldivmod", result0="r2", result1="r3", chkzero="1") 749 750%def op_rem_long_2addr(): 751/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */ 752% binopWide2addr(instr="bl __aeabi_ldivmod", result0="r2", result1="r3", chkzero="1") 753 754%def op_rsub_int(): 755/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */ 756% binopLit16(instr="rsb r0, r0, r1") 757 758%def op_rsub_int_lit8(): 759% binopLit8(extract="", instr="rsb r0, r0, r3, asr #8") 760 761%def op_shl_int(): 762% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, asl r1") 763 764%def op_shl_int_2addr(): 765% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, asl r1") 766 767%def op_shl_int_lit8(): 768% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, asl r1") 769 770%def op_shl_long(): 771 /* 772 * Long integer shift. This is different from the generic 32/64-bit 773 * binary operations because vAA/vBB are 64-bit but vCC (the shift 774 * distance) is 32-bit. Also, Dalvik requires us to mask off the low 775 * 6 bits of the shift distance. 776 */ 777 /* shl-long vAA, vBB, vCC */ 778 FETCH r0, 1 @ r0<- CCBB 779 mov r9, rINST, lsr #8 @ r9<- AA 780 and r3, r0, #255 @ r3<- BB 781 mov r0, r0, lsr #8 @ r0<- CC 782 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB] 783 GET_VREG r2, r0 @ r2<- vCC 784 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1 785 CLEAR_SHADOW_PAIR r9, lr, ip @ Zero out the shadow regs 786 and r2, r2, #63 @ r2<- r2 & 0x3f 787 VREG_INDEX_TO_ADDR r9, r9 @ r9<- &fp[AA] 788 mov r1, r1, asl r2 @ r1<- r1 << r2 789 rsb r3, r2, #32 @ r3<- 32 - r2 790 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 791 subs ip, r2, #32 @ ip<- r2 - 32 792 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 793 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 794 mov r0, r0, asl r2 @ r0<- r0 << r2 795 GET_INST_OPCODE ip @ extract opcode from rINST 796 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA/vAA+1<- r0/r1 797 GOTO_OPCODE ip @ jump to next instruction 798 799%def op_shl_long_2addr(): 800 /* 801 * Long integer shift, 2addr version. vA is 64-bit value/result, vB is 802 * 32-bit shift distance. 803 */ 804 /* shl-long/2addr vA, vB */ 805 mov r3, rINST, lsr #12 @ r3<- B 806 ubfx r9, rINST, #8, #4 @ r9<- A 807 GET_VREG r2, r3 @ r2<- vB 808 CLEAR_SHADOW_PAIR r9, lr, ip @ Zero out the shadow regs 809 VREG_INDEX_TO_ADDR r9, r9 @ r9<- &fp[A] 810 and r2, r2, #63 @ r2<- r2 & 0x3f 811 GET_VREG_WIDE_BY_ADDR r0, r1, r9 @ r0/r1<- vAA/vAA+1 812 mov r1, r1, asl r2 @ r1<- r1 << r2 813 rsb r3, r2, #32 @ r3<- 32 - r2 814 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 815 subs ip, r2, #32 @ ip<- r2 - 32 816 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 817 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 818 mov r0, r0, asl r2 @ r0<- r0 << r2 819 GET_INST_OPCODE ip @ extract opcode from rINST 820 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA/vAA+1<- r0/r1 821 GOTO_OPCODE ip @ jump to next instruction 822 823%def op_shr_int(): 824% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, asr r1") 825 826%def op_shr_int_2addr(): 827% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, asr r1") 828 829%def op_shr_int_lit8(): 830% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, asr r1") 831 832%def op_shr_long(): 833 /* 834 * Long integer shift. This is different from the generic 32/64-bit 835 * binary operations because vAA/vBB are 64-bit but vCC (the shift 836 * distance) is 32-bit. Also, Dalvik requires us to mask off the low 837 * 6 bits of the shift distance. 838 */ 839 /* shr-long vAA, vBB, vCC */ 840 FETCH r0, 1 @ r0<- CCBB 841 mov r9, rINST, lsr #8 @ r9<- AA 842 and r3, r0, #255 @ r3<- BB 843 mov r0, r0, lsr #8 @ r0<- CC 844 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB] 845 GET_VREG r2, r0 @ r2<- vCC 846 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1 847 CLEAR_SHADOW_PAIR r9, lr, ip @ Zero out the shadow regs 848 and r2, r2, #63 @ r0<- r0 & 0x3f 849 VREG_INDEX_TO_ADDR r9, r9 @ r9<- &fp[AA] 850 mov r0, r0, lsr r2 @ r0<- r2 >> r2 851 rsb r3, r2, #32 @ r3<- 32 - r2 852 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 853 subs ip, r2, #32 @ ip<- r2 - 32 854 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 855 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 856 mov r1, r1, asr r2 @ r1<- r1 >> r2 857 GET_INST_OPCODE ip @ extract opcode from rINST 858 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA/vAA+1<- r0/r1 859 GOTO_OPCODE ip @ jump to next instruction 860 861%def op_shr_long_2addr(): 862 /* 863 * Long integer shift, 2addr version. vA is 64-bit value/result, vB is 864 * 32-bit shift distance. 865 */ 866 /* shr-long/2addr vA, vB */ 867 mov r3, rINST, lsr #12 @ r3<- B 868 ubfx r9, rINST, #8, #4 @ r9<- A 869 GET_VREG r2, r3 @ r2<- vB 870 CLEAR_SHADOW_PAIR r9, lr, ip @ Zero out the shadow regs 871 VREG_INDEX_TO_ADDR r9, r9 @ r9<- &fp[A] 872 and r2, r2, #63 @ r2<- r2 & 0x3f 873 GET_VREG_WIDE_BY_ADDR r0, r1, r9 @ r0/r1<- vAA/vAA+1 874 mov r0, r0, lsr r2 @ r0<- r2 >> r2 875 rsb r3, r2, #32 @ r3<- 32 - r2 876 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 877 subs ip, r2, #32 @ ip<- r2 - 32 878 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 879 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 880 mov r1, r1, asr r2 @ r1<- r1 >> r2 881 GET_INST_OPCODE ip @ extract opcode from rINST 882 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA/vAA+1<- r0/r1 883 GOTO_OPCODE ip @ jump to next instruction 884 885%def op_sub_int(): 886% binop(instr="sub r0, r0, r1") 887 888%def op_sub_int_2addr(): 889% binop2addr(instr="sub r0, r0, r1") 890 891%def op_sub_long(): 892% binopWide(preinstr="subs r0, r0, r2", instr="sbc r1, r1, r3") 893 894%def op_sub_long_2addr(): 895% binopWide2addr(preinstr="subs r0, r0, r2", instr="sbc r1, r1, r3") 896 897%def op_ushr_int(): 898% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, lsr r1") 899 900%def op_ushr_int_2addr(): 901% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, lsr r1") 902 903%def op_ushr_int_lit8(): 904% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, lsr r1") 905 906%def op_ushr_long(): 907 /* 908 * Long integer shift. This is different from the generic 32/64-bit 909 * binary operations because vAA/vBB are 64-bit but vCC (the shift 910 * distance) is 32-bit. Also, Dalvik requires us to mask off the low 911 * 6 bits of the shift distance. 912 */ 913 /* ushr-long vAA, vBB, vCC */ 914 FETCH r0, 1 @ r0<- CCBB 915 mov r9, rINST, lsr #8 @ r9<- AA 916 and r3, r0, #255 @ r3<- BB 917 mov r0, r0, lsr #8 @ r0<- CC 918 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB] 919 GET_VREG r2, r0 @ r2<- vCC 920 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1 921 CLEAR_SHADOW_PAIR r9, lr, ip @ Zero out the shadow regs 922 and r2, r2, #63 @ r0<- r0 & 0x3f 923 VREG_INDEX_TO_ADDR r9, r9 @ r9<- &fp[AA] 924 mov r0, r0, lsr r2 @ r0<- r2 >> r2 925 rsb r3, r2, #32 @ r3<- 32 - r2 926 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 927 subs ip, r2, #32 @ ip<- r2 - 32 928 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 929 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 930 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 931 GET_INST_OPCODE ip @ extract opcode from rINST 932 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA/vAA+1<- r0/r1 933 GOTO_OPCODE ip @ jump to next instruction 934 935%def op_ushr_long_2addr(): 936 /* 937 * Long integer shift, 2addr version. vA is 64-bit value/result, vB is 938 * 32-bit shift distance. 939 */ 940 /* ushr-long/2addr vA, vB */ 941 mov r3, rINST, lsr #12 @ r3<- B 942 ubfx r9, rINST, #8, #4 @ r9<- A 943 GET_VREG r2, r3 @ r2<- vB 944 CLEAR_SHADOW_PAIR r9, lr, ip @ Zero out the shadow regs 945 VREG_INDEX_TO_ADDR r9, r9 @ r9<- &fp[A] 946 and r2, r2, #63 @ r2<- r2 & 0x3f 947 GET_VREG_WIDE_BY_ADDR r0, r1, r9 @ r0/r1<- vAA/vAA+1 948 mov r0, r0, lsr r2 @ r0<- r2 >> r2 949 rsb r3, r2, #32 @ r3<- 32 - r2 950 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 951 subs ip, r2, #32 @ ip<- r2 - 32 952 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 953 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 954 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 955 GET_INST_OPCODE ip @ extract opcode from rINST 956 SET_VREG_WIDE_BY_ADDR r0, r1, r9 @ vAA/vAA+1<- r0/r1 957 GOTO_OPCODE ip @ jump to next instruction 958 959%def op_xor_int(): 960% binop(instr="eor r0, r0, r1") 961 962%def op_xor_int_2addr(): 963% binop2addr(instr="eor r0, r0, r1") 964 965%def op_xor_int_lit16(): 966% binopLit16(instr="eor r0, r0, r1") 967 968%def op_xor_int_lit8(): 969% binopLit8(extract="", instr="eor r0, r0, r3, asr #8") 970 971%def op_xor_long(): 972% binopWide(preinstr="eor r0, r0, r2", instr="eor r1, r1, r3") 973 974%def op_xor_long_2addr(): 975% binopWide2addr(preinstr="eor r0, r0, r2", instr="eor r1, r1, r3") 976