1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "asm_support_arm.S" 18#include "interpreter/cfi_asm_support.h" 19 20#include "arch/quick_alloc_entrypoints.S" 21 22 /* Deliver the given exception */ 23 .extern artDeliverExceptionFromCode 24 /* Deliver an exception pending on a thread */ 25 .extern artDeliverPendingException 26 27 /* 28 * Macro that sets up the callee save frame to conform with 29 * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). 30 */ 31.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp 32 // Note: We could avoid saving R8 in the case of Baker read 33 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 34 // later; but it's not worth handling this special case. 35 push {r5-r8, r10-r11, lr} @ 7 words of callee saves 36 .cfi_adjust_cfa_offset 28 37 .cfi_rel_offset r5, 0 38 .cfi_rel_offset r6, 4 39 .cfi_rel_offset r7, 8 40 .cfi_rel_offset r8, 12 41 .cfi_rel_offset r10, 16 42 .cfi_rel_offset r11, 20 43 .cfi_rel_offset lr, 24 44 sub sp, #4 @ bottom word will hold Method* 45 .cfi_adjust_cfa_offset 4 46 RUNTIME_CURRENT2 \rTemp @ Load Runtime::Current into rTemp. 47 @ Load kSaveRefsOnly Method* into rTemp. 48 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET] 49 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 50 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 51 52 // Ugly compile-time check, but we only have the preprocessor. 53#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4) 54#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected." 55#endif 56.endm 57 58.macro RESTORE_SAVE_REFS_ONLY_FRAME 59 add sp, #4 @ bottom word holds Method* 60 .cfi_adjust_cfa_offset -4 61 // Note: Likewise, we could avoid restoring R8 in the case of Baker 62 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 63 // later; but it's not worth handling this special case. 64 pop {r5-r8, r10-r11, lr} @ 7 words of callee saves 65 .cfi_restore r5 66 .cfi_restore r6 67 .cfi_restore r7 68 .cfi_restore r8 69 .cfi_restore r10 70 .cfi_restore r11 71 .cfi_restore lr 72 .cfi_adjust_cfa_offset -28 73.endm 74 75.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp 76 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 77 RUNTIME_CURRENT3 \rTemp @ Load Runtime::Current into rTemp. 78 @ Load kSaveRefsAndArgs Method* into rTemp. 79 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET] 80 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 81 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 82.endm 83 84.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 85 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 86 str r0, [sp, #0] @ Store ArtMethod* to bottom of stack. 87 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 88.endm 89 90 /* 91 * Macro that sets up the callee save frame to conform with 92 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 93 * when core registers are already saved. 94 */ 95.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 96 @ 14 words of callee saves and args already saved. 97 vpush {d0-d15} @ 32 words, 2 for each of the 16 saved doubles. 98 .cfi_adjust_cfa_offset 128 99 sub sp, #8 @ 2 words of space, alignment padding and Method* 100 .cfi_adjust_cfa_offset 8 101 RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. 102 @ Load kSaveEverything Method* into rTemp. 103 ldr \rTemp, [\rTemp, #\runtime_method_offset] 104 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 105 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 106 107 // Ugly compile-time check, but we only have the preprocessor. 108#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8) 109#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected." 110#endif 111.endm 112 113 /* 114 * Macro that sets up the callee save frame to conform with 115 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 116 */ 117.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 118 push {r0-r12, lr} @ 14 words of callee saves and args. 119 .cfi_adjust_cfa_offset 56 120 .cfi_rel_offset r0, 0 121 .cfi_rel_offset r1, 4 122 .cfi_rel_offset r2, 8 123 .cfi_rel_offset r3, 12 124 .cfi_rel_offset r4, 16 125 .cfi_rel_offset r5, 20 126 .cfi_rel_offset r6, 24 127 .cfi_rel_offset r7, 28 128 .cfi_rel_offset r8, 32 129 .cfi_rel_offset r9, 36 130 .cfi_rel_offset r10, 40 131 .cfi_rel_offset r11, 44 132 .cfi_rel_offset ip, 48 133 .cfi_rel_offset lr, 52 134 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset 135.endm 136 137.macro RESTORE_SAVE_EVERYTHING_FRAME 138 add sp, #8 @ rewind sp 139 .cfi_adjust_cfa_offset -8 140 vpop {d0-d15} 141 .cfi_adjust_cfa_offset -128 142 pop {r0-r12, lr} @ 14 words of callee saves 143 .cfi_restore r0 144 .cfi_restore r1 145 .cfi_restore r2 146 .cfi_restore r3 147 .cfi_restore r4 148 .cfi_restore r5 149 .cfi_restore r6 150 .cfi_restore r7 151 .cfi_restore r8 152 .cfi_restore r9 153 .cfi_restore r10 154 .cfi_restore r11 155 .cfi_restore r12 156 .cfi_restore lr 157 .cfi_adjust_cfa_offset -56 158.endm 159 160.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 161 add sp, #8 @ rewind sp 162 .cfi_adjust_cfa_offset -8 163 vpop {d0-d15} 164 .cfi_adjust_cfa_offset -128 165 add sp, #4 @ skip r0 166 .cfi_adjust_cfa_offset -4 167 .cfi_restore r0 @ debugger can no longer restore caller's r0 168 pop {r1-r12, lr} @ 13 words of callee saves 169 .cfi_restore r1 170 .cfi_restore r2 171 .cfi_restore r3 172 .cfi_restore r4 173 .cfi_restore r5 174 .cfi_restore r6 175 .cfi_restore r7 176 .cfi_restore r8 177 .cfi_restore r9 178 .cfi_restore r10 179 .cfi_restore r11 180 .cfi_restore r12 181 .cfi_restore lr 182 .cfi_adjust_cfa_offset -52 183.endm 184 185.macro RETURN_IF_RESULT_IS_ZERO 186 cbnz r0, 1f @ result non-zero branch over 187 bx lr @ return 1881: 189.endm 190 191.macro RETURN_IF_RESULT_IS_NON_ZERO 192 cbz r0, 1f @ result zero branch over 193 bx lr @ return 1941: 195.endm 196 197.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name 198 .extern \cxx_name 199ENTRY \c_name 200 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save all registers as basis for long jump context 201 mov r0, rSELF @ pass Thread::Current 202 bl \cxx_name @ \cxx_name(Thread*) 203END \c_name 204.endm 205 206.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 207 .extern \cxx_name 208ENTRY \c_name 209 SETUP_SAVE_EVERYTHING_FRAME r0 @ save all registers as basis for long jump context 210 mov r0, rSELF @ pass Thread::Current 211 bl \cxx_name @ \cxx_name(Thread*) 212END \c_name 213.endm 214 215.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name 216 .extern \cxx_name 217ENTRY \c_name 218 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1 @ save all registers as basis for long jump context 219 mov r1, rSELF @ pass Thread::Current 220 bl \cxx_name @ \cxx_name(Thread*) 221END \c_name 222.endm 223 224.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 225 .extern \cxx_name 226ENTRY \c_name 227 SETUP_SAVE_EVERYTHING_FRAME r2 @ save all registers as basis for long jump context 228 mov r2, rSELF @ pass Thread::Current 229 bl \cxx_name @ \cxx_name(Thread*) 230END \c_name 231.endm 232 233.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg 234 ldr \reg, [rSELF, #THREAD_EXCEPTION_OFFSET] @ Get exception field. 235 cbnz \reg, 1f 236 bx lr 2371: 238 DELIVER_PENDING_EXCEPTION 239.endm 240 241.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 242 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1 243.endm 244 245.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 246 RETURN_IF_RESULT_IS_ZERO 247 DELIVER_PENDING_EXCEPTION 248.endm 249 250.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 251 RETURN_IF_RESULT_IS_NON_ZERO 252 DELIVER_PENDING_EXCEPTION 253.endm 254 255// Macros taking opportunity of code similarities for downcalls. 256.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return 257 .extern \entrypoint 258ENTRY \name 259 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 260 mov r1, rSELF @ pass Thread::Current 261 bl \entrypoint @ (uint32_t field_idx, Thread*) 262 RESTORE_SAVE_REFS_ONLY_FRAME 263 REFRESH_MARKING_REGISTER 264 \return 265END \name 266.endm 267 268.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return 269 .extern \entrypoint 270ENTRY \name 271 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 272 mov r2, rSELF @ pass Thread::Current 273 bl \entrypoint @ (field_idx, Object*, Thread*) 274 RESTORE_SAVE_REFS_ONLY_FRAME 275 REFRESH_MARKING_REGISTER 276 \return 277END \name 278.endm 279 280.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return 281 .extern \entrypoint 282ENTRY \name 283 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 284 mov r3, rSELF @ pass Thread::Current 285 bl \entrypoint @ (field_idx, Object*, new_val, Thread*) 286 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 287 REFRESH_MARKING_REGISTER 288 \return 289END \name 290.endm 291 292 /* 293 * Called by managed code, saves callee saves and then calls artThrowException 294 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 295 */ 296ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 297 298 /* 299 * Called by managed code to create and deliver a NullPointerException. 300 */ 301NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 302 303 /* 304 * Call installed by a signal handler to create and deliver a NullPointerException. 305 */ 306 .extern art_quick_throw_null_pointer_exception_from_signal 307ENTRY art_quick_throw_null_pointer_exception_from_signal 308 // The fault handler pushes the gc map address, i.e. "return address", to stack 309 // and passes the fault address in LR. So we need to set up the CFI info accordingly. 310 .cfi_def_cfa_offset __SIZEOF_POINTER__ 311 .cfi_rel_offset lr, 0 312 push {r0-r12} @ 13 words of callee saves and args; LR already saved. 313 .cfi_adjust_cfa_offset 52 314 .cfi_rel_offset r0, 0 315 .cfi_rel_offset r1, 4 316 .cfi_rel_offset r2, 8 317 .cfi_rel_offset r3, 12 318 .cfi_rel_offset r4, 16 319 .cfi_rel_offset r5, 20 320 .cfi_rel_offset r6, 24 321 .cfi_rel_offset r7, 28 322 .cfi_rel_offset r8, 32 323 .cfi_rel_offset r9, 36 324 .cfi_rel_offset r10, 40 325 .cfi_rel_offset r11, 44 326 .cfi_rel_offset ip, 48 327 328 @ save all registers as basis for long jump context 329 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 330 mov r0, lr @ pass the fault address stored in LR by the fault handler. 331 mov r1, rSELF @ pass Thread::Current 332 bl artThrowNullPointerExceptionFromSignal @ (Thread*) 333END art_quick_throw_null_pointer_exception_from_signal 334 335 /* 336 * Called by managed code to create and deliver an ArithmeticException. 337 */ 338NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 339 340 /* 341 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 342 * index, arg2 holds limit. 343 */ 344TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 345 346 /* 347 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 348 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 349 */ 350TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 351 352 /* 353 * Called by managed code to create and deliver a StackOverflowError. 354 */ 355NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 356 357 /* 358 * All generated callsites for interface invokes and invocation slow paths will load arguments 359 * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 360 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 361 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1. 362 * 363 * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting 364 * of the target Method* in r0 and method->code_ in r1. 365 * 366 * If unsuccessful, the helper will return null/null. There will bea pending exception in the 367 * thread and we branch to another stub to deliver it. 368 * 369 * On success this wrapper will restore arguments and *jump* to the target, leaving the lr 370 * pointing back to the original caller. 371 * 372 * Clobbers IP (R12). 373 */ 374.macro INVOKE_TRAMPOLINE_BODY cxx_name 375 .extern \cxx_name 376 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 @ save callee saves in case allocation triggers GC 377 mov r2, rSELF @ pass Thread::Current 378 mov r3, sp 379 bl \cxx_name @ (method_idx, this, Thread*, SP) 380 mov r12, r1 @ save Method*->code_ 381 RESTORE_SAVE_REFS_AND_ARGS_FRAME 382 REFRESH_MARKING_REGISTER 383 cbz r0, 1f @ did we find the target? if not go to exception delivery 384 bx r12 @ tail call to target 3851: 386 DELIVER_PENDING_EXCEPTION 387.endm 388.macro INVOKE_TRAMPOLINE c_name, cxx_name 389ENTRY \c_name 390 INVOKE_TRAMPOLINE_BODY \cxx_name 391END \c_name 392.endm 393 394INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 395 396INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 397INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 398INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 399INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 400 401 /* 402 * Quick invocation stub internal. 403 * On entry: 404 * r0 = method pointer 405 * r1 = argument array or null for no argument methods 406 * r2 = size of argument array in bytes 407 * r3 = (managed) thread pointer 408 * [sp] = JValue* result 409 * [sp + 4] = result_in_float 410 * [sp + 8] = core register argument array 411 * [sp + 12] = fp register argument array 412 * +-------------------------+ 413 * | uint32_t* fp_reg_args | 414 * | uint32_t* core_reg_args | 415 * | result_in_float | <- Caller frame 416 * | Jvalue* result | 417 * +-------------------------+ 418 * | lr | 419 * | r11 | 420 * | r9 | 421 * | r4 | <- r11 422 * +-------------------------+ 423 * | uint32_t out[n-1] | 424 * | : : | Outs 425 * | uint32_t out[0] | 426 * | StackRef<ArtMethod> | <- SP value=null 427 * +-------------------------+ 428 */ 429ENTRY art_quick_invoke_stub_internal 430 SPILL_ALL_CALLEE_SAVE_GPRS @ spill regs (9) 431 mov r11, sp @ save the stack pointer 432 .cfi_def_cfa_register r11 433 434 mov r9, r3 @ move managed thread pointer into r9 435 436 add r4, r2, #4 @ create space for method pointer in frame 437 sub r4, sp, r4 @ reserve & align *stack* to 16 bytes: native calling 438 and r4, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART 439 mov sp, r4 @ 16B alignment ourselves. 440 441 mov r4, r0 @ save method* 442 add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy 443 bl memcpy @ memcpy (dest, src, bytes) 444 mov ip, #0 @ set ip to 0 445 str ip, [sp] @ store null for method* at bottom of frame 446 447 ldr ip, [r11, #48] @ load fp register argument array pointer 448 vldm ip, {s0-s15} @ copy s0 - s15 449 450 ldr ip, [r11, #44] @ load core register argument array pointer 451 mov r0, r4 @ restore method* 452 add ip, ip, #4 @ skip r0 453 ldm ip, {r1-r3} @ copy r1 - r3 454 455 REFRESH_MARKING_REGISTER 456 457 ldr ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] @ get pointer to the code 458 blx ip @ call the method 459 460 mov sp, r11 @ restore the stack pointer 461 .cfi_def_cfa_register sp 462 463 ldr r4, [sp, #40] @ load result_is_float 464 ldr r9, [sp, #36] @ load the result pointer 465 cmp r4, #0 466 ite eq 467 strdeq r0, [r9] @ store r0/r1 into result pointer 468 vstrne d0, [r9] @ store s0-s1/d0 into result pointer 469 470 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} @ restore spill regs 471END art_quick_invoke_stub_internal 472 473 /* 474 * On stack replacement stub. 475 * On entry: 476 * r0 = stack to copy 477 * r1 = size of stack 478 * r2 = pc to call 479 * r3 = JValue* result 480 * [sp] = shorty 481 * [sp + 4] = thread 482 */ 483ENTRY art_quick_osr_stub 484 SPILL_ALL_CALLEE_SAVE_GPRS @ Spill regs (9) 485 vpush {s16-s31} @ Spill fp-regs (16) 486 .cfi_adjust_cfa_offset 64 487 SAVE_SIZE=(9*4+16*4) 488 mov r11, sp @ Save the stack pointer 489 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 490 .cfi_remember_state 491 mov r10, r1 @ Save size of stack 492 ldr r9, [r11, #(SAVE_SIZE+4)] @ Move managed thread pointer into r9 493 REFRESH_MARKING_REGISTER 494 mov r6, r2 @ Save the pc to call 495 sub r7, sp, #12 @ Reserve space for stack pointer, 496 @ JValue* result, and ArtMethod* slot. 497 and r7, #0xFFFFFFF0 @ Align stack pointer 498 mov sp, r7 @ Update stack pointer 499 str r11, [sp, #4] @ Save old stack pointer 500 str r3, [sp, #8] @ Save JValue* result 501 mov ip, #0 502 str ip, [sp] @ Store null for ArtMethod* at bottom of frame 503 // r11 isn't properly spilled in the osr method, so we need use DWARF expression. 504 // NB: the CFI must be before the call since this is the address gdb will lookup. 505 // NB: gdb expects that cfa_expression returns the CFA value (not address to it). 506 .cfi_escape /* CFA = [sp + 4] + SAVE_SIZE */ \ 507 0x0f, 6, /* DW_CFA_def_cfa_expression(len) */ \ 508 0x92, 13, 4, /* DW_OP_bregx(reg,offset) */ \ 509 0x06, /* DW_OP_deref */ \ 510 0x23, SAVE_SIZE /* DW_OP_plus_uconst(val) */ 511 bl .Losr_entry @ Call the method 512 ldr r10, [sp, #8] @ Restore JValue* result 513 ldr sp, [sp, #4] @ Restore saved stack pointer 514 .cfi_def_cfa sp, SAVE_SIZE @ CFA = sp + SAVE_SIZE 515 strd r0, [r10] @ Store r0/r1 into result pointer 516 vpop {s16-s31} 517 .cfi_adjust_cfa_offset -64 518 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 519.Losr_entry: 520 .cfi_restore_state 521 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 522 sub sp, sp, r10 @ Reserve space for callee stack 523 sub r10, r10, #4 524 str lr, [sp, r10] @ Store link register per the compiler ABI 525 mov r2, r10 526 mov r1, r0 527 mov r0, sp 528 bl memcpy @ memcpy (dest r0, src r1, bytes r2) 529 bx r6 530END art_quick_osr_stub 531 532 /* 533 * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_. 534 * Both must reside on the stack, between current SP and target SP. 535 * The r12 (IP) shall be clobbered rather than retrieved from gprs_. 536 */ 537ARM_ENTRY art_quick_do_long_jump 538 vldm r1, {s0-s31} @ Load all fprs from argument fprs_. 539 mov sp, r0 @ Make SP point to gprs_. 540 @ Do not access fprs_ from now, they may be below SP. 541 ldm sp, {r0-r11} @ load r0-r11 from gprs_. 542 ldr r12, [sp, #60] @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12). 543 ldr lr, [sp, #56] @ Load LR from gprs_, 56 = 4 * 14. 544 ldr sp, [sp, #52] @ Load SP from gprs_ 52 = 4 * 13. 545 @ Do not access gprs_ from now, they are below SP. 546 REFRESH_MARKING_REGISTER 547 bx r12 @ Do long jump. 548END art_quick_do_long_jump 549 550 /* 551 * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on 552 * failure. 553 */ 554TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 555 556 /* 557 * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the 558 * possibly null object to lock. 559 */ 560 .extern artLockObjectFromCode 561ENTRY art_quick_lock_object 562 ldr r1, [rSELF, #THREAD_ID_OFFSET] 563 cbz r0, .Lslow_lock 564.Lretry_lock: 565 ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 566 eor r3, r2, r1 @ Prepare the value to store if unlocked 567 @ (thread id, count of 0 and preserved read barrier bits), 568 @ or prepare to compare thread id for recursive lock check 569 @ (lock_word.ThreadId() ^ self->ThreadId()). 570 ands ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. 571 bne .Lnot_unlocked @ Check if unlocked. 572 @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits. 573 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 574 cbnz r2, .Llock_strex_fail @ If store failed, retry. 575 dmb ish @ Full (LoadLoad|LoadStore) memory barrier. 576 bx lr 577.Lnot_unlocked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 578#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT 579#error "Expecting thin lock count and gc state in consecutive bits." 580#endif 581 @ Check lock word state and thread id together, 582 bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) 583 cbnz r3, .Lslow_lock @ if either of the top two bits are set, or the lock word's 584 @ thread id did not match, go slow path. 585 add r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. 586 @ Extract the new thin lock count for overflow check. 587 ubfx r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE 588 cbz r2, .Lslow_lock @ Zero as the new count indicates overflow, go slow path. 589 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. 590 cbnz r2, .Llock_strex_fail @ If strex failed, retry. 591 bx lr 592.Llock_strex_fail: 593 b .Lretry_lock @ retry 594// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). 595END art_quick_lock_object 596 597ENTRY art_quick_lock_object_no_inline 598 // This is also the slow path for art_quick_lock_object. Note that we 599 // need a local label, the assembler complains about target being out of 600 // range if we try to jump to `art_quick_lock_object_no_inline`. 601.Lslow_lock: 602 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block 603 mov r1, rSELF @ pass Thread::Current 604 bl artLockObjectFromCode @ (Object* obj, Thread*) 605 RESTORE_SAVE_REFS_ONLY_FRAME 606 REFRESH_MARKING_REGISTER 607 RETURN_IF_RESULT_IS_ZERO 608 DELIVER_PENDING_EXCEPTION 609END art_quick_lock_object_no_inline 610 611 /* 612 * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. 613 * r0 holds the possibly null object to lock. 614 */ 615 .extern artUnlockObjectFromCode 616ENTRY art_quick_unlock_object 617 ldr r1, [rSELF, #THREAD_ID_OFFSET] 618 cbz r0, .Lslow_unlock 619.Lretry_unlock: 620#ifndef USE_READ_BARRIER 621 ldr r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 622#else 623 @ Need to use atomic instructions for read barrier. 624 ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 625#endif 626 eor r3, r2, r1 @ Prepare the value to store if simply locked 627 @ (mostly 0s, and preserved read barrier bits), 628 @ or prepare to compare thread id for recursive lock check 629 @ (lock_word.ThreadId() ^ self->ThreadId()). 630 ands ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. 631 bne .Lnot_simply_locked @ Locked recursively or by other thread? 632 @ Transition to unlocked. 633 dmb ish @ Full (LoadStore|StoreStore) memory barrier. 634#ifndef USE_READ_BARRIER 635 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 636#else 637 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits 638 cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. 639#endif 640 bx lr 641.Lnot_simply_locked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 642#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT 643#error "Expecting thin lock count and gc state in consecutive bits." 644#endif 645 @ Check lock word state and thread id together, 646 bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) 647 cbnz r3, .Lslow_unlock @ if either of the top two bits are set, or the lock word's 648 @ thread id did not match, go slow path. 649 sub r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. 650#ifndef USE_READ_BARRIER 651 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 652#else 653 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. 654 cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. 655#endif 656 bx lr 657.Lunlock_strex_fail: 658 b .Lretry_unlock @ retry 659// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). 660END art_quick_unlock_object 661 662ENTRY art_quick_unlock_object_no_inline 663 // This is also the slow path for art_quick_unlock_object. Note that we 664 // need a local label, the assembler complains about target being out of 665 // range if we try to jump to `art_quick_unlock_object_no_inline`. 666.Lslow_unlock: 667 @ save callee saves in case exception allocation triggers GC 668 SETUP_SAVE_REFS_ONLY_FRAME r1 669 mov r1, rSELF @ pass Thread::Current 670 bl artUnlockObjectFromCode @ (Object* obj, Thread*) 671 RESTORE_SAVE_REFS_ONLY_FRAME 672 REFRESH_MARKING_REGISTER 673 RETURN_IF_RESULT_IS_ZERO 674 DELIVER_PENDING_EXCEPTION 675END art_quick_unlock_object_no_inline 676 677 /* 678 * Entry from managed code that calls artInstanceOfFromCode and on failure calls 679 * artThrowClassCastExceptionForObject. 680 */ 681 .extern artInstanceOfFromCode 682 .extern artThrowClassCastExceptionForObject 683ENTRY art_quick_check_instance_of 684 // Type check using the bit string passes null as the target class. In that case just throw. 685 cbz r1, .Lthrow_class_cast_exception_for_bitstring_check 686 687 push {r0-r2, lr} @ save arguments, padding (r2) and link register 688 .cfi_adjust_cfa_offset 16 689 .cfi_rel_offset r0, 0 690 .cfi_rel_offset r1, 4 691 .cfi_rel_offset r2, 8 692 .cfi_rel_offset lr, 12 693 bl artInstanceOfFromCode 694 cbz r0, .Lthrow_class_cast_exception 695 pop {r0-r2, pc} 696 697.Lthrow_class_cast_exception: 698 pop {r0-r2, lr} 699 .cfi_adjust_cfa_offset -16 700 .cfi_restore r0 701 .cfi_restore r1 702 .cfi_restore r2 703 .cfi_restore lr 704 705.Lthrow_class_cast_exception_for_bitstring_check: 706 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2 @ save all registers as basis for long jump context 707 mov r2, rSELF @ pass Thread::Current 708 bl artThrowClassCastExceptionForObject @ (Object*, Class*, Thread*) 709 bkpt 710END art_quick_check_instance_of 711 712// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude. 713.macro POP_REG_NE rReg, offset, rExclude 714 .ifnc \rReg, \rExclude 715 ldr \rReg, [sp, #\offset] @ restore rReg 716 .cfi_restore \rReg 717 .endif 718.endm 719 720// Save rReg's value to [sp, #offset]. 721.macro PUSH_REG rReg, offset 722 str \rReg, [sp, #\offset] @ save rReg 723 .cfi_rel_offset \rReg, \offset 724.endm 725 726 /* 727 * Macro to insert read barrier, only used in art_quick_aput_obj. 728 * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET. 729 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 730 */ 731.macro READ_BARRIER rDest, rObj, offset 732#ifdef USE_READ_BARRIER 733 push {r0-r3, ip, lr} @ 6 words for saved registers (used in art_quick_aput_obj) 734 .cfi_adjust_cfa_offset 24 735 .cfi_rel_offset r0, 0 736 .cfi_rel_offset r1, 4 737 .cfi_rel_offset r2, 8 738 .cfi_rel_offset r3, 12 739 .cfi_rel_offset ip, 16 740 .cfi_rel_offset lr, 20 741 sub sp, #8 @ push padding 742 .cfi_adjust_cfa_offset 8 743 @ mov r0, \rRef @ pass ref in r0 (no-op for now since parameter ref is unused) 744 .ifnc \rObj, r1 745 mov r1, \rObj @ pass rObj 746 .endif 747 mov r2, #\offset @ pass offset 748 bl artReadBarrierSlow @ artReadBarrierSlow(ref, rObj, offset) 749 @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning. 750 .ifnc \rDest, r0 751 mov \rDest, r0 @ save return value in rDest 752 .endif 753 add sp, #8 @ pop padding 754 .cfi_adjust_cfa_offset -8 755 POP_REG_NE r0, 0, \rDest @ conditionally restore saved registers 756 POP_REG_NE r1, 4, \rDest 757 POP_REG_NE r2, 8, \rDest 758 POP_REG_NE r3, 12, \rDest 759 POP_REG_NE ip, 16, \rDest 760 add sp, #20 761 .cfi_adjust_cfa_offset -20 762 pop {lr} @ restore lr 763 .cfi_adjust_cfa_offset -4 764 .cfi_restore lr 765#else 766 ldr \rDest, [\rObj, #\offset] 767 UNPOISON_HEAP_REF \rDest 768#endif // USE_READ_BARRIER 769.endm 770 771#ifdef USE_READ_BARRIER 772 .extern artReadBarrierSlow 773#endif 774 .hidden art_quick_aput_obj 775ENTRY art_quick_aput_obj 776#ifdef USE_READ_BARRIER 777 @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro. 778 tst r2, r2 779 beq .Ldo_aput_null 780#else 781 cbz r2, .Ldo_aput_null 782#endif // USE_READ_BARRIER 783 READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET 784 READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET 785 READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET 786 cmp r3, ip @ value's type == array's component type - trivial assignability 787 bne .Lcheck_assignability 788.Ldo_aput: 789 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 790 POISON_HEAP_REF r2 791 str r2, [r3, r1, lsl #2] 792 ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET] 793 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 794 strb r3, [r3, r0] 795 blx lr 796.Ldo_aput_null: 797 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 798 str r2, [r3, r1, lsl #2] 799 blx lr 800.Lcheck_assignability: 801 push {r0-r2, lr} @ save arguments 802 .cfi_adjust_cfa_offset 16 803 .cfi_rel_offset r0, 0 804 .cfi_rel_offset r1, 4 805 .cfi_rel_offset r2, 8 806 .cfi_rel_offset lr, 12 807 mov r1, ip 808 mov r0, r3 809 bl artIsAssignableFromCode 810 cbz r0, .Lthrow_array_store_exception 811 pop {r0-r2, lr} 812 .cfi_restore r0 813 .cfi_restore r1 814 .cfi_restore r2 815 .cfi_restore lr 816 .cfi_adjust_cfa_offset -16 817 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 818 POISON_HEAP_REF r2 819 str r2, [r3, r1, lsl #2] 820 ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET] 821 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 822 strb r3, [r3, r0] 823 blx lr 824.Lthrow_array_store_exception: 825 pop {r0-r2, lr} 826 /* No need to repeat restore cfi directives, the ones above apply here. */ 827 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3 828 mov r1, r2 829 mov r2, rSELF @ pass Thread::Current 830 bl artThrowArrayStoreException @ (Class*, Class*, Thread*) 831 bkpt @ unreached 832END art_quick_aput_obj 833 834// Macro to facilitate adding new allocation entrypoints. 835.macro ONE_ARG_DOWNCALL name, entrypoint, return 836 .extern \entrypoint 837ENTRY \name 838 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 839 mov r1, rSELF @ pass Thread::Current 840 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 841 RESTORE_SAVE_REFS_ONLY_FRAME 842 REFRESH_MARKING_REGISTER 843 \return 844END \name 845.endm 846 847// Macro to facilitate adding new allocation entrypoints. 848.macro TWO_ARG_DOWNCALL name, entrypoint, return 849 .extern \entrypoint 850ENTRY \name 851 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 852 mov r2, rSELF @ pass Thread::Current 853 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 854 RESTORE_SAVE_REFS_ONLY_FRAME 855 REFRESH_MARKING_REGISTER 856 \return 857END \name 858.endm 859 860// Macro to facilitate adding new array allocation entrypoints. 861.macro THREE_ARG_DOWNCALL name, entrypoint, return 862 .extern \entrypoint 863ENTRY \name 864 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 865 mov r3, rSELF @ pass Thread::Current 866 @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) 867 bl \entrypoint 868 RESTORE_SAVE_REFS_ONLY_FRAME 869 REFRESH_MARKING_REGISTER 870 \return 871END \name 872.endm 873 874// Macro to facilitate adding new allocation entrypoints. 875.macro FOUR_ARG_DOWNCALL name, entrypoint, return 876 .extern \entrypoint 877ENTRY \name 878 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 879 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 880 .cfi_adjust_cfa_offset 16 881 bl \entrypoint 882 add sp, #16 @ strip the extra frame 883 .cfi_adjust_cfa_offset -16 884 RESTORE_SAVE_REFS_ONLY_FRAME 885 REFRESH_MARKING_REGISTER 886 \return 887END \name 888.endm 889 890 /* 891 * Macro for resolution and initialization of indexed DEX file 892 * constants such as classes and strings. 893 */ 894.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 895 .extern \entrypoint 896ENTRY \name 897 SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset @ save everything in case of GC 898 mov r1, rSELF @ pass Thread::Current 899 bl \entrypoint @ (uint32_t index, Thread*) 900 cbz r0, 1f @ If result is null, deliver the OOME. 901 .cfi_remember_state 902 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 903 REFRESH_MARKING_REGISTER 904 bx lr 905 .cfi_restore_state 9061: 907 DELIVER_PENDING_EXCEPTION_FRAME_READY 908END \name 909.endm 910 911.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint 912 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 913.endm 914 915ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 916ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode 917ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode 918ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode 919ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode 920ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 921 922// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 923// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 924 925 /* 926 * Called by managed code to resolve a static field and load a non-wide value. 927 */ 928ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 929ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 930ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 931ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 932ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 933ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 934 /* 935 * Called by managed code to resolve a static field and load a 64-bit primitive value. 936 */ 937 .extern artGet64StaticFromCompiledCode 938ENTRY art_quick_get64_static 939 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 940 mov r1, rSELF @ pass Thread::Current 941 bl artGet64StaticFromCompiledCode @ (uint32_t field_idx, Thread*) 942 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 943 RESTORE_SAVE_REFS_ONLY_FRAME 944 REFRESH_MARKING_REGISTER 945 cbnz r2, 1f @ success if no exception pending 946 bx lr @ return on success 9471: 948 DELIVER_PENDING_EXCEPTION 949END art_quick_get64_static 950 951 /* 952 * Called by managed code to resolve an instance field and load a non-wide value. 953 */ 954TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 955TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 956TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 957TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 958TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 959TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 960 /* 961 * Called by managed code to resolve an instance field and load a 64-bit primitive value. 962 */ 963 .extern artGet64InstanceFromCompiledCode 964ENTRY art_quick_get64_instance 965 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 966 mov r2, rSELF @ pass Thread::Current 967 bl artGet64InstanceFromCompiledCode @ (field_idx, Object*, Thread*) 968 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 969 RESTORE_SAVE_REFS_ONLY_FRAME 970 REFRESH_MARKING_REGISTER 971 cbnz r2, 1f @ success if no exception pending 972 bx lr @ return on success 9731: 974 DELIVER_PENDING_EXCEPTION 975END art_quick_get64_instance 976 977 /* 978 * Called by managed code to resolve a static field and store a value. 979 */ 980TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 981TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 982TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 983TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 984 985 /* 986 * Called by managed code to resolve an instance field and store a non-wide value. 987 */ 988THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 989THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 990THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 991THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 992 993 /* 994 * Called by managed code to resolve an instance field and store a wide value. 995 */ 996 .extern artSet64InstanceFromCompiledCode 997ENTRY art_quick_set64_instance 998 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 999 @ r2:r3 contain the wide argument 1000 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 1001 .cfi_adjust_cfa_offset 16 1002 bl artSet64InstanceFromCompiledCode @ (field_idx, Object*, new_val, Thread*) 1003 add sp, #16 @ release out args 1004 .cfi_adjust_cfa_offset -16 1005 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 1006 REFRESH_MARKING_REGISTER 1007 RETURN_IF_RESULT_IS_ZERO 1008 DELIVER_PENDING_EXCEPTION 1009END art_quick_set64_instance 1010 1011 .extern artSet64StaticFromCompiledCode 1012ENTRY art_quick_set64_static 1013 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1014 @ r2:r3 contain the wide argument 1015 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 1016 .cfi_adjust_cfa_offset 16 1017 bl artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*) 1018 add sp, #16 @ release out args 1019 .cfi_adjust_cfa_offset -16 1020 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 1021 REFRESH_MARKING_REGISTER 1022 RETURN_IF_RESULT_IS_ZERO 1023 DELIVER_PENDING_EXCEPTION 1024END art_quick_set64_static 1025 1026// Generate the allocation entrypoints for each allocator. 1027GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 1028// Comment out allocators that have arm specific asm. 1029// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 1030// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 1031GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 1032GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB) 1033// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 1034// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 1035// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 1036// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 1037// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 1038GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 1039GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 1040GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 1041 1042// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 1043// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 1044GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 1045GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB) 1046// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 1047// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 1048// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 1049// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 1050// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 1051GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 1052GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 1053GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 1054 1055// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc). 1056// 1057// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1058// If isInitialized=0 the compiler can only assume it's been at least resolved. 1059.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized 1060ENTRY \c_name 1061 // Fast path rosalloc allocation. 1062 // r0: type/return value, rSELF (r9): Thread::Current 1063 // r1, r2, r3, r12: free. 1064 ldr r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local 1065 // allocation stack has room. 1066 // TODO: consider using ldrd. 1067 ldr r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] 1068 cmp r3, r12 1069 bhs .Lslow_path\c_name 1070 1071 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) 1072 cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread 1073 // local allocation. 1074 // If the class is not yet visibly initialized, or it is finalizable, 1075 // the object size will be very large to force the branch below to be taken. 1076 // 1077 // See Class::SetStatus() in class.cc for more details. 1078 bhs .Lslow_path\c_name 1079 // Compute the rosalloc bracket index 1080 // from the size. Since the size is 1081 // already aligned we can combine the 1082 // two shifts together. 1083 add r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) 1084 // Subtract pointer size since ther 1085 // are no runs for 0 byte allocations 1086 // and the size is already aligned. 1087 // Load the rosalloc run (r12) 1088 ldr r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] 1089 // Load the free list head (r3). This 1090 // will be the return val. 1091 ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1092 cbz r3, .Lslow_path\c_name 1093 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1094 ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head 1095 // and update the list head with the 1096 // next pointer. 1097 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1098 // Store the class pointer in the 1099 // header. This also overwrites the 1100 // next pointer. The offsets are 1101 // asserted to match. 1102#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1103#error "Class pointer needs to overwrite next pointer." 1104#endif 1105 POISON_HEAP_REF r0 1106 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] 1107 // Push the new object onto the thread 1108 // local allocation stack and 1109 // increment the thread local 1110 // allocation stack top. 1111 ldr r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1112 str r3, [r1], #COMPRESSED_REFERENCE_SIZE // (Increment r1 as a side effect.) 1113 str r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1114 // Decrement the size of the free list 1115 1116 // After this "STR" the object is published to the thread local allocation stack, 1117 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1118 // It is not yet visible to the running (user) compiled code until after the return. 1119 // 1120 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1121 // the state of the allocation stack slot. It can be a pointer to one of: 1122 // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet. 1123 // (The stack initial state is "null" pointers). 1124 // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot. 1125 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1126 // Other states are not allowed. 1127 // 1128 // An object that is invalid only temporarily, and will eventually become valid. 1129 // The internal runtime code simply checks if the object is not null or is partial and then 1130 // ignores it. 1131 // 1132 // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing 1133 // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot 1134 // "next" pointer is not-cyclic.) 1135 // 1136 // See also b/28790624 for a listing of CLs dealing with this race. 1137 ldr r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1138 sub r1, #1 1139 // TODO: consider combining this store 1140 // and the list head store above using 1141 // strd. 1142 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1143 1144 mov r0, r3 // Set the return value and return. 1145 // No barrier. The class is already observably initialized (otherwise the fast 1146 // path size check above would fail) and new-instance allocations are protected 1147 // from publishing by the compiler which inserts its own StoreStore barrier. 1148 bx lr 1149 1150.Lslow_path\c_name: 1151 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1152 mov r1, rSELF @ pass Thread::Current 1153 bl \cxx_name @ (mirror::Class* cls, Thread*) 1154 RESTORE_SAVE_REFS_ONLY_FRAME 1155 REFRESH_MARKING_REGISTER 1156 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1157END \c_name 1158.endm 1159 1160ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0 1161ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1 1162 1163// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab 1164// and art_quick_alloc_object_resolved/initialized_region_tlab. 1165// 1166// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free. 1167// Need to preserve r0 to the slow path. 1168// 1169// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1170// If isInitialized=0 the compiler can only assume it's been at least resolved. 1171.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized 1172 // Load thread_local_pos (r12) and 1173 // thread_local_end (r3) with ldrd. 1174 // Check constraints for ldrd. 1175#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1176#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1177#endif 1178 ldrd r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1179 sub r12, r3, r12 // Compute the remaining buf size. 1180 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3). 1181 cmp r3, r12 // Check if it fits. 1182 // If the class is not yet visibly initialized, or it is finalizable, 1183 // the object size will be very large to force the branch below to be taken. 1184 // 1185 // See Class::SetStatus() in class.cc for more details. 1186 bhi \slowPathLabel 1187 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1188 // Reload old thread_local_pos (r0) 1189 // for the return value. 1190 ldr r2, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1191 add r1, r2, r3 1192 str r1, [rSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1193 // After this "STR" the object is published to the thread local allocation stack, 1194 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1195 // It is not yet visible to the running (user) compiled code until after the return. 1196 // 1197 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1198 // the state of the object. It can be either: 1199 // 1) A partially valid object, with a null class pointer 1200 // (because the initial state of TLAB buffers is all 0s/nulls). 1201 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1202 // Other states are not allowed. 1203 // 1204 // An object that is invalid only temporarily, and will eventually become valid. 1205 // The internal runtime code simply checks if the object is not null or is partial and then 1206 // ignores it. 1207 // 1208 // (Note: The actual check is done by checking that the object's class pointer is non-null. 1209 // Also, unlike rosalloc, the object can never be observed as null). 1210 ldr r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1211 add r1, r1, #1 1212 str r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1213 POISON_HEAP_REF r0 1214 str r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1215 // Fence. This is "ish" not "ishst" so 1216 // that the code after this allocation 1217 // site will see the right values in 1218 // the fields of the class. 1219 mov r0, r2 1220 // No barrier. The class is already observably initialized (otherwise the fast 1221 // path size check above would fail) and new-instance allocations are protected 1222 // from publishing by the compiler which inserts its own StoreStore barrier. 1223 bx lr 1224.endm 1225 1226// The common code for art_quick_alloc_object_*region_tlab 1227.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized 1228ENTRY \name 1229 // Fast path tlab allocation. 1230 // r0: type, rSELF (r9): Thread::Current 1231 // r1, r2, r3, r12: free. 1232 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized 1233.Lslow_path\name: 1234 SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. 1235 mov r1, rSELF // Pass Thread::Current. 1236 bl \entrypoint // (mirror::Class* klass, Thread*) 1237 RESTORE_SAVE_REFS_ONLY_FRAME 1238 REFRESH_MARKING_REGISTER 1239 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1240END \name 1241.endm 1242 1243GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0 1244GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1 1245GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0 1246GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1 1247 1248 1249// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab 1250// and art_quick_alloc_array_resolved/initialized_region_tlab. 1251// 1252// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free. 1253// Need to preserve r0 and r1 to the slow path. 1254.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel 1255 and r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED // Apply alignment mask 1256 // (addr + 7) & ~7. 1257 1258 // Load thread_local_pos (r3) and 1259 // thread_local_end (r12) with ldrd. 1260 // Check constraints for ldrd. 1261#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1262#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1263#endif 1264 ldrd r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1265 sub r12, r12, r3 // Compute the remaining buf size. 1266 cmp r2, r12 // Check if the total_size fits. 1267 // The array class is always initialized here. Unlike new-instance, 1268 // this does not act as a double test. 1269 bhi \slowPathLabel 1270 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1271 add r2, r2, r3 1272 str r2, [rSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1273 ldr r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1274 add r2, r2, #1 1275 str r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1276 POISON_HEAP_REF r0 1277 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1278 str r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length. 1279 // Fence. This is "ish" not "ishst" so 1280 // that the code after this allocation 1281 // site will see the right values in 1282 // the fields of the class. 1283 mov r0, r3 1284// new-array is special. The class is loaded and immediately goes to the Initialized state 1285// before it is published. Therefore the only fence needed is for the publication of the object. 1286// See ClassLinker::CreateArrayClass() for more details. 1287 1288// For publication of the new array, we don't need a 'dmb ishst' here. 1289// The compiler generates 'dmb ishst' for all new-array insts. 1290 bx lr 1291.endm 1292 1293.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup 1294ENTRY \name 1295 // Fast path array allocation for region tlab allocation. 1296 // r0: mirror::Class* type 1297 // r1: int32_t component_count 1298 // rSELF (r9): thread 1299 // r2, r3, r12: free. 1300 \size_setup .Lslow_path\name 1301 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name 1302.Lslow_path\name: 1303 // r0: mirror::Class* klass 1304 // r1: int32_t component_count 1305 // r2: Thread* self 1306 SETUP_SAVE_REFS_ONLY_FRAME r2 // save callee saves in case of GC 1307 mov r2, rSELF // pass Thread::Current 1308 bl \entrypoint 1309 RESTORE_SAVE_REFS_ONLY_FRAME 1310 REFRESH_MARKING_REGISTER 1311 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1312END \name 1313.endm 1314 1315.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path 1316 bkpt // We should never enter here. 1317 // Code below is for reference. 1318 // Possibly a large object, go slow. 1319 // Also does negative array size check. 1320 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8) 1321 cmp r1, r2 1322 bhi \slow_path 1323 // Array classes are never finalizable 1324 // or uninitialized, no need to check. 1325 ldr r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type 1326 UNPOISON_HEAP_REF r3 1327 ldr r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] 1328 lsr r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 1329 // bits. 1330 lsl r2, r1, r3 // Calculate data size 1331 // Add array data offset and alignment. 1332 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1333#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1334#error Long array data offset must be 4 greater than int array data offset. 1335#endif 1336 1337 add r3, r3, #1 // Add 4 to the length only if the 1338 // component size shift is 3 1339 // (for 64 bit alignment). 1340 and r3, r3, #4 1341 add r2, r2, r3 1342.endm 1343 1344.macro COMPUTE_ARRAY_SIZE_8 slow_path 1345 // Possibly a large object, go slow. 1346 // Also does negative array size check. 1347 movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) 1348 cmp r1, r2 1349 bhi \slow_path 1350 // Add array data offset and alignment. 1351 add r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1352.endm 1353 1354.macro COMPUTE_ARRAY_SIZE_16 slow_path 1355 // Possibly a large object, go slow. 1356 // Also does negative array size check. 1357 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2) 1358 cmp r1, r2 1359 bhi \slow_path 1360 lsl r2, r1, #1 1361 // Add array data offset and alignment. 1362 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1363.endm 1364 1365.macro COMPUTE_ARRAY_SIZE_32 slow_path 1366 // Possibly a large object, go slow. 1367 // Also does negative array size check. 1368 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4) 1369 cmp r1, r2 1370 bhi \slow_path 1371 lsl r2, r1, #2 1372 // Add array data offset and alignment. 1373 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1374.endm 1375 1376.macro COMPUTE_ARRAY_SIZE_64 slow_path 1377 // Possibly a large object, go slow. 1378 // Also does negative array size check. 1379 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8) 1380 cmp r1, r2 1381 bhi \slow_path 1382 lsl r2, r1, #3 1383 // Add array data offset and alignment. 1384 add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1385.endm 1386 1387// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove 1388// the entrypoint once all backends have been updated to use the size variants. 1389GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1390GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1391GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1392GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1393GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1394GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1395GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1396GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1397GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1398GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1399 1400 /* 1401 * Called by managed code when the value in rSUSPEND has been decremented to 0. 1402 */ 1403 .extern artTestSuspendFromCode 1404ENTRY art_quick_test_suspend 1405 SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl 1406 mov r0, rSELF 1407 bl artTestSuspendFromCode @ (Thread*) 1408 RESTORE_SAVE_EVERYTHING_FRAME 1409 REFRESH_MARKING_REGISTER 1410 bx lr 1411END art_quick_test_suspend 1412 1413ENTRY art_quick_implicit_suspend 1414 mov r0, rSELF 1415 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves for stack crawl 1416 bl artTestSuspendFromCode @ (Thread*) 1417 RESTORE_SAVE_REFS_ONLY_FRAME 1418 REFRESH_MARKING_REGISTER 1419 bx lr 1420END art_quick_implicit_suspend 1421 1422 /* 1423 * Called by managed code that is attempting to call a method on a proxy class. On entry 1424 * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The 1425 * frame size of the invoked proxy method agrees with a ref and args callee save frame. 1426 */ 1427 .extern artQuickProxyInvokeHandler 1428ENTRY art_quick_proxy_invoke_handler 1429 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1430 mov r2, rSELF @ pass Thread::Current 1431 mov r3, sp @ pass SP 1432 blx artQuickProxyInvokeHandler @ (Method* proxy method, receiver, Thread*, SP) 1433 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1434 // Tear down the callee-save frame. Skip arg registers. 1435 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1436 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1437 RESTORE_SAVE_REFS_ONLY_FRAME 1438 REFRESH_MARKING_REGISTER 1439 cbnz r2, 1f @ success if no exception is pending 1440 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1441 bx lr @ return on success 14421: 1443 DELIVER_PENDING_EXCEPTION 1444END art_quick_proxy_invoke_handler 1445 1446 /* 1447 * Called to resolve an imt conflict. 1448 * r0 is the conflict ArtMethod. 1449 * r12 is a hidden argument that holds the target interface method's dex method index. 1450 * 1451 * Note that this stub writes to r0, r4, and r12. 1452 */ 1453 .extern artLookupResolvedMethod 1454ENTRY art_quick_imt_conflict_trampoline 1455 push {r1-r2} 1456 .cfi_adjust_cfa_offset (2 * 4) 1457 .cfi_rel_offset r1, 0 1458 .cfi_rel_offset r2, 4 1459 ldr r4, [sp, #(2 * 4)] // Load referrer. 1460 ldr r2, [r0, #ART_METHOD_JNI_OFFSET_32] // Load ImtConflictTable 1461 // Load the declaring class (without read barrier) and access flags (for obsolete method check). 1462 // The obsolete flag is set with suspended threads, so we do not need an acquire operation here. 1463#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4 1464#error "Expecting declaring class and access flags to be consecutive for LDRD." 1465#endif 1466 ldrd r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET] 1467 // If the method is obsolete, just go through the dex cache miss slow path. 1468 lsrs r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1) 1469 bcs .Limt_conflict_trampoline_dex_cache_miss 1470 ldr r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET] // Load the DexCache (without read barrier). 1471 UNPOISON_HEAP_REF r4 1472 ubfx r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS // Calculate DexCache method slot index. 1473 ldr r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET] // Load the resolved methods. 1474 add r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1) // Load DexCache method slot address. 1475 1476// FIXME: Configure the build to use the faster code when appropriate. 1477// Currently we fall back to the slower version. 1478#if HAS_ATOMIC_LDRD 1479 ldrd r0, r1, [r4] 1480#else 1481 push {r3} 1482 .cfi_adjust_cfa_offset 4 1483 .cfi_rel_offset r3, 0 1484.Limt_conflict_trampoline_retry_load: 1485 ldrexd r0, r1, [r4] 1486 strexd r3, r0, r1, [r4] 1487 cmp r3, #0 1488 bne .Limt_conflict_trampoline_retry_load 1489 pop {r3} 1490 .cfi_adjust_cfa_offset -4 1491 .cfi_restore r3 1492#endif 1493 1494 ldr r4, [r2] // Load first entry in ImtConflictTable. 1495 cmp r1, r12 // Compare method index to see if we had a DexCache method hit. 1496 bne .Limt_conflict_trampoline_dex_cache_miss 1497.Limt_table_iterate: 1498 cmp r4, r0 1499 // Branch if found. Benchmarks have shown doing a branch here is better. 1500 beq .Limt_table_found 1501 // If the entry is null, the interface method is not in the ImtConflictTable. 1502 cbz r4, .Lconflict_trampoline 1503 // Iterate over the entries of the ImtConflictTable. 1504 ldr r4, [r2, #(2 * __SIZEOF_POINTER__)]! 1505 b .Limt_table_iterate 1506.Limt_table_found: 1507 // We successfully hit an entry in the table. Load the target method 1508 // and jump to it. 1509 ldr r0, [r2, #__SIZEOF_POINTER__] 1510 .cfi_remember_state 1511 pop {r1-r2} 1512 .cfi_adjust_cfa_offset -(2 * 4) 1513 .cfi_restore r1 1514 .cfi_restore r2 1515 ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] 1516 .cfi_restore_state 1517.Lconflict_trampoline: 1518 // Call the runtime stub to populate the ImtConflictTable and jump to the 1519 // resolved method. 1520 .cfi_remember_state 1521 pop {r1-r2} 1522 .cfi_adjust_cfa_offset -(2 * 4) 1523 .cfi_restore r1 1524 .cfi_restore r2 1525 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 1526 .cfi_restore_state 1527.Limt_conflict_trampoline_dex_cache_miss: 1528 // We're not creating a proper runtime method frame here, 1529 // artLookupResolvedMethod() is not allowed to walk the stack. 1530 1531 // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr). 1532 push {r2-r4, lr} 1533 .cfi_adjust_cfa_offset (4 * 4) 1534 .cfi_rel_offset r3, 4 1535 .cfi_rel_offset lr, 12 1536 // Save FPR args. 1537 vpush {d0-d7} 1538 .cfi_adjust_cfa_offset (8 * 8) 1539 1540 mov r0, ip // Pass method index. 1541 ldr r1, [sp, #(8 * 8 + 6 * 4)] // Pass referrer. 1542 bl artLookupResolvedMethod // (uint32_t method_index, ArtMethod* referrer) 1543 1544 // Restore FPR args. 1545 vpop {d0-d7} 1546 .cfi_adjust_cfa_offset -(8 * 8) 1547 // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr). 1548 pop {r2-r4, lr} 1549 .cfi_adjust_cfa_offset -(4 * 4) 1550 .cfi_restore r3 1551 .cfi_restore lr 1552 1553 cmp r0, #0 // If the method wasn't resolved, 1554 beq .Lconflict_trampoline // skip the lookup and go to artInvokeInterfaceTrampoline(). 1555 b .Limt_table_iterate 1556END art_quick_imt_conflict_trampoline 1557 1558 .extern artQuickResolutionTrampoline 1559ENTRY art_quick_resolution_trampoline 1560 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1561 mov r2, rSELF @ pass Thread::Current 1562 mov r3, sp @ pass SP 1563 blx artQuickResolutionTrampoline @ (Method* called, receiver, Thread*, SP) 1564 cbz r0, 1f @ is code pointer null? goto exception 1565 mov r12, r0 1566 ldr r0, [sp, #0] @ load resolved method in r0 1567 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1568 REFRESH_MARKING_REGISTER 1569 bx r12 @ tail-call into actual code 15701: 1571 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1572 DELIVER_PENDING_EXCEPTION 1573END art_quick_resolution_trampoline 1574 1575 /* 1576 * Called to do a generic JNI down-call 1577 */ 1578ENTRY art_quick_generic_jni_trampoline 1579 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1580 1581 // Save rSELF 1582 mov r11, rSELF 1583 // Save SP , so we can have static CFI info. r10 is saved in ref_and_args. 1584 mov r10, sp 1585 .cfi_def_cfa_register r10 1586 1587 sub sp, sp, #5120 1588 1589 // prepare for artQuickGenericJniTrampoline call 1590 // (Thread*, managed_sp, reserved_area) 1591 // r0 r1 r2 <= C calling convention 1592 // rSELF r10 sp <= where they are 1593 1594 mov r0, rSELF // Thread* 1595 mov r1, r10 // SP for the managed frame. 1596 mov r2, sp // reserved area for arguments and other saved data (up to managed frame) 1597 blx artQuickGenericJniTrampoline // (Thread*, managed_sp, reserved_area) 1598 1599 // The C call will have registered the complete save-frame on success. 1600 // The result of the call is: 1601 // r0: pointer to native code, 0 on error. 1602 // The bottom of the reserved area contains values for arg registers, 1603 // hidden arg register and SP for out args for the call. 1604 1605 // Check for error (class init check or locking for synchronized native method can throw). 1606 cbz r0, .Lexception_in_native 1607 1608 // Save the code pointer 1609 mov lr, r0 1610 1611 // Load parameters from frame into registers r0-r3 (soft-float), 1612 // hidden arg (r4) for @CriticalNative and SP for out args. 1613 pop {r0-r3, r4, ip} 1614 1615 // Apply the new SP for out args, releasing unneeded reserved area. 1616 mov sp, ip 1617 1618 // Softfloat. 1619 // TODO: Change to hardfloat when supported. 1620 1621 blx lr // native call. 1622 1623 // result sign extension is handled in C code 1624 // prepare for artQuickGenericJniEndTrampoline call 1625 // (Thread*, result, result_f) 1626 // r0 r2,r3 stack <= C calling convention 1627 // r11 r0,r1 r0,r1 <= where they are 1628 sub sp, sp, #8 // Stack alignment. 1629 1630 push {r0-r1} 1631 mov r3, r1 1632 mov r2, r0 1633 mov r0, r11 1634 1635 blx artQuickGenericJniEndTrampoline 1636 1637 // Restore self pointer. 1638 mov rSELF, r11 1639 1640 // Pending exceptions possible. 1641 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1642 cbnz r2, .Lexception_in_native 1643 1644 // Tear down the alloca. 1645 mov sp, r10 1646 .cfi_def_cfa_register sp 1647 1648 // Tear down the callee-save frame. Skip arg registers. 1649 add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1650 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY) 1651 RESTORE_SAVE_REFS_ONLY_FRAME 1652 REFRESH_MARKING_REGISTER 1653 1654 // store into fpr, for when it's a fpr return... 1655 vmov d0, r0, r1 1656 bx lr // ret 1657 // Undo the unwinding information from above since it doesn't apply below. 1658 .cfi_def_cfa_register r10 1659 .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1660 1661.Lexception_in_native: 1662 ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] 1663 add ip, ip, #-1 // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE. 1664 mov sp, ip 1665 .cfi_def_cfa_register sp 1666 # This will create a new save-all frame, required by the runtime. 1667 DELIVER_PENDING_EXCEPTION 1668END art_quick_generic_jni_trampoline 1669 1670 .extern artQuickToInterpreterBridge 1671ENTRY art_quick_to_interpreter_bridge 1672 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 1673 mov r1, rSELF @ pass Thread::Current 1674 mov r2, sp @ pass SP 1675 blx artQuickToInterpreterBridge @ (Method* method, Thread*, SP) 1676 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1677 // Tear down the callee-save frame. Skip arg registers. 1678 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1679 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1680 RESTORE_SAVE_REFS_ONLY_FRAME 1681 REFRESH_MARKING_REGISTER 1682 cbnz r2, 1f @ success if no exception is pending 1683 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1684 bx lr @ return on success 16851: 1686 DELIVER_PENDING_EXCEPTION 1687END art_quick_to_interpreter_bridge 1688 1689/* 1690 * Called to attempt to execute an obsolete method. 1691 */ 1692ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 1693 1694 /* 1695 * Routine that intercepts method calls and returns. 1696 */ 1697 .extern artInstrumentationMethodEntryFromCode 1698 .extern artInstrumentationMethodExitFromCode 1699ENTRY art_quick_instrumentation_entry 1700 @ Make stack crawlable and clobber r2 and r3 (post saving) 1701 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1702 @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs. 1703 str r0, [sp, #4] 1704 mov r2, rSELF @ pass Thread::Current 1705 mov r3, sp @ pass SP 1706 blx artInstrumentationMethodEntryFromCode @ (Method*, Object*, Thread*, SP) 1707 cbz r0, .Ldeliver_instrumentation_entry_exception 1708 @ Deliver exception if we got nullptr as function. 1709 mov r12, r0 @ r12 holds reference to code 1710 ldr r0, [sp, #4] @ restore r0 1711 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1712 adr lr, art_quick_instrumentation_exit + /* thumb mode */ 1 1713 @ load art_quick_instrumentation_exit into lr in thumb mode 1714 REFRESH_MARKING_REGISTER 1715 bx r12 @ call method with lr set to art_quick_instrumentation_exit 1716.Ldeliver_instrumentation_entry_exception: 1717 @ Deliver exception for art_quick_instrumentation_entry placed after 1718 @ art_quick_instrumentation_exit so that the fallthrough works. 1719 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1720 DELIVER_PENDING_EXCEPTION 1721END art_quick_instrumentation_entry 1722 1723ENTRY art_quick_instrumentation_exit 1724 mov lr, #0 @ link register is to here, so clobber with 0 for later checks 1725 SETUP_SAVE_EVERYTHING_FRAME r2 1726 1727 add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame 1728 add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame 1729 mov r1, sp @ pass SP 1730 mov r0, rSELF @ pass Thread::Current 1731 blx artInstrumentationMethodExitFromCode @ (Thread*, SP, gpr_res*, fpr_res*) 1732 1733 cbz r0, .Ldo_deliver_instrumentation_exception 1734 @ Deliver exception if we got nullptr as function. 1735 cbnz r1, .Ldeoptimize 1736 // Normal return. 1737 str r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1738 @ Set return pc. 1739 RESTORE_SAVE_EVERYTHING_FRAME 1740 REFRESH_MARKING_REGISTER 1741 bx lr 1742.Ldo_deliver_instrumentation_exception: 1743 DELIVER_PENDING_EXCEPTION_FRAME_READY 1744.Ldeoptimize: 1745 str r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1746 @ Set return pc. 1747 RESTORE_SAVE_EVERYTHING_FRAME 1748 // Jump to art_quick_deoptimize. 1749 b art_quick_deoptimize 1750END art_quick_instrumentation_exit 1751 1752 /* 1753 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 1754 * will long jump to the upcall with a special exception of -1. 1755 */ 1756 .extern artDeoptimize 1757ENTRY art_quick_deoptimize 1758 SETUP_SAVE_EVERYTHING_FRAME r0 1759 mov r0, rSELF @ pass Thread::Current 1760 blx artDeoptimize @ (Thread*) 1761END art_quick_deoptimize 1762 1763 /* 1764 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 1765 * will long jump to the interpreter bridge. 1766 */ 1767 .extern artDeoptimizeFromCompiledCode 1768ENTRY art_quick_deoptimize_from_compiled_code 1769 SETUP_SAVE_EVERYTHING_FRAME r1 1770 mov r1, rSELF @ pass Thread::Current 1771 blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*) 1772END art_quick_deoptimize_from_compiled_code 1773 1774 /* 1775 * Signed 64-bit integer multiply. 1776 * 1777 * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 1778 * WX 1779 * x YZ 1780 * -------- 1781 * ZW ZX 1782 * YW YX 1783 * 1784 * The low word of the result holds ZX, the high word holds 1785 * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 1786 * it doesn't fit in the low 64 bits. 1787 * 1788 * Unlike most ARM math operations, multiply instructions have 1789 * restrictions on using the same register more than once (Rd and Rm 1790 * cannot be the same). 1791 */ 1792 /* mul-long vAA, vBB, vCC */ 1793ENTRY art_quick_mul_long 1794 push {r9-r10} 1795 .cfi_adjust_cfa_offset 8 1796 .cfi_rel_offset r9, 0 1797 .cfi_rel_offset r10, 4 1798 mul ip, r2, r1 @ ip<- ZxW 1799 umull r9, r10, r2, r0 @ r9/r10 <- ZxX 1800 mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 1801 add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) 1802 mov r0,r9 1803 mov r1,r10 1804 pop {r9-r10} 1805 .cfi_adjust_cfa_offset -8 1806 .cfi_restore r9 1807 .cfi_restore r10 1808 bx lr 1809END art_quick_mul_long 1810 1811 /* 1812 * Long integer shift. This is different from the generic 32/64-bit 1813 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1814 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1815 * 6 bits. 1816 * On entry: 1817 * r0: low word 1818 * r1: high word 1819 * r2: shift count 1820 */ 1821 /* shl-long vAA, vBB, vCC */ 1822ARM_ENTRY art_quick_shl_long @ ARM code as thumb code requires spills 1823 and r2, r2, #63 @ r2<- r2 & 0x3f 1824 mov r1, r1, asl r2 @ r1<- r1 << r2 1825 rsb r3, r2, #32 @ r3<- 32 - r2 1826 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 1827 subs ip, r2, #32 @ ip<- r2 - 32 1828 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 1829 mov r0, r0, asl r2 @ r0<- r0 << r2 1830 bx lr 1831END art_quick_shl_long 1832 1833 /* 1834 * Long integer shift. This is different from the generic 32/64-bit 1835 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1836 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1837 * 6 bits. 1838 * On entry: 1839 * r0: low word 1840 * r1: high word 1841 * r2: shift count 1842 */ 1843 /* shr-long vAA, vBB, vCC */ 1844ARM_ENTRY art_quick_shr_long @ ARM code as thumb code requires spills 1845 and r2, r2, #63 @ r0<- r0 & 0x3f 1846 mov r0, r0, lsr r2 @ r0<- r2 >> r2 1847 rsb r3, r2, #32 @ r3<- 32 - r2 1848 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 1849 subs ip, r2, #32 @ ip<- r2 - 32 1850 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 1851 mov r1, r1, asr r2 @ r1<- r1 >> r2 1852 bx lr 1853END art_quick_shr_long 1854 1855 /* 1856 * Long integer shift. This is different from the generic 32/64-bit 1857 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1858 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1859 * 6 bits. 1860 * On entry: 1861 * r0: low word 1862 * r1: high word 1863 * r2: shift count 1864 */ 1865 /* ushr-long vAA, vBB, vCC */ 1866ARM_ENTRY art_quick_ushr_long @ ARM code as thumb code requires spills 1867 and r2, r2, #63 @ r0<- r0 & 0x3f 1868 mov r0, r0, lsr r2 @ r0<- r2 >> r2 1869 rsb r3, r2, #32 @ r3<- 32 - r2 1870 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 1871 subs ip, r2, #32 @ ip<- r2 - 32 1872 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 1873 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 1874 bx lr 1875END art_quick_ushr_long 1876 1877 /* 1878 * String's indexOf. 1879 * 1880 * On entry: 1881 * r0: string object (known non-null) 1882 * r1: char to match (known <= 0xFFFF) 1883 * r2: Starting offset in string data 1884 */ 1885ENTRY art_quick_indexof 1886 push {r4, r10-r11, lr} @ 4 words of callee saves 1887 .cfi_adjust_cfa_offset 16 1888 .cfi_rel_offset r4, 0 1889 .cfi_rel_offset r10, 4 1890 .cfi_rel_offset r11, 8 1891 .cfi_rel_offset lr, 12 1892#if (STRING_COMPRESSION_FEATURE) 1893 ldr r4, [r0, #MIRROR_STRING_COUNT_OFFSET] 1894#else 1895 ldr r3, [r0, #MIRROR_STRING_COUNT_OFFSET] 1896#endif 1897 add r0, #MIRROR_STRING_VALUE_OFFSET 1898#if (STRING_COMPRESSION_FEATURE) 1899 /* r4 count (with flag) and r3 holds actual length */ 1900 lsr r3, r4, #1 1901#endif 1902 /* Clamp start to [0..count] */ 1903 cmp r2, #0 1904 it lt 1905 movlt r2, #0 1906 cmp r2, r3 1907 it gt 1908 movgt r2, r3 1909 1910 /* Save a copy in r12 to later compute result */ 1911 mov r12, r0 1912 1913 /* Build pointer to start of data to compare and pre-bias */ 1914#if (STRING_COMPRESSION_FEATURE) 1915 lsrs r4, r4, #1 1916 bcc .Lstring_indexof_compressed 1917#endif 1918 add r0, r0, r2, lsl #1 1919 sub r0, #2 1920 1921 /* Compute iteration count */ 1922 sub r2, r3, r2 1923 1924 /* 1925 * At this point we have: 1926 * r0: start of data to test 1927 * r1: char to compare 1928 * r2: iteration count 1929 * r4: compression style (used temporarily) 1930 * r12: original start of string data 1931 * r3, r4, r10, r11 available for loading string data 1932 */ 1933 1934 subs r2, #4 1935 blt .Lindexof_remainder 1936 1937.Lindexof_loop4: 1938 ldrh r3, [r0, #2]! 1939 ldrh r4, [r0, #2]! 1940 ldrh r10, [r0, #2]! 1941 ldrh r11, [r0, #2]! 1942 cmp r3, r1 1943 beq .Lmatch_0 1944 cmp r4, r1 1945 beq .Lmatch_1 1946 cmp r10, r1 1947 beq .Lmatch_2 1948 cmp r11, r1 1949 beq .Lmatch_3 1950 subs r2, #4 1951 bge .Lindexof_loop4 1952 1953.Lindexof_remainder: 1954 adds r2, #4 1955 beq .Lindexof_nomatch 1956 1957.Lindexof_loop1: 1958 ldrh r3, [r0, #2]! 1959 cmp r3, r1 1960 beq .Lmatch_3 1961 subs r2, #1 1962 bne .Lindexof_loop1 1963 1964.Lindexof_nomatch: 1965 mov r0, #-1 1966 pop {r4, r10-r11, pc} 1967 1968.Lmatch_0: 1969 sub r0, #6 1970 sub r0, r12 1971 asr r0, r0, #1 1972 pop {r4, r10-r11, pc} 1973.Lmatch_1: 1974 sub r0, #4 1975 sub r0, r12 1976 asr r0, r0, #1 1977 pop {r4, r10-r11, pc} 1978.Lmatch_2: 1979 sub r0, #2 1980 sub r0, r12 1981 asr r0, r0, #1 1982 pop {r4, r10-r11, pc} 1983.Lmatch_3: 1984 sub r0, r12 1985 asr r0, r0, #1 1986 pop {r4, r10-r11, pc} 1987#if (STRING_COMPRESSION_FEATURE) 1988.Lstring_indexof_compressed: 1989 add r0, r0, r2 1990 sub r0, #1 1991 sub r2, r3, r2 1992.Lstring_indexof_compressed_loop: 1993 subs r2, #1 1994 blt .Lindexof_nomatch 1995 ldrb r3, [r0, #1]! 1996 cmp r3, r1 1997 beq .Lstring_indexof_compressed_matched 1998 b .Lstring_indexof_compressed_loop 1999.Lstring_indexof_compressed_matched: 2000 sub r0, r12 2001 pop {r4, r10-r11, pc} 2002#endif 2003END art_quick_indexof 2004 2005 /* Assembly routines used to handle ABI differences. */ 2006 2007 /* double fmod(double a, double b) */ 2008 .extern fmod 2009ENTRY art_quick_fmod 2010 push {lr} 2011 .cfi_adjust_cfa_offset 4 2012 .cfi_rel_offset lr, 0 2013 sub sp, #4 2014 .cfi_adjust_cfa_offset 4 2015 vmov r0, r1, d0 2016 vmov r2, r3, d1 2017 bl fmod 2018 vmov d0, r0, r1 2019 add sp, #4 2020 .cfi_adjust_cfa_offset -4 2021 pop {pc} 2022END art_quick_fmod 2023 2024 /* float fmodf(float a, float b) */ 2025 .extern fmodf 2026ENTRY art_quick_fmodf 2027 push {lr} 2028 .cfi_adjust_cfa_offset 4 2029 .cfi_rel_offset lr, 0 2030 sub sp, #4 2031 .cfi_adjust_cfa_offset 4 2032 vmov r0, r1, d0 2033 bl fmodf 2034 vmov s0, r0 2035 add sp, #4 2036 .cfi_adjust_cfa_offset -4 2037 pop {pc} 2038END art_quick_fmodf 2039 2040 /* int64_t art_d2l(double d) */ 2041 .extern art_d2l 2042ENTRY art_quick_d2l 2043 vmov r0, r1, d0 2044 b art_d2l 2045END art_quick_d2l 2046 2047 /* int64_t art_f2l(float f) */ 2048 .extern art_f2l 2049ENTRY art_quick_f2l 2050 vmov r0, s0 2051 b art_f2l 2052END art_quick_f2l 2053 2054 /* float art_l2f(int64_t l) */ 2055 .extern art_l2f 2056ENTRY art_quick_l2f 2057 push {lr} 2058 .cfi_adjust_cfa_offset 4 2059 .cfi_rel_offset lr, 0 2060 sub sp, #4 2061 .cfi_adjust_cfa_offset 4 2062 bl art_l2f 2063 vmov s0, r0 2064 add sp, #4 2065 .cfi_adjust_cfa_offset -4 2066 pop {pc} 2067END art_quick_l2f 2068 2069 .extern artStringBuilderAppend 2070ENTRY art_quick_string_builder_append 2071 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 2072 add r1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__) @ pass args 2073 mov r2, rSELF @ pass Thread::Current 2074 bl artStringBuilderAppend @ (uint32_t, const unit32_t*, Thread*) 2075 RESTORE_SAVE_REFS_ONLY_FRAME 2076 REFRESH_MARKING_REGISTER 2077 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 2078END art_quick_string_builder_append 2079 2080.macro CONDITIONAL_CBZ reg, reg_if, dest 2081.ifc \reg, \reg_if 2082 cbz \reg, \dest 2083.endif 2084.endm 2085 2086.macro CONDITIONAL_CMPBZ reg, reg_if, dest 2087.ifc \reg, \reg_if 2088 cmp \reg, #0 2089 beq \dest 2090.endif 2091.endm 2092 2093// Use CBZ if the register is in {r0, r7} otherwise compare and branch. 2094.macro SMART_CBZ reg, dest 2095 CONDITIONAL_CBZ \reg, r0, \dest 2096 CONDITIONAL_CBZ \reg, r1, \dest 2097 CONDITIONAL_CBZ \reg, r2, \dest 2098 CONDITIONAL_CBZ \reg, r3, \dest 2099 CONDITIONAL_CBZ \reg, r4, \dest 2100 CONDITIONAL_CBZ \reg, r5, \dest 2101 CONDITIONAL_CBZ \reg, r6, \dest 2102 CONDITIONAL_CBZ \reg, r7, \dest 2103 CONDITIONAL_CMPBZ \reg, r8, \dest 2104 CONDITIONAL_CMPBZ \reg, r9, \dest 2105 CONDITIONAL_CMPBZ \reg, r10, \dest 2106 CONDITIONAL_CMPBZ \reg, r11, \dest 2107 CONDITIONAL_CMPBZ \reg, r12, \dest 2108 CONDITIONAL_CMPBZ \reg, r13, \dest 2109 CONDITIONAL_CMPBZ \reg, r14, \dest 2110 CONDITIONAL_CMPBZ \reg, r15, \dest 2111.endm 2112 2113 /* 2114 * Create a function `name` calling the ReadBarrier::Mark routine, 2115 * getting its argument and returning its result through register 2116 * `reg`, saving and restoring all caller-save registers. 2117 * 2118 * IP is clobbered; `reg` must not be IP. 2119 * 2120 * If `reg` is different from `r0`, the generated function follows a 2121 * non-standard runtime calling convention: 2122 * - register `reg` is used to pass the (sole) argument of this 2123 * function (instead of R0); 2124 * - register `reg` is used to return the result of this function 2125 * (instead of R0); 2126 * - R0 is treated like a normal (non-argument) caller-save register; 2127 * - everything else is the same as in the standard runtime calling 2128 * convention (e.g. standard callee-save registers are preserved). 2129 */ 2130.macro READ_BARRIER_MARK_REG name, reg 2131ENTRY \name 2132 // Null check so that we can load the lock word. 2133 SMART_CBZ \reg, .Lret_rb_\name 2134 // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. 2135 ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] 2136 tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2137 beq .Lnot_marked_rb_\name 2138 // Already marked, return right away. 2139.Lret_rb_\name: 2140 bx lr 2141 2142.Lnot_marked_rb_\name: 2143 // Test that both the forwarding state bits are 1. 2144#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2145 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2146 // the highest bits and the "forwarding address" state to have all bits set. 2147#error "Unexpected lock word state shift or forwarding address state value." 2148#endif 2149 cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2150 bhs .Lret_forwarding_address\name 2151 2152.Lslow_rb_\name: 2153 // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to 2154 // make a tail call here. Currently, it serves only for stack alignment but 2155 // we may reintroduce kSaveEverything calls here in the future. 2156 push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip 2157 .cfi_adjust_cfa_offset 32 2158 .cfi_rel_offset r0, 0 2159 .cfi_rel_offset r1, 4 2160 .cfi_rel_offset r2, 8 2161 .cfi_rel_offset r3, 12 2162 .cfi_rel_offset r4, 16 2163 .cfi_rel_offset r9, 20 2164 .cfi_rel_offset ip, 24 2165 .cfi_rel_offset lr, 28 2166 2167 .ifnc \reg, r0 2168 mov r0, \reg @ pass arg1 - obj from `reg` 2169 .endif 2170 2171 vpush {s0-s15} @ save floating-point caller-save registers 2172 .cfi_adjust_cfa_offset 64 2173 bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) 2174 vpop {s0-s15} @ restore floating-point registers 2175 .cfi_adjust_cfa_offset -64 2176 2177 .ifc \reg, r0 @ Save result to the stack slot or destination register. 2178 str r0, [sp, #0] 2179 .else 2180 .ifc \reg, r1 2181 str r0, [sp, #4] 2182 .else 2183 .ifc \reg, r2 2184 str r0, [sp, #8] 2185 .else 2186 .ifc \reg, r3 2187 str r0, [sp, #12] 2188 .else 2189 .ifc \reg, r4 2190 str r0, [sp, #16] 2191 .else 2192 .ifc \reg, r9 2193 str r0, [sp, #20] 2194 .else 2195 mov \reg, r0 2196 .endif 2197 .endif 2198 .endif 2199 .endif 2200 .endif 2201 .endif 2202 2203 pop {r0-r4, r9, ip, lr} @ restore caller-save registers 2204 .cfi_adjust_cfa_offset -32 2205 .cfi_restore r0 2206 .cfi_restore r1 2207 .cfi_restore r2 2208 .cfi_restore r3 2209 .cfi_restore r4 2210 .cfi_restore r9 2211 .cfi_restore ip 2212 .cfi_restore lr 2213 bx lr 2214.Lret_forwarding_address\name: 2215 // Shift left by the forwarding address shift. This clears out the state bits since they are 2216 // in the top 2 bits of the lock word. 2217 lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2218 bx lr 2219END \name 2220.endm 2221 2222READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0 2223READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1 2224READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2 2225READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3 2226READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4 2227READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5 2228READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6 2229READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7 2230READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 2231READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 2232READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 2233READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 2234 2235// Helper macros for Baker CC read barrier mark introspection (BRBMI). 2236.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register 2237 \macro_for_register r0 2238 \macro_for_register r1 2239 \macro_for_register r2 2240 \macro_for_register r3 2241 \macro_for_register r4 2242 \macro_for_register r5 2243 \macro_for_register r6 2244 \macro_for_register r7 2245 \macro_for_reserved_register // r8 (rMR) is the marking register. 2246 \macro_for_register r9 2247 \macro_for_register r10 2248 \macro_for_register r11 2249 \macro_for_reserved_register // IP is reserved. 2250 \macro_for_reserved_register // SP is reserved. 2251 \macro_for_reserved_register // LR is reserved. 2252 \macro_for_reserved_register // PC is reserved. 2253.endm 2254 2255.macro BRBMI_RETURN_SWITCH_CASE reg 2256 .balign 8 2257.Lmark_introspection_return_switch_case_\reg: 2258 mov rMR, #1 2259 mov \reg, ip 2260 bx lr 2261.endm 2262 2263.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg 2264 .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 2265.endm 2266 2267.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2268 .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 2269.endm 2270 2271#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET 2272#error "Array and field introspection code sharing requires same LDR offset." 2273#endif 2274.macro BRBMI_ARRAY_LOAD index_reg 2275 ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. 2276 b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. 2277 .balign 8 // Add padding to 8 bytes. 2278.endm 2279 2280.macro BRBMI_BKPT_FILL_4B 2281 bkpt 0 2282 bkpt 0 2283.endm 2284 2285.macro BRBMI_BKPT_FILL_8B 2286 BRBMI_BKPT_FILL_4B 2287 BRBMI_BKPT_FILL_4B 2288.endm 2289 2290.macro BRBMI_RUNTIME_CALL 2291 // Note: This macro generates exactly 22 bytes of code. The core register 2292 // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. 2293 2294 push {r0-r3, r7, lr} // Save return address and caller-save registers. 2295 .cfi_adjust_cfa_offset 24 2296 .cfi_rel_offset r0, 0 2297 .cfi_rel_offset r1, 4 2298 .cfi_rel_offset r2, 8 2299 .cfi_rel_offset r3, 12 2300 .cfi_rel_offset r7, 16 2301 .cfi_rel_offset lr, 20 2302 2303 mov r0, ip // Pass the reference. 2304 vpush {s0-s15} // save floating-point caller-save registers 2305 .cfi_adjust_cfa_offset 64 2306 bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) 2307 vpop {s0-s15} // restore floating-point registers 2308 .cfi_adjust_cfa_offset -64 2309 mov ip, r0 // Move reference to ip in preparation for return switch. 2310 2311 pop {r0-r3, r7, lr} // Restore registers. 2312 .cfi_adjust_cfa_offset -24 2313 .cfi_restore r0 2314 .cfi_restore r1 2315 .cfi_restore r2 2316 .cfi_restore r3 2317 .cfi_restore r7 2318 .cfi_restore lr 2319.endm 2320 2321.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix 2322 // If reference is null, just return it in the right register. 2323 cmp ip, #0 2324 beq .Lmark_introspection_return\label_suffix 2325 // Use rMR as temp and check the mark bit of the reference. 2326 ldr rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 2327 tst rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2328 beq .Lmark_introspection_unmarked\label_suffix 2329.Lmark_introspection_return\label_suffix: 2330.endm 2331 2332.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix 2333.Lmark_introspection_unmarked\label_suffix: 2334 // Check if the top two bits are one, if this is the case it is a forwarding address. 2335#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2336 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2337 // the highest bits and the "forwarding address" state to have all bits set. 2338#error "Unexpected lock word state shift or forwarding address state value." 2339#endif 2340 cmp rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2341 bhs .Lmark_introspection_forwarding_address\label_suffix 2342.endm 2343 2344.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix 2345.Lmark_introspection_forwarding_address\label_suffix: 2346 // Note: This macro generates exactly 22 bytes of code, the branch is near. 2347 2348 // Shift left by the forwarding address shift. This clears out the state bits since they are 2349 // in the top 2 bits of the lock word. 2350 lsl ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2351 b .Lmark_introspection_return\label_suffix 2352.endm 2353 2354.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset 2355 // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. 2356 ldrh rMR, [lr, #(-1 + \ldr_offset + 2)] 2357.endm 2358 2359.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset 2360 // Load the 16-bit instruction. Adjust for the thumb state in LR. 2361 ldrh rMR, [lr, #(-1 + \ldr_offset)] 2362.endm 2363 2364.macro BRBMI_EXTRACT_RETURN_REG_wide 2365 lsr rMR, rMR, #12 // Extract `ref_reg`. 2366.endm 2367 2368.macro BRBMI_EXTRACT_RETURN_REG_narrow 2369 and rMR, rMR, #7 // Extract `ref_reg`. 2370.endm 2371 2372.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix 2373 BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset 2374 BRBMI_EXTRACT_RETURN_REG\label_suffix 2375.endm 2376 2377.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix 2378 .balign 32 2379 .thumb_func 2380 .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function 2381 .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2382 .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2383art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: 2384 BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix 2385.endm 2386 2387.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix 2388 .balign 16 2389 // Note: Generates exactly 16 bytes of code. 2390 BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix 2391 BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix 2392 b .Lmark_introspection_runtime_call 2393.endm 2394 2395 /* 2396 * Use introspection to load a reference from the same address as the LDR 2397 * instruction in generated code would load (unless loaded by the thunk, 2398 * see below), call ReadBarrier::Mark() with that reference if needed 2399 * and return it in the same register as the LDR instruction would load. 2400 * 2401 * The entrypoint is called through a thunk that differs across load kinds. 2402 * For field and array loads the LDR instruction in generated code follows 2403 * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) 2404 * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where 2405 * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk 2406 * knows the holder and performs the gray bit check, returning to the LDR 2407 * instruction if the object is not gray, so this entrypoint no longer 2408 * needs to know anything about the holder. For GC root loads, the LDR 2409 * instruction in generated code precedes the branch to the thunk, i.e. the 2410 * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) 2411 * where the -1 is again the Thumb mode bit adjustment, and the thunk does 2412 * not do the gray bit check. 2413 * 2414 * For field accesses and array loads with a constant index the thunk loads 2415 * the reference into IP using introspection and calls the main entrypoint 2416 * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or 2417 * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known 2418 * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET) 2419 * from the main entrypoint and the thunk adjusts the entrypoint pointer. 2420 * With heap poisoning enabled, the passed reference is poisoned. 2421 * 2422 * For array accesses with non-constant index, the thunk inserts the bits 2423 * 0-5 of the LDR instruction to the entrypoint address, effectively 2424 * calculating a switch case label based on the index register (bits 0-3) 2425 * and adding an extra offset (bits 4-5 hold the shift which is always 2 2426 * for reference loads) to differentiate from the main entrypoint, then 2427 * moves the base register to IP and jumps to the switch case. Therefore 2428 * we need to align the main entrypoint to 512 bytes, accounting for 2429 * a 256-byte offset followed by 16 array entrypoints starting at 2430 * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR 2431 * (register) and a branch to the main entrypoint. 2432 * 2433 * For GC root accesses we cannot use the main entrypoint because of the 2434 * different offset where the LDR instruction in generated code is located. 2435 * (And even with heap poisoning enabled, GC roots are not poisoned.) 2436 * To re-use the same entrypoint pointer in generated code, we make sure 2437 * that the gc root entrypoint (a copy of the entrypoint with a different 2438 * offset for introspection loads) is located at a known offset (0xc0/0xe0 2439 * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/ 2440 * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the 2441 * main entrypoint and the GC root thunk adjusts the entrypoint pointer, 2442 * moves the root register to IP and jumps to the customized entrypoint, 2443 * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. 2444 * The thunk also performs all the fast-path checks, so we need just the 2445 * slow path. 2446 * 2447 * The UnsafeCASObject intrinsic is similar to the GC roots wide approach 2448 * but using ADD (register, T3) instead of the LDR (immediate, T3), so the 2449 * destination register is in bits 8-11 rather than 12-15. Therefore it has 2450 * its own entrypoint, art_quick_read_barrier_mark_introspection_unsafe_cas 2451 * at the offset BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET. 2452 * 2453 * The code structure is 2454 * art_quick_read_barrier_mark_introspection: // @0x00 2455 * Up to 32 bytes code for main entrypoint fast-path code for fields 2456 * (and array elements with constant offset) with LDR encoding T3; 2457 * jumps to the switch in the "narrow" entrypoint. 2458 * art_quick_read_barrier_mark_introspection_narrow: // @0x20 2459 * Up to 48 bytes code for fast path code for fields (and array 2460 * elements with constant offset) with LDR encoding T1, ending in the 2461 * return switch instruction TBB and the table with switch offsets. 2462 * .Lmark_introspection_return_switch_case_r0: // @0x50 2463 * Exactly 88 bytes of code for the return switch cases (8 bytes per 2464 * case, 11 cases; no code for reserved registers). 2465 * .Lmark_introspection_forwarding_address_narrow: // @0xa8 2466 * Exactly 6 bytes to extract the forwarding address and jump to the 2467 * "narrow" entrypoint fast path. 2468 * .Lmark_introspection_return_switch_case_bad: // @0xae 2469 * Exactly 2 bytes, bkpt for unexpected return register. 2470 * .Lmark_introspection_unmarked_narrow: // @0xb0 2471 * Exactly 16 bytes for "narrow" entrypoint slow path. 2472 * art_quick_read_barrier_mark_introspection_gc_roots_wide: // @0xc0 2473 * GC root entrypoint code for LDR encoding T3 (10 bytes); loads and 2474 * extracts the return register and jumps to the runtime call. 2475 * .Lmark_introspection_forwarding_address_wide: // @0xca 2476 * Exactly 6 bytes to extract the forwarding address and jump to the 2477 * "wide" entrypoint fast path. 2478 * .Lmark_introspection_unmarked_wide: // @0xd0 2479 * Exactly 16 bytes for "wide" entrypoint slow path. 2480 * art_quick_read_barrier_mark_introspection_gc_roots_narrow: // @0xe0 2481 * GC root entrypoint code for LDR encoding T1 (8 bytes); loads and 2482 * extracts the return register and falls through to the runtime call. 2483 * .Lmark_introspection_runtime_call: // @0xe8 2484 * Exactly 24 bytes for the runtime call to MarkReg() and jump to the 2485 * return switch. 2486 * art_quick_read_barrier_mark_introspection_arrays: // @0x100 2487 * Exactly 128 bytes for array load switch cases (16x2 instructions). 2488 * art_quick_read_barrier_mark_introspection_unsafe_cas: // @0x180 2489 * UnsafeCASObject intrinsic entrypoint for ADD (register) encoding T3 2490 * (6 bytes). Loads the return register and jumps to the runtime call. 2491 */ 2492#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2493ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512 2494 // At this point, IP contains the reference, rMR is clobbered by the thunk 2495 // and can be freely used as it will be set back to 1 before returning. 2496 // For heap poisoning, the reference is poisoned, so unpoison it first. 2497 UNPOISON_HEAP_REF ip 2498 // Check for null or marked, lock word is loaded into rMR. 2499 BRBMI_CHECK_NULL_AND_MARKED _wide 2500 // Load and extract the return register from the instruction. 2501 BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide 2502 b .Lmark_introspection_return_switch 2503 2504 .balign 32 2505 .thumb_func 2506 .type art_quick_read_barrier_mark_introspection_narrow, #function 2507 .hidden art_quick_read_barrier_mark_introspection_narrow 2508 .global art_quick_read_barrier_mark_introspection_narrow 2509art_quick_read_barrier_mark_introspection_narrow: 2510 // At this point, IP contains the reference, rMR is clobbered by the thunk 2511 // and can be freely used as it will be set back to 1 before returning. 2512 // For heap poisoning, the reference is poisoned, so unpoison it first. 2513 UNPOISON_HEAP_REF ip 2514 // Check for null or marked, lock word is loaded into rMR. 2515 BRBMI_CHECK_NULL_AND_MARKED _narrow 2516 // Load and extract the return register from the instruction. 2517 BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow 2518.Lmark_introspection_return_switch: 2519 tbb [pc, rMR] // Jump to the switch case. 2520.Lmark_introspection_return_table: 2521 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2522 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */ 2523 2524 .balign 8 2525 BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow // 6 bytes 2526.Lmark_introspection_return_switch_case_bad: 2527 bkpt // 2 bytes 2528 2529 BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow 2530 2531 // 8 bytes for the loading and extracting of the return register. 2532 BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide 2533 // 2 bytes for near branch to the runtime call. 2534 b .Lmark_introspection_runtime_call 2535 2536 BRBMI_EXTRACT_FORWARDING_ADDRESS _wide // Not even 4-byte aligned. 2537 2538 BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide 2539 2540 // 8 bytes for the loading and extracting of the return register. 2541 BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow 2542 // And the runtime call and branch to the switch taking exactly 24 bytes 2543 // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch) 2544 // shall take the rest of the 32-byte section (within a cache line). 2545.Lmark_introspection_runtime_call: 2546 BRBMI_RUNTIME_CALL 2547 b .Lmark_introspection_return_switch 2548 2549 .balign 256 2550 .thumb_func 2551 .type art_quick_read_barrier_mark_introspection_arrays, #function 2552 .hidden art_quick_read_barrier_mark_introspection_arrays 2553 .global art_quick_read_barrier_mark_introspection_arrays 2554art_quick_read_barrier_mark_introspection_arrays: 2555 BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B 2556 2557 .balign 8 2558 .thumb_func 2559 .type art_quick_read_barrier_mark_introspection_unsafe_cas, #function 2560 .hidden art_quick_read_barrier_mark_introspection_unsafe_cas 2561 .global art_quick_read_barrier_mark_introspection_unsafe_cas 2562art_quick_read_barrier_mark_introspection_unsafe_cas: 2563 // Load the byte of the ADD instruction that contains Rd. Adjust for the thumb state in LR. 2564 // The ADD (register, T3) is |11101011000|S|Rn|(0)imm3|Rd|imm2|type|Rm| and we're using 2565 // no shift (type=0, imm2=0, imm3=0), so the byte we read here, i.e. |(0)imm3|Rd|, 2566 // contains only the register number, the top 4 bits are 0. 2567 ldrb rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET + 3)] 2568 b .Lmark_introspection_runtime_call 2569END art_quick_read_barrier_mark_introspection 2570#else // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2571ENTRY art_quick_read_barrier_mark_introspection 2572 bkpt // Unreachable. 2573END art_quick_read_barrier_mark_introspection 2574#endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2575 2576.extern artInvokePolymorphic 2577ENTRY art_quick_invoke_polymorphic 2578 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 2579 mov r0, r1 @ r0 := receiver 2580 mov r1, rSELF @ r1 := Thread::Current 2581 mov r2, sp @ r2 := SP 2582 bl artInvokePolymorphic @ artInvokePolymorphic(receiver, Thread*, SP) 2583 str r1, [sp, 72] @ r0:r1 := Result. Copy r1 to context. 2584 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2585 REFRESH_MARKING_REGISTER 2586 vmov d0, r0, r1 @ Put result r0:r1 into floating point return register. 2587 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2588END art_quick_invoke_polymorphic 2589 2590.extern artInvokeCustom 2591ENTRY art_quick_invoke_custom 2592 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 2593 @ r0 := call_site_idx 2594 mov r1, rSELF @ r1 := Thread::Current 2595 mov r2, sp @ r2 := SP 2596 bl artInvokeCustom @ artInvokeCustom(call_site_idx, Thread*, SP) 2597 str r1, [sp, #72] @ Save r1 to context (r0:r1 = result) 2598 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2599 REFRESH_MARKING_REGISTER 2600 vmov d0, r0, r1 @ Put result r0:r1 into floating point return register. 2601 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2602END art_quick_invoke_custom 2603 2604// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2605// Argument 0: r0: The context pointer for ExecuteSwitchImpl. 2606// Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call. 2607// Argument 2: r2: The value of DEX PC (memory address of the methods bytecode). 2608ENTRY ExecuteSwitchImplAsm 2609 push {r4, lr} // 2 words of callee saves. 2610 .cfi_adjust_cfa_offset 8 2611 .cfi_rel_offset r4, 0 2612 .cfi_rel_offset lr, 4 2613 mov r4, r2 // r4 = DEX PC 2614 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0) 2615 blx r1 // Call the wrapped method. 2616 pop {r4, pc} 2617END ExecuteSwitchImplAsm 2618 2619// r0 contains the class, r4 contains the inline cache. We can use ip as temporary. 2620ENTRY art_quick_update_inline_cache 2621#if (INLINE_CACHE_SIZE != 5) 2622#error "INLINE_CACHE_SIZE not as expected." 2623#endif 2624#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2625 // Don't update the cache if we are marking. 2626 cmp rMR, #0 2627 bne .Ldone 2628#endif 2629.Lentry1: 2630 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET] 2631 cmp ip, r0 2632 beq .Ldone 2633 cmp ip, #0 2634 bne .Lentry2 2635 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET] 2636 cmp ip, #0 2637 bne .Lentry1 2638 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET] 2639 cmp ip, #0 2640 bne .Ldone 2641 b .Lentry1 2642.Lentry2: 2643 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4] 2644 cmp ip, r0 2645 beq .Ldone 2646 cmp ip, #0 2647 bne .Lentry3 2648 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4] 2649 cmp ip, #0 2650 bne .Lentry2 2651 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+4] 2652 cmp ip, #0 2653 bne .Ldone 2654 b .Lentry2 2655.Lentry3: 2656 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8] 2657 cmp ip, r0 2658 beq .Ldone 2659 cmp ip, #0 2660 bne .Lentry4 2661 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8] 2662 cmp ip, #0 2663 bne .Lentry3 2664 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+8] 2665 cmp ip, #0 2666 bne .Ldone 2667 b .Lentry3 2668.Lentry4: 2669 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12] 2670 cmp ip, r0 2671 beq .Ldone 2672 cmp ip, #0 2673 bne .Lentry5 2674 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12] 2675 cmp ip, #0 2676 bne .Lentry4 2677 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+12] 2678 cmp ip, #0 2679 bne .Ldone 2680 b .Lentry4 2681.Lentry5: 2682 // Unconditionally store, the inline cache is megamorphic. 2683 str r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+16] 2684.Ldone: 2685 blx lr 2686END art_quick_update_inline_cache 2687 2688// On entry, method is at the bottom of the stack. 2689ENTRY art_quick_compile_optimized 2690 SETUP_SAVE_EVERYTHING_FRAME r0 2691 ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod 2692 mov r1, rSELF @ pass Thread::Current 2693 bl artCompileOptimized @ (ArtMethod*, Thread*) 2694 RESTORE_SAVE_EVERYTHING_FRAME 2695 // We don't need to restore the marking register here, as 2696 // artCompileOptimized doesn't allow thread suspension. 2697 blx lr 2698END art_quick_compile_optimized 2699