1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "asm_support_arm.S" 18#include "interpreter/cfi_asm_support.h" 19 20#include "arch/quick_alloc_entrypoints.S" 21 22 /* Deliver the given exception */ 23 .extern artDeliverExceptionFromCode 24 /* Deliver an exception pending on a thread */ 25 .extern artDeliverPendingException 26 27.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp 28 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 29 RUNTIME_CURRENT3 \rTemp @ Load Runtime::Current into rTemp. 30 @ Load kSaveRefsAndArgs Method* into rTemp. 31 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET] 32 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 33 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 34.endm 35 36.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 37 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 38 str r0, [sp, #0] @ Store ArtMethod* to bottom of stack. 39 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 40.endm 41 42 /* 43 * Macro that sets up the callee save frame to conform with 44 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 45 * when core registers are already saved. 46 */ 47.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 48 @ 14 words of callee saves and args already saved. 49 vpush {d0-d15} @ 32 words, 2 for each of the 16 saved doubles. 50 .cfi_adjust_cfa_offset 128 51 sub sp, #8 @ 2 words of space, alignment padding and Method* 52 .cfi_adjust_cfa_offset 8 53 RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. 54 @ Load kSaveEverything Method* into rTemp. 55 ldr \rTemp, [\rTemp, #\runtime_method_offset] 56 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 57 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 58 59 // Ugly compile-time check, but we only have the preprocessor. 60#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8) 61#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected." 62#endif 63.endm 64 65 /* 66 * Macro that sets up the callee save frame to conform with 67 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 68 */ 69.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 70 push {r0-r12, lr} @ 14 words of callee saves and args. 71 .cfi_adjust_cfa_offset 56 72 .cfi_rel_offset r0, 0 73 .cfi_rel_offset r1, 4 74 .cfi_rel_offset r2, 8 75 .cfi_rel_offset r3, 12 76 .cfi_rel_offset r4, 16 77 .cfi_rel_offset r5, 20 78 .cfi_rel_offset r6, 24 79 .cfi_rel_offset r7, 28 80 .cfi_rel_offset r8, 32 81 .cfi_rel_offset r9, 36 82 .cfi_rel_offset r10, 40 83 .cfi_rel_offset r11, 44 84 .cfi_rel_offset ip, 48 85 .cfi_rel_offset lr, 52 86 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset 87.endm 88 89.macro RESTORE_SAVE_EVERYTHING_FRAME 90 add sp, #8 @ rewind sp 91 .cfi_adjust_cfa_offset -8 92 vpop {d0-d15} 93 .cfi_adjust_cfa_offset -128 94 pop {r0-r12, lr} @ 14 words of callee saves 95 .cfi_restore r0 96 .cfi_restore r1 97 .cfi_restore r2 98 .cfi_restore r3 99 .cfi_restore r4 100 .cfi_restore r5 101 .cfi_restore r6 102 .cfi_restore r7 103 .cfi_restore r8 104 .cfi_restore r9 105 .cfi_restore r10 106 .cfi_restore r11 107 .cfi_restore r12 108 .cfi_restore lr 109 .cfi_adjust_cfa_offset -56 110.endm 111 112.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 113 add sp, #8 @ rewind sp 114 .cfi_adjust_cfa_offset -8 115 vpop {d0-d15} 116 .cfi_adjust_cfa_offset -128 117 add sp, #4 @ skip r0 118 .cfi_adjust_cfa_offset -4 119 .cfi_restore r0 @ debugger can no longer restore caller's r0 120 pop {r1-r12, lr} @ 13 words of callee saves 121 .cfi_restore r1 122 .cfi_restore r2 123 .cfi_restore r3 124 .cfi_restore r4 125 .cfi_restore r5 126 .cfi_restore r6 127 .cfi_restore r7 128 .cfi_restore r8 129 .cfi_restore r9 130 .cfi_restore r10 131 .cfi_restore r11 132 .cfi_restore r12 133 .cfi_restore lr 134 .cfi_adjust_cfa_offset -52 135.endm 136 137.macro RETURN_IF_RESULT_IS_ZERO 138 cbnz r0, 1f @ result non-zero branch over 139 bx lr @ return 1401: 141.endm 142 143.macro RETURN_IF_RESULT_IS_NON_ZERO 144 cbz r0, 1f @ result zero branch over 145 bx lr @ return 1461: 147.endm 148 149.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name 150 .extern \cxx_name 151ENTRY \c_name 152 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save all registers as basis for long jump context 153 mov r0, rSELF @ pass Thread::Current 154 bl \cxx_name @ \cxx_name(Thread*) 155END \c_name 156.endm 157 158.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 159 .extern \cxx_name 160ENTRY \c_name 161 SETUP_SAVE_EVERYTHING_FRAME r0 @ save all registers as basis for long jump context 162 mov r0, rSELF @ pass Thread::Current 163 bl \cxx_name @ \cxx_name(Thread*) 164END \c_name 165.endm 166 167.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name 168 .extern \cxx_name 169ENTRY \c_name 170 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1 @ save all registers as basis for long jump context 171 mov r1, rSELF @ pass Thread::Current 172 bl \cxx_name @ \cxx_name(Thread*) 173END \c_name 174.endm 175 176.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 177 .extern \cxx_name 178ENTRY \c_name 179 SETUP_SAVE_EVERYTHING_FRAME r2 @ save all registers as basis for long jump context 180 mov r2, rSELF @ pass Thread::Current 181 bl \cxx_name @ \cxx_name(Thread*) 182END \c_name 183.endm 184 185.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 186 RETURN_IF_RESULT_IS_ZERO 187 DELIVER_PENDING_EXCEPTION 188.endm 189 190.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 191 RETURN_IF_RESULT_IS_NON_ZERO 192 DELIVER_PENDING_EXCEPTION 193.endm 194 195// Macros taking opportunity of code similarities for downcalls. 196.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return 197 .extern \entrypoint 198ENTRY \name 199 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 200 mov r1, rSELF @ pass Thread::Current 201 bl \entrypoint @ (uint32_t field_idx, Thread*) 202 RESTORE_SAVE_REFS_ONLY_FRAME 203 REFRESH_MARKING_REGISTER 204 \return 205END \name 206.endm 207 208.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return 209 .extern \entrypoint 210ENTRY \name 211 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 212 mov r2, rSELF @ pass Thread::Current 213 bl \entrypoint @ (field_idx, Object*, Thread*) 214 RESTORE_SAVE_REFS_ONLY_FRAME 215 REFRESH_MARKING_REGISTER 216 \return 217END \name 218.endm 219 220.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return 221 .extern \entrypoint 222ENTRY \name 223 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 224 mov r3, rSELF @ pass Thread::Current 225 bl \entrypoint @ (field_idx, Object*, new_val, Thread*) 226 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 227 REFRESH_MARKING_REGISTER 228 \return 229END \name 230.endm 231 232 /* 233 * Called by managed code, saves callee saves and then calls artThrowException 234 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 235 */ 236ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 237 238 /* 239 * Called by managed code to create and deliver a NullPointerException. 240 */ 241NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 242 243 /* 244 * Call installed by a signal handler to create and deliver a NullPointerException. 245 */ 246 .extern art_quick_throw_null_pointer_exception_from_signal 247ENTRY art_quick_throw_null_pointer_exception_from_signal 248 // The fault handler pushes the gc map address, i.e. "return address", to stack 249 // and passes the fault address in LR. So we need to set up the CFI info accordingly. 250 .cfi_def_cfa_offset __SIZEOF_POINTER__ 251 .cfi_rel_offset lr, 0 252 push {r0-r12} @ 13 words of callee saves and args; LR already saved. 253 .cfi_adjust_cfa_offset 52 254 .cfi_rel_offset r0, 0 255 .cfi_rel_offset r1, 4 256 .cfi_rel_offset r2, 8 257 .cfi_rel_offset r3, 12 258 .cfi_rel_offset r4, 16 259 .cfi_rel_offset r5, 20 260 .cfi_rel_offset r6, 24 261 .cfi_rel_offset r7, 28 262 .cfi_rel_offset r8, 32 263 .cfi_rel_offset r9, 36 264 .cfi_rel_offset r10, 40 265 .cfi_rel_offset r11, 44 266 .cfi_rel_offset ip, 48 267 268 @ save all registers as basis for long jump context 269 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 270 mov r0, lr @ pass the fault address stored in LR by the fault handler. 271 mov r1, rSELF @ pass Thread::Current 272 bl artThrowNullPointerExceptionFromSignal @ (Thread*) 273END art_quick_throw_null_pointer_exception_from_signal 274 275 /* 276 * Called by managed code to create and deliver an ArithmeticException. 277 */ 278NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 279 280 /* 281 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 282 * index, arg2 holds limit. 283 */ 284TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 285 286 /* 287 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 288 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 289 */ 290TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 291 292 /* 293 * Called by managed code to create and deliver a StackOverflowError. 294 */ 295NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 296 297 /* 298 * All generated callsites for interface invokes and invocation slow paths will load arguments 299 * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 300 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 301 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1. 302 * 303 * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting 304 * of the target Method* in r0 and method->code_ in r1. 305 * 306 * If unsuccessful, the helper will return null/null. There will bea pending exception in the 307 * thread and we branch to another stub to deliver it. 308 * 309 * On success this wrapper will restore arguments and *jump* to the target, leaving the lr 310 * pointing back to the original caller. 311 * 312 * Clobbers IP (R12). 313 */ 314.macro INVOKE_TRAMPOLINE_BODY cxx_name 315 .extern \cxx_name 316 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 @ save callee saves in case allocation triggers GC 317 mov r2, rSELF @ pass Thread::Current 318 mov r3, sp 319 bl \cxx_name @ (method_idx, this, Thread*, SP) 320 mov r12, r1 @ save Method*->code_ 321 RESTORE_SAVE_REFS_AND_ARGS_FRAME 322 REFRESH_MARKING_REGISTER 323 cbz r0, 1f @ did we find the target? if not go to exception delivery 324 bx r12 @ tail call to target 3251: 326 DELIVER_PENDING_EXCEPTION 327.endm 328.macro INVOKE_TRAMPOLINE c_name, cxx_name 329ENTRY \c_name 330 INVOKE_TRAMPOLINE_BODY \cxx_name 331END \c_name 332.endm 333 334INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 335 336INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 337INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 338INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 339INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 340 341 /* 342 * Quick invocation stub internal. 343 * On entry: 344 * r0 = method pointer 345 * r1 = argument array or null for no argument methods 346 * r2 = size of argument array in bytes 347 * r3 = (managed) thread pointer 348 * [sp] = JValue* result 349 * [sp + 4] = result_in_float 350 * [sp + 8] = core register argument array 351 * [sp + 12] = fp register argument array 352 * +-------------------------+ 353 * | uint32_t* fp_reg_args | 354 * | uint32_t* core_reg_args | 355 * | result_in_float | <- Caller frame 356 * | Jvalue* result | 357 * +-------------------------+ 358 * | lr | 359 * | r11 | 360 * | r9 | 361 * | r4 | <- r11 362 * +-------------------------+ 363 * | uint32_t out[n-1] | 364 * | : : | Outs 365 * | uint32_t out[0] | 366 * | StackRef<ArtMethod> | <- SP value=null 367 * +-------------------------+ 368 */ 369ENTRY art_quick_invoke_stub_internal 370 SPILL_ALL_CALLEE_SAVE_GPRS @ spill regs (9) 371 mov r11, sp @ save the stack pointer 372 .cfi_def_cfa_register r11 373 374 mov r9, r3 @ move managed thread pointer into r9 375 376 add r4, r2, #4 @ create space for method pointer in frame 377 sub r4, sp, r4 @ reserve & align *stack* to 16 bytes: native calling 378 and r4, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART 379 mov sp, r4 @ 16B alignment ourselves. 380 381 mov r4, r0 @ save method* 382 add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy 383 bl memcpy @ memcpy (dest, src, bytes) 384 mov ip, #0 @ set ip to 0 385 str ip, [sp] @ store null for method* at bottom of frame 386 387 ldr ip, [r11, #48] @ load fp register argument array pointer 388 vldm ip, {s0-s15} @ copy s0 - s15 389 390 ldr ip, [r11, #44] @ load core register argument array pointer 391 mov r0, r4 @ restore method* 392 add ip, ip, #4 @ skip r0 393 ldm ip, {r1-r3} @ copy r1 - r3 394 395 REFRESH_MARKING_REGISTER 396 397 ldr ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] @ get pointer to the code 398 blx ip @ call the method 399 400 mov sp, r11 @ restore the stack pointer 401 .cfi_def_cfa_register sp 402 403 ldr r4, [sp, #40] @ load result_is_float 404 ldr r9, [sp, #36] @ load the result pointer 405 cmp r4, #0 406 ite eq 407 strdeq r0, [r9] @ store r0/r1 into result pointer 408 vstrne d0, [r9] @ store s0-s1/d0 into result pointer 409 410 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} @ restore spill regs 411END art_quick_invoke_stub_internal 412 413 /* 414 * On stack replacement stub. 415 * On entry: 416 * r0 = stack to copy 417 * r1 = size of stack 418 * r2 = pc to call 419 * r3 = JValue* result 420 * [sp] = shorty 421 * [sp + 4] = thread 422 */ 423ENTRY art_quick_osr_stub 424 SPILL_ALL_CALLEE_SAVE_GPRS @ Spill regs (9) 425 vpush {s16-s31} @ Spill fp-regs (16) 426 .cfi_adjust_cfa_offset 64 427 SAVE_SIZE=(9*4+16*4) 428 mov r11, sp @ Save the stack pointer 429 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 430 .cfi_remember_state 431 mov r10, r1 @ Save size of stack 432 ldr r9, [r11, #(SAVE_SIZE+4)] @ Move managed thread pointer into r9 433 REFRESH_MARKING_REGISTER 434 mov r6, r2 @ Save the pc to call 435 sub r7, sp, #12 @ Reserve space for stack pointer, 436 @ JValue* result, and ArtMethod* slot. 437 and r7, #0xFFFFFFF0 @ Align stack pointer 438 mov sp, r7 @ Update stack pointer 439 str r11, [sp, #4] @ Save old stack pointer 440 str r3, [sp, #8] @ Save JValue* result 441 mov ip, #0 442 str ip, [sp] @ Store null for ArtMethod* at bottom of frame 443 // r11 isn't properly spilled in the osr method, so we need use DWARF expression. 444 // NB: the CFI must be before the call since this is the address gdb will lookup. 445 // NB: gdb expects that cfa_expression returns the CFA value (not address to it). 446 .cfi_escape /* CFA = [sp + 4] + SAVE_SIZE */ \ 447 0x0f, 6, /* DW_CFA_def_cfa_expression(len) */ \ 448 0x92, 13, 4, /* DW_OP_bregx(reg,offset) */ \ 449 0x06, /* DW_OP_deref */ \ 450 0x23, SAVE_SIZE /* DW_OP_plus_uconst(val) */ 451 bl .Losr_entry @ Call the method 452 ldr r10, [sp, #8] @ Restore JValue* result 453 ldr sp, [sp, #4] @ Restore saved stack pointer 454 .cfi_def_cfa sp, SAVE_SIZE @ CFA = sp + SAVE_SIZE 455 strd r0, [r10] @ Store r0/r1 into result pointer 456 vpop {s16-s31} 457 .cfi_adjust_cfa_offset -64 458 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 459.Losr_entry: 460 .cfi_restore_state 461 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 462 sub sp, sp, r10 @ Reserve space for callee stack 463 sub r10, r10, #4 464 str lr, [sp, r10] @ Store link register per the compiler ABI 465 mov r2, r10 466 mov r1, r0 467 mov r0, sp 468 bl memcpy @ memcpy (dest r0, src r1, bytes r2) 469 bx r6 470END art_quick_osr_stub 471 472 /* 473 * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_. 474 * Both must reside on the stack, between current SP and target SP. 475 * The r12 (IP) shall be clobbered rather than retrieved from gprs_. 476 */ 477ARM_ENTRY art_quick_do_long_jump 478 vldm r1, {s0-s31} @ Load all fprs from argument fprs_. 479 mov sp, r0 @ Make SP point to gprs_. 480 @ Do not access fprs_ from now, they may be below SP. 481 ldm sp, {r0-r11} @ load r0-r11 from gprs_. 482 ldr r12, [sp, #60] @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12). 483 ldr lr, [sp, #56] @ Load LR from gprs_, 56 = 4 * 14. 484 ldr sp, [sp, #52] @ Load SP from gprs_ 52 = 4 * 13. 485 @ Do not access gprs_ from now, they are below SP. 486 REFRESH_MARKING_REGISTER 487 bx r12 @ Do long jump. 488END art_quick_do_long_jump 489 490 /* 491 * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on 492 * failure. 493 */ 494TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 495 496 /* 497 * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the 498 * possibly null object to lock. 499 */ 500 .extern artLockObjectFromCode 501ENTRY art_quick_lock_object 502 ldr r1, [rSELF, #THREAD_ID_OFFSET] 503 cbz r0, .Lslow_lock 504.Lretry_lock: 505 ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 506 eor r3, r2, r1 @ Prepare the value to store if unlocked 507 @ (thread id, count of 0 and preserved read barrier bits), 508 @ or prepare to compare thread id for recursive lock check 509 @ (lock_word.ThreadId() ^ self->ThreadId()). 510 ands ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. 511 bne .Lnot_unlocked @ Check if unlocked. 512 @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits. 513 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 514 cbnz r2, .Llock_strex_fail @ If store failed, retry. 515 dmb ish @ Full (LoadLoad|LoadStore) memory barrier. 516 bx lr 517.Lnot_unlocked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 518#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT 519#error "Expecting thin lock count and gc state in consecutive bits." 520#endif 521 @ Check lock word state and thread id together, 522 bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) 523 cbnz r3, .Lslow_lock @ if either of the top two bits are set, or the lock word's 524 @ thread id did not match, go slow path. 525 add r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. 526 @ Extract the new thin lock count for overflow check. 527 ubfx r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE 528 cbz r2, .Lslow_lock @ Zero as the new count indicates overflow, go slow path. 529 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. 530 cbnz r2, .Llock_strex_fail @ If strex failed, retry. 531 bx lr 532.Llock_strex_fail: 533 b .Lretry_lock @ retry 534// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). 535END art_quick_lock_object 536 537ENTRY art_quick_lock_object_no_inline 538 // This is also the slow path for art_quick_lock_object. Note that we 539 // need a local label, the assembler complains about target being out of 540 // range if we try to jump to `art_quick_lock_object_no_inline`. 541.Lslow_lock: 542 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block 543 mov r1, rSELF @ pass Thread::Current 544 bl artLockObjectFromCode @ (Object* obj, Thread*) 545 RESTORE_SAVE_REFS_ONLY_FRAME 546 REFRESH_MARKING_REGISTER 547 RETURN_IF_RESULT_IS_ZERO 548 DELIVER_PENDING_EXCEPTION 549END art_quick_lock_object_no_inline 550 551 /* 552 * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. 553 * r0 holds the possibly null object to lock. 554 */ 555 .extern artUnlockObjectFromCode 556ENTRY art_quick_unlock_object 557 ldr r1, [rSELF, #THREAD_ID_OFFSET] 558 cbz r0, .Lslow_unlock 559.Lretry_unlock: 560#ifndef USE_READ_BARRIER 561 ldr r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 562#else 563 @ Need to use atomic instructions for read barrier. 564 ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 565#endif 566 eor r3, r2, r1 @ Prepare the value to store if simply locked 567 @ (mostly 0s, and preserved read barrier bits), 568 @ or prepare to compare thread id for recursive lock check 569 @ (lock_word.ThreadId() ^ self->ThreadId()). 570 ands ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. 571 bne .Lnot_simply_locked @ Locked recursively or by other thread? 572 @ Transition to unlocked. 573 dmb ish @ Full (LoadStore|StoreStore) memory barrier. 574#ifndef USE_READ_BARRIER 575 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 576#else 577 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits 578 cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. 579#endif 580 bx lr 581.Lnot_simply_locked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 582#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT 583#error "Expecting thin lock count and gc state in consecutive bits." 584#endif 585 @ Check lock word state and thread id together, 586 bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) 587 cbnz r3, .Lslow_unlock @ if either of the top two bits are set, or the lock word's 588 @ thread id did not match, go slow path. 589 sub r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. 590#ifndef USE_READ_BARRIER 591 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 592#else 593 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. 594 cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. 595#endif 596 bx lr 597.Lunlock_strex_fail: 598 b .Lretry_unlock @ retry 599// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). 600END art_quick_unlock_object 601 602ENTRY art_quick_unlock_object_no_inline 603 // This is also the slow path for art_quick_unlock_object. Note that we 604 // need a local label, the assembler complains about target being out of 605 // range if we try to jump to `art_quick_unlock_object_no_inline`. 606.Lslow_unlock: 607 @ save callee saves in case exception allocation triggers GC 608 SETUP_SAVE_REFS_ONLY_FRAME r1 609 mov r1, rSELF @ pass Thread::Current 610 bl artUnlockObjectFromCode @ (Object* obj, Thread*) 611 RESTORE_SAVE_REFS_ONLY_FRAME 612 REFRESH_MARKING_REGISTER 613 RETURN_IF_RESULT_IS_ZERO 614 DELIVER_PENDING_EXCEPTION 615END art_quick_unlock_object_no_inline 616 617 /* 618 * Entry from managed code that calls artInstanceOfFromCode and on failure calls 619 * artThrowClassCastExceptionForObject. 620 */ 621 .extern artInstanceOfFromCode 622 .extern artThrowClassCastExceptionForObject 623ENTRY art_quick_check_instance_of 624 // Type check using the bit string passes null as the target class. In that case just throw. 625 cbz r1, .Lthrow_class_cast_exception_for_bitstring_check 626 627 push {r0-r2, lr} @ save arguments, padding (r2) and link register 628 .cfi_adjust_cfa_offset 16 629 .cfi_rel_offset r0, 0 630 .cfi_rel_offset r1, 4 631 .cfi_rel_offset r2, 8 632 .cfi_rel_offset lr, 12 633 bl artInstanceOfFromCode 634 cbz r0, .Lthrow_class_cast_exception 635 pop {r0-r2, pc} 636 637.Lthrow_class_cast_exception: 638 pop {r0-r2, lr} 639 .cfi_adjust_cfa_offset -16 640 .cfi_restore r0 641 .cfi_restore r1 642 .cfi_restore r2 643 .cfi_restore lr 644 645.Lthrow_class_cast_exception_for_bitstring_check: 646 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2 @ save all registers as basis for long jump context 647 mov r2, rSELF @ pass Thread::Current 648 bl artThrowClassCastExceptionForObject @ (Object*, Class*, Thread*) 649 bkpt 650END art_quick_check_instance_of 651 652// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude. 653.macro POP_REG_NE rReg, offset, rExclude 654 .ifnc \rReg, \rExclude 655 ldr \rReg, [sp, #\offset] @ restore rReg 656 .cfi_restore \rReg 657 .endif 658.endm 659 660// Save rReg's value to [sp, #offset]. 661.macro PUSH_REG rReg, offset 662 str \rReg, [sp, #\offset] @ save rReg 663 .cfi_rel_offset \rReg, \offset 664.endm 665 666 /* 667 * Macro to insert read barrier, only used in art_quick_aput_obj. 668 * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET. 669 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 670 */ 671.macro READ_BARRIER rDest, rObj, offset 672#ifdef USE_READ_BARRIER 673 push {r0-r3, ip, lr} @ 6 words for saved registers (used in art_quick_aput_obj) 674 .cfi_adjust_cfa_offset 24 675 .cfi_rel_offset r0, 0 676 .cfi_rel_offset r1, 4 677 .cfi_rel_offset r2, 8 678 .cfi_rel_offset r3, 12 679 .cfi_rel_offset ip, 16 680 .cfi_rel_offset lr, 20 681 sub sp, #8 @ push padding 682 .cfi_adjust_cfa_offset 8 683 @ mov r0, \rRef @ pass ref in r0 (no-op for now since parameter ref is unused) 684 .ifnc \rObj, r1 685 mov r1, \rObj @ pass rObj 686 .endif 687 mov r2, #\offset @ pass offset 688 bl artReadBarrierSlow @ artReadBarrierSlow(ref, rObj, offset) 689 @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning. 690 .ifnc \rDest, r0 691 mov \rDest, r0 @ save return value in rDest 692 .endif 693 add sp, #8 @ pop padding 694 .cfi_adjust_cfa_offset -8 695 POP_REG_NE r0, 0, \rDest @ conditionally restore saved registers 696 POP_REG_NE r1, 4, \rDest 697 POP_REG_NE r2, 8, \rDest 698 POP_REG_NE r3, 12, \rDest 699 POP_REG_NE ip, 16, \rDest 700 add sp, #20 701 .cfi_adjust_cfa_offset -20 702 pop {lr} @ restore lr 703 .cfi_adjust_cfa_offset -4 704 .cfi_restore lr 705#else 706 ldr \rDest, [\rObj, #\offset] 707 UNPOISON_HEAP_REF \rDest 708#endif // USE_READ_BARRIER 709.endm 710 711#ifdef USE_READ_BARRIER 712 .extern artReadBarrierSlow 713#endif 714 .hidden art_quick_aput_obj 715ENTRY art_quick_aput_obj 716#ifdef USE_READ_BARRIER 717 @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro. 718 tst r2, r2 719 beq .Ldo_aput_null 720#else 721 cbz r2, .Ldo_aput_null 722#endif // USE_READ_BARRIER 723 READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET 724 READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET 725 READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET 726 cmp r3, ip @ value's type == array's component type - trivial assignability 727 bne .Lcheck_assignability 728.Ldo_aput: 729 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 730 POISON_HEAP_REF r2 731 str r2, [r3, r1, lsl #2] 732 ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET] 733 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 734 strb r3, [r3, r0] 735 blx lr 736.Ldo_aput_null: 737 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 738 str r2, [r3, r1, lsl #2] 739 blx lr 740.Lcheck_assignability: 741 push {r0-r2, lr} @ save arguments 742 .cfi_adjust_cfa_offset 16 743 .cfi_rel_offset r0, 0 744 .cfi_rel_offset r1, 4 745 .cfi_rel_offset r2, 8 746 .cfi_rel_offset lr, 12 747 mov r1, ip 748 mov r0, r3 749 bl artIsAssignableFromCode 750 cbz r0, .Lthrow_array_store_exception 751 pop {r0-r2, lr} 752 .cfi_restore r0 753 .cfi_restore r1 754 .cfi_restore r2 755 .cfi_restore lr 756 .cfi_adjust_cfa_offset -16 757 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 758 POISON_HEAP_REF r2 759 str r2, [r3, r1, lsl #2] 760 ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET] 761 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 762 strb r3, [r3, r0] 763 blx lr 764.Lthrow_array_store_exception: 765 pop {r0-r2, lr} 766 /* No need to repeat restore cfi directives, the ones above apply here. */ 767 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3 768 mov r1, r2 769 mov r2, rSELF @ pass Thread::Current 770 bl artThrowArrayStoreException @ (Class*, Class*, Thread*) 771 bkpt @ unreached 772END art_quick_aput_obj 773 774// Macro to facilitate adding new allocation entrypoints. 775.macro ONE_ARG_DOWNCALL name, entrypoint, return 776 .extern \entrypoint 777ENTRY \name 778 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 779 mov r1, rSELF @ pass Thread::Current 780 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 781 RESTORE_SAVE_REFS_ONLY_FRAME 782 REFRESH_MARKING_REGISTER 783 \return 784END \name 785.endm 786 787// Macro to facilitate adding new allocation entrypoints. 788.macro TWO_ARG_DOWNCALL name, entrypoint, return 789 .extern \entrypoint 790ENTRY \name 791 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 792 mov r2, rSELF @ pass Thread::Current 793 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 794 RESTORE_SAVE_REFS_ONLY_FRAME 795 REFRESH_MARKING_REGISTER 796 \return 797END \name 798.endm 799 800// Macro to facilitate adding new array allocation entrypoints. 801.macro THREE_ARG_DOWNCALL name, entrypoint, return 802 .extern \entrypoint 803ENTRY \name 804 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 805 mov r3, rSELF @ pass Thread::Current 806 @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) 807 bl \entrypoint 808 RESTORE_SAVE_REFS_ONLY_FRAME 809 REFRESH_MARKING_REGISTER 810 \return 811END \name 812.endm 813 814// Macro to facilitate adding new allocation entrypoints. 815.macro FOUR_ARG_DOWNCALL name, entrypoint, return 816 .extern \entrypoint 817ENTRY \name 818 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 819 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 820 .cfi_adjust_cfa_offset 16 821 bl \entrypoint 822 add sp, #16 @ strip the extra frame 823 .cfi_adjust_cfa_offset -16 824 RESTORE_SAVE_REFS_ONLY_FRAME 825 REFRESH_MARKING_REGISTER 826 \return 827END \name 828.endm 829 830 /* 831 * Macro for resolution and initialization of indexed DEX file 832 * constants such as classes and strings. 833 */ 834.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 835 .extern \entrypoint 836ENTRY \name 837 SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset @ save everything in case of GC 838 mov r1, rSELF @ pass Thread::Current 839 bl \entrypoint @ (uint32_t index, Thread*) 840 cbz r0, 1f @ If result is null, deliver the OOME. 841 .cfi_remember_state 842 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 843 REFRESH_MARKING_REGISTER 844 bx lr 845 .cfi_restore_state 8461: 847 DELIVER_PENDING_EXCEPTION_FRAME_READY 848END \name 849.endm 850 851.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint 852 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 853.endm 854 855ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 856ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode 857ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode 858ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode 859ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode 860ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 861 862// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 863// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 864 865 /* 866 * Called by managed code to resolve a static field and load a non-wide value. 867 */ 868ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 869ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 870ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 871ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 872ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 873ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 874 /* 875 * Called by managed code to resolve a static field and load a 64-bit primitive value. 876 */ 877 .extern artGet64StaticFromCompiledCode 878ENTRY art_quick_get64_static 879 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 880 mov r1, rSELF @ pass Thread::Current 881 bl artGet64StaticFromCompiledCode @ (uint32_t field_idx, Thread*) 882 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 883 RESTORE_SAVE_REFS_ONLY_FRAME 884 REFRESH_MARKING_REGISTER 885 cbnz r2, 1f @ success if no exception pending 886 bx lr @ return on success 8871: 888 DELIVER_PENDING_EXCEPTION 889END art_quick_get64_static 890 891 /* 892 * Called by managed code to resolve an instance field and load a non-wide value. 893 */ 894TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 895TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 896TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 897TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 898TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 899TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 900 /* 901 * Called by managed code to resolve an instance field and load a 64-bit primitive value. 902 */ 903 .extern artGet64InstanceFromCompiledCode 904ENTRY art_quick_get64_instance 905 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 906 mov r2, rSELF @ pass Thread::Current 907 bl artGet64InstanceFromCompiledCode @ (field_idx, Object*, Thread*) 908 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 909 RESTORE_SAVE_REFS_ONLY_FRAME 910 REFRESH_MARKING_REGISTER 911 cbnz r2, 1f @ success if no exception pending 912 bx lr @ return on success 9131: 914 DELIVER_PENDING_EXCEPTION 915END art_quick_get64_instance 916 917 /* 918 * Called by managed code to resolve a static field and store a value. 919 */ 920TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 921TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 922TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 923TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 924 925 /* 926 * Called by managed code to resolve an instance field and store a non-wide value. 927 */ 928THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 929THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 930THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 931THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 932 933 /* 934 * Called by managed code to resolve an instance field and store a wide value. 935 */ 936 .extern artSet64InstanceFromCompiledCode 937ENTRY art_quick_set64_instance 938 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 939 @ r2:r3 contain the wide argument 940 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 941 .cfi_adjust_cfa_offset 16 942 bl artSet64InstanceFromCompiledCode @ (field_idx, Object*, new_val, Thread*) 943 add sp, #16 @ release out args 944 .cfi_adjust_cfa_offset -16 945 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 946 REFRESH_MARKING_REGISTER 947 RETURN_IF_RESULT_IS_ZERO 948 DELIVER_PENDING_EXCEPTION 949END art_quick_set64_instance 950 951 .extern artSet64StaticFromCompiledCode 952ENTRY art_quick_set64_static 953 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 954 @ r2:r3 contain the wide argument 955 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 956 .cfi_adjust_cfa_offset 16 957 bl artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*) 958 add sp, #16 @ release out args 959 .cfi_adjust_cfa_offset -16 960 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 961 REFRESH_MARKING_REGISTER 962 RETURN_IF_RESULT_IS_ZERO 963 DELIVER_PENDING_EXCEPTION 964END art_quick_set64_static 965 966// Generate the allocation entrypoints for each allocator. 967GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 968// Comment out allocators that have arm specific asm. 969// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 970// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 971GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 972GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB) 973// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 974// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 975// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 976// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 977// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 978GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 979GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 980GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 981 982// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 983// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 984GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 985GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB) 986// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 987// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 988// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 989// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 990// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 991GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 992GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 993GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 994 995// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc). 996// 997// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 998// If isInitialized=0 the compiler can only assume it's been at least resolved. 999.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized 1000ENTRY \c_name 1001 // Fast path rosalloc allocation. 1002 // r0: type/return value, rSELF (r9): Thread::Current 1003 // r1, r2, r3, r12: free. 1004 ldr r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local 1005 // allocation stack has room. 1006 // TODO: consider using ldrd. 1007 ldr r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] 1008 cmp r3, r12 1009 bhs .Lslow_path\c_name 1010 1011 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) 1012 cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread 1013 // local allocation. 1014 // If the class is not yet visibly initialized, or it is finalizable, 1015 // the object size will be very large to force the branch below to be taken. 1016 // 1017 // See Class::SetStatus() in class.cc for more details. 1018 bhs .Lslow_path\c_name 1019 // Compute the rosalloc bracket index 1020 // from the size. Since the size is 1021 // already aligned we can combine the 1022 // two shifts together. 1023 add r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) 1024 // Subtract pointer size since ther 1025 // are no runs for 0 byte allocations 1026 // and the size is already aligned. 1027 // Load the rosalloc run (r12) 1028 ldr r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] 1029 // Load the free list head (r3). This 1030 // will be the return val. 1031 ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1032 cbz r3, .Lslow_path\c_name 1033 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1034 ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head 1035 // and update the list head with the 1036 // next pointer. 1037 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1038 // Store the class pointer in the 1039 // header. This also overwrites the 1040 // next pointer. The offsets are 1041 // asserted to match. 1042#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1043#error "Class pointer needs to overwrite next pointer." 1044#endif 1045 POISON_HEAP_REF r0 1046 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] 1047 // Push the new object onto the thread 1048 // local allocation stack and 1049 // increment the thread local 1050 // allocation stack top. 1051 ldr r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1052 str r3, [r1], #COMPRESSED_REFERENCE_SIZE // (Increment r1 as a side effect.) 1053 str r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1054 // Decrement the size of the free list 1055 1056 // After this "STR" the object is published to the thread local allocation stack, 1057 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1058 // It is not yet visible to the running (user) compiled code until after the return. 1059 // 1060 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1061 // the state of the allocation stack slot. It can be a pointer to one of: 1062 // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet. 1063 // (The stack initial state is "null" pointers). 1064 // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot. 1065 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1066 // Other states are not allowed. 1067 // 1068 // An object that is invalid only temporarily, and will eventually become valid. 1069 // The internal runtime code simply checks if the object is not null or is partial and then 1070 // ignores it. 1071 // 1072 // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing 1073 // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot 1074 // "next" pointer is not-cyclic.) 1075 // 1076 // See also b/28790624 for a listing of CLs dealing with this race. 1077 ldr r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1078 sub r1, #1 1079 // TODO: consider combining this store 1080 // and the list head store above using 1081 // strd. 1082 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1083 1084 mov r0, r3 // Set the return value and return. 1085 // No barrier. The class is already observably initialized (otherwise the fast 1086 // path size check above would fail) and new-instance allocations are protected 1087 // from publishing by the compiler which inserts its own StoreStore barrier. 1088 bx lr 1089 1090.Lslow_path\c_name: 1091 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1092 mov r1, rSELF @ pass Thread::Current 1093 bl \cxx_name @ (mirror::Class* cls, Thread*) 1094 RESTORE_SAVE_REFS_ONLY_FRAME 1095 REFRESH_MARKING_REGISTER 1096 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1097END \c_name 1098.endm 1099 1100ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0 1101ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1 1102 1103// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab 1104// and art_quick_alloc_object_resolved/initialized_region_tlab. 1105// 1106// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free. 1107// Need to preserve r0 to the slow path. 1108// 1109// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1110// If isInitialized=0 the compiler can only assume it's been at least resolved. 1111.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized 1112 // Load thread_local_pos (r12) and 1113 // thread_local_end (r3) with ldrd. 1114 // Check constraints for ldrd. 1115#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1116#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1117#endif 1118 ldrd r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1119 sub r12, r3, r12 // Compute the remaining buf size. 1120 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3). 1121 cmp r3, r12 // Check if it fits. 1122 // If the class is not yet visibly initialized, or it is finalizable, 1123 // the object size will be very large to force the branch below to be taken. 1124 // 1125 // See Class::SetStatus() in class.cc for more details. 1126 bhi \slowPathLabel 1127 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1128 // Reload old thread_local_pos (r0) 1129 // for the return value. 1130 ldr r2, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1131 add r1, r2, r3 1132 str r1, [rSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1133 // After this "STR" the object is published to the thread local allocation stack, 1134 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1135 // It is not yet visible to the running (user) compiled code until after the return. 1136 // 1137 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1138 // the state of the object. It can be either: 1139 // 1) A partially valid object, with a null class pointer 1140 // (because the initial state of TLAB buffers is all 0s/nulls). 1141 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1142 // Other states are not allowed. 1143 // 1144 // An object that is invalid only temporarily, and will eventually become valid. 1145 // The internal runtime code simply checks if the object is not null or is partial and then 1146 // ignores it. 1147 // 1148 // (Note: The actual check is done by checking that the object's class pointer is non-null. 1149 // Also, unlike rosalloc, the object can never be observed as null). 1150 ldr r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1151 add r1, r1, #1 1152 str r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1153 POISON_HEAP_REF r0 1154 str r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1155 // Fence. This is "ish" not "ishst" so 1156 // that the code after this allocation 1157 // site will see the right values in 1158 // the fields of the class. 1159 mov r0, r2 1160 // No barrier. The class is already observably initialized (otherwise the fast 1161 // path size check above would fail) and new-instance allocations are protected 1162 // from publishing by the compiler which inserts its own StoreStore barrier. 1163 bx lr 1164.endm 1165 1166// The common code for art_quick_alloc_object_*region_tlab 1167// Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up. 1168// Caller must execute a constructor fence after this. 1169.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized 1170ENTRY \name 1171 // Fast path tlab allocation. 1172 // r0: type, rSELF (r9): Thread::Current 1173 // r1, r2, r3, r12: free. 1174 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized 1175.Lslow_path\name: 1176 SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. 1177 mov r1, rSELF // Pass Thread::Current. 1178 bl \entrypoint // (mirror::Class* klass, Thread*) 1179 RESTORE_SAVE_REFS_ONLY_FRAME 1180 REFRESH_MARKING_REGISTER 1181 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1182END \name 1183.endm 1184 1185GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0 1186GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1 1187GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0 1188GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1 1189 1190 1191// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab 1192// and art_quick_alloc_array_resolved/initialized_region_tlab. 1193// 1194// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free. 1195// Need to preserve r0 and r1 to the slow path. 1196.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel 1197 and r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED // Apply alignment mask 1198 // (addr + 7) & ~7. 1199 1200 // Load thread_local_pos (r3) and 1201 // thread_local_end (r12) with ldrd. 1202 // Check constraints for ldrd. 1203#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1204#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1205#endif 1206 ldrd r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1207 sub r12, r12, r3 // Compute the remaining buf size. 1208 cmp r2, r12 // Check if the total_size fits. 1209 // The array class is always initialized here. Unlike new-instance, 1210 // this does not act as a double test. 1211 bhi \slowPathLabel 1212 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1213 add r2, r2, r3 1214 str r2, [rSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1215 ldr r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1216 add r2, r2, #1 1217 str r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1218 POISON_HEAP_REF r0 1219 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1220 str r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length. 1221 // Fence. This is "ish" not "ishst" so 1222 // that the code after this allocation 1223 // site will see the right values in 1224 // the fields of the class. 1225 mov r0, r3 1226// new-array is special. The class is loaded and immediately goes to the Initialized state 1227// before it is published. Therefore the only fence needed is for the publication of the object. 1228// See ClassLinker::CreateArrayClass() for more details. 1229 1230// For publication of the new array, we don't need a 'dmb ishst' here. 1231// The compiler generates 'dmb ishst' for all new-array insts. 1232 bx lr 1233.endm 1234 1235// Caller must execute a constructor fence after this. 1236.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup 1237ENTRY \name 1238 // Fast path array allocation for region tlab allocation. 1239 // r0: mirror::Class* type 1240 // r1: int32_t component_count 1241 // rSELF (r9): thread 1242 // r2, r3, r12: free. 1243 \size_setup .Lslow_path\name 1244 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name 1245.Lslow_path\name: 1246 // r0: mirror::Class* klass 1247 // r1: int32_t component_count 1248 // r2: Thread* self 1249 SETUP_SAVE_REFS_ONLY_FRAME r2 // save callee saves in case of GC 1250 mov r2, rSELF // pass Thread::Current 1251 bl \entrypoint 1252 RESTORE_SAVE_REFS_ONLY_FRAME 1253 REFRESH_MARKING_REGISTER 1254 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1255END \name 1256.endm 1257 1258.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path 1259 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8) 1260 cmp r1, r2 1261 bhi \slow_path 1262 // Array classes are never finalizable 1263 // or uninitialized, no need to check. 1264 ldr r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type 1265 UNPOISON_HEAP_REF r3 1266 ldr r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] 1267 lsr r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 1268 // bits. 1269 lsl r2, r1, r3 // Calculate data size 1270 // Add array data offset and alignment. 1271 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1272#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1273#error Long array data offset must be 4 greater than int array data offset. 1274#endif 1275 1276 add r3, r3, #1 // Add 4 to the length only if the 1277 // component size shift is 3 1278 // (for 64 bit alignment). 1279 and r3, r3, #4 1280 add r2, r2, r3 1281.endm 1282 1283.macro COMPUTE_ARRAY_SIZE_8 slow_path 1284 // Possibly a large object, go slow. 1285 // Also does negative array size check. 1286 movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) 1287 cmp r1, r2 1288 bhi \slow_path 1289 // Add array data offset and alignment. 1290 add r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1291.endm 1292 1293.macro COMPUTE_ARRAY_SIZE_16 slow_path 1294 // Possibly a large object, go slow. 1295 // Also does negative array size check. 1296 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2) 1297 cmp r1, r2 1298 bhi \slow_path 1299 lsl r2, r1, #1 1300 // Add array data offset and alignment. 1301 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1302.endm 1303 1304.macro COMPUTE_ARRAY_SIZE_32 slow_path 1305 // Possibly a large object, go slow. 1306 // Also does negative array size check. 1307 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4) 1308 cmp r1, r2 1309 bhi \slow_path 1310 lsl r2, r1, #2 1311 // Add array data offset and alignment. 1312 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1313.endm 1314 1315.macro COMPUTE_ARRAY_SIZE_64 slow_path 1316 // Possibly a large object, go slow. 1317 // Also does negative array size check. 1318 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8) 1319 cmp r1, r2 1320 bhi \slow_path 1321 lsl r2, r1, #3 1322 // Add array data offset and alignment. 1323 add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1324.endm 1325 1326GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1327GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1328GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1329GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1330GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1331GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1332GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1333GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1334GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1335GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1336 1337 /* 1338 * Called by managed code when the value in rSUSPEND has been decremented to 0. 1339 */ 1340 .extern artTestSuspendFromCode 1341ENTRY art_quick_test_suspend 1342 SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl 1343 mov r0, rSELF 1344 bl artTestSuspendFromCode @ (Thread*) 1345 RESTORE_SAVE_EVERYTHING_FRAME 1346 REFRESH_MARKING_REGISTER 1347 bx lr 1348END art_quick_test_suspend 1349 1350ENTRY art_quick_implicit_suspend 1351 mov r0, rSELF 1352 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves for stack crawl 1353 bl artTestSuspendFromCode @ (Thread*) 1354 RESTORE_SAVE_REFS_ONLY_FRAME 1355 REFRESH_MARKING_REGISTER 1356 bx lr 1357END art_quick_implicit_suspend 1358 1359 /* 1360 * Called by managed code that is attempting to call a method on a proxy class. On entry 1361 * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The 1362 * frame size of the invoked proxy method agrees with a ref and args callee save frame. 1363 */ 1364 .extern artQuickProxyInvokeHandler 1365ENTRY art_quick_proxy_invoke_handler 1366 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1367 mov r2, rSELF @ pass Thread::Current 1368 mov r3, sp @ pass SP 1369 blx artQuickProxyInvokeHandler @ (Method* proxy method, receiver, Thread*, SP) 1370 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1371 // Tear down the callee-save frame. Skip arg registers. 1372 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1373 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1374 RESTORE_SAVE_REFS_ONLY_FRAME 1375 REFRESH_MARKING_REGISTER 1376 cbnz r2, 1f @ success if no exception is pending 1377 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1378 bx lr @ return on success 13791: 1380 DELIVER_PENDING_EXCEPTION 1381END art_quick_proxy_invoke_handler 1382 1383 /* 1384 * Called to resolve an imt conflict. 1385 * r0 is the conflict ArtMethod. 1386 * r12 is a hidden argument that holds the target interface method. 1387 * 1388 * Note that this stub writes to r0, r4, and r12. 1389 */ 1390ENTRY art_quick_imt_conflict_trampoline 1391 ldr r0, [r0, #ART_METHOD_JNI_OFFSET_32] // Load ImtConflictTable 1392 ldr r4, [r0] // Load first entry in ImtConflictTable. 1393.Limt_table_iterate: 1394 cmp r4, r12 1395 // Branch if found. Benchmarks have shown doing a branch here is better. 1396 beq .Limt_table_found 1397 // If the entry is null, the interface method is not in the ImtConflictTable. 1398 cbz r4, .Lconflict_trampoline 1399 // Iterate over the entries of the ImtConflictTable. 1400 ldr r4, [r0, #(2 * __SIZEOF_POINTER__)]! 1401 b .Limt_table_iterate 1402.Limt_table_found: 1403 // We successfully hit an entry in the table. Load the target method 1404 // and jump to it. 1405 ldr r0, [r0, #__SIZEOF_POINTER__] 1406 ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] 1407.Lconflict_trampoline: 1408 // Pass interface method to the trampoline. 1409 mov r0, r12 1410 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 1411END art_quick_imt_conflict_trampoline 1412 1413 .extern artQuickResolutionTrampoline 1414ENTRY art_quick_resolution_trampoline 1415 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1416 mov r2, rSELF @ pass Thread::Current 1417 mov r3, sp @ pass SP 1418 blx artQuickResolutionTrampoline @ (Method* called, receiver, Thread*, SP) 1419 cbz r0, 1f @ is code pointer null? goto exception 1420 mov r12, r0 1421 ldr r0, [sp, #0] @ load resolved method in r0 1422 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1423 REFRESH_MARKING_REGISTER 1424 bx r12 @ tail-call into actual code 14251: 1426 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1427 DELIVER_PENDING_EXCEPTION 1428END art_quick_resolution_trampoline 1429 1430 /* 1431 * Called to do a generic JNI down-call 1432 */ 1433ENTRY art_quick_generic_jni_trampoline 1434 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1435 1436 // Save rSELF 1437 mov r11, rSELF 1438 // Save SP , so we can have static CFI info. r10 is saved in ref_and_args. 1439 mov r10, sp 1440 .cfi_def_cfa_register r10 1441 1442 sub sp, sp, #5120 1443 1444 // prepare for artQuickGenericJniTrampoline call 1445 // (Thread*, managed_sp, reserved_area) 1446 // r0 r1 r2 <= C calling convention 1447 // rSELF r10 sp <= where they are 1448 1449 mov r0, rSELF // Thread* 1450 mov r1, r10 // SP for the managed frame. 1451 mov r2, sp // reserved area for arguments and other saved data (up to managed frame) 1452 blx artQuickGenericJniTrampoline // (Thread*, managed_sp, reserved_area) 1453 1454 // The C call will have registered the complete save-frame on success. 1455 // The result of the call is: 1456 // r0: pointer to native code, 0 on error. 1457 // The bottom of the reserved area contains values for arg registers, 1458 // hidden arg register and SP for out args for the call. 1459 1460 // Check for error (class init check or locking for synchronized native method can throw). 1461 cbz r0, .Lexception_in_native 1462 1463 // Save the code pointer 1464 mov lr, r0 1465 1466 // Load parameters from frame into registers r0-r3 (soft-float), 1467 // hidden arg (r4) for @CriticalNative and SP for out args. 1468 pop {r0-r3, r4, ip} 1469 1470 // Apply the new SP for out args, releasing unneeded reserved area. 1471 mov sp, ip 1472 1473 // Softfloat. 1474 // TODO: Change to hardfloat when supported. 1475 1476 blx lr // native call. 1477 1478 // result sign extension is handled in C code 1479 // prepare for artQuickGenericJniEndTrampoline call 1480 // (Thread*, result, result_f) 1481 // r0 r2,r3 stack <= C calling convention 1482 // r11 r0,r1 r0,r1 <= where they are 1483 sub sp, sp, #8 // Stack alignment. 1484 1485 push {r0-r1} 1486 mov r3, r1 1487 mov r2, r0 1488 mov r0, r11 1489 1490 blx artQuickGenericJniEndTrampoline 1491 1492 // Restore self pointer. 1493 mov rSELF, r11 1494 1495 // Pending exceptions possible. 1496 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1497 cbnz r2, .Lexception_in_native 1498 1499 // Tear down the alloca. 1500 mov sp, r10 1501 .cfi_remember_state 1502 .cfi_def_cfa_register sp 1503 1504 // Tear down the callee-save frame. Skip arg registers. 1505 add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1506 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY) 1507 RESTORE_SAVE_REFS_ONLY_FRAME 1508 REFRESH_MARKING_REGISTER 1509 1510 // store into fpr, for when it's a fpr return... 1511 vmov d0, r0, r1 1512 bx lr // ret 1513 1514 // Undo the unwinding information from above since it doesn't apply below. 1515 .cfi_restore_state 1516 .cfi_def_cfa r10, FRAME_SIZE_SAVE_REFS_AND_ARGS 1517.Lexception_in_native: 1518 ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] 1519 add ip, ip, #-1 // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE. 1520 mov sp, ip 1521 bl art_deliver_pending_exception 1522END art_quick_generic_jni_trampoline 1523 1524ENTRY art_deliver_pending_exception 1525 # This will create a new save-all frame, required by the runtime. 1526 DELIVER_PENDING_EXCEPTION 1527END art_deliver_pending_exception 1528 1529 .extern artQuickToInterpreterBridge 1530ENTRY art_quick_to_interpreter_bridge 1531 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 1532 mov r1, rSELF @ pass Thread::Current 1533 mov r2, sp @ pass SP 1534 blx artQuickToInterpreterBridge @ (Method* method, Thread*, SP) 1535 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1536 // Tear down the callee-save frame. Skip arg registers. 1537 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1538 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1539 RESTORE_SAVE_REFS_ONLY_FRAME 1540 REFRESH_MARKING_REGISTER 1541 cbnz r2, 1f @ success if no exception is pending 1542 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1543 bx lr @ return on success 15441: 1545 DELIVER_PENDING_EXCEPTION 1546END art_quick_to_interpreter_bridge 1547 1548/* 1549 * Called to attempt to execute an obsolete method. 1550 */ 1551ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 1552 1553 /* 1554 * Routine that intercepts method calls and returns. 1555 */ 1556 .extern artInstrumentationMethodEntryFromCode 1557 .extern artInstrumentationMethodExitFromCode 1558ENTRY art_quick_instrumentation_entry 1559 @ Make stack crawlable and clobber r2 and r3 (post saving) 1560 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1561 @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs. 1562 str r0, [sp, #4] 1563 mov r2, rSELF @ pass Thread::Current 1564 mov r3, sp @ pass SP 1565 blx artInstrumentationMethodEntryFromCode @ (Method*, Object*, Thread*, SP) 1566 cbz r0, .Ldeliver_instrumentation_entry_exception 1567 @ Deliver exception if we got nullptr as function. 1568 mov r12, r0 @ r12 holds reference to code 1569 ldr r0, [sp, #4] @ restore r0 1570 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1571 adr lr, art_quick_instrumentation_exit + /* thumb mode */ 1 1572 @ load art_quick_instrumentation_exit into lr in thumb mode 1573 REFRESH_MARKING_REGISTER 1574 bx r12 @ call method with lr set to art_quick_instrumentation_exit 1575.Ldeliver_instrumentation_entry_exception: 1576 @ Deliver exception for art_quick_instrumentation_entry placed after 1577 @ art_quick_instrumentation_exit so that the fallthrough works. 1578 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1579 DELIVER_PENDING_EXCEPTION 1580END art_quick_instrumentation_entry 1581 1582ENTRY art_quick_instrumentation_exit 1583 mov lr, #0 @ link register is to here, so clobber with 0 for later checks 1584 SETUP_SAVE_EVERYTHING_FRAME r2 1585 1586 add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame 1587 add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame 1588 mov r1, sp @ pass SP 1589 mov r0, rSELF @ pass Thread::Current 1590 blx artInstrumentationMethodExitFromCode @ (Thread*, SP, gpr_res*, fpr_res*) 1591 1592 cbz r0, .Ldo_deliver_instrumentation_exception 1593 @ Deliver exception if we got nullptr as function. 1594 cbnz r1, .Ldeoptimize 1595 // Normal return. 1596 str r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1597 @ Set return pc. 1598 RESTORE_SAVE_EVERYTHING_FRAME 1599 REFRESH_MARKING_REGISTER 1600 bx lr 1601.Ldo_deliver_instrumentation_exception: 1602 DELIVER_PENDING_EXCEPTION_FRAME_READY 1603.Ldeoptimize: 1604 str r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1605 @ Set return pc. 1606 RESTORE_SAVE_EVERYTHING_FRAME 1607 // Jump to art_quick_deoptimize. 1608 b art_quick_deoptimize 1609END art_quick_instrumentation_exit 1610 1611 /* 1612 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 1613 * will long jump to the upcall with a special exception of -1. 1614 */ 1615 .extern artDeoptimize 1616ENTRY art_quick_deoptimize 1617 SETUP_SAVE_EVERYTHING_FRAME r0 1618 mov r0, rSELF @ pass Thread::Current 1619 blx artDeoptimize @ (Thread*) 1620END art_quick_deoptimize 1621 1622 /* 1623 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 1624 * will long jump to the interpreter bridge. 1625 */ 1626 .extern artDeoptimizeFromCompiledCode 1627ENTRY art_quick_deoptimize_from_compiled_code 1628 SETUP_SAVE_EVERYTHING_FRAME r1 1629 mov r1, rSELF @ pass Thread::Current 1630 blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*) 1631END art_quick_deoptimize_from_compiled_code 1632 1633 /* 1634 * Signed 64-bit integer multiply. 1635 * 1636 * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 1637 * WX 1638 * x YZ 1639 * -------- 1640 * ZW ZX 1641 * YW YX 1642 * 1643 * The low word of the result holds ZX, the high word holds 1644 * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 1645 * it doesn't fit in the low 64 bits. 1646 * 1647 * Unlike most ARM math operations, multiply instructions have 1648 * restrictions on using the same register more than once (Rd and Rm 1649 * cannot be the same). 1650 */ 1651 /* mul-long vAA, vBB, vCC */ 1652ENTRY art_quick_mul_long 1653 push {r9-r10} 1654 .cfi_adjust_cfa_offset 8 1655 .cfi_rel_offset r9, 0 1656 .cfi_rel_offset r10, 4 1657 mul ip, r2, r1 @ ip<- ZxW 1658 umull r9, r10, r2, r0 @ r9/r10 <- ZxX 1659 mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 1660 add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) 1661 mov r0,r9 1662 mov r1,r10 1663 pop {r9-r10} 1664 .cfi_adjust_cfa_offset -8 1665 .cfi_restore r9 1666 .cfi_restore r10 1667 bx lr 1668END art_quick_mul_long 1669 1670 /* 1671 * Long integer shift. This is different from the generic 32/64-bit 1672 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1673 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1674 * 6 bits. 1675 * On entry: 1676 * r0: low word 1677 * r1: high word 1678 * r2: shift count 1679 */ 1680 /* shl-long vAA, vBB, vCC */ 1681ARM_ENTRY art_quick_shl_long @ ARM code as thumb code requires spills 1682 and r2, r2, #63 @ r2<- r2 & 0x3f 1683 mov r1, r1, asl r2 @ r1<- r1 << r2 1684 rsb r3, r2, #32 @ r3<- 32 - r2 1685 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 1686 subs ip, r2, #32 @ ip<- r2 - 32 1687 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 1688 mov r0, r0, asl r2 @ r0<- r0 << r2 1689 bx lr 1690END art_quick_shl_long 1691 1692 /* 1693 * Long integer shift. This is different from the generic 32/64-bit 1694 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1695 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1696 * 6 bits. 1697 * On entry: 1698 * r0: low word 1699 * r1: high word 1700 * r2: shift count 1701 */ 1702 /* shr-long vAA, vBB, vCC */ 1703ARM_ENTRY art_quick_shr_long @ ARM code as thumb code requires spills 1704 and r2, r2, #63 @ r0<- r0 & 0x3f 1705 mov r0, r0, lsr r2 @ r0<- r2 >> r2 1706 rsb r3, r2, #32 @ r3<- 32 - r2 1707 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 1708 subs ip, r2, #32 @ ip<- r2 - 32 1709 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 1710 mov r1, r1, asr r2 @ r1<- r1 >> r2 1711 bx lr 1712END art_quick_shr_long 1713 1714 /* 1715 * Long integer shift. This is different from the generic 32/64-bit 1716 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1717 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1718 * 6 bits. 1719 * On entry: 1720 * r0: low word 1721 * r1: high word 1722 * r2: shift count 1723 */ 1724 /* ushr-long vAA, vBB, vCC */ 1725ARM_ENTRY art_quick_ushr_long @ ARM code as thumb code requires spills 1726 and r2, r2, #63 @ r0<- r0 & 0x3f 1727 mov r0, r0, lsr r2 @ r0<- r2 >> r2 1728 rsb r3, r2, #32 @ r3<- 32 - r2 1729 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 1730 subs ip, r2, #32 @ ip<- r2 - 32 1731 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 1732 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 1733 bx lr 1734END art_quick_ushr_long 1735 1736 /* 1737 * String's indexOf. 1738 * 1739 * On entry: 1740 * r0: string object (known non-null) 1741 * r1: char to match (known <= 0xFFFF) 1742 * r2: Starting offset in string data 1743 */ 1744ENTRY art_quick_indexof 1745 push {r4, r10-r11, lr} @ 4 words of callee saves 1746 .cfi_adjust_cfa_offset 16 1747 .cfi_rel_offset r4, 0 1748 .cfi_rel_offset r10, 4 1749 .cfi_rel_offset r11, 8 1750 .cfi_rel_offset lr, 12 1751#if (STRING_COMPRESSION_FEATURE) 1752 ldr r4, [r0, #MIRROR_STRING_COUNT_OFFSET] 1753#else 1754 ldr r3, [r0, #MIRROR_STRING_COUNT_OFFSET] 1755#endif 1756 add r0, #MIRROR_STRING_VALUE_OFFSET 1757#if (STRING_COMPRESSION_FEATURE) 1758 /* r4 count (with flag) and r3 holds actual length */ 1759 lsr r3, r4, #1 1760#endif 1761 /* Clamp start to [0..count] */ 1762 cmp r2, #0 1763 it lt 1764 movlt r2, #0 1765 cmp r2, r3 1766 it gt 1767 movgt r2, r3 1768 1769 /* Save a copy in r12 to later compute result */ 1770 mov r12, r0 1771 1772 /* Build pointer to start of data to compare and pre-bias */ 1773#if (STRING_COMPRESSION_FEATURE) 1774 lsrs r4, r4, #1 1775 bcc .Lstring_indexof_compressed 1776#endif 1777 add r0, r0, r2, lsl #1 1778 sub r0, #2 1779 1780 /* Compute iteration count */ 1781 sub r2, r3, r2 1782 1783 /* 1784 * At this point we have: 1785 * r0: start of data to test 1786 * r1: char to compare 1787 * r2: iteration count 1788 * r4: compression style (used temporarily) 1789 * r12: original start of string data 1790 * r3, r4, r10, r11 available for loading string data 1791 */ 1792 1793 subs r2, #4 1794 blt .Lindexof_remainder 1795 1796.Lindexof_loop4: 1797 ldrh r3, [r0, #2]! 1798 ldrh r4, [r0, #2]! 1799 ldrh r10, [r0, #2]! 1800 ldrh r11, [r0, #2]! 1801 cmp r3, r1 1802 beq .Lmatch_0 1803 cmp r4, r1 1804 beq .Lmatch_1 1805 cmp r10, r1 1806 beq .Lmatch_2 1807 cmp r11, r1 1808 beq .Lmatch_3 1809 subs r2, #4 1810 bge .Lindexof_loop4 1811 1812.Lindexof_remainder: 1813 adds r2, #4 1814 beq .Lindexof_nomatch 1815 1816.Lindexof_loop1: 1817 ldrh r3, [r0, #2]! 1818 cmp r3, r1 1819 beq .Lmatch_3 1820 subs r2, #1 1821 bne .Lindexof_loop1 1822 1823.Lindexof_nomatch: 1824 mov r0, #-1 1825 pop {r4, r10-r11, pc} 1826 1827.Lmatch_0: 1828 sub r0, #6 1829 sub r0, r12 1830 asr r0, r0, #1 1831 pop {r4, r10-r11, pc} 1832.Lmatch_1: 1833 sub r0, #4 1834 sub r0, r12 1835 asr r0, r0, #1 1836 pop {r4, r10-r11, pc} 1837.Lmatch_2: 1838 sub r0, #2 1839 sub r0, r12 1840 asr r0, r0, #1 1841 pop {r4, r10-r11, pc} 1842.Lmatch_3: 1843 sub r0, r12 1844 asr r0, r0, #1 1845 pop {r4, r10-r11, pc} 1846#if (STRING_COMPRESSION_FEATURE) 1847.Lstring_indexof_compressed: 1848 add r0, r0, r2 1849 sub r0, #1 1850 sub r2, r3, r2 1851.Lstring_indexof_compressed_loop: 1852 subs r2, #1 1853 blt .Lindexof_nomatch 1854 ldrb r3, [r0, #1]! 1855 cmp r3, r1 1856 beq .Lstring_indexof_compressed_matched 1857 b .Lstring_indexof_compressed_loop 1858.Lstring_indexof_compressed_matched: 1859 sub r0, r12 1860 pop {r4, r10-r11, pc} 1861#endif 1862END art_quick_indexof 1863 1864 /* Assembly routines used to handle ABI differences. */ 1865 1866 /* double fmod(double a, double b) */ 1867 .extern fmod 1868ENTRY art_quick_fmod 1869 push {lr} 1870 .cfi_adjust_cfa_offset 4 1871 .cfi_rel_offset lr, 0 1872 sub sp, #4 1873 .cfi_adjust_cfa_offset 4 1874 vmov r0, r1, d0 1875 vmov r2, r3, d1 1876 bl fmod 1877 vmov d0, r0, r1 1878 add sp, #4 1879 .cfi_adjust_cfa_offset -4 1880 pop {pc} 1881END art_quick_fmod 1882 1883 /* float fmodf(float a, float b) */ 1884 .extern fmodf 1885ENTRY art_quick_fmodf 1886 push {lr} 1887 .cfi_adjust_cfa_offset 4 1888 .cfi_rel_offset lr, 0 1889 sub sp, #4 1890 .cfi_adjust_cfa_offset 4 1891 vmov r0, r1, d0 1892 bl fmodf 1893 vmov s0, r0 1894 add sp, #4 1895 .cfi_adjust_cfa_offset -4 1896 pop {pc} 1897END art_quick_fmodf 1898 1899 /* int64_t art_d2l(double d) */ 1900 .extern art_d2l 1901ENTRY art_quick_d2l 1902 vmov r0, r1, d0 1903 b art_d2l 1904END art_quick_d2l 1905 1906 /* int64_t art_f2l(float f) */ 1907 .extern art_f2l 1908ENTRY art_quick_f2l 1909 vmov r0, s0 1910 b art_f2l 1911END art_quick_f2l 1912 1913 /* float art_l2f(int64_t l) */ 1914 .extern art_l2f 1915ENTRY art_quick_l2f 1916 push {lr} 1917 .cfi_adjust_cfa_offset 4 1918 .cfi_rel_offset lr, 0 1919 sub sp, #4 1920 .cfi_adjust_cfa_offset 4 1921 bl art_l2f 1922 vmov s0, r0 1923 add sp, #4 1924 .cfi_adjust_cfa_offset -4 1925 pop {pc} 1926END art_quick_l2f 1927 1928 .extern artStringBuilderAppend 1929ENTRY art_quick_string_builder_append 1930 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1931 add r1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__) @ pass args 1932 mov r2, rSELF @ pass Thread::Current 1933 bl artStringBuilderAppend @ (uint32_t, const unit32_t*, Thread*) 1934 RESTORE_SAVE_REFS_ONLY_FRAME 1935 REFRESH_MARKING_REGISTER 1936 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1937END art_quick_string_builder_append 1938 1939 /* 1940 * Create a function `name` calling the ReadBarrier::Mark routine, 1941 * getting its argument and returning its result through register 1942 * `reg`, saving and restoring all caller-save registers. 1943 * 1944 * IP is clobbered; `reg` must not be IP. 1945 * 1946 * If `reg` is different from `r0`, the generated function follows a 1947 * non-standard runtime calling convention: 1948 * - register `reg` is used to pass the (sole) argument of this 1949 * function (instead of R0); 1950 * - register `reg` is used to return the result of this function 1951 * (instead of R0); 1952 * - R0 is treated like a normal (non-argument) caller-save register; 1953 * - everything else is the same as in the standard runtime calling 1954 * convention (e.g. standard callee-save registers are preserved). 1955 */ 1956.macro READ_BARRIER_MARK_REG name, reg 1957ENTRY \name 1958 // Null check so that we can load the lock word. 1959 SMART_CBZ \reg, .Lret_rb_\name 1960 // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. 1961 ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] 1962 tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 1963 beq .Lnot_marked_rb_\name 1964 // Already marked, return right away. 1965.Lret_rb_\name: 1966 bx lr 1967 1968.Lnot_marked_rb_\name: 1969 // Test that both the forwarding state bits are 1. 1970#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 1971 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 1972 // the highest bits and the "forwarding address" state to have all bits set. 1973#error "Unexpected lock word state shift or forwarding address state value." 1974#endif 1975 cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 1976 bhs .Lret_forwarding_address\name 1977 1978.Lslow_rb_\name: 1979 // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to 1980 // make a tail call here. Currently, it serves only for stack alignment but 1981 // we may reintroduce kSaveEverything calls here in the future. 1982 push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip 1983 .cfi_adjust_cfa_offset 32 1984 .cfi_rel_offset r0, 0 1985 .cfi_rel_offset r1, 4 1986 .cfi_rel_offset r2, 8 1987 .cfi_rel_offset r3, 12 1988 .cfi_rel_offset r4, 16 1989 .cfi_rel_offset r9, 20 1990 .cfi_rel_offset ip, 24 1991 .cfi_rel_offset lr, 28 1992 1993 .ifnc \reg, r0 1994 mov r0, \reg @ pass arg1 - obj from `reg` 1995 .endif 1996 1997 vpush {s0-s15} @ save floating-point caller-save registers 1998 .cfi_adjust_cfa_offset 64 1999 bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) 2000 vpop {s0-s15} @ restore floating-point registers 2001 .cfi_adjust_cfa_offset -64 2002 2003 .ifc \reg, r0 @ Save result to the stack slot or destination register. 2004 str r0, [sp, #0] 2005 .else 2006 .ifc \reg, r1 2007 str r0, [sp, #4] 2008 .else 2009 .ifc \reg, r2 2010 str r0, [sp, #8] 2011 .else 2012 .ifc \reg, r3 2013 str r0, [sp, #12] 2014 .else 2015 .ifc \reg, r4 2016 str r0, [sp, #16] 2017 .else 2018 .ifc \reg, r9 2019 str r0, [sp, #20] 2020 .else 2021 mov \reg, r0 2022 .endif 2023 .endif 2024 .endif 2025 .endif 2026 .endif 2027 .endif 2028 2029 pop {r0-r4, r9, ip, lr} @ restore caller-save registers 2030 .cfi_adjust_cfa_offset -32 2031 .cfi_restore r0 2032 .cfi_restore r1 2033 .cfi_restore r2 2034 .cfi_restore r3 2035 .cfi_restore r4 2036 .cfi_restore r9 2037 .cfi_restore ip 2038 .cfi_restore lr 2039 bx lr 2040.Lret_forwarding_address\name: 2041 // Shift left by the forwarding address shift. This clears out the state bits since they are 2042 // in the top 2 bits of the lock word. 2043 lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2044 bx lr 2045END \name 2046.endm 2047 2048READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0 2049READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1 2050READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2 2051READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3 2052READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4 2053READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5 2054READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6 2055READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7 2056READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 2057READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 2058READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 2059READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 2060 2061// Helper macros for Baker CC read barrier mark introspection (BRBMI). 2062.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register 2063 \macro_for_register r0 2064 \macro_for_register r1 2065 \macro_for_register r2 2066 \macro_for_register r3 2067 \macro_for_register r4 2068 \macro_for_register r5 2069 \macro_for_register r6 2070 \macro_for_register r7 2071 \macro_for_reserved_register // r8 (rMR) is the marking register. 2072 \macro_for_register r9 2073 \macro_for_register r10 2074 \macro_for_register r11 2075 \macro_for_reserved_register // IP is reserved. 2076 \macro_for_reserved_register // SP is reserved. 2077 \macro_for_reserved_register // LR is reserved. 2078 \macro_for_reserved_register // PC is reserved. 2079.endm 2080 2081.macro BRBMI_RETURN_SWITCH_CASE reg 2082 .balign 8 2083.Lmark_introspection_return_switch_case_\reg: 2084 mov rMR, #1 2085 mov \reg, ip 2086 bx lr 2087.endm 2088 2089.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg 2090 .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 2091.endm 2092 2093.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2094 .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 2095.endm 2096 2097#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET 2098#error "Array and field introspection code sharing requires same LDR offset." 2099#endif 2100.macro BRBMI_ARRAY_LOAD index_reg 2101 ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. 2102 b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. 2103 .balign 8 // Add padding to 8 bytes. 2104.endm 2105 2106.macro BRBMI_BKPT_FILL_4B 2107 bkpt 0 2108 bkpt 0 2109.endm 2110 2111.macro BRBMI_BKPT_FILL_8B 2112 BRBMI_BKPT_FILL_4B 2113 BRBMI_BKPT_FILL_4B 2114.endm 2115 2116.macro BRBMI_RUNTIME_CALL 2117 // Note: This macro generates exactly 22 bytes of code. The core register 2118 // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. 2119 2120 push {r0-r3, r7, lr} // Save return address and caller-save registers. 2121 .cfi_adjust_cfa_offset 24 2122 .cfi_rel_offset r0, 0 2123 .cfi_rel_offset r1, 4 2124 .cfi_rel_offset r2, 8 2125 .cfi_rel_offset r3, 12 2126 .cfi_rel_offset r7, 16 2127 .cfi_rel_offset lr, 20 2128 2129 mov r0, ip // Pass the reference. 2130 vpush {s0-s15} // save floating-point caller-save registers 2131 .cfi_adjust_cfa_offset 64 2132 bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) 2133 vpop {s0-s15} // restore floating-point registers 2134 .cfi_adjust_cfa_offset -64 2135 mov ip, r0 // Move reference to ip in preparation for return switch. 2136 2137 pop {r0-r3, r7, lr} // Restore registers. 2138 .cfi_adjust_cfa_offset -24 2139 .cfi_restore r0 2140 .cfi_restore r1 2141 .cfi_restore r2 2142 .cfi_restore r3 2143 .cfi_restore r7 2144 .cfi_restore lr 2145.endm 2146 2147.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix 2148 // If reference is null, just return it in the right register. 2149 cmp ip, #0 2150 beq .Lmark_introspection_return\label_suffix 2151 // Use rMR as temp and check the mark bit of the reference. 2152 ldr rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 2153 tst rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2154 beq .Lmark_introspection_unmarked\label_suffix 2155.Lmark_introspection_return\label_suffix: 2156.endm 2157 2158.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix 2159.Lmark_introspection_unmarked\label_suffix: 2160 // Check if the top two bits are one, if this is the case it is a forwarding address. 2161#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2162 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2163 // the highest bits and the "forwarding address" state to have all bits set. 2164#error "Unexpected lock word state shift or forwarding address state value." 2165#endif 2166 cmp rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2167 bhs .Lmark_introspection_forwarding_address\label_suffix 2168.endm 2169 2170.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix 2171.Lmark_introspection_forwarding_address\label_suffix: 2172 // Note: This macro generates exactly 22 bytes of code, the branch is near. 2173 2174 // Shift left by the forwarding address shift. This clears out the state bits since they are 2175 // in the top 2 bits of the lock word. 2176 lsl ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2177 b .Lmark_introspection_return\label_suffix 2178.endm 2179 2180.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset 2181 // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. 2182 ldrh rMR, [lr, #(-1 + \ldr_offset + 2)] 2183.endm 2184 2185.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset 2186 // Load the 16-bit instruction. Adjust for the thumb state in LR. 2187 ldrh rMR, [lr, #(-1 + \ldr_offset)] 2188.endm 2189 2190.macro BRBMI_EXTRACT_RETURN_REG_wide 2191 lsr rMR, rMR, #12 // Extract `ref_reg`. 2192.endm 2193 2194.macro BRBMI_EXTRACT_RETURN_REG_narrow 2195 and rMR, rMR, #7 // Extract `ref_reg`. 2196.endm 2197 2198.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix 2199 BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset 2200 BRBMI_EXTRACT_RETURN_REG\label_suffix 2201.endm 2202 2203.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix 2204 .balign 32 2205 .thumb_func 2206 .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function 2207 .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2208 .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2209art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: 2210 BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix 2211.endm 2212 2213.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix 2214 .balign 16 2215 // Note: Generates exactly 16 bytes of code. 2216 BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix 2217 BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix 2218 b .Lmark_introspection_runtime_call 2219.endm 2220 2221 /* 2222 * Use introspection to load a reference from the same address as the LDR 2223 * instruction in generated code would load (unless loaded by the thunk, 2224 * see below), call ReadBarrier::Mark() with that reference if needed 2225 * and return it in the same register as the LDR instruction would load. 2226 * 2227 * The entrypoint is called through a thunk that differs across load kinds. 2228 * For field and array loads the LDR instruction in generated code follows 2229 * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) 2230 * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where 2231 * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk 2232 * knows the holder and performs the gray bit check, returning to the LDR 2233 * instruction if the object is not gray, so this entrypoint no longer 2234 * needs to know anything about the holder. For GC root loads, the LDR 2235 * instruction in generated code precedes the branch to the thunk, i.e. the 2236 * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) 2237 * where the -1 is again the Thumb mode bit adjustment, and the thunk does 2238 * not do the gray bit check. 2239 * 2240 * For field accesses and array loads with a constant index the thunk loads 2241 * the reference into IP using introspection and calls the main entrypoint 2242 * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or 2243 * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known 2244 * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET) 2245 * from the main entrypoint and the thunk adjusts the entrypoint pointer. 2246 * With heap poisoning enabled, the passed reference is poisoned. 2247 * 2248 * For array accesses with non-constant index, the thunk inserts the bits 2249 * 0-5 of the LDR instruction to the entrypoint address, effectively 2250 * calculating a switch case label based on the index register (bits 0-3) 2251 * and adding an extra offset (bits 4-5 hold the shift which is always 2 2252 * for reference loads) to differentiate from the main entrypoint, then 2253 * moves the base register to IP and jumps to the switch case. Therefore 2254 * we need to align the main entrypoint to 512 bytes, accounting for 2255 * a 256-byte offset followed by 16 array entrypoints starting at 2256 * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR 2257 * (register) and a branch to the main entrypoint. 2258 * 2259 * For GC root accesses we cannot use the main entrypoint because of the 2260 * different offset where the LDR instruction in generated code is located. 2261 * (And even with heap poisoning enabled, GC roots are not poisoned.) 2262 * To re-use the same entrypoint pointer in generated code, we make sure 2263 * that the gc root entrypoint (a copy of the entrypoint with a different 2264 * offset for introspection loads) is located at a known offset (0xc0/0xe0 2265 * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/ 2266 * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the 2267 * main entrypoint and the GC root thunk adjusts the entrypoint pointer, 2268 * moves the root register to IP and jumps to the customized entrypoint, 2269 * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. 2270 * The thunk also performs all the fast-path checks, so we need just the 2271 * slow path. 2272 * 2273 * Intrinsic CAS operations (VarHandle*CompareAnd{Set,Exchange}* and 2274 * UnsafeCASObject) use similar code to the GC roots wide load but using 2275 * MOV (register, T3) instead of the LDR (immediate, T3), with destination 2276 * register in bits 8-11 rather than 12-15. Therefore they have their own 2277 * entrypoint, art_quick_read_barrier_mark_introspection_intrinsic_cas 2278 * at the offset BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET. 2279 * This is used only for high registers, low registers reuse the GC roots 2280 * narrow load entrypoint as the low 3 bits of the destination register 2281 * for MOV (register) encoding T1 match the LDR (immediate) encoding T1. 2282 * 2283 * The code structure is 2284 * art_quick_read_barrier_mark_introspection: // @0x00 2285 * Up to 32 bytes code for main entrypoint fast-path code for fields 2286 * (and array elements with constant offset) with LDR encoding T3; 2287 * jumps to the switch in the "narrow" entrypoint. 2288 * art_quick_read_barrier_mark_introspection_narrow: // @0x20 2289 * Up to 48 bytes code for fast path code for fields (and array 2290 * elements with constant offset) with LDR encoding T1, ending in the 2291 * return switch instruction TBB and the table with switch offsets. 2292 * .Lmark_introspection_return_switch_case_r0: // @0x50 2293 * Exactly 88 bytes of code for the return switch cases (8 bytes per 2294 * case, 11 cases; no code for reserved registers). 2295 * .Lmark_introspection_forwarding_address_narrow: // @0xa8 2296 * Exactly 6 bytes to extract the forwarding address and jump to the 2297 * "narrow" entrypoint fast path. 2298 * .Lmark_introspection_return_switch_case_bad: // @0xae 2299 * Exactly 2 bytes, bkpt for unexpected return register. 2300 * .Lmark_introspection_unmarked_narrow: // @0xb0 2301 * Exactly 16 bytes for "narrow" entrypoint slow path. 2302 * art_quick_read_barrier_mark_introspection_gc_roots_wide: // @0xc0 2303 * GC root entrypoint code for LDR encoding T3 (10 bytes); loads and 2304 * extracts the return register and jumps to the runtime call. 2305 * .Lmark_introspection_forwarding_address_wide: // @0xca 2306 * Exactly 6 bytes to extract the forwarding address and jump to the 2307 * "wide" entrypoint fast path. 2308 * .Lmark_introspection_unmarked_wide: // @0xd0 2309 * Exactly 16 bytes for "wide" entrypoint slow path. 2310 * art_quick_read_barrier_mark_introspection_gc_roots_narrow: // @0xe0 2311 * GC root entrypoint code for LDR encoding T1 (8 bytes); loads and 2312 * extracts the return register and falls through to the runtime call. 2313 * .Lmark_introspection_runtime_call: // @0xe8 2314 * Exactly 24 bytes for the runtime call to MarkReg() and jump to the 2315 * return switch. 2316 * art_quick_read_barrier_mark_introspection_arrays: // @0x100 2317 * Exactly 128 bytes for array load switch cases (16x2 instructions). 2318 * art_quick_read_barrier_mark_introspection_intrinsic_cas: // @0x180 2319 * Intrinsic CAS entrypoint for MOV (register) encoding T3 (6 bytes). 2320 * Loads the return register and jumps to the runtime call. 2321 */ 2322#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2323ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512 2324 // At this point, IP contains the reference, rMR is clobbered by the thunk 2325 // and can be freely used as it will be set back to 1 before returning. 2326 // For heap poisoning, the reference is poisoned, so unpoison it first. 2327 UNPOISON_HEAP_REF ip 2328 // Check for null or marked, lock word is loaded into rMR. 2329 BRBMI_CHECK_NULL_AND_MARKED _wide 2330 // Load and extract the return register from the instruction. 2331 BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide 2332 b .Lmark_introspection_return_switch 2333 2334 .balign 32 2335 .thumb_func 2336 .type art_quick_read_barrier_mark_introspection_narrow, #function 2337 .hidden art_quick_read_barrier_mark_introspection_narrow 2338 .global art_quick_read_barrier_mark_introspection_narrow 2339art_quick_read_barrier_mark_introspection_narrow: 2340 // At this point, IP contains the reference, rMR is clobbered by the thunk 2341 // and can be freely used as it will be set back to 1 before returning. 2342 // For heap poisoning, the reference is poisoned, so unpoison it first. 2343 UNPOISON_HEAP_REF ip 2344 // Check for null or marked, lock word is loaded into rMR. 2345 BRBMI_CHECK_NULL_AND_MARKED _narrow 2346 // Load and extract the return register from the instruction. 2347 BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow 2348.Lmark_introspection_return_switch: 2349 tbb [pc, rMR] // Jump to the switch case. 2350.Lmark_introspection_return_table: 2351 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2352 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */ 2353 2354 .balign 8 2355 BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow // 6 bytes 2356.Lmark_introspection_return_switch_case_bad: 2357 bkpt // 2 bytes 2358 2359 BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow 2360 2361 // 8 bytes for the loading and extracting of the return register. 2362 BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide 2363 // 2 bytes for near branch to the runtime call. 2364 b .Lmark_introspection_runtime_call 2365 2366 BRBMI_EXTRACT_FORWARDING_ADDRESS _wide // Not even 4-byte aligned. 2367 2368 BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide 2369 2370 // 8 bytes for the loading and extracting of the return register. 2371 BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow 2372 // And the runtime call and branch to the switch taking exactly 24 bytes 2373 // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch) 2374 // shall take the rest of the 32-byte section (within a cache line). 2375.Lmark_introspection_runtime_call: 2376 BRBMI_RUNTIME_CALL 2377 b .Lmark_introspection_return_switch 2378 2379 .balign 256 2380 .thumb_func 2381 .type art_quick_read_barrier_mark_introspection_arrays, #function 2382 .hidden art_quick_read_barrier_mark_introspection_arrays 2383 .global art_quick_read_barrier_mark_introspection_arrays 2384art_quick_read_barrier_mark_introspection_arrays: 2385 BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B 2386 2387 .balign 8 2388 .thumb_func 2389 .type art_quick_read_barrier_mark_introspection_intrinsic_cas, #function 2390 .hidden art_quick_read_barrier_mark_introspection_intrinsic_cas 2391 .global art_quick_read_barrier_mark_introspection_intrinsic_cas 2392art_quick_read_barrier_mark_introspection_intrinsic_cas: 2393 // Load the byte of the MOV instruction that contains Rd. Adjust for the thumb state in LR. 2394 // The MOV (register, T3) is |11101010010|S|1111|(0)000|Rd|0000|Rm|, so the byte we read 2395 // here, i.e. |(0)000|Rd|, contains only the register number, the top 4 bits are 0. 2396 ldrb rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET + 3)] 2397 b .Lmark_introspection_runtime_call 2398END art_quick_read_barrier_mark_introspection 2399#else // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2400ENTRY art_quick_read_barrier_mark_introspection 2401 bkpt // Unreachable. 2402END art_quick_read_barrier_mark_introspection 2403#endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2404 2405.extern artInvokePolymorphic 2406ENTRY art_quick_invoke_polymorphic 2407 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 2408 mov r0, r1 @ r0 := receiver 2409 mov r1, rSELF @ r1 := Thread::Current 2410 mov r2, sp @ r2 := SP 2411 bl artInvokePolymorphic @ artInvokePolymorphic(receiver, Thread*, SP) 2412 str r1, [sp, 72] @ r0:r1 := Result. Copy r1 to context. 2413 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2414 REFRESH_MARKING_REGISTER 2415 vmov d0, r0, r1 @ Put result r0:r1 into floating point return register. 2416 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2417END art_quick_invoke_polymorphic 2418 2419.extern artInvokeCustom 2420ENTRY art_quick_invoke_custom 2421 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 2422 @ r0 := call_site_idx 2423 mov r1, rSELF @ r1 := Thread::Current 2424 mov r2, sp @ r2 := SP 2425 bl artInvokeCustom @ artInvokeCustom(call_site_idx, Thread*, SP) 2426 str r1, [sp, #72] @ Save r1 to context (r0:r1 = result) 2427 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2428 REFRESH_MARKING_REGISTER 2429 vmov d0, r0, r1 @ Put result r0:r1 into floating point return register. 2430 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2431END art_quick_invoke_custom 2432 2433// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2434// Argument 0: r0: The context pointer for ExecuteSwitchImpl. 2435// Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call. 2436// Argument 2: r2: The value of DEX PC (memory address of the methods bytecode). 2437ENTRY ExecuteSwitchImplAsm 2438 push {r4, lr} // 2 words of callee saves. 2439 .cfi_adjust_cfa_offset 8 2440 .cfi_rel_offset r4, 0 2441 .cfi_rel_offset lr, 4 2442 mov r4, r2 // r4 = DEX PC 2443 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0) 2444 blx r1 // Call the wrapped method. 2445 pop {r4, pc} 2446END ExecuteSwitchImplAsm 2447 2448// r0 contains the class, r4 contains the inline cache. We can use ip as temporary. 2449ENTRY art_quick_update_inline_cache 2450#if (INLINE_CACHE_SIZE != 5) 2451#error "INLINE_CACHE_SIZE not as expected." 2452#endif 2453#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2454 // Don't update the cache if we are marking. 2455 cmp rMR, #0 2456 bne .Ldone 2457#endif 2458.Lentry1: 2459 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET] 2460 cmp ip, r0 2461 beq .Ldone 2462 cmp ip, #0 2463 bne .Lentry2 2464 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET] 2465 cmp ip, #0 2466 bne .Lentry1 2467 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET] 2468 cmp ip, #0 2469 bne .Ldone 2470 b .Lentry1 2471.Lentry2: 2472 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4] 2473 cmp ip, r0 2474 beq .Ldone 2475 cmp ip, #0 2476 bne .Lentry3 2477 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4] 2478 cmp ip, #0 2479 bne .Lentry2 2480 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+4] 2481 cmp ip, #0 2482 bne .Ldone 2483 b .Lentry2 2484.Lentry3: 2485 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8] 2486 cmp ip, r0 2487 beq .Ldone 2488 cmp ip, #0 2489 bne .Lentry4 2490 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8] 2491 cmp ip, #0 2492 bne .Lentry3 2493 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+8] 2494 cmp ip, #0 2495 bne .Ldone 2496 b .Lentry3 2497.Lentry4: 2498 ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12] 2499 cmp ip, r0 2500 beq .Ldone 2501 cmp ip, #0 2502 bne .Lentry5 2503 ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12] 2504 cmp ip, #0 2505 bne .Lentry4 2506 strex ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+12] 2507 cmp ip, #0 2508 bne .Ldone 2509 b .Lentry4 2510.Lentry5: 2511 // Unconditionally store, the inline cache is megamorphic. 2512 str r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+16] 2513.Ldone: 2514 blx lr 2515END art_quick_update_inline_cache 2516 2517// On entry, method is at the bottom of the stack. 2518ENTRY art_quick_compile_optimized 2519 SETUP_SAVE_EVERYTHING_FRAME r0 2520 ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod 2521 mov r1, rSELF @ pass Thread::Current 2522 bl artCompileOptimized @ (ArtMethod*, Thread*) 2523 RESTORE_SAVE_EVERYTHING_FRAME 2524 // We don't need to restore the marking register here, as 2525 // artCompileOptimized doesn't allow thread suspension. 2526 blx lr 2527END art_quick_compile_optimized 2528