1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "asm_support_arm.S" 18#include "interpreter/cfi_asm_support.h" 19 20#include "arch/quick_alloc_entrypoints.S" 21 22 /* Deliver the given exception */ 23 .extern artDeliverExceptionFromCode 24 /* Deliver an exception pending on a thread */ 25 .extern artDeliverPendingException 26 27 /* 28 * Macro to spill the GPRs. 29 */ 30.macro SPILL_ALL_CALLEE_SAVE_GPRS 31 push {r4-r11, lr} @ 9 words (36 bytes) of callee saves. 32 .cfi_adjust_cfa_offset 36 33 .cfi_rel_offset r4, 0 34 .cfi_rel_offset r5, 4 35 .cfi_rel_offset r6, 8 36 .cfi_rel_offset r7, 12 37 .cfi_rel_offset r8, 16 38 .cfi_rel_offset r9, 20 39 .cfi_rel_offset r10, 24 40 .cfi_rel_offset r11, 28 41 .cfi_rel_offset lr, 32 42.endm 43 44 /* 45 * Macro that sets up the callee save frame to conform with 46 * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) 47 */ 48.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp 49 SPILL_ALL_CALLEE_SAVE_GPRS @ 9 words (36 bytes) of callee saves. 50 vpush {s16-s31} @ 16 words (64 bytes) of floats. 51 .cfi_adjust_cfa_offset 64 52 sub sp, #12 @ 3 words of space, bottom word will hold Method* 53 .cfi_adjust_cfa_offset 12 54 RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. 55 @ Load kSaveAllCalleeSaves Method* into rTemp. 56 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET] 57 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 58 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 59 60 // Ugly compile-time check, but we only have the preprocessor. 61#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12) 62#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected." 63#endif 64.endm 65 66 /* 67 * Macro that sets up the callee save frame to conform with 68 * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). 69 */ 70.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp 71 // Note: We could avoid saving R8 in the case of Baker read 72 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 73 // later; but it's not worth handling this special case. 74 push {r5-r8, r10-r11, lr} @ 7 words of callee saves 75 .cfi_adjust_cfa_offset 28 76 .cfi_rel_offset r5, 0 77 .cfi_rel_offset r6, 4 78 .cfi_rel_offset r7, 8 79 .cfi_rel_offset r8, 12 80 .cfi_rel_offset r10, 16 81 .cfi_rel_offset r11, 20 82 .cfi_rel_offset lr, 24 83 sub sp, #4 @ bottom word will hold Method* 84 .cfi_adjust_cfa_offset 4 85 RUNTIME_CURRENT2 \rTemp @ Load Runtime::Current into rTemp. 86 @ Load kSaveRefsOnly Method* into rTemp. 87 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET] 88 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 89 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 90 91 // Ugly compile-time check, but we only have the preprocessor. 92#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4) 93#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected." 94#endif 95.endm 96 97.macro RESTORE_SAVE_REFS_ONLY_FRAME 98 add sp, #4 @ bottom word holds Method* 99 .cfi_adjust_cfa_offset -4 100 // Note: Likewise, we could avoid restoring R8 in the case of Baker 101 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 102 // later; but it's not worth handling this special case. 103 pop {r5-r8, r10-r11, lr} @ 7 words of callee saves 104 .cfi_restore r5 105 .cfi_restore r6 106 .cfi_restore r7 107 .cfi_restore r8 108 .cfi_restore r10 109 .cfi_restore r11 110 .cfi_restore lr 111 .cfi_adjust_cfa_offset -28 112.endm 113 114 /* 115 * Macro that sets up the callee save frame to conform with 116 * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). 117 */ 118.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 119 // Note: We could avoid saving R8 in the case of Baker read 120 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 121 // later; but it's not worth handling this special case. 122 push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. 123 .cfi_adjust_cfa_offset 40 124 .cfi_rel_offset r1, 0 125 .cfi_rel_offset r2, 4 126 .cfi_rel_offset r3, 8 127 .cfi_rel_offset r5, 12 128 .cfi_rel_offset r6, 16 129 .cfi_rel_offset r7, 20 130 .cfi_rel_offset r8, 24 131 .cfi_rel_offset r10, 28 132 .cfi_rel_offset r11, 32 133 .cfi_rel_offset lr, 36 134 vpush {s0-s15} @ 16 words of float args. 135 .cfi_adjust_cfa_offset 64 136 sub sp, #8 @ 2 words of space, alignment padding and Method* 137 .cfi_adjust_cfa_offset 8 138 // Ugly compile-time check, but we only have the preprocessor. 139#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8) 140#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected." 141#endif 142.endm 143 144.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp 145 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 146 RUNTIME_CURRENT3 \rTemp @ Load Runtime::Current into rTemp. 147 @ Load kSaveRefsAndArgs Method* into rTemp. 148 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET] 149 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 150 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 151.endm 152 153.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 154 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 155 str r0, [sp, #0] @ Store ArtMethod* to bottom of stack. 156 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 157.endm 158 159.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME 160 add sp, #8 @ rewind sp 161 .cfi_adjust_cfa_offset -8 162 vpop {s0-s15} 163 .cfi_adjust_cfa_offset -64 164 // Note: Likewise, we could avoid restoring X20 in the case of Baker 165 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 166 // later; but it's not worth handling this special case. 167 pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves 168 .cfi_restore r1 169 .cfi_restore r2 170 .cfi_restore r3 171 .cfi_restore r5 172 .cfi_restore r6 173 .cfi_restore r7 174 .cfi_restore r8 175 .cfi_restore r10 176 .cfi_restore r11 177 .cfi_restore lr 178 .cfi_adjust_cfa_offset -40 179.endm 180 181 /* 182 * Macro that sets up the callee save frame to conform with 183 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 184 * when core registers are already saved. 185 */ 186.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 187 @ 14 words of callee saves and args already saved. 188 vpush {d0-d15} @ 32 words, 2 for each of the 16 saved doubles. 189 .cfi_adjust_cfa_offset 128 190 sub sp, #8 @ 2 words of space, alignment padding and Method* 191 .cfi_adjust_cfa_offset 8 192 RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. 193 @ Load kSaveEverything Method* into rTemp. 194 ldr \rTemp, [\rTemp, #\runtime_method_offset] 195 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 196 str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 197 198 // Ugly compile-time check, but we only have the preprocessor. 199#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8) 200#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected." 201#endif 202.endm 203 204 /* 205 * Macro that sets up the callee save frame to conform with 206 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 207 */ 208.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 209 push {r0-r12, lr} @ 14 words of callee saves and args. 210 .cfi_adjust_cfa_offset 56 211 .cfi_rel_offset r0, 0 212 .cfi_rel_offset r1, 4 213 .cfi_rel_offset r2, 8 214 .cfi_rel_offset r3, 12 215 .cfi_rel_offset r4, 16 216 .cfi_rel_offset r5, 20 217 .cfi_rel_offset r6, 24 218 .cfi_rel_offset r7, 28 219 .cfi_rel_offset r8, 32 220 .cfi_rel_offset r9, 36 221 .cfi_rel_offset r10, 40 222 .cfi_rel_offset r11, 44 223 .cfi_rel_offset ip, 48 224 .cfi_rel_offset lr, 52 225 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset 226.endm 227 228.macro RESTORE_SAVE_EVERYTHING_FRAME 229 add sp, #8 @ rewind sp 230 .cfi_adjust_cfa_offset -8 231 vpop {d0-d15} 232 .cfi_adjust_cfa_offset -128 233 pop {r0-r12, lr} @ 14 words of callee saves 234 .cfi_restore r0 235 .cfi_restore r1 236 .cfi_restore r2 237 .cfi_restore r3 238 .cfi_restore r4 239 .cfi_restore r5 240 .cfi_restore r6 241 .cfi_restore r7 242 .cfi_restore r8 243 .cfi_restore r9 244 .cfi_restore r10 245 .cfi_restore r11 246 .cfi_restore r12 247 .cfi_restore lr 248 .cfi_adjust_cfa_offset -56 249.endm 250 251.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 252 add sp, #8 @ rewind sp 253 .cfi_adjust_cfa_offset -8 254 vpop {d0-d15} 255 .cfi_adjust_cfa_offset -128 256 add sp, #4 @ skip r0 257 .cfi_adjust_cfa_offset -4 258 .cfi_restore r0 @ debugger can no longer restore caller's r0 259 pop {r1-r12, lr} @ 13 words of callee saves 260 .cfi_restore r1 261 .cfi_restore r2 262 .cfi_restore r3 263 .cfi_restore r4 264 .cfi_restore r5 265 .cfi_restore r6 266 .cfi_restore r7 267 .cfi_restore r8 268 .cfi_restore r9 269 .cfi_restore r10 270 .cfi_restore r11 271 .cfi_restore r12 272 .cfi_restore lr 273 .cfi_adjust_cfa_offset -52 274.endm 275 276// Macro to refresh the Marking Register (R8). 277// 278// This macro must be called at the end of functions implementing 279// entrypoints that possibly (directly or indirectly) perform a 280// suspend check (before they return). 281.macro REFRESH_MARKING_REGISTER 282#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 283 ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] 284#endif 285.endm 286 287.macro RETURN_IF_RESULT_IS_ZERO 288 cbnz r0, 1f @ result non-zero branch over 289 bx lr @ return 2901: 291.endm 292 293.macro RETURN_IF_RESULT_IS_NON_ZERO 294 cbz r0, 1f @ result zero branch over 295 bx lr @ return 2961: 297.endm 298 299 /* 300 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 301 * exception is Thread::Current()->exception_ when the runtime method frame is ready. 302 */ 303.macro DELIVER_PENDING_EXCEPTION_FRAME_READY 304 mov r0, rSELF @ pass Thread::Current 305 bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*) 306.endm 307 308 /* 309 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 310 * exception is Thread::Current()->exception_. 311 */ 312.macro DELIVER_PENDING_EXCEPTION 313 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw 314 DELIVER_PENDING_EXCEPTION_FRAME_READY 315.endm 316 317.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name 318 .extern \cxx_name 319ENTRY \c_name 320 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save all registers as basis for long jump context 321 mov r0, rSELF @ pass Thread::Current 322 bl \cxx_name @ \cxx_name(Thread*) 323END \c_name 324.endm 325 326.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 327 .extern \cxx_name 328ENTRY \c_name 329 SETUP_SAVE_EVERYTHING_FRAME r0 @ save all registers as basis for long jump context 330 mov r0, rSELF @ pass Thread::Current 331 bl \cxx_name @ \cxx_name(Thread*) 332END \c_name 333.endm 334 335.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name 336 .extern \cxx_name 337ENTRY \c_name 338 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1 @ save all registers as basis for long jump context 339 mov r1, rSELF @ pass Thread::Current 340 bl \cxx_name @ \cxx_name(Thread*) 341END \c_name 342.endm 343 344.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 345 .extern \cxx_name 346ENTRY \c_name 347 SETUP_SAVE_EVERYTHING_FRAME r2 @ save all registers as basis for long jump context 348 mov r2, rSELF @ pass Thread::Current 349 bl \cxx_name @ \cxx_name(Thread*) 350END \c_name 351.endm 352 353.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg 354 ldr \reg, [rSELF, #THREAD_EXCEPTION_OFFSET] @ Get exception field. 355 cbnz \reg, 1f 356 bx lr 3571: 358 DELIVER_PENDING_EXCEPTION 359.endm 360 361.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 362 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1 363.endm 364 365.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 366 RETURN_IF_RESULT_IS_ZERO 367 DELIVER_PENDING_EXCEPTION 368.endm 369 370.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 371 RETURN_IF_RESULT_IS_NON_ZERO 372 DELIVER_PENDING_EXCEPTION 373.endm 374 375// Macros taking opportunity of code similarities for downcalls. 376.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return 377 .extern \entrypoint 378ENTRY \name 379 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 380 mov r1, rSELF @ pass Thread::Current 381 bl \entrypoint @ (uint32_t field_idx, Thread*) 382 RESTORE_SAVE_REFS_ONLY_FRAME 383 REFRESH_MARKING_REGISTER 384 \return 385END \name 386.endm 387 388.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return 389 .extern \entrypoint 390ENTRY \name 391 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 392 mov r2, rSELF @ pass Thread::Current 393 bl \entrypoint @ (field_idx, Object*, Thread*) 394 RESTORE_SAVE_REFS_ONLY_FRAME 395 REFRESH_MARKING_REGISTER 396 \return 397END \name 398.endm 399 400.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return 401 .extern \entrypoint 402ENTRY \name 403 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 404 mov r3, rSELF @ pass Thread::Current 405 bl \entrypoint @ (field_idx, Object*, new_val, Thread*) 406 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 407 REFRESH_MARKING_REGISTER 408 \return 409END \name 410.endm 411 412 /* 413 * Called by managed code, saves callee saves and then calls artThrowException 414 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 415 */ 416ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 417 418 /* 419 * Called by managed code to create and deliver a NullPointerException. 420 */ 421NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 422 423 /* 424 * Call installed by a signal handler to create and deliver a NullPointerException. 425 */ 426 .extern art_quick_throw_null_pointer_exception_from_signal 427ENTRY art_quick_throw_null_pointer_exception_from_signal 428 // The fault handler pushes the gc map address, i.e. "return address", to stack 429 // and passes the fault address in LR. So we need to set up the CFI info accordingly. 430 .cfi_def_cfa_offset __SIZEOF_POINTER__ 431 .cfi_rel_offset lr, 0 432 push {r0-r12} @ 13 words of callee saves and args; LR already saved. 433 .cfi_adjust_cfa_offset 52 434 .cfi_rel_offset r0, 0 435 .cfi_rel_offset r1, 4 436 .cfi_rel_offset r2, 8 437 .cfi_rel_offset r3, 12 438 .cfi_rel_offset r4, 16 439 .cfi_rel_offset r5, 20 440 .cfi_rel_offset r6, 24 441 .cfi_rel_offset r7, 28 442 .cfi_rel_offset r8, 32 443 .cfi_rel_offset r9, 36 444 .cfi_rel_offset r10, 40 445 .cfi_rel_offset r11, 44 446 .cfi_rel_offset ip, 48 447 448 @ save all registers as basis for long jump context 449 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 450 mov r0, lr @ pass the fault address stored in LR by the fault handler. 451 mov r1, rSELF @ pass Thread::Current 452 bl artThrowNullPointerExceptionFromSignal @ (Thread*) 453END art_quick_throw_null_pointer_exception_from_signal 454 455 /* 456 * Called by managed code to create and deliver an ArithmeticException. 457 */ 458NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 459 460 /* 461 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 462 * index, arg2 holds limit. 463 */ 464TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 465 466 /* 467 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 468 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 469 */ 470TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 471 472 /* 473 * Called by managed code to create and deliver a StackOverflowError. 474 */ 475NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 476 477 /* 478 * All generated callsites for interface invokes and invocation slow paths will load arguments 479 * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 480 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 481 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1. 482 * 483 * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting 484 * of the target Method* in r0 and method->code_ in r1. 485 * 486 * If unsuccessful, the helper will return null/null. There will bea pending exception in the 487 * thread and we branch to another stub to deliver it. 488 * 489 * On success this wrapper will restore arguments and *jump* to the target, leaving the lr 490 * pointing back to the original caller. 491 * 492 * Clobbers IP (R12). 493 */ 494.macro INVOKE_TRAMPOLINE_BODY cxx_name 495 .extern \cxx_name 496 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 @ save callee saves in case allocation triggers GC 497 mov r2, rSELF @ pass Thread::Current 498 mov r3, sp 499 bl \cxx_name @ (method_idx, this, Thread*, SP) 500 mov r12, r1 @ save Method*->code_ 501 RESTORE_SAVE_REFS_AND_ARGS_FRAME 502 REFRESH_MARKING_REGISTER 503 cbz r0, 1f @ did we find the target? if not go to exception delivery 504 bx r12 @ tail call to target 5051: 506 DELIVER_PENDING_EXCEPTION 507.endm 508.macro INVOKE_TRAMPOLINE c_name, cxx_name 509ENTRY \c_name 510 INVOKE_TRAMPOLINE_BODY \cxx_name 511END \c_name 512.endm 513 514INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 515 516INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 517INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 518INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 519INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 520 521 /* 522 * Quick invocation stub internal. 523 * On entry: 524 * r0 = method pointer 525 * r1 = argument array or null for no argument methods 526 * r2 = size of argument array in bytes 527 * r3 = (managed) thread pointer 528 * [sp] = JValue* result 529 * [sp + 4] = result_in_float 530 * [sp + 8] = core register argument array 531 * [sp + 12] = fp register argument array 532 * +-------------------------+ 533 * | uint32_t* fp_reg_args | 534 * | uint32_t* core_reg_args | 535 * | result_in_float | <- Caller frame 536 * | Jvalue* result | 537 * +-------------------------+ 538 * | lr | 539 * | r11 | 540 * | r9 | 541 * | r4 | <- r11 542 * +-------------------------+ 543 * | uint32_t out[n-1] | 544 * | : : | Outs 545 * | uint32_t out[0] | 546 * | StackRef<ArtMethod> | <- SP value=null 547 * +-------------------------+ 548 */ 549ENTRY art_quick_invoke_stub_internal 550 SPILL_ALL_CALLEE_SAVE_GPRS @ spill regs (9) 551 mov r11, sp @ save the stack pointer 552 .cfi_def_cfa_register r11 553 554 mov r9, r3 @ move managed thread pointer into r9 555 556 add r4, r2, #4 @ create space for method pointer in frame 557 sub r4, sp, r4 @ reserve & align *stack* to 16 bytes: native calling 558 and r4, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART 559 mov sp, r4 @ 16B alignment ourselves. 560 561 mov r4, r0 @ save method* 562 add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy 563 bl memcpy @ memcpy (dest, src, bytes) 564 mov ip, #0 @ set ip to 0 565 str ip, [sp] @ store null for method* at bottom of frame 566 567 ldr ip, [r11, #48] @ load fp register argument array pointer 568 vldm ip, {s0-s15} @ copy s0 - s15 569 570 ldr ip, [r11, #44] @ load core register argument array pointer 571 mov r0, r4 @ restore method* 572 add ip, ip, #4 @ skip r0 573 ldm ip, {r1-r3} @ copy r1 - r3 574 575 REFRESH_MARKING_REGISTER 576 577 ldr ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] @ get pointer to the code 578 blx ip @ call the method 579 580 mov sp, r11 @ restore the stack pointer 581 .cfi_def_cfa_register sp 582 583 ldr r4, [sp, #40] @ load result_is_float 584 ldr r9, [sp, #36] @ load the result pointer 585 cmp r4, #0 586 ite eq 587 strdeq r0, [r9] @ store r0/r1 into result pointer 588 vstrne d0, [r9] @ store s0-s1/d0 into result pointer 589 590 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} @ restore spill regs 591END art_quick_invoke_stub_internal 592 593 /* 594 * On stack replacement stub. 595 * On entry: 596 * r0 = stack to copy 597 * r1 = size of stack 598 * r2 = pc to call 599 * r3 = JValue* result 600 * [sp] = shorty 601 * [sp + 4] = thread 602 */ 603ENTRY art_quick_osr_stub 604 SPILL_ALL_CALLEE_SAVE_GPRS @ Spill regs (9) 605 vpush {s16-s31} @ Spill fp-regs (16) 606 .cfi_adjust_cfa_offset 64 607 SAVE_SIZE=(9*4+16*4) 608 mov r11, sp @ Save the stack pointer 609 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 610 .cfi_remember_state 611 mov r10, r1 @ Save size of stack 612 ldr r9, [r11, #(SAVE_SIZE+4)] @ Move managed thread pointer into r9 613 REFRESH_MARKING_REGISTER 614 mov r6, r2 @ Save the pc to call 615 sub r7, sp, #12 @ Reserve space for stack pointer, 616 @ JValue* result, and ArtMethod* slot. 617 and r7, #0xFFFFFFF0 @ Align stack pointer 618 mov sp, r7 @ Update stack pointer 619 str r11, [sp, #4] @ Save old stack pointer 620 str r3, [sp, #8] @ Save JValue* result 621 mov ip, #0 622 str ip, [sp] @ Store null for ArtMethod* at bottom of frame 623 // r11 isn't properly spilled in the osr method, so we need use DWARF expression. 624 // NB: the CFI must be before the call since this is the address gdb will lookup. 625 // NB: gdb expects that cfa_expression returns the CFA value (not address to it). 626 .cfi_escape /* CFA = [sp + 4] + SAVE_SIZE */ \ 627 0x0f, 6, /* DW_CFA_def_cfa_expression(len) */ \ 628 0x92, 13, 4, /* DW_OP_bregx(reg,offset) */ \ 629 0x06, /* DW_OP_deref */ \ 630 0x23, SAVE_SIZE /* DW_OP_plus_uconst(val) */ 631 bl .Losr_entry @ Call the method 632 ldr r10, [sp, #8] @ Restore JValue* result 633 ldr sp, [sp, #4] @ Restore saved stack pointer 634 .cfi_def_cfa sp, SAVE_SIZE @ CFA = sp + SAVE_SIZE 635 ldr r4, [sp, #SAVE_SIZE] @ load shorty 636 ldrb r4, [r4, #0] @ load return type 637 cmp r4, #68 @ Test if result type char == 'D'. 638 beq .Losr_fp_result 639 cmp r4, #70 @ Test if result type char == 'F'. 640 beq .Losr_fp_result 641 strd r0, [r10] @ Store r0/r1 into result pointer 642 b .Losr_exit 643.Losr_fp_result: 644 vstr d0, [r10] @ Store s0-s1/d0 into result pointer 645.Losr_exit: 646 vpop {s16-s31} 647 .cfi_adjust_cfa_offset -64 648 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 649.Losr_entry: 650 .cfi_restore_state 651 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 652 sub sp, sp, r10 @ Reserve space for callee stack 653 sub r10, r10, #4 654 str lr, [sp, r10] @ Store link register per the compiler ABI 655 mov r2, r10 656 mov r1, r0 657 mov r0, sp 658 bl memcpy @ memcpy (dest r0, src r1, bytes r2) 659 bx r6 660END art_quick_osr_stub 661 662 /* 663 * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_. 664 * Both must reside on the stack, between current SP and target SP. 665 * The r12 (IP) shall be clobbered rather than retrieved from gprs_. 666 */ 667ARM_ENTRY art_quick_do_long_jump 668 vldm r1, {s0-s31} @ Load all fprs from argument fprs_. 669 mov sp, r0 @ Make SP point to gprs_. 670 @ Do not access fprs_ from now, they may be below SP. 671 ldm sp, {r0-r11} @ load r0-r11 from gprs_. 672 ldr r12, [sp, #60] @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12). 673 ldr lr, [sp, #56] @ Load LR from gprs_, 56 = 4 * 14. 674 ldr sp, [sp, #52] @ Load SP from gprs_ 52 = 4 * 13. 675 @ Do not access gprs_ from now, they are below SP. 676 REFRESH_MARKING_REGISTER 677 bx r12 @ Do long jump. 678END art_quick_do_long_jump 679 680 /* 681 * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on 682 * failure. 683 */ 684TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 685 686 /* 687 * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the 688 * possibly null object to lock. 689 */ 690 .extern artLockObjectFromCode 691ENTRY art_quick_lock_object 692 ldr r1, [rSELF, #THREAD_ID_OFFSET] 693 cbz r0, .Lslow_lock 694.Lretry_lock: 695 ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 696 eor r3, r2, r1 @ Prepare the value to store if unlocked 697 @ (thread id, count of 0 and preserved read barrier bits), 698 @ or prepare to compare thread id for recursive lock check 699 @ (lock_word.ThreadId() ^ self->ThreadId()). 700 ands ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. 701 bne .Lnot_unlocked @ Check if unlocked. 702 @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits. 703 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 704 cbnz r2, .Llock_strex_fail @ If store failed, retry. 705 dmb ish @ Full (LoadLoad|LoadStore) memory barrier. 706 bx lr 707.Lnot_unlocked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 708#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT 709#error "Expecting thin lock count and gc state in consecutive bits." 710#endif 711 @ Check lock word state and thread id together, 712 bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) 713 cbnz r3, .Lslow_lock @ if either of the top two bits are set, or the lock word's 714 @ thread id did not match, go slow path. 715 add r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. 716 @ Extract the new thin lock count for overflow check. 717 ubfx r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE 718 cbz r2, .Lslow_lock @ Zero as the new count indicates overflow, go slow path. 719 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. 720 cbnz r2, .Llock_strex_fail @ If strex failed, retry. 721 bx lr 722.Llock_strex_fail: 723 b .Lretry_lock @ retry 724// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). 725END art_quick_lock_object 726 727ENTRY art_quick_lock_object_no_inline 728 // This is also the slow path for art_quick_lock_object. Note that we 729 // need a local label, the assembler complains about target being out of 730 // range if we try to jump to `art_quick_lock_object_no_inline`. 731.Lslow_lock: 732 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block 733 mov r1, rSELF @ pass Thread::Current 734 bl artLockObjectFromCode @ (Object* obj, Thread*) 735 RESTORE_SAVE_REFS_ONLY_FRAME 736 REFRESH_MARKING_REGISTER 737 RETURN_IF_RESULT_IS_ZERO 738 DELIVER_PENDING_EXCEPTION 739END art_quick_lock_object_no_inline 740 741 /* 742 * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. 743 * r0 holds the possibly null object to lock. 744 */ 745 .extern artUnlockObjectFromCode 746ENTRY art_quick_unlock_object 747 ldr r1, [rSELF, #THREAD_ID_OFFSET] 748 cbz r0, .Lslow_unlock 749.Lretry_unlock: 750#ifndef USE_READ_BARRIER 751 ldr r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 752#else 753 @ Need to use atomic instructions for read barrier. 754 ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 755#endif 756 eor r3, r2, r1 @ Prepare the value to store if simply locked 757 @ (mostly 0s, and preserved read barrier bits), 758 @ or prepare to compare thread id for recursive lock check 759 @ (lock_word.ThreadId() ^ self->ThreadId()). 760 ands ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. 761 bne .Lnot_simply_locked @ Locked recursively or by other thread? 762 @ Transition to unlocked. 763 dmb ish @ Full (LoadStore|StoreStore) memory barrier. 764#ifndef USE_READ_BARRIER 765 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 766#else 767 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits 768 cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. 769#endif 770 bx lr 771.Lnot_simply_locked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 772#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT 773#error "Expecting thin lock count and gc state in consecutive bits." 774#endif 775 @ Check lock word state and thread id together, 776 bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) 777 cbnz r3, .Lslow_unlock @ if either of the top two bits are set, or the lock word's 778 @ thread id did not match, go slow path. 779 sub r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. 780#ifndef USE_READ_BARRIER 781 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 782#else 783 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. 784 cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. 785#endif 786 bx lr 787.Lunlock_strex_fail: 788 b .Lretry_unlock @ retry 789// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). 790END art_quick_unlock_object 791 792ENTRY art_quick_unlock_object_no_inline 793 // This is also the slow path for art_quick_unlock_object. Note that we 794 // need a local label, the assembler complains about target being out of 795 // range if we try to jump to `art_quick_unlock_object_no_inline`. 796.Lslow_unlock: 797 @ save callee saves in case exception allocation triggers GC 798 SETUP_SAVE_REFS_ONLY_FRAME r1 799 mov r1, rSELF @ pass Thread::Current 800 bl artUnlockObjectFromCode @ (Object* obj, Thread*) 801 RESTORE_SAVE_REFS_ONLY_FRAME 802 REFRESH_MARKING_REGISTER 803 RETURN_IF_RESULT_IS_ZERO 804 DELIVER_PENDING_EXCEPTION 805END art_quick_unlock_object_no_inline 806 807 /* 808 * Entry from managed code that calls artInstanceOfFromCode and on failure calls 809 * artThrowClassCastExceptionForObject. 810 */ 811 .extern artInstanceOfFromCode 812 .extern artThrowClassCastExceptionForObject 813ENTRY art_quick_check_instance_of 814 // Type check using the bit string passes null as the target class. In that case just throw. 815 cbz r1, .Lthrow_class_cast_exception_for_bitstring_check 816 817 push {r0-r2, lr} @ save arguments, padding (r2) and link register 818 .cfi_adjust_cfa_offset 16 819 .cfi_rel_offset r0, 0 820 .cfi_rel_offset r1, 4 821 .cfi_rel_offset r2, 8 822 .cfi_rel_offset lr, 12 823 bl artInstanceOfFromCode 824 cbz r0, .Lthrow_class_cast_exception 825 pop {r0-r2, pc} 826 827.Lthrow_class_cast_exception: 828 pop {r0-r2, lr} 829 .cfi_adjust_cfa_offset -16 830 .cfi_restore r0 831 .cfi_restore r1 832 .cfi_restore r2 833 .cfi_restore lr 834 835.Lthrow_class_cast_exception_for_bitstring_check: 836 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2 @ save all registers as basis for long jump context 837 mov r2, rSELF @ pass Thread::Current 838 bl artThrowClassCastExceptionForObject @ (Object*, Class*, Thread*) 839 bkpt 840END art_quick_check_instance_of 841 842// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude. 843.macro POP_REG_NE rReg, offset, rExclude 844 .ifnc \rReg, \rExclude 845 ldr \rReg, [sp, #\offset] @ restore rReg 846 .cfi_restore \rReg 847 .endif 848.endm 849 850// Save rReg's value to [sp, #offset]. 851.macro PUSH_REG rReg, offset 852 str \rReg, [sp, #\offset] @ save rReg 853 .cfi_rel_offset \rReg, \offset 854.endm 855 856 /* 857 * Macro to insert read barrier, only used in art_quick_aput_obj. 858 * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET. 859 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 860 */ 861.macro READ_BARRIER rDest, rObj, offset 862#ifdef USE_READ_BARRIER 863 push {r0-r3, ip, lr} @ 6 words for saved registers (used in art_quick_aput_obj) 864 .cfi_adjust_cfa_offset 24 865 .cfi_rel_offset r0, 0 866 .cfi_rel_offset r1, 4 867 .cfi_rel_offset r2, 8 868 .cfi_rel_offset r3, 12 869 .cfi_rel_offset ip, 16 870 .cfi_rel_offset lr, 20 871 sub sp, #8 @ push padding 872 .cfi_adjust_cfa_offset 8 873 @ mov r0, \rRef @ pass ref in r0 (no-op for now since parameter ref is unused) 874 .ifnc \rObj, r1 875 mov r1, \rObj @ pass rObj 876 .endif 877 mov r2, #\offset @ pass offset 878 bl artReadBarrierSlow @ artReadBarrierSlow(ref, rObj, offset) 879 @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning. 880 .ifnc \rDest, r0 881 mov \rDest, r0 @ save return value in rDest 882 .endif 883 add sp, #8 @ pop padding 884 .cfi_adjust_cfa_offset -8 885 POP_REG_NE r0, 0, \rDest @ conditionally restore saved registers 886 POP_REG_NE r1, 4, \rDest 887 POP_REG_NE r2, 8, \rDest 888 POP_REG_NE r3, 12, \rDest 889 POP_REG_NE ip, 16, \rDest 890 add sp, #20 891 .cfi_adjust_cfa_offset -20 892 pop {lr} @ restore lr 893 .cfi_adjust_cfa_offset -4 894 .cfi_restore lr 895#else 896 ldr \rDest, [\rObj, #\offset] 897 UNPOISON_HEAP_REF \rDest 898#endif // USE_READ_BARRIER 899.endm 900 901#ifdef USE_READ_BARRIER 902 .extern artReadBarrierSlow 903#endif 904 .hidden art_quick_aput_obj 905ENTRY art_quick_aput_obj 906#ifdef USE_READ_BARRIER 907 @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro. 908 tst r2, r2 909 beq .Ldo_aput_null 910#else 911 cbz r2, .Ldo_aput_null 912#endif // USE_READ_BARRIER 913 READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET 914 READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET 915 READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET 916 cmp r3, ip @ value's type == array's component type - trivial assignability 917 bne .Lcheck_assignability 918.Ldo_aput: 919 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 920 POISON_HEAP_REF r2 921 str r2, [r3, r1, lsl #2] 922 ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET] 923 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 924 strb r3, [r3, r0] 925 blx lr 926.Ldo_aput_null: 927 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 928 str r2, [r3, r1, lsl #2] 929 blx lr 930.Lcheck_assignability: 931 push {r0-r2, lr} @ save arguments 932 .cfi_adjust_cfa_offset 16 933 .cfi_rel_offset r0, 0 934 .cfi_rel_offset r1, 4 935 .cfi_rel_offset r2, 8 936 .cfi_rel_offset lr, 12 937 mov r1, ip 938 mov r0, r3 939 bl artIsAssignableFromCode 940 cbz r0, .Lthrow_array_store_exception 941 pop {r0-r2, lr} 942 .cfi_restore r0 943 .cfi_restore r1 944 .cfi_restore r2 945 .cfi_restore lr 946 .cfi_adjust_cfa_offset -16 947 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 948 POISON_HEAP_REF r2 949 str r2, [r3, r1, lsl #2] 950 ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET] 951 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 952 strb r3, [r3, r0] 953 blx lr 954.Lthrow_array_store_exception: 955 pop {r0-r2, lr} 956 /* No need to repeat restore cfi directives, the ones above apply here. */ 957 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3 958 mov r1, r2 959 mov r2, rSELF @ pass Thread::Current 960 bl artThrowArrayStoreException @ (Class*, Class*, Thread*) 961 bkpt @ unreached 962END art_quick_aput_obj 963 964// Macro to facilitate adding new allocation entrypoints. 965.macro ONE_ARG_DOWNCALL name, entrypoint, return 966 .extern \entrypoint 967ENTRY \name 968 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 969 mov r1, rSELF @ pass Thread::Current 970 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 971 RESTORE_SAVE_REFS_ONLY_FRAME 972 REFRESH_MARKING_REGISTER 973 \return 974END \name 975.endm 976 977// Macro to facilitate adding new allocation entrypoints. 978.macro TWO_ARG_DOWNCALL name, entrypoint, return 979 .extern \entrypoint 980ENTRY \name 981 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 982 mov r2, rSELF @ pass Thread::Current 983 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 984 RESTORE_SAVE_REFS_ONLY_FRAME 985 REFRESH_MARKING_REGISTER 986 \return 987END \name 988.endm 989 990// Macro to facilitate adding new array allocation entrypoints. 991.macro THREE_ARG_DOWNCALL name, entrypoint, return 992 .extern \entrypoint 993ENTRY \name 994 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 995 mov r3, rSELF @ pass Thread::Current 996 @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) 997 bl \entrypoint 998 RESTORE_SAVE_REFS_ONLY_FRAME 999 REFRESH_MARKING_REGISTER 1000 \return 1001END \name 1002.endm 1003 1004// Macro to facilitate adding new allocation entrypoints. 1005.macro FOUR_ARG_DOWNCALL name, entrypoint, return 1006 .extern \entrypoint 1007ENTRY \name 1008 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1009 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 1010 .cfi_adjust_cfa_offset 16 1011 bl \entrypoint 1012 add sp, #16 @ strip the extra frame 1013 .cfi_adjust_cfa_offset -16 1014 RESTORE_SAVE_REFS_ONLY_FRAME 1015 REFRESH_MARKING_REGISTER 1016 \return 1017END \name 1018.endm 1019 1020 /* 1021 * Macro for resolution and initialization of indexed DEX file 1022 * constants such as classes and strings. 1023 */ 1024.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 1025 .extern \entrypoint 1026ENTRY \name 1027 SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset @ save everything in case of GC 1028 mov r1, rSELF @ pass Thread::Current 1029 bl \entrypoint @ (uint32_t index, Thread*) 1030 cbz r0, 1f @ If result is null, deliver the OOME. 1031 .cfi_remember_state 1032 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 1033 REFRESH_MARKING_REGISTER 1034 bx lr 1035 .cfi_restore_state 10361: 1037 DELIVER_PENDING_EXCEPTION_FRAME_READY 1038END \name 1039.endm 1040 1041.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint 1042 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 1043.endm 1044 1045ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 1046ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode 1047ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode 1048ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode 1049ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode 1050ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 1051 1052// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 1053// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 1054 1055 /* 1056 * Called by managed code to resolve a static field and load a non-wide value. 1057 */ 1058ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1059ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1060ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1061ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1062ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1063ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1064 /* 1065 * Called by managed code to resolve a static field and load a 64-bit primitive value. 1066 */ 1067 .extern artGet64StaticFromCompiledCode 1068ENTRY art_quick_get64_static 1069 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1070 mov r1, rSELF @ pass Thread::Current 1071 bl artGet64StaticFromCompiledCode @ (uint32_t field_idx, Thread*) 1072 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1073 RESTORE_SAVE_REFS_ONLY_FRAME 1074 REFRESH_MARKING_REGISTER 1075 cbnz r2, 1f @ success if no exception pending 1076 bx lr @ return on success 10771: 1078 DELIVER_PENDING_EXCEPTION 1079END art_quick_get64_static 1080 1081 /* 1082 * Called by managed code to resolve an instance field and load a non-wide value. 1083 */ 1084TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1085TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1086TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1087TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1088TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1089TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1090 /* 1091 * Called by managed code to resolve an instance field and load a 64-bit primitive value. 1092 */ 1093 .extern artGet64InstanceFromCompiledCode 1094ENTRY art_quick_get64_instance 1095 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1096 mov r2, rSELF @ pass Thread::Current 1097 bl artGet64InstanceFromCompiledCode @ (field_idx, Object*, Thread*) 1098 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1099 RESTORE_SAVE_REFS_ONLY_FRAME 1100 REFRESH_MARKING_REGISTER 1101 cbnz r2, 1f @ success if no exception pending 1102 bx lr @ return on success 11031: 1104 DELIVER_PENDING_EXCEPTION 1105END art_quick_get64_instance 1106 1107 /* 1108 * Called by managed code to resolve a static field and store a value. 1109 */ 1110TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1111TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1112TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1113TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1114 1115 /* 1116 * Called by managed code to resolve an instance field and store a non-wide value. 1117 */ 1118THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1119THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1120THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1121THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1122 1123 /* 1124 * Called by managed code to resolve an instance field and store a wide value. 1125 */ 1126 .extern artSet64InstanceFromCompiledCode 1127ENTRY art_quick_set64_instance 1128 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1129 @ r2:r3 contain the wide argument 1130 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 1131 .cfi_adjust_cfa_offset 16 1132 bl artSet64InstanceFromCompiledCode @ (field_idx, Object*, new_val, Thread*) 1133 add sp, #16 @ release out args 1134 .cfi_adjust_cfa_offset -16 1135 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 1136 REFRESH_MARKING_REGISTER 1137 RETURN_IF_RESULT_IS_ZERO 1138 DELIVER_PENDING_EXCEPTION 1139END art_quick_set64_instance 1140 1141 .extern artSet64StaticFromCompiledCode 1142ENTRY art_quick_set64_static 1143 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1144 @ r2:r3 contain the wide argument 1145 str rSELF, [sp, #-16]! @ expand the frame and pass Thread::Current 1146 .cfi_adjust_cfa_offset 16 1147 bl artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*) 1148 add sp, #16 @ release out args 1149 .cfi_adjust_cfa_offset -16 1150 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 1151 REFRESH_MARKING_REGISTER 1152 RETURN_IF_RESULT_IS_ZERO 1153 DELIVER_PENDING_EXCEPTION 1154END art_quick_set64_static 1155 1156// Generate the allocation entrypoints for each allocator. 1157GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 1158// Comment out allocators that have arm specific asm. 1159// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 1160// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 1161GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 1162GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB) 1163// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 1164// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 1165// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 1166// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 1167// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 1168GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 1169GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 1170GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 1171 1172// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 1173// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 1174GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 1175GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB) 1176// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 1177// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 1178// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 1179// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 1180// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 1181GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 1182GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 1183GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 1184 1185// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc). 1186// 1187// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1188// If isInitialized=0 the compiler can only assume it's been at least resolved. 1189.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized 1190ENTRY \c_name 1191 // Fast path rosalloc allocation. 1192 // r0: type/return value, rSELF (r9): Thread::Current 1193 // r1, r2, r3, r12: free. 1194 ldr r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local 1195 // allocation stack has room. 1196 // TODO: consider using ldrd. 1197 ldr r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] 1198 cmp r3, r12 1199 bhs .Lslow_path\c_name 1200 1201 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) 1202 cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread 1203 // local allocation. Also does the 1204 // initialized and finalizable checks. 1205 // When isInitialized == 0, then the class is potentially not yet initialized. 1206 // If the class is not yet initialized, the object size will be very large to force the branch 1207 // below to be taken. 1208 // 1209 // See InitializeClassVisitors in class-inl.h for more details. 1210 bhs .Lslow_path\c_name 1211 // Compute the rosalloc bracket index 1212 // from the size. Since the size is 1213 // already aligned we can combine the 1214 // two shifts together. 1215 add r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) 1216 // Subtract pointer size since ther 1217 // are no runs for 0 byte allocations 1218 // and the size is already aligned. 1219 // Load the rosalloc run (r12) 1220 ldr r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] 1221 // Load the free list head (r3). This 1222 // will be the return val. 1223 ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1224 cbz r3, .Lslow_path\c_name 1225 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1226 ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head 1227 // and update the list head with the 1228 // next pointer. 1229 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1230 // Store the class pointer in the 1231 // header. This also overwrites the 1232 // next pointer. The offsets are 1233 // asserted to match. 1234#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1235#error "Class pointer needs to overwrite next pointer." 1236#endif 1237 POISON_HEAP_REF r0 1238 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] 1239 // Push the new object onto the thread 1240 // local allocation stack and 1241 // increment the thread local 1242 // allocation stack top. 1243 ldr r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1244 str r3, [r1], #COMPRESSED_REFERENCE_SIZE // (Increment r1 as a side effect.) 1245 str r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1246 // Decrement the size of the free list 1247 1248 // After this "STR" the object is published to the thread local allocation stack, 1249 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1250 // It is not yet visible to the running (user) compiled code until after the return. 1251 // 1252 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1253 // the state of the allocation stack slot. It can be a pointer to one of: 1254 // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet. 1255 // (The stack initial state is "null" pointers). 1256 // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot. 1257 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1258 // Other states are not allowed. 1259 // 1260 // An object that is invalid only temporarily, and will eventually become valid. 1261 // The internal runtime code simply checks if the object is not null or is partial and then 1262 // ignores it. 1263 // 1264 // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing 1265 // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot 1266 // "next" pointer is not-cyclic.) 1267 // 1268 // See also b/28790624 for a listing of CLs dealing with this race. 1269 ldr r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1270 sub r1, #1 1271 // TODO: consider combining this store 1272 // and the list head store above using 1273 // strd. 1274 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1275 1276 mov r0, r3 // Set the return value and return. 1277.if \isInitialized == 0 1278 // This barrier is only necessary when the allocation also requires 1279 // a class initialization check. 1280 // 1281 // If the class is already observably initialized, then new-instance allocations are protected 1282 // from publishing by the compiler which inserts its own StoreStore barrier. 1283 dmb ish 1284 // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size), 1285 // they should happen-after the implicit initialization check. 1286 // 1287 // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing 1288 // a new observably-initialized class state. 1289.endif 1290 bx lr 1291 1292.Lslow_path\c_name: 1293 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1294 mov r1, rSELF @ pass Thread::Current 1295 bl \cxx_name @ (mirror::Class* cls, Thread*) 1296 RESTORE_SAVE_REFS_ONLY_FRAME 1297 REFRESH_MARKING_REGISTER 1298 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1299END \c_name 1300.endm 1301 1302ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0 1303ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1 1304 1305// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab 1306// and art_quick_alloc_object_resolved/initialized_region_tlab. 1307// 1308// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free. 1309// Need to preserve r0 to the slow path. 1310// 1311// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1312// If isInitialized=0 the compiler can only assume it's been at least resolved. 1313.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized 1314 // Load thread_local_pos (r12) and 1315 // thread_local_end (r3) with ldrd. 1316 // Check constraints for ldrd. 1317#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1318#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1319#endif 1320 ldrd r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1321 sub r12, r3, r12 // Compute the remaining buf size. 1322 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3). 1323 cmp r3, r12 // Check if it fits. 1324 // When isInitialized == 0, then the class is potentially not yet initialized. 1325 // If the class is not yet initialized, the object size will be very large to force the branch 1326 // below to be taken. 1327 // 1328 // See InitializeClassVisitors in class-inl.h for more details. 1329 bhi \slowPathLabel 1330 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1331 // Reload old thread_local_pos (r0) 1332 // for the return value. 1333 ldr r2, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1334 add r1, r2, r3 1335 str r1, [rSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1336 // After this "STR" the object is published to the thread local allocation stack, 1337 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1338 // It is not yet visible to the running (user) compiled code until after the return. 1339 // 1340 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1341 // the state of the object. It can be either: 1342 // 1) A partially valid object, with a null class pointer 1343 // (because the initial state of TLAB buffers is all 0s/nulls). 1344 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1345 // Other states are not allowed. 1346 // 1347 // An object that is invalid only temporarily, and will eventually become valid. 1348 // The internal runtime code simply checks if the object is not null or is partial and then 1349 // ignores it. 1350 // 1351 // (Note: The actual check is done by checking that the object's class pointer is non-null. 1352 // Also, unlike rosalloc, the object can never be observed as null). 1353 ldr r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1354 add r1, r1, #1 1355 str r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1356 POISON_HEAP_REF r0 1357 str r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1358 // Fence. This is "ish" not "ishst" so 1359 // that the code after this allocation 1360 // site will see the right values in 1361 // the fields of the class. 1362 mov r0, r2 1363.if \isInitialized == 0 1364 // This barrier is only necessary when the allocation also requires 1365 // a class initialization check. 1366 // 1367 // If the class is already observably initialized, then new-instance allocations are protected 1368 // from publishing by the compiler which inserts its own StoreStore barrier. 1369 dmb ish 1370 // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size), 1371 // they should happen-after the implicit initialization check. 1372 // 1373 // TODO: Remove dmb for class initialization checks (b/36692143) 1374.endif 1375 bx lr 1376.endm 1377 1378// The common code for art_quick_alloc_object_*region_tlab 1379.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized 1380ENTRY \name 1381 // Fast path tlab allocation. 1382 // r0: type, rSELF (r9): Thread::Current 1383 // r1, r2, r3, r12: free. 1384 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized 1385.Lslow_path\name: 1386 SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. 1387 mov r1, rSELF // Pass Thread::Current. 1388 bl \entrypoint // (mirror::Class* klass, Thread*) 1389 RESTORE_SAVE_REFS_ONLY_FRAME 1390 REFRESH_MARKING_REGISTER 1391 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1392END \name 1393.endm 1394 1395GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0 1396GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1 1397GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0 1398GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1 1399 1400 1401// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab 1402// and art_quick_alloc_array_resolved/initialized_region_tlab. 1403// 1404// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free. 1405// Need to preserve r0 and r1 to the slow path. 1406.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel 1407 and r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED // Apply alignment mask 1408 // (addr + 7) & ~7. 1409 1410 // Load thread_local_pos (r3) and 1411 // thread_local_end (r12) with ldrd. 1412 // Check constraints for ldrd. 1413#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1414#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1415#endif 1416 ldrd r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET] 1417 sub r12, r12, r3 // Compute the remaining buf size. 1418 cmp r2, r12 // Check if the total_size fits. 1419 // The array class is always initialized here. Unlike new-instance, 1420 // this does not act as a double test. 1421 bhi \slowPathLabel 1422 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1423 add r2, r2, r3 1424 str r2, [rSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1425 ldr r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1426 add r2, r2, #1 1427 str r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1428 POISON_HEAP_REF r0 1429 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1430 str r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length. 1431 // Fence. This is "ish" not "ishst" so 1432 // that the code after this allocation 1433 // site will see the right values in 1434 // the fields of the class. 1435 mov r0, r3 1436// new-array is special. The class is loaded and immediately goes to the Initialized state 1437// before it is published. Therefore the only fence needed is for the publication of the object. 1438// See ClassLinker::CreateArrayClass() for more details. 1439 1440// For publication of the new array, we don't need a 'dmb ishst' here. 1441// The compiler generates 'dmb ishst' for all new-array insts. 1442 bx lr 1443.endm 1444 1445.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup 1446ENTRY \name 1447 // Fast path array allocation for region tlab allocation. 1448 // r0: mirror::Class* type 1449 // r1: int32_t component_count 1450 // rSELF (r9): thread 1451 // r2, r3, r12: free. 1452 \size_setup .Lslow_path\name 1453 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name 1454.Lslow_path\name: 1455 // r0: mirror::Class* klass 1456 // r1: int32_t component_count 1457 // r2: Thread* self 1458 SETUP_SAVE_REFS_ONLY_FRAME r2 // save callee saves in case of GC 1459 mov r2, rSELF // pass Thread::Current 1460 bl \entrypoint 1461 RESTORE_SAVE_REFS_ONLY_FRAME 1462 REFRESH_MARKING_REGISTER 1463 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1464END \name 1465.endm 1466 1467.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path 1468 bkpt // We should never enter here. 1469 // Code below is for reference. 1470 // Possibly a large object, go slow. 1471 // Also does negative array size check. 1472 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8) 1473 cmp r1, r2 1474 bhi \slow_path 1475 // Array classes are never finalizable 1476 // or uninitialized, no need to check. 1477 ldr r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type 1478 UNPOISON_HEAP_REF r3 1479 ldr r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] 1480 lsr r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 1481 // bits. 1482 lsl r2, r1, r3 // Calculate data size 1483 // Add array data offset and alignment. 1484 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1485#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1486#error Long array data offset must be 4 greater than int array data offset. 1487#endif 1488 1489 add r3, r3, #1 // Add 4 to the length only if the 1490 // component size shift is 3 1491 // (for 64 bit alignment). 1492 and r3, r3, #4 1493 add r2, r2, r3 1494.endm 1495 1496.macro COMPUTE_ARRAY_SIZE_8 slow_path 1497 // Possibly a large object, go slow. 1498 // Also does negative array size check. 1499 movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) 1500 cmp r1, r2 1501 bhi \slow_path 1502 // Add array data offset and alignment. 1503 add r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1504.endm 1505 1506.macro COMPUTE_ARRAY_SIZE_16 slow_path 1507 // Possibly a large object, go slow. 1508 // Also does negative array size check. 1509 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2) 1510 cmp r1, r2 1511 bhi \slow_path 1512 lsl r2, r1, #1 1513 // Add array data offset and alignment. 1514 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1515.endm 1516 1517.macro COMPUTE_ARRAY_SIZE_32 slow_path 1518 // Possibly a large object, go slow. 1519 // Also does negative array size check. 1520 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4) 1521 cmp r1, r2 1522 bhi \slow_path 1523 lsl r2, r1, #2 1524 // Add array data offset and alignment. 1525 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1526.endm 1527 1528.macro COMPUTE_ARRAY_SIZE_64 slow_path 1529 // Possibly a large object, go slow. 1530 // Also does negative array size check. 1531 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8) 1532 cmp r1, r2 1533 bhi \slow_path 1534 lsl r2, r1, #3 1535 // Add array data offset and alignment. 1536 add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1537.endm 1538 1539// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove 1540// the entrypoint once all backends have been updated to use the size variants. 1541GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1542GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1543GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1544GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1545GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1546GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1547GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1548GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1549GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1550GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1551 1552 /* 1553 * Called by managed code when the value in rSUSPEND has been decremented to 0. 1554 */ 1555 .extern artTestSuspendFromCode 1556ENTRY art_quick_test_suspend 1557 SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl 1558 mov r0, rSELF 1559 bl artTestSuspendFromCode @ (Thread*) 1560 RESTORE_SAVE_EVERYTHING_FRAME 1561 REFRESH_MARKING_REGISTER 1562 bx lr 1563END art_quick_test_suspend 1564 1565ENTRY art_quick_implicit_suspend 1566 mov r0, rSELF 1567 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves for stack crawl 1568 bl artTestSuspendFromCode @ (Thread*) 1569 RESTORE_SAVE_REFS_ONLY_FRAME 1570 REFRESH_MARKING_REGISTER 1571 bx lr 1572END art_quick_implicit_suspend 1573 1574 /* 1575 * Called by managed code that is attempting to call a method on a proxy class. On entry 1576 * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The 1577 * frame size of the invoked proxy method agrees with a ref and args callee save frame. 1578 */ 1579 .extern artQuickProxyInvokeHandler 1580ENTRY art_quick_proxy_invoke_handler 1581 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1582 mov r2, rSELF @ pass Thread::Current 1583 mov r3, sp @ pass SP 1584 blx artQuickProxyInvokeHandler @ (Method* proxy method, receiver, Thread*, SP) 1585 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1586 // Tear down the callee-save frame. Skip arg registers. 1587 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1588 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1589 RESTORE_SAVE_REFS_ONLY_FRAME 1590 REFRESH_MARKING_REGISTER 1591 cbnz r2, 1f @ success if no exception is pending 1592 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1593 bx lr @ return on success 15941: 1595 DELIVER_PENDING_EXCEPTION 1596END art_quick_proxy_invoke_handler 1597 1598 /* 1599 * Called to resolve an imt conflict. 1600 * r0 is the conflict ArtMethod. 1601 * r12 is a hidden argument that holds the target interface method's dex method index. 1602 * 1603 * Note that this stub writes to r0, r4, and r12. 1604 */ 1605 .extern artLookupResolvedMethod 1606ENTRY art_quick_imt_conflict_trampoline 1607 push {r1-r2} 1608 .cfi_adjust_cfa_offset (2 * 4) 1609 .cfi_rel_offset r1, 0 1610 .cfi_rel_offset r2, 4 1611 ldr r4, [sp, #(2 * 4)] // Load referrer. 1612 ldr r2, [r0, #ART_METHOD_JNI_OFFSET_32] // Load ImtConflictTable 1613 // Load the declaring class (without read barrier) and access flags (for obsolete method check). 1614 // The obsolete flag is set with suspended threads, so we do not need an acquire operation here. 1615#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4 1616#error "Expecting declaring class and access flags to be consecutive for LDRD." 1617#endif 1618 ldrd r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET] 1619 // If the method is obsolete, just go through the dex cache miss slow path. 1620 lsrs r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1) 1621 bcs .Limt_conflict_trampoline_dex_cache_miss 1622 ldr r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET] // Load the DexCache (without read barrier). 1623 UNPOISON_HEAP_REF r4 1624 ubfx r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS // Calculate DexCache method slot index. 1625 ldr r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET] // Load the resolved methods. 1626 add r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1) // Load DexCache method slot address. 1627 1628// FIXME: Configure the build to use the faster code when appropriate. 1629// Currently we fall back to the slower version. 1630#if HAS_ATOMIC_LDRD 1631 ldrd r0, r1, [r4] 1632#else 1633 push {r3} 1634 .cfi_adjust_cfa_offset 4 1635 .cfi_rel_offset r3, 0 1636.Limt_conflict_trampoline_retry_load: 1637 ldrexd r0, r1, [r4] 1638 strexd r3, r0, r1, [r4] 1639 cmp r3, #0 1640 bne .Limt_conflict_trampoline_retry_load 1641 pop {r3} 1642 .cfi_adjust_cfa_offset -4 1643 .cfi_restore r3 1644#endif 1645 1646 ldr r4, [r2] // Load first entry in ImtConflictTable. 1647 cmp r1, r12 // Compare method index to see if we had a DexCache method hit. 1648 bne .Limt_conflict_trampoline_dex_cache_miss 1649.Limt_table_iterate: 1650 cmp r4, r0 1651 // Branch if found. Benchmarks have shown doing a branch here is better. 1652 beq .Limt_table_found 1653 // If the entry is null, the interface method is not in the ImtConflictTable. 1654 cbz r4, .Lconflict_trampoline 1655 // Iterate over the entries of the ImtConflictTable. 1656 ldr r4, [r2, #(2 * __SIZEOF_POINTER__)]! 1657 b .Limt_table_iterate 1658.Limt_table_found: 1659 // We successfully hit an entry in the table. Load the target method 1660 // and jump to it. 1661 ldr r0, [r2, #__SIZEOF_POINTER__] 1662 .cfi_remember_state 1663 pop {r1-r2} 1664 .cfi_adjust_cfa_offset -(2 * 4) 1665 .cfi_restore r1 1666 .cfi_restore r2 1667 ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] 1668 .cfi_restore_state 1669.Lconflict_trampoline: 1670 // Call the runtime stub to populate the ImtConflictTable and jump to the 1671 // resolved method. 1672 .cfi_remember_state 1673 pop {r1-r2} 1674 .cfi_adjust_cfa_offset -(2 * 4) 1675 .cfi_restore r1 1676 .cfi_restore r2 1677 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 1678 .cfi_restore_state 1679.Limt_conflict_trampoline_dex_cache_miss: 1680 // We're not creating a proper runtime method frame here, 1681 // artLookupResolvedMethod() is not allowed to walk the stack. 1682 1683 // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr). 1684 push {r2-r4, lr} 1685 .cfi_adjust_cfa_offset (4 * 4) 1686 .cfi_rel_offset r3, 4 1687 .cfi_rel_offset lr, 12 1688 // Save FPR args. 1689 vpush {d0-d7} 1690 .cfi_adjust_cfa_offset (8 * 8) 1691 1692 mov r0, ip // Pass method index. 1693 ldr r1, [sp, #(8 * 8 + 6 * 4)] // Pass referrer. 1694 bl artLookupResolvedMethod // (uint32_t method_index, ArtMethod* referrer) 1695 1696 // Restore FPR args. 1697 vpop {d0-d7} 1698 .cfi_adjust_cfa_offset -(8 * 8) 1699 // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr). 1700 pop {r2-r4, lr} 1701 .cfi_adjust_cfa_offset -(4 * 4) 1702 .cfi_restore r3 1703 .cfi_restore lr 1704 1705 cmp r0, #0 // If the method wasn't resolved, 1706 beq .Lconflict_trampoline // skip the lookup and go to artInvokeInterfaceTrampoline(). 1707 b .Limt_table_iterate 1708END art_quick_imt_conflict_trampoline 1709 1710 .extern artQuickResolutionTrampoline 1711ENTRY art_quick_resolution_trampoline 1712 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1713 mov r2, rSELF @ pass Thread::Current 1714 mov r3, sp @ pass SP 1715 blx artQuickResolutionTrampoline @ (Method* called, receiver, Thread*, SP) 1716 cbz r0, 1f @ is code pointer null? goto exception 1717 mov r12, r0 1718 ldr r0, [sp, #0] @ load resolved method in r0 1719 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1720 REFRESH_MARKING_REGISTER 1721 bx r12 @ tail-call into actual code 17221: 1723 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1724 DELIVER_PENDING_EXCEPTION 1725END art_quick_resolution_trampoline 1726 1727 /* 1728 * Called to do a generic JNI down-call 1729 */ 1730ENTRY art_quick_generic_jni_trampoline 1731 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1732 1733 // Save rSELF 1734 mov r11, rSELF 1735 // Save SP , so we can have static CFI info. r10 is saved in ref_and_args. 1736 mov r10, sp 1737 .cfi_def_cfa_register r10 1738 1739 sub sp, sp, #5120 1740 1741 // prepare for artQuickGenericJniTrampoline call 1742 // (Thread*, SP) 1743 // r0 r1 <= C calling convention 1744 // rSELF r10 <= where they are 1745 1746 mov r0, rSELF // Thread* 1747 mov r1, r10 1748 blx artQuickGenericJniTrampoline // (Thread*, sp) 1749 1750 // The C call will have registered the complete save-frame on success. 1751 // The result of the call is: 1752 // r0: pointer to native code, 0 on error. 1753 // r1: pointer to the bottom of the used area of the alloca, can restore stack till there. 1754 1755 // Check for error = 0. 1756 cbz r0, .Lexception_in_native 1757 1758 // Release part of the alloca. 1759 mov sp, r1 1760 1761 // Save the code pointer 1762 mov r12, r0 1763 1764 // Load parameters from frame into registers. 1765 pop {r0-r3} 1766 1767 // Softfloat. 1768 // TODO: Change to hardfloat when supported. 1769 1770 blx r12 // native call. 1771 1772 // result sign extension is handled in C code 1773 // prepare for artQuickGenericJniEndTrampoline call 1774 // (Thread*, result, result_f) 1775 // r0 r2,r3 stack <= C calling convention 1776 // r11 r0,r1 r0,r1 <= where they are 1777 sub sp, sp, #8 // Stack alignment. 1778 1779 push {r0-r1} 1780 mov r3, r1 1781 mov r2, r0 1782 mov r0, r11 1783 1784 blx artQuickGenericJniEndTrampoline 1785 1786 // Restore self pointer. 1787 mov rSELF, r11 1788 1789 // Pending exceptions possible. 1790 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1791 cbnz r2, .Lexception_in_native 1792 1793 // Tear down the alloca. 1794 mov sp, r10 1795 .cfi_def_cfa_register sp 1796 1797 // Tear down the callee-save frame. Skip arg registers. 1798 add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1799 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY) 1800 RESTORE_SAVE_REFS_ONLY_FRAME 1801 REFRESH_MARKING_REGISTER 1802 1803 // store into fpr, for when it's a fpr return... 1804 vmov d0, r0, r1 1805 bx lr // ret 1806 // Undo the unwinding information from above since it doesn't apply below. 1807 .cfi_def_cfa_register r10 1808 .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1809 1810.Lexception_in_native: 1811 ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] 1812 add ip, ip, #-1 // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE. 1813 mov sp, ip 1814 .cfi_def_cfa_register sp 1815 # This will create a new save-all frame, required by the runtime. 1816 DELIVER_PENDING_EXCEPTION 1817END art_quick_generic_jni_trampoline 1818 1819 .extern artQuickToInterpreterBridge 1820ENTRY art_quick_to_interpreter_bridge 1821 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 1822 mov r1, rSELF @ pass Thread::Current 1823 mov r2, sp @ pass SP 1824 blx artQuickToInterpreterBridge @ (Method* method, Thread*, SP) 1825 ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1826 // Tear down the callee-save frame. Skip arg registers. 1827 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1828 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1829 RESTORE_SAVE_REFS_ONLY_FRAME 1830 REFRESH_MARKING_REGISTER 1831 cbnz r2, 1f @ success if no exception is pending 1832 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1833 bx lr @ return on success 18341: 1835 DELIVER_PENDING_EXCEPTION 1836END art_quick_to_interpreter_bridge 1837 1838/* 1839 * Called to attempt to execute an obsolete method. 1840 */ 1841ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 1842 1843 /* 1844 * Routine that intercepts method calls and returns. 1845 */ 1846 .extern artInstrumentationMethodEntryFromCode 1847 .extern artInstrumentationMethodExitFromCode 1848ENTRY art_quick_instrumentation_entry 1849 @ Make stack crawlable and clobber r2 and r3 (post saving) 1850 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1851 @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs. 1852 str r0, [sp, #4] 1853 mov r2, rSELF @ pass Thread::Current 1854 mov r3, sp @ pass SP 1855 blx artInstrumentationMethodEntryFromCode @ (Method*, Object*, Thread*, SP) 1856 cbz r0, .Ldeliver_instrumentation_entry_exception 1857 @ Deliver exception if we got nullptr as function. 1858 mov r12, r0 @ r12 holds reference to code 1859 ldr r0, [sp, #4] @ restore r0 1860 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1861 adr lr, art_quick_instrumentation_exit + /* thumb mode */ 1 1862 @ load art_quick_instrumentation_exit into lr in thumb mode 1863 REFRESH_MARKING_REGISTER 1864 bx r12 @ call method with lr set to art_quick_instrumentation_exit 1865.Ldeliver_instrumentation_entry_exception: 1866 @ Deliver exception for art_quick_instrumentation_entry placed after 1867 @ art_quick_instrumentation_exit so that the fallthrough works. 1868 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1869 DELIVER_PENDING_EXCEPTION 1870END art_quick_instrumentation_entry 1871 1872ENTRY art_quick_instrumentation_exit 1873 mov lr, #0 @ link register is to here, so clobber with 0 for later checks 1874 SETUP_SAVE_EVERYTHING_FRAME r2 1875 1876 add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame 1877 add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame 1878 mov r1, sp @ pass SP 1879 mov r0, rSELF @ pass Thread::Current 1880 blx artInstrumentationMethodExitFromCode @ (Thread*, SP, gpr_res*, fpr_res*) 1881 1882 cbz r0, .Ldo_deliver_instrumentation_exception 1883 @ Deliver exception if we got nullptr as function. 1884 cbnz r1, .Ldeoptimize 1885 // Normal return. 1886 str r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1887 @ Set return pc. 1888 RESTORE_SAVE_EVERYTHING_FRAME 1889 REFRESH_MARKING_REGISTER 1890 bx lr 1891.Ldo_deliver_instrumentation_exception: 1892 DELIVER_PENDING_EXCEPTION_FRAME_READY 1893.Ldeoptimize: 1894 str r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1895 @ Set return pc. 1896 RESTORE_SAVE_EVERYTHING_FRAME 1897 // Jump to art_quick_deoptimize. 1898 b art_quick_deoptimize 1899END art_quick_instrumentation_exit 1900 1901 /* 1902 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 1903 * will long jump to the upcall with a special exception of -1. 1904 */ 1905 .extern artDeoptimize 1906ENTRY art_quick_deoptimize 1907 SETUP_SAVE_EVERYTHING_FRAME r0 1908 mov r0, rSELF @ pass Thread::Current 1909 blx artDeoptimize @ (Thread*) 1910END art_quick_deoptimize 1911 1912 /* 1913 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 1914 * will long jump to the interpreter bridge. 1915 */ 1916 .extern artDeoptimizeFromCompiledCode 1917ENTRY art_quick_deoptimize_from_compiled_code 1918 SETUP_SAVE_EVERYTHING_FRAME r1 1919 mov r1, rSELF @ pass Thread::Current 1920 blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*) 1921END art_quick_deoptimize_from_compiled_code 1922 1923 /* 1924 * Signed 64-bit integer multiply. 1925 * 1926 * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 1927 * WX 1928 * x YZ 1929 * -------- 1930 * ZW ZX 1931 * YW YX 1932 * 1933 * The low word of the result holds ZX, the high word holds 1934 * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 1935 * it doesn't fit in the low 64 bits. 1936 * 1937 * Unlike most ARM math operations, multiply instructions have 1938 * restrictions on using the same register more than once (Rd and Rm 1939 * cannot be the same). 1940 */ 1941 /* mul-long vAA, vBB, vCC */ 1942ENTRY art_quick_mul_long 1943 push {r9-r10} 1944 .cfi_adjust_cfa_offset 8 1945 .cfi_rel_offset r9, 0 1946 .cfi_rel_offset r10, 4 1947 mul ip, r2, r1 @ ip<- ZxW 1948 umull r9, r10, r2, r0 @ r9/r10 <- ZxX 1949 mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 1950 add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) 1951 mov r0,r9 1952 mov r1,r10 1953 pop {r9-r10} 1954 .cfi_adjust_cfa_offset -8 1955 .cfi_restore r9 1956 .cfi_restore r10 1957 bx lr 1958END art_quick_mul_long 1959 1960 /* 1961 * Long integer shift. This is different from the generic 32/64-bit 1962 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1963 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1964 * 6 bits. 1965 * On entry: 1966 * r0: low word 1967 * r1: high word 1968 * r2: shift count 1969 */ 1970 /* shl-long vAA, vBB, vCC */ 1971ARM_ENTRY art_quick_shl_long @ ARM code as thumb code requires spills 1972 and r2, r2, #63 @ r2<- r2 & 0x3f 1973 mov r1, r1, asl r2 @ r1<- r1 << r2 1974 rsb r3, r2, #32 @ r3<- 32 - r2 1975 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 1976 subs ip, r2, #32 @ ip<- r2 - 32 1977 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 1978 mov r0, r0, asl r2 @ r0<- r0 << r2 1979 bx lr 1980END art_quick_shl_long 1981 1982 /* 1983 * Long integer shift. This is different from the generic 32/64-bit 1984 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1985 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1986 * 6 bits. 1987 * On entry: 1988 * r0: low word 1989 * r1: high word 1990 * r2: shift count 1991 */ 1992 /* shr-long vAA, vBB, vCC */ 1993ARM_ENTRY art_quick_shr_long @ ARM code as thumb code requires spills 1994 and r2, r2, #63 @ r0<- r0 & 0x3f 1995 mov r0, r0, lsr r2 @ r0<- r2 >> r2 1996 rsb r3, r2, #32 @ r3<- 32 - r2 1997 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 1998 subs ip, r2, #32 @ ip<- r2 - 32 1999 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 2000 mov r1, r1, asr r2 @ r1<- r1 >> r2 2001 bx lr 2002END art_quick_shr_long 2003 2004 /* 2005 * Long integer shift. This is different from the generic 32/64-bit 2006 * binary operations because vAA/vBB are 64-bit but vCC (the shift 2007 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 2008 * 6 bits. 2009 * On entry: 2010 * r0: low word 2011 * r1: high word 2012 * r2: shift count 2013 */ 2014 /* ushr-long vAA, vBB, vCC */ 2015ARM_ENTRY art_quick_ushr_long @ ARM code as thumb code requires spills 2016 and r2, r2, #63 @ r0<- r0 & 0x3f 2017 mov r0, r0, lsr r2 @ r0<- r2 >> r2 2018 rsb r3, r2, #32 @ r3<- 32 - r2 2019 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 2020 subs ip, r2, #32 @ ip<- r2 - 32 2021 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 2022 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 2023 bx lr 2024END art_quick_ushr_long 2025 2026 /* 2027 * String's indexOf. 2028 * 2029 * On entry: 2030 * r0: string object (known non-null) 2031 * r1: char to match (known <= 0xFFFF) 2032 * r2: Starting offset in string data 2033 */ 2034ENTRY art_quick_indexof 2035 push {r4, r10-r11, lr} @ 4 words of callee saves 2036 .cfi_adjust_cfa_offset 16 2037 .cfi_rel_offset r4, 0 2038 .cfi_rel_offset r10, 4 2039 .cfi_rel_offset r11, 8 2040 .cfi_rel_offset lr, 12 2041#if (STRING_COMPRESSION_FEATURE) 2042 ldr r4, [r0, #MIRROR_STRING_COUNT_OFFSET] 2043#else 2044 ldr r3, [r0, #MIRROR_STRING_COUNT_OFFSET] 2045#endif 2046 add r0, #MIRROR_STRING_VALUE_OFFSET 2047#if (STRING_COMPRESSION_FEATURE) 2048 /* r4 count (with flag) and r3 holds actual length */ 2049 lsr r3, r4, #1 2050#endif 2051 /* Clamp start to [0..count] */ 2052 cmp r2, #0 2053 it lt 2054 movlt r2, #0 2055 cmp r2, r3 2056 it gt 2057 movgt r2, r3 2058 2059 /* Save a copy in r12 to later compute result */ 2060 mov r12, r0 2061 2062 /* Build pointer to start of data to compare and pre-bias */ 2063#if (STRING_COMPRESSION_FEATURE) 2064 lsrs r4, r4, #1 2065 bcc .Lstring_indexof_compressed 2066#endif 2067 add r0, r0, r2, lsl #1 2068 sub r0, #2 2069 2070 /* Compute iteration count */ 2071 sub r2, r3, r2 2072 2073 /* 2074 * At this point we have: 2075 * r0: start of data to test 2076 * r1: char to compare 2077 * r2: iteration count 2078 * r4: compression style (used temporarily) 2079 * r12: original start of string data 2080 * r3, r4, r10, r11 available for loading string data 2081 */ 2082 2083 subs r2, #4 2084 blt .Lindexof_remainder 2085 2086.Lindexof_loop4: 2087 ldrh r3, [r0, #2]! 2088 ldrh r4, [r0, #2]! 2089 ldrh r10, [r0, #2]! 2090 ldrh r11, [r0, #2]! 2091 cmp r3, r1 2092 beq .Lmatch_0 2093 cmp r4, r1 2094 beq .Lmatch_1 2095 cmp r10, r1 2096 beq .Lmatch_2 2097 cmp r11, r1 2098 beq .Lmatch_3 2099 subs r2, #4 2100 bge .Lindexof_loop4 2101 2102.Lindexof_remainder: 2103 adds r2, #4 2104 beq .Lindexof_nomatch 2105 2106.Lindexof_loop1: 2107 ldrh r3, [r0, #2]! 2108 cmp r3, r1 2109 beq .Lmatch_3 2110 subs r2, #1 2111 bne .Lindexof_loop1 2112 2113.Lindexof_nomatch: 2114 mov r0, #-1 2115 pop {r4, r10-r11, pc} 2116 2117.Lmatch_0: 2118 sub r0, #6 2119 sub r0, r12 2120 asr r0, r0, #1 2121 pop {r4, r10-r11, pc} 2122.Lmatch_1: 2123 sub r0, #4 2124 sub r0, r12 2125 asr r0, r0, #1 2126 pop {r4, r10-r11, pc} 2127.Lmatch_2: 2128 sub r0, #2 2129 sub r0, r12 2130 asr r0, r0, #1 2131 pop {r4, r10-r11, pc} 2132.Lmatch_3: 2133 sub r0, r12 2134 asr r0, r0, #1 2135 pop {r4, r10-r11, pc} 2136#if (STRING_COMPRESSION_FEATURE) 2137.Lstring_indexof_compressed: 2138 add r0, r0, r2 2139 sub r0, #1 2140 sub r2, r3, r2 2141.Lstring_indexof_compressed_loop: 2142 subs r2, #1 2143 blt .Lindexof_nomatch 2144 ldrb r3, [r0, #1]! 2145 cmp r3, r1 2146 beq .Lstring_indexof_compressed_matched 2147 b .Lstring_indexof_compressed_loop 2148.Lstring_indexof_compressed_matched: 2149 sub r0, r12 2150 pop {r4, r10-r11, pc} 2151#endif 2152END art_quick_indexof 2153 2154 /* Assembly routines used to handle ABI differences. */ 2155 2156 /* double fmod(double a, double b) */ 2157 .extern fmod 2158ENTRY art_quick_fmod 2159 push {lr} 2160 .cfi_adjust_cfa_offset 4 2161 .cfi_rel_offset lr, 0 2162 sub sp, #4 2163 .cfi_adjust_cfa_offset 4 2164 vmov r0, r1, d0 2165 vmov r2, r3, d1 2166 bl fmod 2167 vmov d0, r0, r1 2168 add sp, #4 2169 .cfi_adjust_cfa_offset -4 2170 pop {pc} 2171END art_quick_fmod 2172 2173 /* float fmodf(float a, float b) */ 2174 .extern fmodf 2175ENTRY art_quick_fmodf 2176 push {lr} 2177 .cfi_adjust_cfa_offset 4 2178 .cfi_rel_offset lr, 0 2179 sub sp, #4 2180 .cfi_adjust_cfa_offset 4 2181 vmov r0, r1, d0 2182 bl fmodf 2183 vmov s0, r0 2184 add sp, #4 2185 .cfi_adjust_cfa_offset -4 2186 pop {pc} 2187END art_quick_fmodf 2188 2189 /* int64_t art_d2l(double d) */ 2190 .extern art_d2l 2191ENTRY art_quick_d2l 2192 vmov r0, r1, d0 2193 b art_d2l 2194END art_quick_d2l 2195 2196 /* int64_t art_f2l(float f) */ 2197 .extern art_f2l 2198ENTRY art_quick_f2l 2199 vmov r0, s0 2200 b art_f2l 2201END art_quick_f2l 2202 2203 /* float art_l2f(int64_t l) */ 2204 .extern art_l2f 2205ENTRY art_quick_l2f 2206 push {lr} 2207 .cfi_adjust_cfa_offset 4 2208 .cfi_rel_offset lr, 0 2209 sub sp, #4 2210 .cfi_adjust_cfa_offset 4 2211 bl art_l2f 2212 vmov s0, r0 2213 add sp, #4 2214 .cfi_adjust_cfa_offset -4 2215 pop {pc} 2216END art_quick_l2f 2217 2218.macro CONDITIONAL_CBZ reg, reg_if, dest 2219.ifc \reg, \reg_if 2220 cbz \reg, \dest 2221.endif 2222.endm 2223 2224.macro CONDITIONAL_CMPBZ reg, reg_if, dest 2225.ifc \reg, \reg_if 2226 cmp \reg, #0 2227 beq \dest 2228.endif 2229.endm 2230 2231// Use CBZ if the register is in {r0, r7} otherwise compare and branch. 2232.macro SMART_CBZ reg, dest 2233 CONDITIONAL_CBZ \reg, r0, \dest 2234 CONDITIONAL_CBZ \reg, r1, \dest 2235 CONDITIONAL_CBZ \reg, r2, \dest 2236 CONDITIONAL_CBZ \reg, r3, \dest 2237 CONDITIONAL_CBZ \reg, r4, \dest 2238 CONDITIONAL_CBZ \reg, r5, \dest 2239 CONDITIONAL_CBZ \reg, r6, \dest 2240 CONDITIONAL_CBZ \reg, r7, \dest 2241 CONDITIONAL_CMPBZ \reg, r8, \dest 2242 CONDITIONAL_CMPBZ \reg, r9, \dest 2243 CONDITIONAL_CMPBZ \reg, r10, \dest 2244 CONDITIONAL_CMPBZ \reg, r11, \dest 2245 CONDITIONAL_CMPBZ \reg, r12, \dest 2246 CONDITIONAL_CMPBZ \reg, r13, \dest 2247 CONDITIONAL_CMPBZ \reg, r14, \dest 2248 CONDITIONAL_CMPBZ \reg, r15, \dest 2249.endm 2250 2251 /* 2252 * Create a function `name` calling the ReadBarrier::Mark routine, 2253 * getting its argument and returning its result through register 2254 * `reg`, saving and restoring all caller-save registers. 2255 * 2256 * IP is clobbered; `reg` must not be IP. 2257 * 2258 * If `reg` is different from `r0`, the generated function follows a 2259 * non-standard runtime calling convention: 2260 * - register `reg` is used to pass the (sole) argument of this 2261 * function (instead of R0); 2262 * - register `reg` is used to return the result of this function 2263 * (instead of R0); 2264 * - R0 is treated like a normal (non-argument) caller-save register; 2265 * - everything else is the same as in the standard runtime calling 2266 * convention (e.g. standard callee-save registers are preserved). 2267 */ 2268.macro READ_BARRIER_MARK_REG name, reg 2269ENTRY \name 2270 // Null check so that we can load the lock word. 2271 SMART_CBZ \reg, .Lret_rb_\name 2272 // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. 2273 ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] 2274 tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2275 beq .Lnot_marked_rb_\name 2276 // Already marked, return right away. 2277.Lret_rb_\name: 2278 bx lr 2279 2280.Lnot_marked_rb_\name: 2281 // Test that both the forwarding state bits are 1. 2282#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2283 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2284 // the highest bits and the "forwarding address" state to have all bits set. 2285#error "Unexpected lock word state shift or forwarding address state value." 2286#endif 2287 cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2288 bhs .Lret_forwarding_address\name 2289 2290.Lslow_rb_\name: 2291 // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to 2292 // make a tail call here. Currently, it serves only for stack alignment but 2293 // we may reintroduce kSaveEverything calls here in the future. 2294 push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip 2295 .cfi_adjust_cfa_offset 32 2296 .cfi_rel_offset r0, 0 2297 .cfi_rel_offset r1, 4 2298 .cfi_rel_offset r2, 8 2299 .cfi_rel_offset r3, 12 2300 .cfi_rel_offset r4, 16 2301 .cfi_rel_offset r9, 20 2302 .cfi_rel_offset ip, 24 2303 .cfi_rel_offset lr, 28 2304 2305 .ifnc \reg, r0 2306 mov r0, \reg @ pass arg1 - obj from `reg` 2307 .endif 2308 2309 vpush {s0-s15} @ save floating-point caller-save registers 2310 .cfi_adjust_cfa_offset 64 2311 bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) 2312 vpop {s0-s15} @ restore floating-point registers 2313 .cfi_adjust_cfa_offset -64 2314 2315 .ifc \reg, r0 @ Save result to the stack slot or destination register. 2316 str r0, [sp, #0] 2317 .else 2318 .ifc \reg, r1 2319 str r0, [sp, #4] 2320 .else 2321 .ifc \reg, r2 2322 str r0, [sp, #8] 2323 .else 2324 .ifc \reg, r3 2325 str r0, [sp, #12] 2326 .else 2327 .ifc \reg, r4 2328 str r0, [sp, #16] 2329 .else 2330 .ifc \reg, r9 2331 str r0, [sp, #20] 2332 .else 2333 mov \reg, r0 2334 .endif 2335 .endif 2336 .endif 2337 .endif 2338 .endif 2339 .endif 2340 2341 pop {r0-r4, r9, ip, lr} @ restore caller-save registers 2342 .cfi_adjust_cfa_offset -32 2343 .cfi_restore r0 2344 .cfi_restore r1 2345 .cfi_restore r2 2346 .cfi_restore r3 2347 .cfi_restore r4 2348 .cfi_restore r9 2349 .cfi_restore ip 2350 .cfi_restore lr 2351 bx lr 2352.Lret_forwarding_address\name: 2353 // Shift left by the forwarding address shift. This clears out the state bits since they are 2354 // in the top 2 bits of the lock word. 2355 lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2356 bx lr 2357END \name 2358.endm 2359 2360READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0 2361READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1 2362READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2 2363READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3 2364READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4 2365READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5 2366READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6 2367READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7 2368READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 2369READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 2370READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 2371READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 2372 2373// Helper macros for Baker CC read barrier mark introspection (BRBMI). 2374.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register 2375 \macro_for_register r0 2376 \macro_for_register r1 2377 \macro_for_register r2 2378 \macro_for_register r3 2379 \macro_for_register r4 2380 \macro_for_register r5 2381 \macro_for_register r6 2382 \macro_for_register r7 2383 \macro_for_reserved_register // r8 (rMR) is the marking register. 2384 \macro_for_register r9 2385 \macro_for_register r10 2386 \macro_for_register r11 2387 \macro_for_reserved_register // IP is reserved. 2388 \macro_for_reserved_register // SP is reserved. 2389 \macro_for_reserved_register // LR is reserved. 2390 \macro_for_reserved_register // PC is reserved. 2391.endm 2392 2393.macro BRBMI_RETURN_SWITCH_CASE reg 2394 .balign 8 2395.Lmark_introspection_return_switch_case_\reg: 2396 mov rMR, #1 2397 mov \reg, ip 2398 bx lr 2399.endm 2400 2401.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg 2402 .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 2403.endm 2404 2405.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2406 .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 2407.endm 2408 2409#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET 2410#error "Array and field introspection code sharing requires same LDR offset." 2411#endif 2412.macro BRBMI_ARRAY_LOAD index_reg 2413 ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. 2414 b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. 2415 .balign 8 // Add padding to 8 bytes. 2416.endm 2417 2418.macro BRBMI_BKPT_FILL_4B 2419 bkpt 0 2420 bkpt 0 2421.endm 2422 2423.macro BRBMI_BKPT_FILL_8B 2424 BRBMI_BKPT_FILL_4B 2425 BRBMI_BKPT_FILL_4B 2426.endm 2427 2428.macro BRBMI_RUNTIME_CALL 2429 // Note: This macro generates exactly 22 bytes of code. The core register 2430 // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. 2431 2432 push {r0-r3, r7, lr} // Save return address and caller-save registers. 2433 .cfi_adjust_cfa_offset 24 2434 .cfi_rel_offset r0, 0 2435 .cfi_rel_offset r1, 4 2436 .cfi_rel_offset r2, 8 2437 .cfi_rel_offset r3, 12 2438 .cfi_rel_offset r7, 16 2439 .cfi_rel_offset lr, 20 2440 2441 mov r0, ip // Pass the reference. 2442 vpush {s0-s15} // save floating-point caller-save registers 2443 .cfi_adjust_cfa_offset 64 2444 bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) 2445 vpop {s0-s15} // restore floating-point registers 2446 .cfi_adjust_cfa_offset -64 2447 mov ip, r0 // Move reference to ip in preparation for return switch. 2448 2449 pop {r0-r3, r7, lr} // Restore registers. 2450 .cfi_adjust_cfa_offset -24 2451 .cfi_restore r0 2452 .cfi_restore r1 2453 .cfi_restore r2 2454 .cfi_restore r3 2455 .cfi_restore r7 2456 .cfi_restore lr 2457.endm 2458 2459.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix 2460 // If reference is null, just return it in the right register. 2461 cmp ip, #0 2462 beq .Lmark_introspection_return\label_suffix 2463 // Use rMR as temp and check the mark bit of the reference. 2464 ldr rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 2465 tst rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2466 beq .Lmark_introspection_unmarked\label_suffix 2467.Lmark_introspection_return\label_suffix: 2468.endm 2469 2470.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix 2471.Lmark_introspection_unmarked\label_suffix: 2472 // Check if the top two bits are one, if this is the case it is a forwarding address. 2473#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2474 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2475 // the highest bits and the "forwarding address" state to have all bits set. 2476#error "Unexpected lock word state shift or forwarding address state value." 2477#endif 2478 cmp rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2479 bhs .Lmark_introspection_forwarding_address\label_suffix 2480.endm 2481 2482.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix 2483.Lmark_introspection_forwarding_address\label_suffix: 2484 // Note: This macro generates exactly 22 bytes of code, the branch is near. 2485 2486 // Shift left by the forwarding address shift. This clears out the state bits since they are 2487 // in the top 2 bits of the lock word. 2488 lsl ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2489 b .Lmark_introspection_return\label_suffix 2490.endm 2491 2492.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset 2493 // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. 2494 ldrh rMR, [lr, #(-1 + \ldr_offset + 2)] 2495.endm 2496 2497.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset 2498 // Load the 16-bit instruction. Adjust for the thumb state in LR. 2499 ldrh rMR, [lr, #(-1 + \ldr_offset)] 2500.endm 2501 2502.macro BRBMI_EXTRACT_RETURN_REG_wide 2503 lsr rMR, rMR, #12 // Extract `ref_reg`. 2504.endm 2505 2506.macro BRBMI_EXTRACT_RETURN_REG_narrow 2507 and rMR, rMR, #7 // Extract `ref_reg`. 2508.endm 2509 2510.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix 2511 BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset 2512 BRBMI_EXTRACT_RETURN_REG\label_suffix 2513.endm 2514 2515.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix 2516 .balign 32 2517 .thumb_func 2518 .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function 2519 .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2520 .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2521art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: 2522 BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix 2523.endm 2524 2525.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix 2526 .balign 16 2527 // Note: Generates exactly 16 bytes of code. 2528 BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix 2529 BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix 2530 b .Lmark_introspection_runtime_call 2531.endm 2532 2533 /* 2534 * Use introspection to load a reference from the same address as the LDR 2535 * instruction in generated code would load (unless loaded by the thunk, 2536 * see below), call ReadBarrier::Mark() with that reference if needed 2537 * and return it in the same register as the LDR instruction would load. 2538 * 2539 * The entrypoint is called through a thunk that differs across load kinds. 2540 * For field and array loads the LDR instruction in generated code follows 2541 * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) 2542 * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where 2543 * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk 2544 * knows the holder and performs the gray bit check, returning to the LDR 2545 * instruction if the object is not gray, so this entrypoint no longer 2546 * needs to know anything about the holder. For GC root loads, the LDR 2547 * instruction in generated code precedes the branch to the thunk, i.e. the 2548 * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) 2549 * where the -1 is again the Thumb mode bit adjustment, and the thunk does 2550 * not do the gray bit check. 2551 * 2552 * For field accesses and array loads with a constant index the thunk loads 2553 * the reference into IP using introspection and calls the main entrypoint 2554 * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or 2555 * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known 2556 * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET) 2557 * from the main entrypoint and the thunk adjusts the entrypoint pointer. 2558 * With heap poisoning enabled, the passed reference is poisoned. 2559 * 2560 * For array accesses with non-constant index, the thunk inserts the bits 2561 * 0-5 of the LDR instruction to the entrypoint address, effectively 2562 * calculating a switch case label based on the index register (bits 0-3) 2563 * and adding an extra offset (bits 4-5 hold the shift which is always 2 2564 * for reference loads) to differentiate from the main entrypoint, then 2565 * moves the base register to IP and jumps to the switch case. Therefore 2566 * we need to align the main entrypoint to 512 bytes, accounting for 2567 * a 256-byte offset followed by 16 array entrypoints starting at 2568 * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR 2569 * (register) and a branch to the main entrypoint. 2570 * 2571 * For GC root accesses we cannot use the main entrypoint because of the 2572 * different offset where the LDR instruction in generated code is located. 2573 * (And even with heap poisoning enabled, GC roots are not poisoned.) 2574 * To re-use the same entrypoint pointer in generated code, we make sure 2575 * that the gc root entrypoint (a copy of the entrypoint with a different 2576 * offset for introspection loads) is located at a known offset (0xc0/0xe0 2577 * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/ 2578 * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the 2579 * main entrypoint and the GC root thunk adjusts the entrypoint pointer, 2580 * moves the root register to IP and jumps to the customized entrypoint, 2581 * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. 2582 * The thunk also performs all the fast-path checks, so we need just the 2583 * slow path. 2584 * 2585 * The UnsafeCASObject intrinsic is similar to the GC roots wide approach 2586 * but using ADD (register, T3) instead of the LDR (immediate, T3), so the 2587 * destination register is in bits 8-11 rather than 12-15. Therefore it has 2588 * its own entrypoint, art_quick_read_barrier_mark_introspection_unsafe_cas 2589 * at the offset BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET. 2590 * 2591 * The code structure is 2592 * art_quick_read_barrier_mark_introspection: // @0x00 2593 * Up to 32 bytes code for main entrypoint fast-path code for fields 2594 * (and array elements with constant offset) with LDR encoding T3; 2595 * jumps to the switch in the "narrow" entrypoint. 2596 * art_quick_read_barrier_mark_introspection_narrow: // @0x20 2597 * Up to 48 bytes code for fast path code for fields (and array 2598 * elements with constant offset) with LDR encoding T1, ending in the 2599 * return switch instruction TBB and the table with switch offsets. 2600 * .Lmark_introspection_return_switch_case_r0: // @0x50 2601 * Exactly 88 bytes of code for the return switch cases (8 bytes per 2602 * case, 11 cases; no code for reserved registers). 2603 * .Lmark_introspection_forwarding_address_narrow: // @0xa8 2604 * Exactly 6 bytes to extract the forwarding address and jump to the 2605 * "narrow" entrypoint fast path. 2606 * .Lmark_introspection_return_switch_case_bad: // @0xae 2607 * Exactly 2 bytes, bkpt for unexpected return register. 2608 * .Lmark_introspection_unmarked_narrow: // @0xb0 2609 * Exactly 16 bytes for "narrow" entrypoint slow path. 2610 * art_quick_read_barrier_mark_introspection_gc_roots_wide: // @0xc0 2611 * GC root entrypoint code for LDR encoding T3 (10 bytes); loads and 2612 * extracts the return register and jumps to the runtime call. 2613 * .Lmark_introspection_forwarding_address_wide: // @0xca 2614 * Exactly 6 bytes to extract the forwarding address and jump to the 2615 * "wide" entrypoint fast path. 2616 * .Lmark_introspection_unmarked_wide: // @0xd0 2617 * Exactly 16 bytes for "wide" entrypoint slow path. 2618 * art_quick_read_barrier_mark_introspection_gc_roots_narrow: // @0xe0 2619 * GC root entrypoint code for LDR encoding T1 (8 bytes); loads and 2620 * extracts the return register and falls through to the runtime call. 2621 * .Lmark_introspection_runtime_call: // @0xe8 2622 * Exactly 24 bytes for the runtime call to MarkReg() and jump to the 2623 * return switch. 2624 * art_quick_read_barrier_mark_introspection_arrays: // @0x100 2625 * Exactly 128 bytes for array load switch cases (16x2 instructions). 2626 * art_quick_read_barrier_mark_introspection_unsafe_cas: // @0x180 2627 * UnsafeCASObject intrinsic entrypoint for ADD (register) encoding T3 2628 * (6 bytes). Loads the return register and jumps to the runtime call. 2629 */ 2630#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2631 .balign 512 2632ENTRY art_quick_read_barrier_mark_introspection 2633 // At this point, IP contains the reference, rMR is clobbered by the thunk 2634 // and can be freely used as it will be set back to 1 before returning. 2635 // For heap poisoning, the reference is poisoned, so unpoison it first. 2636 UNPOISON_HEAP_REF ip 2637 // Check for null or marked, lock word is loaded into rMR. 2638 BRBMI_CHECK_NULL_AND_MARKED _wide 2639 // Load and extract the return register from the instruction. 2640 BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide 2641 b .Lmark_introspection_return_switch 2642 2643 .balign 32 2644 .thumb_func 2645 .type art_quick_read_barrier_mark_introspection_narrow, #function 2646 .hidden art_quick_read_barrier_mark_introspection_narrow 2647 .global art_quick_read_barrier_mark_introspection_narrow 2648art_quick_read_barrier_mark_introspection_narrow: 2649 // At this point, IP contains the reference, rMR is clobbered by the thunk 2650 // and can be freely used as it will be set back to 1 before returning. 2651 // For heap poisoning, the reference is poisoned, so unpoison it first. 2652 UNPOISON_HEAP_REF ip 2653 // Check for null or marked, lock word is loaded into rMR. 2654 BRBMI_CHECK_NULL_AND_MARKED _narrow 2655 // Load and extract the return register from the instruction. 2656 BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow 2657.Lmark_introspection_return_switch: 2658 tbb [pc, rMR] // Jump to the switch case. 2659.Lmark_introspection_return_table: 2660 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2661 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */ 2662 2663 .balign 8 2664 BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow // 6 bytes 2665.Lmark_introspection_return_switch_case_bad: 2666 bkpt // 2 bytes 2667 2668 BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow 2669 2670 // 8 bytes for the loading and extracting of the return register. 2671 BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide 2672 // 2 bytes for near branch to the runtime call. 2673 b .Lmark_introspection_runtime_call 2674 2675 BRBMI_EXTRACT_FORWARDING_ADDRESS _wide // Not even 4-byte aligned. 2676 2677 BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide 2678 2679 // 8 bytes for the loading and extracting of the return register. 2680 BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow 2681 // And the runtime call and branch to the switch taking exactly 24 bytes 2682 // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch) 2683 // shall take the rest of the 32-byte section (within a cache line). 2684.Lmark_introspection_runtime_call: 2685 BRBMI_RUNTIME_CALL 2686 b .Lmark_introspection_return_switch 2687 2688 .balign 256 2689 .thumb_func 2690 .type art_quick_read_barrier_mark_introspection_arrays, #function 2691 .hidden art_quick_read_barrier_mark_introspection_arrays 2692 .global art_quick_read_barrier_mark_introspection_arrays 2693art_quick_read_barrier_mark_introspection_arrays: 2694 BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B 2695 2696 .balign 8 2697 .thumb_func 2698 .type art_quick_read_barrier_mark_introspection_unsafe_cas, #function 2699 .hidden art_quick_read_barrier_mark_introspection_unsafe_cas 2700 .global art_quick_read_barrier_mark_introspection_unsafe_cas 2701art_quick_read_barrier_mark_introspection_unsafe_cas: 2702 // Load the byte of the ADD instruction that contains Rd. Adjust for the thumb state in LR. 2703 // The ADD (register, T3) is |11101011000|S|Rn|(0)imm3|Rd|imm2|type|Rm| and we're using 2704 // no shift (type=0, imm2=0, imm3=0), so the byte we read here, i.e. |(0)imm3|Rd|, 2705 // contains only the register number, the top 4 bits are 0. 2706 ldrb rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET + 3)] 2707 b .Lmark_introspection_runtime_call 2708END art_quick_read_barrier_mark_introspection 2709#else // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2710ENTRY art_quick_read_barrier_mark_introspection 2711 bkpt // Unreachable. 2712END art_quick_read_barrier_mark_introspection 2713#endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 2714 2715.extern artInvokePolymorphic 2716ENTRY art_quick_invoke_polymorphic 2717 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 2718 mov r0, r1 @ r0 := receiver 2719 mov r1, rSELF @ r1 := Thread::Current 2720 mov r2, sp @ r2 := SP 2721 bl artInvokePolymorphic @ artInvokePolymorphic(receiver, Thread*, SP) 2722 str r1, [sp, 72] @ r0:r1 := Result. Copy r1 to context. 2723 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2724 REFRESH_MARKING_REGISTER 2725 vmov d0, r0, r1 @ Put result r0:r1 into floating point return register. 2726 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2727END art_quick_invoke_polymorphic 2728 2729.extern artInvokeCustom 2730ENTRY art_quick_invoke_custom 2731 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 2732 @ r0 := call_site_idx 2733 mov r1, rSELF @ r1 := Thread::Current 2734 mov r2, sp @ r2 := SP 2735 bl artInvokeCustom @ artInvokeCustom(call_site_idx, Thread*, SP) 2736 str r1, [sp, #72] @ Save r1 to context (r0:r1 = result) 2737 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2738 REFRESH_MARKING_REGISTER 2739 vmov d0, r0, r1 @ Put result r0:r1 into floating point return register. 2740 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2741END art_quick_invoke_custom 2742 2743// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2744// Argument 0: r0: The context pointer for ExecuteSwitchImpl. 2745// Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call. 2746// Argument 2: r2: The value of DEX PC (memory address of the methods bytecode). 2747ENTRY ExecuteSwitchImplAsm 2748 push {r4, lr} // 2 words of callee saves. 2749 .cfi_adjust_cfa_offset 8 2750 .cfi_rel_offset r4, 0 2751 .cfi_rel_offset lr, 4 2752 mov r4, r2 // r4 = DEX PC 2753 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0) 2754 blx r1 // Call the wrapped method. 2755 pop {r4, pc} 2756END ExecuteSwitchImplAsm 2757