1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "asm_support_arm64.S" 18#include "interpreter/cfi_asm_support.h" 19 20#include "arch/quick_alloc_entrypoints.S" 21 22 23.macro INCREASE_FRAME frame_adjustment 24 sub sp, sp, #(\frame_adjustment) 25 .cfi_adjust_cfa_offset (\frame_adjustment) 26.endm 27 28.macro DECREASE_FRAME frame_adjustment 29 add sp, sp, #(\frame_adjustment) 30 .cfi_adjust_cfa_offset -(\frame_adjustment) 31.endm 32 33.macro SAVE_REG reg, offset 34 str \reg, [sp, #(\offset)] 35 .cfi_rel_offset \reg, (\offset) 36.endm 37 38.macro RESTORE_REG reg, offset 39 ldr \reg, [sp, #(\offset)] 40 .cfi_restore \reg 41.endm 42 43.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment 44 str \reg, [sp, #-(\frame_adjustment)]! 45 .cfi_adjust_cfa_offset (\frame_adjustment) 46 .cfi_rel_offset \reg, 0 47.endm 48 49.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment 50 ldr \reg, [sp], #(\frame_adjustment) 51 .cfi_restore \reg 52 .cfi_adjust_cfa_offset -(\frame_adjustment) 53.endm 54 55.macro SAVE_TWO_REGS reg1, reg2, offset 56 stp \reg1, \reg2, [sp, #(\offset)] 57 .cfi_rel_offset \reg1, (\offset) 58 .cfi_rel_offset \reg2, (\offset) + 8 59.endm 60 61.macro RESTORE_TWO_REGS reg1, reg2, offset 62 ldp \reg1, \reg2, [sp, #(\offset)] 63 .cfi_restore \reg1 64 .cfi_restore \reg2 65.endm 66 67.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment 68 stp \reg1, \reg2, [sp, #-(\frame_adjustment)]! 69 .cfi_adjust_cfa_offset (\frame_adjustment) 70 .cfi_rel_offset \reg1, 0 71 .cfi_rel_offset \reg2, 8 72.endm 73 74.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment 75 ldp \reg1, \reg2, [sp], #(\frame_adjustment) 76 .cfi_restore \reg1 77 .cfi_restore \reg2 78 .cfi_adjust_cfa_offset -(\frame_adjustment) 79.endm 80 81 /* 82 * Macro that sets up the callee save frame to conform with 83 * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) 84 */ 85.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME 86 // art::Runtime* xIP0 = art::Runtime::instance_; 87 // Our registers aren't intermixed - just spill in order. 88 adrp xIP0, _ZN3art7Runtime9instance_E 89 ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E] 90 91 // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves]; 92 ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET] 93 94 INCREASE_FRAME 176 95 96 // Ugly compile-time check, but we only have the preprocessor. 97#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176) 98#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected." 99#endif 100 101 // Stack alignment filler [sp, #8]. 102 // FP callee-saves. 103 stp d8, d9, [sp, #16] 104 stp d10, d11, [sp, #32] 105 stp d12, d13, [sp, #48] 106 stp d14, d15, [sp, #64] 107 108 // GP callee-saves 109 SAVE_TWO_REGS x19, x20, 80 110 SAVE_TWO_REGS x21, x22, 96 111 SAVE_TWO_REGS x23, x24, 112 112 SAVE_TWO_REGS x25, x26, 128 113 SAVE_TWO_REGS x27, x28, 144 114 SAVE_TWO_REGS x29, xLR, 160 115 116 // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves]. 117 str xIP0, [sp] 118 // Place sp in Thread::Current()->top_quick_frame. 119 mov xIP0, sp 120 str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] 121.endm 122 123 /* 124 * Macro that sets up the callee save frame to conform with 125 * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). 126 */ 127.macro SETUP_SAVE_REFS_ONLY_FRAME 128 // art::Runtime* xIP0 = art::Runtime::instance_; 129 // Our registers aren't intermixed - just spill in order. 130 adrp xIP0, _ZN3art7Runtime9instance_E 131 ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E] 132 133 // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly]; 134 ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET] 135 136 INCREASE_FRAME 96 137 138 // Ugly compile-time check, but we only have the preprocessor. 139#if (FRAME_SIZE_SAVE_REFS_ONLY != 96) 140#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected." 141#endif 142 143 // GP callee-saves. 144 // x20 paired with ArtMethod* - see below. 145 SAVE_TWO_REGS x21, x22, 16 146 SAVE_TWO_REGS x23, x24, 32 147 SAVE_TWO_REGS x25, x26, 48 148 SAVE_TWO_REGS x27, x28, 64 149 SAVE_TWO_REGS x29, xLR, 80 150 151 // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly]. 152 // Note: We could avoid saving X20 in the case of Baker read 153 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 154 // later; but it's not worth handling this special case. 155 stp xIP0, x20, [sp] 156 .cfi_rel_offset x20, 8 157 158 // Place sp in Thread::Current()->top_quick_frame. 159 mov xIP0, sp 160 str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] 161.endm 162 163// TODO: Probably no need to restore registers preserved by aapcs64. 164.macro RESTORE_SAVE_REFS_ONLY_FRAME 165 // Callee-saves. 166 // Note: Likewise, we could avoid restoring X20 in the case of Baker 167 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 168 // later; but it's not worth handling this special case. 169 RESTORE_REG x20, 8 170 RESTORE_TWO_REGS x21, x22, 16 171 RESTORE_TWO_REGS x23, x24, 32 172 RESTORE_TWO_REGS x25, x26, 48 173 RESTORE_TWO_REGS x27, x28, 64 174 RESTORE_TWO_REGS x29, xLR, 80 175 176 DECREASE_FRAME 96 177.endm 178 179.macro POP_SAVE_REFS_ONLY_FRAME 180 DECREASE_FRAME 96 181.endm 182 183 184.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL 185 INCREASE_FRAME 224 186 187 // Ugly compile-time check, but we only have the preprocessor. 188#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224) 189#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected." 190#endif 191 192 // Stack alignment filler [sp, #8]. 193 // FP args. 194 stp d0, d1, [sp, #16] 195 stp d2, d3, [sp, #32] 196 stp d4, d5, [sp, #48] 197 stp d6, d7, [sp, #64] 198 199 // Core args. 200 SAVE_TWO_REGS x1, x2, 80 201 SAVE_TWO_REGS x3, x4, 96 202 SAVE_TWO_REGS x5, x6, 112 203 204 // x7, Callee-saves. 205 // Note: We could avoid saving X20 in the case of Baker read 206 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 207 // later; but it's not worth handling this special case. 208 SAVE_TWO_REGS x7, x20, 128 209 SAVE_TWO_REGS x21, x22, 144 210 SAVE_TWO_REGS x23, x24, 160 211 SAVE_TWO_REGS x25, x26, 176 212 SAVE_TWO_REGS x27, x28, 192 213 214 // x29(callee-save) and LR. 215 SAVE_TWO_REGS x29, xLR, 208 216 217.endm 218 219 /* 220 * Macro that sets up the callee save frame to conform with 221 * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). 222 * 223 * TODO This is probably too conservative - saving FP & LR. 224 */ 225.macro SETUP_SAVE_REFS_AND_ARGS_FRAME 226 // art::Runtime* xIP0 = art::Runtime::instance_; 227 // Our registers aren't intermixed - just spill in order. 228 adrp xIP0, _ZN3art7Runtime9instance_E 229 ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E] 230 231 // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs]; 232 ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET] 233 234 SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL 235 236 str xIP0, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs]. 237 // Place sp in Thread::Current()->top_quick_frame. 238 mov xIP0, sp 239 str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] 240.endm 241 242.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0 243 SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL 244 str x0, [sp, #0] // Store ArtMethod* to bottom of stack. 245 // Place sp in Thread::Current()->top_quick_frame. 246 mov xIP0, sp 247 str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] 248.endm 249 250// TODO: Probably no need to restore registers preserved by aapcs64. 251.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME 252 // FP args. 253 ldp d0, d1, [sp, #16] 254 ldp d2, d3, [sp, #32] 255 ldp d4, d5, [sp, #48] 256 ldp d6, d7, [sp, #64] 257 258 // Core args. 259 RESTORE_TWO_REGS x1, x2, 80 260 RESTORE_TWO_REGS x3, x4, 96 261 RESTORE_TWO_REGS x5, x6, 112 262 263 // x7, Callee-saves. 264 // Note: Likewise, we could avoid restoring X20 in the case of Baker 265 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 266 // later; but it's not worth handling this special case. 267 RESTORE_TWO_REGS x7, x20, 128 268 RESTORE_TWO_REGS x21, x22, 144 269 RESTORE_TWO_REGS x23, x24, 160 270 RESTORE_TWO_REGS x25, x26, 176 271 RESTORE_TWO_REGS x27, x28, 192 272 273 // x29(callee-save) and LR. 274 RESTORE_TWO_REGS x29, xLR, 208 275 276 DECREASE_FRAME 224 277.endm 278 279 /* 280 * Macro that sets up the callee save frame to conform with 281 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 282 * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING 283 * and saving registers x29 and LR is handled elsewhere. 284 */ 285.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 286 // Ugly compile-time check, but we only have the preprocessor. 287#if (FRAME_SIZE_SAVE_EVERYTHING != 512) 288#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected." 289#endif 290 291 // Save FP registers. 292 stp d0, d1, [sp, #16] 293 stp d2, d3, [sp, #32] 294 stp d4, d5, [sp, #48] 295 stp d6, d7, [sp, #64] 296 stp d8, d9, [sp, #80] 297 stp d10, d11, [sp, #96] 298 stp d12, d13, [sp, #112] 299 stp d14, d15, [sp, #128] 300 stp d16, d17, [sp, #144] 301 stp d18, d19, [sp, #160] 302 stp d20, d21, [sp, #176] 303 stp d22, d23, [sp, #192] 304 stp d24, d25, [sp, #208] 305 stp d26, d27, [sp, #224] 306 stp d28, d29, [sp, #240] 307 stp d30, d31, [sp, #256] 308 309 // Save core registers. 310 SAVE_TWO_REGS x0, x1, 272 311 SAVE_TWO_REGS x2, x3, 288 312 SAVE_TWO_REGS x4, x5, 304 313 SAVE_TWO_REGS x6, x7, 320 314 SAVE_TWO_REGS x8, x9, 336 315 SAVE_TWO_REGS x10, x11, 352 316 SAVE_TWO_REGS x12, x13, 368 317 SAVE_TWO_REGS x14, x15, 384 318 SAVE_TWO_REGS x16, x17, 400 // Do not save the platform register. 319 SAVE_TWO_REGS x19, x20, 416 320 SAVE_TWO_REGS x21, x22, 432 321 SAVE_TWO_REGS x23, x24, 448 322 SAVE_TWO_REGS x25, x26, 464 323 SAVE_TWO_REGS x27, x28, 480 324 325 // art::Runtime* xIP0 = art::Runtime::instance_; 326 adrp xIP0, _ZN3art7Runtime9instance_E 327 ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E] 328 329 // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything]; 330 ldr xIP0, [xIP0, \runtime_method_offset] 331 332 // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything]. 333 str xIP0, [sp] 334 // Place sp in Thread::Current()->top_quick_frame. 335 mov xIP0, sp 336 str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] 337.endm 338 339 /* 340 * Macro that sets up the callee save frame to conform with 341 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 342 */ 343.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 344 INCREASE_FRAME 512 345 SAVE_TWO_REGS x29, xLR, 496 346 SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset 347.endm 348 349.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 350 // Restore FP registers. 351 ldp d0, d1, [sp, #16] 352 ldp d2, d3, [sp, #32] 353 ldp d4, d5, [sp, #48] 354 ldp d6, d7, [sp, #64] 355 ldp d8, d9, [sp, #80] 356 ldp d10, d11, [sp, #96] 357 ldp d12, d13, [sp, #112] 358 ldp d14, d15, [sp, #128] 359 ldp d16, d17, [sp, #144] 360 ldp d18, d19, [sp, #160] 361 ldp d20, d21, [sp, #176] 362 ldp d22, d23, [sp, #192] 363 ldp d24, d25, [sp, #208] 364 ldp d26, d27, [sp, #224] 365 ldp d28, d29, [sp, #240] 366 ldp d30, d31, [sp, #256] 367 368 // Restore core registers, except x0. 369 RESTORE_REG x1, 280 370 RESTORE_TWO_REGS x2, x3, 288 371 RESTORE_TWO_REGS x4, x5, 304 372 RESTORE_TWO_REGS x6, x7, 320 373 RESTORE_TWO_REGS x8, x9, 336 374 RESTORE_TWO_REGS x10, x11, 352 375 RESTORE_TWO_REGS x12, x13, 368 376 RESTORE_TWO_REGS x14, x15, 384 377 RESTORE_TWO_REGS x16, x17, 400 // Do not restore the platform register. 378 RESTORE_TWO_REGS x19, x20, 416 379 RESTORE_TWO_REGS x21, x22, 432 380 RESTORE_TWO_REGS x23, x24, 448 381 RESTORE_TWO_REGS x25, x26, 464 382 RESTORE_TWO_REGS x27, x28, 480 383 RESTORE_TWO_REGS x29, xLR, 496 384 385 DECREASE_FRAME 512 386.endm 387 388.macro RESTORE_SAVE_EVERYTHING_FRAME 389 RESTORE_REG x0, 272 390 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 391.endm 392 393// Macro to refresh the Marking Register (W20). 394// 395// This macro must be called at the end of functions implementing 396// entrypoints that possibly (directly or indirectly) perform a 397// suspend check (before they return). 398.macro REFRESH_MARKING_REGISTER 399#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 400 ldr wMR, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] 401#endif 402.endm 403 404.macro RETURN_IF_RESULT_IS_ZERO 405 cbnz x0, 1f // result non-zero branch over 406 ret // return 4071: 408.endm 409 410.macro RETURN_IF_RESULT_IS_NON_ZERO 411 cbz x0, 1f // result zero branch over 412 ret // return 4131: 414.endm 415 416 /* 417 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 418 * exception is Thread::Current()->exception_ when the runtime method frame is ready. 419 */ 420.macro DELIVER_PENDING_EXCEPTION_FRAME_READY 421 mov x0, xSELF 422 423 // Point of no return. 424 bl artDeliverPendingExceptionFromCode // artDeliverPendingExceptionFromCode(Thread*) 425 brk 0 // Unreached 426.endm 427 428 /* 429 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 430 * exception is Thread::Current()->exception_. 431 */ 432.macro DELIVER_PENDING_EXCEPTION 433 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME 434 DELIVER_PENDING_EXCEPTION_FRAME_READY 435.endm 436 437.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg 438 ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET] // Get exception field. 439 cbnz \reg, 1f 440 ret 4411: 442 DELIVER_PENDING_EXCEPTION 443.endm 444 445.macro RETURN_OR_DELIVER_PENDING_EXCEPTION 446 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0 447.endm 448 449// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register. 450.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 451 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1 452.endm 453 454.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER 455 cbnz w0, 1f // result non-zero branch over 456 ret // return 4571: 458 DELIVER_PENDING_EXCEPTION 459.endm 460 461.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name 462 .extern \cxx_name 463ENTRY \c_name 464 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 465 mov x0, xSELF // pass Thread::Current 466 bl \cxx_name // \cxx_name(Thread*) 467 brk 0 468END \c_name 469.endm 470 471.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 472 .extern \cxx_name 473ENTRY \c_name 474 SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context 475 mov x0, xSELF // pass Thread::Current 476 bl \cxx_name // \cxx_name(Thread*) 477 brk 0 478END \c_name 479.endm 480 481.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name 482 .extern \cxx_name 483ENTRY \c_name 484 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context. 485 mov x1, xSELF // pass Thread::Current. 486 bl \cxx_name // \cxx_name(arg, Thread*). 487 brk 0 488END \c_name 489.endm 490 491.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 492 .extern \cxx_name 493ENTRY \c_name 494 SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context 495 mov x2, xSELF // pass Thread::Current 496 bl \cxx_name // \cxx_name(arg1, arg2, Thread*) 497 brk 0 498END \c_name 499.endm 500 501 /* 502 * Called by managed code, saves callee saves and then calls artThrowException 503 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 504 */ 505ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 506 507 /* 508 * Called by managed code to create and deliver a NullPointerException. 509 */ 510NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 511 512 /* 513 * Call installed by a signal handler to create and deliver a NullPointerException. 514 */ 515 .extern art_quick_throw_null_pointer_exception_from_signal 516ENTRY art_quick_throw_null_pointer_exception_from_signal 517 // The fault handler pushes the gc map address, i.e. "return address", to stack 518 // and passes the fault address in LR. So we need to set up the CFI info accordingly. 519 .cfi_def_cfa_offset __SIZEOF_POINTER__ 520 .cfi_rel_offset lr, 0 521 // Save all registers as basis for long jump context. 522 INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__) 523 SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__) // LR already saved. 524 SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR 525 mov x0, lr // pass the fault address stored in LR by the fault handler. 526 mov x1, xSELF // pass Thread::Current. 527 bl artThrowNullPointerExceptionFromSignal // (arg, Thread*). 528 brk 0 529END art_quick_throw_null_pointer_exception_from_signal 530 531 /* 532 * Called by managed code to create and deliver an ArithmeticException. 533 */ 534NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 535 536 /* 537 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 538 * index, arg2 holds limit. 539 */ 540TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 541 542 /* 543 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 544 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 545 */ 546TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 547 548 /* 549 * Called by managed code to create and deliver a StackOverflowError. 550 */ 551NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 552 553 /* 554 * All generated callsites for interface invokes and invocation slow paths will load arguments 555 * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain 556 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 557 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1. 558 * 559 * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting 560 * of the target Method* in x0 and method->code_ in x1. 561 * 562 * If unsuccessful, the helper will return null/????. There will be a pending exception in the 563 * thread and we branch to another stub to deliver it. 564 * 565 * On success this wrapper will restore arguments and *jump* to the target, leaving the lr 566 * pointing back to the original caller. 567 * 568 * Adapted from ARM32 code. 569 * 570 * Clobbers xIP0. 571 */ 572.macro INVOKE_TRAMPOLINE_BODY cxx_name 573 .extern \cxx_name 574 SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves in case allocation triggers GC 575 // Helper signature is always 576 // (method_idx, *this_object, *caller_method, *self, sp) 577 578 mov x2, xSELF // pass Thread::Current 579 mov x3, sp 580 bl \cxx_name // (method_idx, this, Thread*, SP) 581 mov xIP0, x1 // save Method*->code_ 582 RESTORE_SAVE_REFS_AND_ARGS_FRAME 583 REFRESH_MARKING_REGISTER 584 cbz x0, 1f // did we find the target? if not go to exception delivery 585 br xIP0 // tail call to target 5861: 587 DELIVER_PENDING_EXCEPTION 588.endm 589.macro INVOKE_TRAMPOLINE c_name, cxx_name 590ENTRY \c_name 591 INVOKE_TRAMPOLINE_BODY \cxx_name 592END \c_name 593.endm 594 595INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 596 597INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 598INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 599INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 600INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 601 602 603.macro INVOKE_STUB_CREATE_FRAME 604SAVE_SIZE=6*8 // x4, x5, x19, x20, FP, LR saved. 605 SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE 606 SAVE_TWO_REGS x19, x20, 16 607 SAVE_TWO_REGS xFP, xLR, 32 608 609 mov xFP, sp // Use xFP for frame pointer, as it's callee-saved. 610 .cfi_def_cfa_register xFP 611 612 add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and 613 and x10, x10, # ~0xf // round up for 16-byte stack alignment. 614 sub sp, sp, x10 // Adjust SP for ArtMethod*, args and alignment padding. 615 616 mov xSELF, x3 // Move thread pointer into SELF register. 617 618 // Copy arguments into stack frame. 619 // Use simple copy routine for now. 620 // 4 bytes per slot. 621 // X1 - source address 622 // W2 - args length 623 // X9 - destination address. 624 // W10 - temporary 625 add x9, sp, #8 // Destination address is bottom of stack + null. 626 627 // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler 628 // does not have unique-id variables. 6291: 630 cbz w2, 2f 631 sub w2, w2, #4 // Need 65536 bytes of range. 632 ldr w10, [x1, x2] 633 str w10, [x9, x2] 634 b 1b 635 6362: 637 // Store null into ArtMethod* at bottom of frame. 638 str xzr, [sp] 639.endm 640 641.macro INVOKE_STUB_CALL_AND_RETURN 642 643 REFRESH_MARKING_REGISTER 644 645 // load method-> METHOD_QUICK_CODE_OFFSET 646 ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64] 647 // Branch to method. 648 blr x9 649 650 // Pop the ArtMethod* (null), arguments and alignment padding from the stack. 651 mov sp, xFP 652 .cfi_def_cfa_register sp 653 654 // Restore saved registers including value address and shorty address. 655 RESTORE_TWO_REGS x19, x20, 16 656 RESTORE_TWO_REGS xFP, xLR, 32 657 RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE 658 659 // Store result (w0/x0/s0/d0) appropriately, depending on resultType. 660 ldrb w10, [x5] 661 662 // Check the return type and store the correct register into the jvalue in memory. 663 // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables. 664 665 // Don't set anything for a void type. 666 cmp w10, #'V' 667 beq 1f 668 669 // Is it a double? 670 cmp w10, #'D' 671 beq 2f 672 673 // Is it a float? 674 cmp w10, #'F' 675 beq 3f 676 677 // Just store x0. Doesn't matter if it is 64 or 32 bits. 678 str x0, [x4] 679 6801: // Finish up. 681 ret 682 6832: // Store double. 684 str d0, [x4] 685 ret 686 6873: // Store float. 688 str s0, [x4] 689 ret 690 691.endm 692 693 694/* 695 * extern"C" void art_quick_invoke_stub(ArtMethod *method, x0 696 * uint32_t *args, x1 697 * uint32_t argsize, w2 698 * Thread *self, x3 699 * JValue *result, x4 700 * char *shorty); x5 701 * +----------------------+ 702 * | | 703 * | C/C++ frame | 704 * | LR'' | 705 * | FP'' | <- SP' 706 * +----------------------+ 707 * +----------------------+ 708 * | x28 | <- TODO: Remove callee-saves. 709 * | : | 710 * | x19 | 711 * | SP' | 712 * | X5 | 713 * | X4 | Saved registers 714 * | LR' | 715 * | FP' | <- FP 716 * +----------------------+ 717 * | uint32_t out[n-1] | 718 * | : : | Outs 719 * | uint32_t out[0] | 720 * | ArtMethod* | <- SP value=null 721 * +----------------------+ 722 * 723 * Outgoing registers: 724 * x0 - Method* 725 * x1-x7 - integer parameters. 726 * d0-d7 - Floating point parameters. 727 * xSELF = self 728 * SP = & of ArtMethod* 729 * x1 = "this" pointer. 730 * 731 */ 732ENTRY art_quick_invoke_stub 733 // Spill registers as per AACPS64 calling convention. 734 INVOKE_STUB_CREATE_FRAME 735 736 // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters. 737 // Parse the passed shorty to determine which register to load. 738 // Load addresses for routines that load WXSD registers. 739 adr x11, .LstoreW2 740 adr x12, .LstoreX2 741 adr x13, .LstoreS0 742 adr x14, .LstoreD0 743 744 // Initialize routine offsets to 0 for integers and floats. 745 // x8 for integers, x15 for floating point. 746 mov x8, #0 747 mov x15, #0 748 749 add x10, x5, #1 // Load shorty address, plus one to skip return value. 750 ldr w1, [x9],#4 // Load "this" parameter, and increment arg pointer. 751 752 // Loop to fill registers. 753.LfillRegisters: 754 ldrb w17, [x10], #1 // Load next character in signature, and increment. 755 cbz w17, .LcallFunction // Exit at end of signature. Shorty 0 terminated. 756 757 cmp w17, #'F' // is this a float? 758 bne .LisDouble 759 760 cmp x15, # 8*12 // Skip this load if all registers full. 761 beq .Ladvance4 762 763 add x17, x13, x15 // Calculate subroutine to jump to. 764 br x17 765 766.LisDouble: 767 cmp w17, #'D' // is this a double? 768 bne .LisLong 769 770 cmp x15, # 8*12 // Skip this load if all registers full. 771 beq .Ladvance8 772 773 add x17, x14, x15 // Calculate subroutine to jump to. 774 br x17 775 776.LisLong: 777 cmp w17, #'J' // is this a long? 778 bne .LisOther 779 780 cmp x8, # 6*12 // Skip this load if all registers full. 781 beq .Ladvance8 782 783 add x17, x12, x8 // Calculate subroutine to jump to. 784 br x17 785 786.LisOther: // Everything else takes one vReg. 787 cmp x8, # 6*12 // Skip this load if all registers full. 788 beq .Ladvance4 789 790 add x17, x11, x8 // Calculate subroutine to jump to. 791 br x17 792 793.Ladvance4: 794 add x9, x9, #4 795 b .LfillRegisters 796 797.Ladvance8: 798 add x9, x9, #8 799 b .LfillRegisters 800 801// Macro for loading a parameter into a register. 802// counter - the register with offset into these tables 803// size - the size of the register - 4 or 8 bytes. 804// register - the name of the register to be loaded. 805.macro LOADREG counter size register return 806 ldr \register , [x9], #\size 807 add \counter, \counter, 12 808 b \return 809.endm 810 811// Store ints. 812.LstoreW2: 813 LOADREG x8 4 w2 .LfillRegisters 814 LOADREG x8 4 w3 .LfillRegisters 815 LOADREG x8 4 w4 .LfillRegisters 816 LOADREG x8 4 w5 .LfillRegisters 817 LOADREG x8 4 w6 .LfillRegisters 818 LOADREG x8 4 w7 .LfillRegisters 819 820// Store longs. 821.LstoreX2: 822 LOADREG x8 8 x2 .LfillRegisters 823 LOADREG x8 8 x3 .LfillRegisters 824 LOADREG x8 8 x4 .LfillRegisters 825 LOADREG x8 8 x5 .LfillRegisters 826 LOADREG x8 8 x6 .LfillRegisters 827 LOADREG x8 8 x7 .LfillRegisters 828 829// Store singles. 830.LstoreS0: 831 LOADREG x15 4 s0 .LfillRegisters 832 LOADREG x15 4 s1 .LfillRegisters 833 LOADREG x15 4 s2 .LfillRegisters 834 LOADREG x15 4 s3 .LfillRegisters 835 LOADREG x15 4 s4 .LfillRegisters 836 LOADREG x15 4 s5 .LfillRegisters 837 LOADREG x15 4 s6 .LfillRegisters 838 LOADREG x15 4 s7 .LfillRegisters 839 840// Store doubles. 841.LstoreD0: 842 LOADREG x15 8 d0 .LfillRegisters 843 LOADREG x15 8 d1 .LfillRegisters 844 LOADREG x15 8 d2 .LfillRegisters 845 LOADREG x15 8 d3 .LfillRegisters 846 LOADREG x15 8 d4 .LfillRegisters 847 LOADREG x15 8 d5 .LfillRegisters 848 LOADREG x15 8 d6 .LfillRegisters 849 LOADREG x15 8 d7 .LfillRegisters 850 851 852.LcallFunction: 853 854 INVOKE_STUB_CALL_AND_RETURN 855 856END art_quick_invoke_stub 857 858/* extern"C" 859 * void art_quick_invoke_static_stub(ArtMethod *method, x0 860 * uint32_t *args, x1 861 * uint32_t argsize, w2 862 * Thread *self, x3 863 * JValue *result, x4 864 * char *shorty); x5 865 */ 866ENTRY art_quick_invoke_static_stub 867 // Spill registers as per AACPS64 calling convention. 868 INVOKE_STUB_CREATE_FRAME 869 870 // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters. 871 // Parse the passed shorty to determine which register to load. 872 // Load addresses for routines that load WXSD registers. 873 adr x11, .LstoreW1_2 874 adr x12, .LstoreX1_2 875 adr x13, .LstoreS0_2 876 adr x14, .LstoreD0_2 877 878 // Initialize routine offsets to 0 for integers and floats. 879 // x8 for integers, x15 for floating point. 880 mov x8, #0 881 mov x15, #0 882 883 add x10, x5, #1 // Load shorty address, plus one to skip return value. 884 885 // Loop to fill registers. 886.LfillRegisters2: 887 ldrb w17, [x10], #1 // Load next character in signature, and increment. 888 cbz w17, .LcallFunction2 // Exit at end of signature. Shorty 0 terminated. 889 890 cmp w17, #'F' // is this a float? 891 bne .LisDouble2 892 893 cmp x15, # 8*12 // Skip this load if all registers full. 894 beq .Ladvance4_2 895 896 add x17, x13, x15 // Calculate subroutine to jump to. 897 br x17 898 899.LisDouble2: 900 cmp w17, #'D' // is this a double? 901 bne .LisLong2 902 903 cmp x15, # 8*12 // Skip this load if all registers full. 904 beq .Ladvance8_2 905 906 add x17, x14, x15 // Calculate subroutine to jump to. 907 br x17 908 909.LisLong2: 910 cmp w17, #'J' // is this a long? 911 bne .LisOther2 912 913 cmp x8, # 7*12 // Skip this load if all registers full. 914 beq .Ladvance8_2 915 916 add x17, x12, x8 // Calculate subroutine to jump to. 917 br x17 918 919.LisOther2: // Everything else takes one vReg. 920 cmp x8, # 7*12 // Skip this load if all registers full. 921 beq .Ladvance4_2 922 923 add x17, x11, x8 // Calculate subroutine to jump to. 924 br x17 925 926.Ladvance4_2: 927 add x9, x9, #4 928 b .LfillRegisters2 929 930.Ladvance8_2: 931 add x9, x9, #8 932 b .LfillRegisters2 933 934// Store ints. 935.LstoreW1_2: 936 LOADREG x8 4 w1 .LfillRegisters2 937 LOADREG x8 4 w2 .LfillRegisters2 938 LOADREG x8 4 w3 .LfillRegisters2 939 LOADREG x8 4 w4 .LfillRegisters2 940 LOADREG x8 4 w5 .LfillRegisters2 941 LOADREG x8 4 w6 .LfillRegisters2 942 LOADREG x8 4 w7 .LfillRegisters2 943 944// Store longs. 945.LstoreX1_2: 946 LOADREG x8 8 x1 .LfillRegisters2 947 LOADREG x8 8 x2 .LfillRegisters2 948 LOADREG x8 8 x3 .LfillRegisters2 949 LOADREG x8 8 x4 .LfillRegisters2 950 LOADREG x8 8 x5 .LfillRegisters2 951 LOADREG x8 8 x6 .LfillRegisters2 952 LOADREG x8 8 x7 .LfillRegisters2 953 954// Store singles. 955.LstoreS0_2: 956 LOADREG x15 4 s0 .LfillRegisters2 957 LOADREG x15 4 s1 .LfillRegisters2 958 LOADREG x15 4 s2 .LfillRegisters2 959 LOADREG x15 4 s3 .LfillRegisters2 960 LOADREG x15 4 s4 .LfillRegisters2 961 LOADREG x15 4 s5 .LfillRegisters2 962 LOADREG x15 4 s6 .LfillRegisters2 963 LOADREG x15 4 s7 .LfillRegisters2 964 965// Store doubles. 966.LstoreD0_2: 967 LOADREG x15 8 d0 .LfillRegisters2 968 LOADREG x15 8 d1 .LfillRegisters2 969 LOADREG x15 8 d2 .LfillRegisters2 970 LOADREG x15 8 d3 .LfillRegisters2 971 LOADREG x15 8 d4 .LfillRegisters2 972 LOADREG x15 8 d5 .LfillRegisters2 973 LOADREG x15 8 d6 .LfillRegisters2 974 LOADREG x15 8 d7 .LfillRegisters2 975 976 977.LcallFunction2: 978 979 INVOKE_STUB_CALL_AND_RETURN 980 981END art_quick_invoke_static_stub 982 983 984 985/* extern"C" void art_quick_osr_stub(void** stack, x0 986 * size_t stack_size_in_bytes, x1 987 * const uint8_t* native_pc, x2 988 * JValue *result, x3 989 * char *shorty, x4 990 * Thread *self) x5 991 */ 992ENTRY art_quick_osr_stub 993 SAVE_SIZE=22*8 994 SAVE_TWO_REGS_INCREASE_FRAME x3, x4, SAVE_SIZE 995 SAVE_TWO_REGS x19, x20, 16 996 SAVE_TWO_REGS x21, x22, 32 997 SAVE_TWO_REGS x23, x24, 48 998 SAVE_TWO_REGS x25, x26, 64 999 SAVE_TWO_REGS x27, x28, 80 1000 SAVE_TWO_REGS xFP, xLR, 96 1001 stp d8, d9, [sp, #112] 1002 stp d10, d11, [sp, #128] 1003 stp d12, d13, [sp, #144] 1004 stp d14, d15, [sp, #160] 1005 1006 mov xSELF, x5 // Move thread pointer into SELF register. 1007 REFRESH_MARKING_REGISTER 1008 1009 INCREASE_FRAME 16 1010 str xzr, [sp] // Store null for ArtMethod* slot 1011 // Branch to stub. 1012 bl .Losr_entry 1013 .cfi_remember_state 1014 DECREASE_FRAME 16 1015 1016 // Restore saved registers including value address and shorty address. 1017 ldp d8, d9, [sp, #112] 1018 ldp d10, d11, [sp, #128] 1019 ldp d12, d13, [sp, #144] 1020 ldp d14, d15, [sp, #160] 1021 RESTORE_TWO_REGS x19, x20, 16 1022 RESTORE_TWO_REGS x21, x22, 32 1023 RESTORE_TWO_REGS x23, x24, 48 1024 RESTORE_TWO_REGS x25, x26, 64 1025 RESTORE_TWO_REGS x27, x28, 80 1026 RESTORE_TWO_REGS xFP, xLR, 96 1027 RESTORE_TWO_REGS_DECREASE_FRAME x3, x4, SAVE_SIZE 1028 1029 // Store result (w0/x0/s0/d0) appropriately, depending on resultType. 1030 ldrb w10, [x4] 1031 1032 // Check the return type and store the correct register into the jvalue in memory. 1033 1034 // Don't set anything for a void type. 1035 cmp w10, #'V' 1036 beq .Losr_exit 1037 // Is it a double? 1038 cmp w10, #'D' 1039 beq .Losr_return_double 1040 // Is it a float? 1041 cmp w10, #'F' 1042 beq .Losr_return_float 1043 // Just store x0. Doesn't matter if it is 64 or 32 bits. 1044 str x0, [x3] 1045.Losr_exit: 1046 ret 1047.Losr_return_double: 1048 str d0, [x3] 1049 ret 1050.Losr_return_float: 1051 str s0, [x3] 1052 ret 1053 1054.Losr_entry: 1055 .cfi_restore_state // Reset unwind info so following code unwinds. 1056 .cfi_def_cfa_offset (SAVE_SIZE+16) // workaround for clang bug: 31975598 1057 1058 mov x9, sp // Save stack pointer. 1059 .cfi_def_cfa_register x9 1060 1061 // Update stack pointer for the callee 1062 sub sp, sp, x1 1063 1064 // Update link register slot expected by the callee. 1065 sub w1, w1, #8 1066 str lr, [sp, x1] 1067 1068 // Copy arguments into stack frame. 1069 // Use simple copy routine for now. 1070 // 4 bytes per slot. 1071 // X0 - source address 1072 // W1 - args length 1073 // SP - destination address. 1074 // W10 - temporary 1075.Losr_loop_entry: 1076 cbz w1, .Losr_loop_exit 1077 sub w1, w1, #4 1078 ldr w10, [x0, x1] 1079 str w10, [sp, x1] 1080 b .Losr_loop_entry 1081 1082.Losr_loop_exit: 1083 // Branch to the OSR entry point. 1084 br x2 1085 1086END art_quick_osr_stub 1087 1088 /* 1089 * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_. 1090 * Both must reside on the stack, between current SP and target SP. 1091 * IP0 and IP1 shall be clobbered rather than retrieved from gprs_. 1092 */ 1093 1094ENTRY art_quick_do_long_jump 1095 // Load FPRs 1096 ldp d0, d1, [x1, #0] 1097 ldp d2, d3, [x1, #16] 1098 ldp d4, d5, [x1, #32] 1099 ldp d6, d7, [x1, #48] 1100 ldp d8, d9, [x1, #64] 1101 ldp d10, d11, [x1, #80] 1102 ldp d12, d13, [x1, #96] 1103 ldp d14, d15, [x1, #112] 1104 ldp d16, d17, [x1, #128] 1105 ldp d18, d19, [x1, #144] 1106 ldp d20, d21, [x1, #160] 1107 ldp d22, d23, [x1, #176] 1108 ldp d24, d25, [x1, #192] 1109 ldp d26, d27, [x1, #208] 1110 ldp d28, d29, [x1, #224] 1111 ldp d30, d31, [x1, #240] 1112 1113 // Load GPRs. Delay loading x0, x1 because x0 is used as gprs_. 1114 ldp x2, x3, [x0, #16] 1115 ldp x4, x5, [x0, #32] 1116 ldp x6, x7, [x0, #48] 1117 ldp x8, x9, [x0, #64] 1118 ldp x10, x11, [x0, #80] 1119 ldp x12, x13, [x0, #96] 1120 ldp x14, x15, [x0, #112] 1121 // Do not load IP0 (x16) and IP1 (x17), these shall be clobbered below. 1122 // Don't load the platform register (x18) either. 1123 ldr x19, [x0, #152] // xSELF. 1124 ldp x20, x21, [x0, #160] // For Baker RB, wMR (w20) is reloaded below. 1125 ldp x22, x23, [x0, #176] 1126 ldp x24, x25, [x0, #192] 1127 ldp x26, x27, [x0, #208] 1128 ldp x28, x29, [x0, #224] 1129 ldp x30, xIP0, [x0, #240] // LR and SP, load SP to IP0. 1130 1131 // Load PC to IP1, it's at the end (after the space for the unused XZR). 1132 ldr xIP1, [x0, #33*8] 1133 1134 // Load x0, x1. 1135 ldp x0, x1, [x0, #0] 1136 1137 // Set SP. Do not access fprs_ and gprs_ from now, they are below SP. 1138 mov sp, xIP0 1139 1140 REFRESH_MARKING_REGISTER 1141 1142 br xIP1 1143END art_quick_do_long_jump 1144 1145 /* 1146 * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the 1147 * possibly null object to lock. 1148 * 1149 * Derived from arm32 code. 1150 */ 1151 .extern artLockObjectFromCode 1152ENTRY art_quick_lock_object 1153 ldr w1, [xSELF, #THREAD_ID_OFFSET] 1154 cbz w0, art_quick_lock_object_no_inline 1155 // Exclusive load/store has no immediate anymore. 1156 add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET 1157.Lretry_lock: 1158 ldaxr w2, [x4] // Acquire needed only in most common case. 1159 eor w3, w2, w1 // Prepare the value to store if unlocked 1160 // (thread id, count of 0 and preserved read barrier bits), 1161 // or prepare to compare thread id for recursive lock check 1162 // (lock_word.ThreadId() ^ self->ThreadId()). 1163 tst w2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. 1164 b.ne .Lnot_unlocked // Check if unlocked. 1165 // unlocked case - store w3: original lock word plus thread id, preserved read barrier bits. 1166 stxr w2, w3, [x4] 1167 cbnz w2, .Lretry_lock // If the store failed, retry. 1168 ret 1169.Lnot_unlocked: // w2: original lock word, w1: thread id, w3: w2 ^ w1 1170 // Check lock word state and thread id together, 1171 tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) 1172 b.ne art_quick_lock_object_no_inline 1173 add w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count. 1174 tst w3, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count. 1175 b.eq art_quick_lock_object_no_inline // Zero as the new count indicates overflow, go slow path. 1176 stxr w2, w3, [x4] 1177 cbnz w2, .Lretry_lock // If the store failed, retry. 1178 ret 1179END art_quick_lock_object 1180 1181ENTRY art_quick_lock_object_no_inline 1182 // This is also the slow path for art_quick_lock_object. 1183 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case we block 1184 mov x1, xSELF // pass Thread::Current 1185 bl artLockObjectFromCode // (Object* obj, Thread*) 1186 RESTORE_SAVE_REFS_ONLY_FRAME 1187 REFRESH_MARKING_REGISTER 1188 RETURN_IF_W0_IS_ZERO_OR_DELIVER 1189END art_quick_lock_object_no_inline 1190 1191 /* 1192 * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. 1193 * x0 holds the possibly null object to lock. 1194 * 1195 * Derived from arm32 code. 1196 */ 1197 .extern artUnlockObjectFromCode 1198ENTRY art_quick_unlock_object 1199 ldr w1, [xSELF, #THREAD_ID_OFFSET] 1200 cbz x0, art_quick_unlock_object_no_inline 1201 // Exclusive load/store has no immediate anymore. 1202 add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET 1203.Lretry_unlock: 1204#ifndef USE_READ_BARRIER 1205 ldr w2, [x4] 1206#else 1207 ldxr w2, [x4] // Need to use atomic instructions for read barrier. 1208#endif 1209 eor w3, w2, w1 // Prepare the value to store if simply locked 1210 // (mostly 0s, and preserved read barrier bits), 1211 // or prepare to compare thread id for recursive lock check 1212 // (lock_word.ThreadId() ^ self->ThreadId()). 1213 tst w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. 1214 b.ne .Lnot_simply_locked // Locked recursively or by other thread? 1215 // Transition to unlocked. 1216#ifndef USE_READ_BARRIER 1217 stlr w3, [x4] 1218#else 1219 stlxr w2, w3, [x4] // Need to use atomic instructions for read barrier. 1220 cbnz w2, .Lretry_unlock // If the store failed, retry. 1221#endif 1222 ret 1223.Lnot_simply_locked: 1224 // Check lock word state and thread id together, 1225 tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) 1226 b.ne art_quick_unlock_object_no_inline 1227 sub w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count 1228#ifndef USE_READ_BARRIER 1229 str w3, [x4] 1230#else 1231 stxr w2, w3, [x4] // Need to use atomic instructions for read barrier. 1232 cbnz w2, .Lretry_unlock // If the store failed, retry. 1233#endif 1234 ret 1235END art_quick_unlock_object 1236 1237ENTRY art_quick_unlock_object_no_inline 1238 // This is also the slow path for art_quick_unlock_object. 1239 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case exception allocation triggers GC 1240 mov x1, xSELF // pass Thread::Current 1241 bl artUnlockObjectFromCode // (Object* obj, Thread*) 1242 RESTORE_SAVE_REFS_ONLY_FRAME 1243 REFRESH_MARKING_REGISTER 1244 RETURN_IF_W0_IS_ZERO_OR_DELIVER 1245END art_quick_unlock_object_no_inline 1246 1247 /* 1248 * Entry from managed code that calls artInstanceOfFromCode and on failure calls 1249 * artThrowClassCastExceptionForObject. 1250 */ 1251 .extern artInstanceOfFromCode 1252 .extern artThrowClassCastExceptionForObject 1253ENTRY art_quick_check_instance_of 1254 // Type check using the bit string passes null as the target class. In that case just throw. 1255 cbz x1, .Lthrow_class_cast_exception_for_bitstring_check 1256 1257 // Store arguments and link register 1258 // Stack needs to be 16B aligned on calls. 1259 SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32 1260 SAVE_REG xLR, 24 1261 1262 // Call runtime code 1263 bl artInstanceOfFromCode 1264 1265 // Restore LR. 1266 RESTORE_REG xLR, 24 1267 1268 // Check for exception 1269 cbz x0, .Lthrow_class_cast_exception 1270 1271 // Restore and return 1272 .cfi_remember_state 1273 RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 1274 ret 1275 .cfi_restore_state // Reset unwind info so following code unwinds. 1276 .cfi_def_cfa_offset 32 // workaround for clang bug: 31975598 1277 1278.Lthrow_class_cast_exception: 1279 // Restore 1280 RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 1281 1282.Lthrow_class_cast_exception_for_bitstring_check: 1283 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 1284 mov x2, xSELF // pass Thread::Current 1285 bl artThrowClassCastExceptionForObject // (Object*, Class*, Thread*) 1286 brk 0 // We should not return here... 1287END art_quick_check_instance_of 1288 1289// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude. 1290.macro POP_REG_NE xReg, offset, xExclude 1291 .ifnc \xReg, \xExclude 1292 ldr \xReg, [sp, #\offset] // restore xReg 1293 .cfi_restore \xReg 1294 .endif 1295.endm 1296 1297// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude. 1298// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude. 1299.macro POP_REGS_NE xReg1, xReg2, offset, xExclude 1300 .ifc \xReg1, \xExclude 1301 ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2 1302 .else 1303 .ifc \xReg2, \xExclude 1304 ldr \xReg1, [sp, #\offset] // restore xReg1 1305 .else 1306 ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2 1307 .endif 1308 .endif 1309 .cfi_restore \xReg1 1310 .cfi_restore \xReg2 1311.endm 1312 1313 /* 1314 * Macro to insert read barrier, only used in art_quick_aput_obj. 1315 * xDest, wDest and xObj are registers, offset is a defined literal such as 1316 * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle 1317 * name mismatch between instructions. This macro uses the lower 32b of register when possible. 1318 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 1319 */ 1320.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number 1321#ifdef USE_READ_BARRIER 1322# ifdef USE_BAKER_READ_BARRIER 1323 ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 1324 tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number 1325 // False dependency to avoid needing load/load fence. 1326 add \xObj, \xObj, \xTemp, lsr #32 1327 ldr \wDest, [\xObj, #\offset] // Heap reference = 32b. This also zero-extends to \xDest. 1328 UNPOISON_HEAP_REF \wDest 1329 b .Lrb_exit\number 1330# endif // USE_BAKER_READ_BARRIER 1331.Lrb_slowpath\number: 1332 // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned. 1333 SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48 1334 SAVE_TWO_REGS x2, x3, 16 1335 SAVE_TWO_REGS x4, xLR, 32 1336 1337 // mov x0, \xRef // pass ref in x0 (no-op for now since parameter ref is unused) 1338 .ifnc \xObj, x1 1339 mov x1, \xObj // pass xObj 1340 .endif 1341 mov w2, #\offset // pass offset 1342 bl artReadBarrierSlow // artReadBarrierSlow(ref, xObj, offset) 1343 // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning. 1344 .ifnc \wDest, w0 1345 mov \wDest, w0 // save return value in wDest 1346 .endif 1347 1348 // Conditionally restore saved registers 1349 POP_REG_NE x0, 0, \xDest 1350 POP_REG_NE x1, 8, \xDest 1351 POP_REG_NE x2, 16, \xDest 1352 POP_REG_NE x3, 24, \xDest 1353 POP_REG_NE x4, 32, \xDest 1354 RESTORE_REG xLR, 40 1355 DECREASE_FRAME 48 1356.Lrb_exit\number: 1357#else 1358 ldr \wDest, [\xObj, #\offset] // Heap reference = 32b. This also zero-extends to \xDest. 1359 UNPOISON_HEAP_REF \wDest 1360#endif // USE_READ_BARRIER 1361.endm 1362 1363#ifdef USE_READ_BARRIER 1364 .extern artReadBarrierSlow 1365#endif 1366ENTRY art_quick_aput_obj 1367 cbz x2, .Ldo_aput_null 1368 READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0 // Heap reference = 32b 1369 // This also zero-extends to x3 1370 READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b 1371 // This also zero-extends to x3 1372 READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2 // Heap reference = 32b 1373 // This also zero-extends to x4 1374 cmp w3, w4 // value's type == array's component type - trivial assignability 1375 bne .Lcheck_assignability 1376.Ldo_aput: 1377 add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 1378 // "Compress" = do nothing 1379 POISON_HEAP_REF w2 1380 str w2, [x3, x1, lsl #2] // Heap reference = 32b 1381 ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET] 1382 lsr x0, x0, #CARD_TABLE_CARD_SHIFT 1383 strb w3, [x3, x0] 1384 ret 1385.Ldo_aput_null: 1386 add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 1387 // "Compress" = do nothing 1388 str w2, [x3, x1, lsl #2] // Heap reference = 32b 1389 ret 1390.Lcheck_assignability: 1391 // Store arguments and link register 1392 SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32 1393 SAVE_TWO_REGS x2, xLR, 16 1394 1395 // Call runtime code 1396 mov x0, x3 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended 1397 mov x1, x4 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended 1398 bl artIsAssignableFromCode 1399 1400 // Check for exception 1401 cbz x0, .Lthrow_array_store_exception 1402 1403 // Restore 1404 .cfi_remember_state 1405 RESTORE_TWO_REGS x2, xLR, 16 1406 RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 1407 1408 add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 1409 // "Compress" = do nothing 1410 POISON_HEAP_REF w2 1411 str w2, [x3, x1, lsl #2] // Heap reference = 32b 1412 ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET] 1413 lsr x0, x0, #CARD_TABLE_CARD_SHIFT 1414 strb w3, [x3, x0] 1415 ret 1416 .cfi_restore_state // Reset unwind info so following code unwinds. 1417 .cfi_def_cfa_offset 32 // workaround for clang bug: 31975598 1418.Lthrow_array_store_exception: 1419 RESTORE_TWO_REGS x2, xLR, 16 1420 RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 1421 1422 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME 1423 mov x1, x2 // Pass value. 1424 mov x2, xSELF // Pass Thread::Current. 1425 bl artThrowArrayStoreException // (Object*, Object*, Thread*). 1426 brk 0 // Unreached. 1427END art_quick_aput_obj 1428 1429// Macro to facilitate adding new allocation entrypoints. 1430.macro ONE_ARG_DOWNCALL name, entrypoint, return 1431 .extern \entrypoint 1432ENTRY \name 1433 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1434 mov x1, xSELF // pass Thread::Current 1435 bl \entrypoint // (uint32_t type_idx, Method* method, Thread*) 1436 RESTORE_SAVE_REFS_ONLY_FRAME 1437 REFRESH_MARKING_REGISTER 1438 \return 1439END \name 1440.endm 1441 1442// Macro to facilitate adding new allocation entrypoints. 1443.macro TWO_ARG_DOWNCALL name, entrypoint, return 1444 .extern \entrypoint 1445ENTRY \name 1446 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1447 mov x2, xSELF // pass Thread::Current 1448 bl \entrypoint // (uint32_t type_idx, Method* method, Thread*) 1449 RESTORE_SAVE_REFS_ONLY_FRAME 1450 REFRESH_MARKING_REGISTER 1451 \return 1452END \name 1453.endm 1454 1455// Macro to facilitate adding new allocation entrypoints. 1456.macro THREE_ARG_DOWNCALL name, entrypoint, return 1457 .extern \entrypoint 1458ENTRY \name 1459 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1460 mov x3, xSELF // pass Thread::Current 1461 bl \entrypoint 1462 RESTORE_SAVE_REFS_ONLY_FRAME 1463 REFRESH_MARKING_REGISTER 1464 \return 1465END \name 1466.endm 1467 1468// Macro to facilitate adding new allocation entrypoints. 1469.macro FOUR_ARG_DOWNCALL name, entrypoint, return 1470 .extern \entrypoint 1471ENTRY \name 1472 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1473 mov x4, xSELF // pass Thread::Current 1474 bl \entrypoint // 1475 RESTORE_SAVE_REFS_ONLY_FRAME 1476 REFRESH_MARKING_REGISTER 1477 \return 1478END \name 1479.endm 1480 1481// Macros taking opportunity of code similarities for downcalls. 1482.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return 1483 .extern \entrypoint 1484ENTRY \name 1485 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1486 mov x1, xSELF // pass Thread::Current 1487 bl \entrypoint // (uint32_t type_idx, Thread*) 1488 RESTORE_SAVE_REFS_ONLY_FRAME 1489 REFRESH_MARKING_REGISTER 1490 \return 1491END \name 1492.endm 1493 1494.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return 1495 .extern \entrypoint 1496ENTRY \name 1497 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1498 mov x2, xSELF // pass Thread::Current 1499 bl \entrypoint 1500 RESTORE_SAVE_REFS_ONLY_FRAME 1501 REFRESH_MARKING_REGISTER 1502 \return 1503END \name 1504.endm 1505 1506.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return 1507 .extern \entrypoint 1508ENTRY \name 1509 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1510 mov x3, xSELF // pass Thread::Current 1511 bl \entrypoint 1512 RESTORE_SAVE_REFS_ONLY_FRAME 1513 REFRESH_MARKING_REGISTER 1514 \return 1515END \name 1516.endm 1517 1518 /* 1519 * Macro for resolution and initialization of indexed DEX file 1520 * constants such as classes and strings. 1521 */ 1522.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 1523 .extern \entrypoint 1524ENTRY \name 1525 SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset // save everything for stack crawl 1526 mov x1, xSELF // pass Thread::Current 1527 bl \entrypoint // (int32_t index, Thread* self) 1528 cbz w0, 1f // If result is null, deliver the OOME. 1529 .cfi_remember_state 1530 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 1531 REFRESH_MARKING_REGISTER 1532 ret // return 1533 .cfi_restore_state 1534 .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING // workaround for clang bug: 31975598 15351: 1536 DELIVER_PENDING_EXCEPTION_FRAME_READY 1537END \name 1538.endm 1539 1540.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint 1541 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 1542.endm 1543 1544.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1545 cbz w0, 1f // result zero branch over 1546 ret // return 15471: 1548 DELIVER_PENDING_EXCEPTION 1549.endm 1550 1551 /* 1552 * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on 1553 * failure. 1554 */ 1555TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1556 1557 /* 1558 * Entry from managed code when uninitialized static storage, this stub will run the class 1559 * initializer and deliver the exception on error. On success the static storage base is 1560 * returned. 1561 */ 1562ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 1563ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode 1564ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode 1565ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode 1566ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode 1567ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 1568 1569// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 1570// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 1571 1572ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1573ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1574ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1575ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1576ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1577ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1578ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1579 1580TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1581TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1582TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1583TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1584TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1585TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1586TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 1587 1588TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1589TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1590TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1591TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1592TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1593 1594THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1595THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1596THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1597THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1598THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER 1599 1600// Generate the allocation entrypoints for each allocator. 1601GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 1602// Comment out allocators that have arm64 specific asm. 1603// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 1604// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 1605GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 1606GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB) 1607// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 1608// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 1609// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 1610// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 1611// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 1612GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 1613GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 1614GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 1615 1616// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 1617// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 1618GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 1619GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB) 1620// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 1621// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 1622// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 1623// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 1624// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 1625GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 1626GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 1627GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 1628 1629// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1630// If isInitialized=0 the compiler can only assume it's been at least resolved. 1631.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized 1632ENTRY \c_name 1633 // Fast path rosalloc allocation. 1634 // x0: type, xSELF(x19): Thread::Current 1635 // x1-x7: free. 1636 ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local 1637 // allocation stack has room. 1638 // ldp won't work due to large offset. 1639 ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] 1640 cmp x3, x4 1641 bhs .Lslow_path\c_name 1642 ldr w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3) 1643 cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread 1644 // local allocation. Also does the 1645 // finalizable and initialization 1646 // checks. 1647 // When isInitialized == 0, then the class is potentially not yet initialized. 1648 // If the class is not yet initialized, the object size will be very large to force the branch 1649 // below to be taken. 1650 // 1651 // See InitializeClassVisitors in class-inl.h for more details. 1652 bhs .Lslow_path\c_name 1653 // Compute the rosalloc bracket index 1654 // from the size. Since the size is 1655 // already aligned we can combine the 1656 // two shifts together. 1657 add x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) 1658 // Subtract pointer size since ther 1659 // are no runs for 0 byte allocations 1660 // and the size is already aligned. 1661 ldr x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] 1662 // Load the free list head (x3). This 1663 // will be the return val. 1664 ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1665 cbz x3, .Lslow_path\c_name 1666 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. 1667 ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head 1668 // and update the list head with the 1669 // next pointer. 1670 str x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1671 // Store the class pointer in the 1672 // header. This also overwrites the 1673 // next pointer. The offsets are 1674 // asserted to match. 1675 1676#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1677#error "Class pointer needs to overwrite next pointer." 1678#endif 1679 POISON_HEAP_REF w0 1680 str w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET] 1681 // Push the new object onto the thread 1682 // local allocation stack and 1683 // increment the thread local 1684 // allocation stack top. 1685 ldr x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1686 str w3, [x1], #COMPRESSED_REFERENCE_SIZE // (Increment x1 as a side effect.) 1687 str x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1688 // Decrement the size of the free list 1689 1690 // After this "STR" the object is published to the thread local allocation stack, 1691 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1692 // It is not yet visible to the running (user) compiled code until after the return. 1693 // 1694 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1695 // the state of the allocation stack slot. It can be a pointer to one of: 1696 // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet. 1697 // (The stack initial state is "null" pointers). 1698 // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot. 1699 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1700 // Other states are not allowed. 1701 // 1702 // An object that is invalid only temporarily, and will eventually become valid. 1703 // The internal runtime code simply checks if the object is not null or is partial and then 1704 // ignores it. 1705 // 1706 // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing 1707 // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot 1708 // "next" pointer is not-cyclic.) 1709 // 1710 // See also b/28790624 for a listing of CLs dealing with this race. 1711 ldr w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1712 sub x1, x1, #1 1713 // TODO: consider combining this store 1714 // and the list head store above using 1715 // strd. 1716 str w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1717 1718 mov x0, x3 // Set the return value and return. 1719.if \isInitialized == 0 1720 // This barrier is only necessary when the allocation also requires 1721 // a class initialization check. 1722 // 1723 // If the class is already observably initialized, then new-instance allocations are protected 1724 // from publishing by the compiler which inserts its own StoreStore barrier. 1725 dmb ish 1726 // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size), 1727 // they should happen-after the implicit initialization check. 1728 // 1729 // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing 1730 // a new observably-initialized class state. 1731.endif 1732 ret 1733.Lslow_path\c_name: 1734 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1735 mov x1, xSELF // pass Thread::Current 1736 bl \cxx_name 1737 RESTORE_SAVE_REFS_ONLY_FRAME 1738 REFRESH_MARKING_REGISTER 1739 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1740END \c_name 1741.endm 1742 1743ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0 1744ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1 1745 1746// If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1747// If isInitialized=0 the compiler can only assume it's been at least resolved. 1748.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized 1749 ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET] 1750 ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET] 1751 ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7). 1752 add x6, x4, x7 // Add object size to tlab pos. 1753 cmp x6, x5 // Check if it fits, overflow works 1754 // since the tlab pos and end are 32 1755 // bit values. 1756 1757 // When isInitialized == 0, then the class is potentially not yet initialized. 1758 // If the class is not yet initialized, the object size will be very large to force the branch 1759 // below to be taken. 1760 // 1761 // See InitializeClassVisitors in class-inl.h for more details. 1762 bhi \slowPathLabel 1763 str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1764 ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1765 add x5, x5, #1 1766 str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1767 POISON_HEAP_REF w0 1768 str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1769 // Fence. This is "ish" not "ishst" so 1770 // that the code after this allocation 1771 // site will see the right values in 1772 // the fields of the class. 1773 mov x0, x4 1774.if \isInitialized == 0 1775 // This barrier is only necessary when the allocation also requires 1776 // a class initialization check. 1777 // 1778 // If the class is already observably initialized, then new-instance allocations are protected 1779 // from publishing by the compiler which inserts its own StoreStore barrier. 1780 dmb ish 1781 // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size), 1782 // they should happen-after the implicit initialization check. 1783 // 1784 // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing 1785 // a new observably-initialized class state. 1786.endif 1787 ret 1788.endm 1789 1790// The common code for art_quick_alloc_object_*region_tlab 1791.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized 1792ENTRY \name 1793 // Fast path region tlab allocation. 1794 // x0: type, xSELF(x19): Thread::Current 1795 // x1-x7: free. 1796 ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized 1797.Lslow_path\name: 1798 SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. 1799 mov x1, xSELF // Pass Thread::Current. 1800 bl \entrypoint // (mirror::Class*, Thread*) 1801 RESTORE_SAVE_REFS_ONLY_FRAME 1802 REFRESH_MARKING_REGISTER 1803 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1804END \name 1805.endm 1806 1807GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0 1808GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1 1809GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0 1810GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1 1811 1812.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 1813 and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask 1814 // (addr + 7) & ~7. The mask must 1815 // be 64 bits to keep high bits in 1816 // case of overflow. 1817 // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value. 1818 // Negative ints become large 64 bit unsigned ints which will always be larger than max signed 1819 // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int. 1820 cmp \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD // Possibly a large object, go slow 1821 bhs \slowPathLabel // path. 1822 1823 ldr \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Check tlab for space, note that 1824 // we use (end - begin) to handle 1825 // negative size arrays. It is 1826 // assumed that a negative size will 1827 // always be greater unsigned than 1828 // region size. 1829 ldr \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET] 1830 sub \xTemp2, \xTemp2, \xTemp0 1831 cmp \xTemp1, \xTemp2 1832 1833 // The array class is always initialized here. Unlike new-instance, 1834 // this does not act as a double test. 1835 bhi \slowPathLabel 1836 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. 1837 // Move old thread_local_pos to x0 1838 // for the return value. 1839 mov x0, \xTemp0 1840 add \xTemp0, \xTemp0, \xTemp1 1841 str \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1842 ldr \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1843 add \xTemp0, \xTemp0, #1 1844 str \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] 1845 POISON_HEAP_REF \wClass 1846 str \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1847 str \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length. 1848 // Fence. 1849// new-array is special. The class is loaded and immediately goes to the Initialized state 1850// before it is published. Therefore the only fence needed is for the publication of the object. 1851// See ClassLinker::CreateArrayClass() for more details. 1852 1853// For publication of the new array, we don't need a 'dmb ishst' here. 1854// The compiler generates 'dmb ishst' for all new-array insts. 1855 ret 1856.endm 1857 1858.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup 1859ENTRY \name 1860 // Fast path array allocation for region tlab allocation. 1861 // x0: mirror::Class* type 1862 // x1: int32_t component_count 1863 // x2-x7: free. 1864 mov x3, x0 1865 \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6 1866 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6 1867.Lslow_path\name: 1868 // x0: mirror::Class* klass 1869 // x1: int32_t component_count 1870 // x2: Thread* self 1871 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC 1872 mov x2, xSELF // pass Thread::Current 1873 bl \entrypoint 1874 RESTORE_SAVE_REFS_ONLY_FRAME 1875 REFRESH_MARKING_REGISTER 1876 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1877END \name 1878.endm 1879 1880.macro COMPUTE_ARRAY_SIZE_UNKNOWN xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 1881 // Array classes are never finalizable or uninitialized, no need to check. 1882 ldr \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type 1883 UNPOISON_HEAP_REF \wTemp0 1884 ldr \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] 1885 lsr \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 1886 // bits. 1887 // xCount is holding a 32 bit value, 1888 // it can not overflow. 1889 lsl \xTemp1, \xCount, \xTemp0 // Calculate data size 1890 // Add array data offset and alignment. 1891 add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1892#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1893#error Long array data offset must be 4 greater than int array data offset. 1894#endif 1895 1896 add \xTemp0, \xTemp0, #1 // Add 4 to the length only if the 1897 // component size shift is 3 1898 // (for 64 bit alignment). 1899 and \xTemp0, \xTemp0, #4 1900 add \xTemp1, \xTemp1, \xTemp0 1901.endm 1902 1903.macro COMPUTE_ARRAY_SIZE_8 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 1904 // Add array data offset and alignment. 1905 add \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1906.endm 1907 1908.macro COMPUTE_ARRAY_SIZE_16 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 1909 lsl \xTemp1, \xCount, #1 1910 // Add array data offset and alignment. 1911 add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1912.endm 1913 1914.macro COMPUTE_ARRAY_SIZE_32 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 1915 lsl \xTemp1, \xCount, #2 1916 // Add array data offset and alignment. 1917 add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1918.endm 1919 1920.macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 1921 lsl \xTemp1, \xCount, #3 1922 // Add array data offset and alignment. 1923 add \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1924.endm 1925 1926// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove 1927// the entrypoint once all backends have been updated to use the size variants. 1928GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1929GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1930GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1931GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1932GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1933GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1934GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1935GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1936GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1937GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1938 1939 /* 1940 * Called by managed code when the thread has been asked to suspend. 1941 */ 1942 .extern artTestSuspendFromCode 1943ENTRY art_quick_test_suspend 1944 SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET // save callee saves for stack crawl 1945 mov x0, xSELF 1946 bl artTestSuspendFromCode // (Thread*) 1947 RESTORE_SAVE_EVERYTHING_FRAME 1948 REFRESH_MARKING_REGISTER 1949 ret 1950END art_quick_test_suspend 1951 1952ENTRY art_quick_implicit_suspend 1953 mov x0, xSELF 1954 SETUP_SAVE_REFS_ONLY_FRAME // save callee saves for stack crawl 1955 bl artTestSuspendFromCode // (Thread*) 1956 RESTORE_SAVE_REFS_ONLY_FRAME 1957 REFRESH_MARKING_REGISTER 1958 ret 1959END art_quick_implicit_suspend 1960 1961 /* 1962 * Called by managed code that is attempting to call a method on a proxy class. On entry 1963 * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy 1964 * method agrees with a ref and args callee save frame. 1965 */ 1966 .extern artQuickProxyInvokeHandler 1967ENTRY art_quick_proxy_invoke_handler 1968 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0 1969 mov x2, xSELF // pass Thread::Current 1970 mov x3, sp // pass SP 1971 bl artQuickProxyInvokeHandler // (Method* proxy method, receiver, Thread*, SP) 1972 ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET] 1973 cbnz x2, .Lexception_in_proxy // success if no exception is pending 1974 RESTORE_SAVE_REFS_AND_ARGS_FRAME // Restore frame 1975 REFRESH_MARKING_REGISTER 1976 fmov d0, x0 // Store result in d0 in case it was float or double 1977 ret // return on success 1978.Lexception_in_proxy: 1979 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1980 DELIVER_PENDING_EXCEPTION 1981END art_quick_proxy_invoke_handler 1982 1983 /* 1984 * Called to resolve an imt conflict. 1985 * x0 is the conflict ArtMethod. 1986 * xIP1 is a hidden argument that holds the target interface method's dex method index. 1987 * 1988 * Note that this stub writes to xIP0, xIP1, x13-x15, and x0. 1989 */ 1990 .extern artLookupResolvedMethod 1991ENTRY art_quick_imt_conflict_trampoline 1992 ldr xIP0, [sp, #0] // Load referrer 1993 // Load the declaring class (without read barrier) and access flags (for obsolete method check). 1994 // The obsolete flag is set with suspended threads, so we do not need an acquire operation here. 1995#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4 1996#error "Expecting declaring class and access flags to be consecutive for LDP." 1997#endif 1998 ldp wIP0, w15, [xIP0, #ART_METHOD_DECLARING_CLASS_OFFSET] 1999 // If the method is obsolete, just go through the dex cache miss slow path. 2000 tbnz x15, #ACC_OBSOLETE_METHOD_SHIFT, .Limt_conflict_trampoline_dex_cache_miss 2001 ldr wIP0, [xIP0, #MIRROR_CLASS_DEX_CACHE_OFFSET] // Load the DexCache (without read barrier). 2002 UNPOISON_HEAP_REF wIP0 2003 ubfx x15, xIP1, #0, #METHOD_DEX_CACHE_HASH_BITS // Calculate DexCache method slot index. 2004 ldr xIP0, [xIP0, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET] // Load the resolved methods. 2005 add xIP0, xIP0, x15, lsl #(POINTER_SIZE_SHIFT + 1) // Load DexCache method slot address. 2006 2007 // Relaxed atomic load x14:x15 from the dex cache slot. 2008.Limt_conflict_trampoline_retry_load: 2009 ldxp x14, x15, [xIP0] 2010 stxp w13, x14, x15, [xIP0] 2011 cbnz w13, .Limt_conflict_trampoline_retry_load 2012 2013 cmp x15, xIP1 // Compare method index to see if we had a DexCache method hit. 2014 bne .Limt_conflict_trampoline_dex_cache_miss 2015.Limt_conflict_trampoline_have_interface_method: 2016 ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64] // Load ImtConflictTable 2017 ldr x0, [xIP1] // Load first entry in ImtConflictTable. 2018.Limt_table_iterate: 2019 cmp x0, x14 2020 // Branch if found. Benchmarks have shown doing a branch here is better. 2021 beq .Limt_table_found 2022 // If the entry is null, the interface method is not in the ImtConflictTable. 2023 cbz x0, .Lconflict_trampoline 2024 // Iterate over the entries of the ImtConflictTable. 2025 ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]! 2026 b .Limt_table_iterate 2027.Limt_table_found: 2028 // We successfully hit an entry in the table. Load the target method 2029 // and jump to it. 2030 ldr x0, [xIP1, #__SIZEOF_POINTER__] 2031 ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64] 2032 br xIP0 2033.Lconflict_trampoline: 2034 // Call the runtime stub to populate the ImtConflictTable and jump to the 2035 // resolved method. 2036 mov x0, x14 // Load interface method 2037 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 2038.Limt_conflict_trampoline_dex_cache_miss: 2039 // We're not creating a proper runtime method frame here, 2040 // artLookupResolvedMethod() is not allowed to walk the stack. 2041 2042 // Save GPR args and return address, allocate space for FPR args, align stack. 2043 SAVE_TWO_REGS_INCREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8) 2044 SAVE_TWO_REGS x2, x3, 16 2045 SAVE_TWO_REGS x4, x5, 32 2046 SAVE_TWO_REGS x6, x7, 48 2047 SAVE_REG xLR, (8 * 8 + 8 * 8 + 8) 2048 2049 // Save FPR args. 2050 stp d0, d1, [sp, #64] 2051 stp d2, d3, [sp, #80] 2052 stp d4, d5, [sp, #96] 2053 stp d6, d7, [sp, #112] 2054 2055 mov x0, xIP1 // Pass method index. 2056 ldr x1, [sp, #(8 * 8 + 8 * 8 + 8 + 8)] // Pass referrer. 2057 bl artLookupResolvedMethod // (uint32_t method_index, ArtMethod* referrer) 2058 mov x14, x0 // Move the interface method to x14 where the loop above expects it. 2059 2060 // Restore FPR args. 2061 ldp d0, d1, [sp, #64] 2062 ldp d2, d3, [sp, #80] 2063 ldp d4, d5, [sp, #96] 2064 ldp d6, d7, [sp, #112] 2065 2066 // Restore GPR args and return address. 2067 RESTORE_REG xLR, (8 * 8 + 8 * 8 + 8) 2068 RESTORE_TWO_REGS x2, x3, 16 2069 RESTORE_TWO_REGS x4, x5, 32 2070 RESTORE_TWO_REGS x6, x7, 48 2071 RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8) 2072 2073 // If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline(). 2074 cbz x14, .Lconflict_trampoline 2075 b .Limt_conflict_trampoline_have_interface_method 2076END art_quick_imt_conflict_trampoline 2077 2078ENTRY art_quick_resolution_trampoline 2079 SETUP_SAVE_REFS_AND_ARGS_FRAME 2080 mov x2, xSELF 2081 mov x3, sp 2082 bl artQuickResolutionTrampoline // (called, receiver, Thread*, SP) 2083 cbz x0, 1f 2084 mov xIP0, x0 // Remember returned code pointer in xIP0. 2085 ldr x0, [sp, #0] // artQuickResolutionTrampoline puts called method in *SP. 2086 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2087 REFRESH_MARKING_REGISTER 2088 br xIP0 20891: 2090 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2091 DELIVER_PENDING_EXCEPTION 2092END art_quick_resolution_trampoline 2093 2094/* 2095 * Generic JNI frame layout: 2096 * 2097 * #-------------------# 2098 * | | 2099 * | caller method... | 2100 * #-------------------# <--- SP on entry 2101 * | Return X30/LR | 2102 * | X29/FP | callee save 2103 * | X28 | callee save 2104 * | X27 | callee save 2105 * | X26 | callee save 2106 * | X25 | callee save 2107 * | X24 | callee save 2108 * | X23 | callee save 2109 * | X22 | callee save 2110 * | X21 | callee save 2111 * | X20 | callee save 2112 * | X19 | callee save 2113 * | X7 | arg7 2114 * | X6 | arg6 2115 * | X5 | arg5 2116 * | X4 | arg4 2117 * | X3 | arg3 2118 * | X2 | arg2 2119 * | X1 | arg1 2120 * | D7 | float arg 8 2121 * | D6 | float arg 7 2122 * | D5 | float arg 6 2123 * | D4 | float arg 5 2124 * | D3 | float arg 4 2125 * | D2 | float arg 3 2126 * | D1 | float arg 2 2127 * | D0 | float arg 1 2128 * | Method* | <- X0 2129 * #-------------------# 2130 * | local ref cookie | // 4B 2131 * | handle scope size | // 4B 2132 * #-------------------# 2133 * | JNI Call Stack | 2134 * #-------------------# <--- SP on native call 2135 * | | 2136 * | Stack for Regs | The trampoline assembly will pop these values 2137 * | | into registers for native call 2138 * #-------------------# 2139 * | Native code ptr | 2140 * #-------------------# 2141 * | Free scratch | 2142 * #-------------------# 2143 * | Ptr to (1) | <--- SP 2144 * #-------------------# 2145 */ 2146 /* 2147 * Called to do a generic JNI down-call 2148 */ 2149ENTRY art_quick_generic_jni_trampoline 2150 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0 2151 2152 // Save SP , so we can have static CFI info. 2153 mov x28, sp 2154 .cfi_def_cfa_register x28 2155 2156 // This looks the same, but is different: this will be updated to point to the bottom 2157 // of the frame when the handle scope is inserted. 2158 mov xFP, sp 2159 2160 mov xIP0, #5120 2161 sub sp, sp, xIP0 2162 2163 // prepare for artQuickGenericJniTrampoline call 2164 // (Thread*, SP) 2165 // x0 x1 <= C calling convention 2166 // xSELF xFP <= where they are 2167 2168 mov x0, xSELF // Thread* 2169 mov x1, xFP 2170 bl artQuickGenericJniTrampoline // (Thread*, sp) 2171 2172 // The C call will have registered the complete save-frame on success. 2173 // The result of the call is: 2174 // x0: pointer to native code, 0 on error. 2175 // x1: pointer to the bottom of the used area of the alloca, can restore stack till there. 2176 2177 // Check for error = 0. 2178 cbz x0, .Lexception_in_native 2179 2180 // Release part of the alloca. 2181 mov sp, x1 2182 2183 // Save the code pointer 2184 mov xIP0, x0 2185 2186 // Load parameters from frame into registers. 2187 // TODO Check with artQuickGenericJniTrampoline. 2188 // Also, check again APPCS64 - the stack arguments are interleaved. 2189 ldp x0, x1, [sp] 2190 ldp x2, x3, [sp, #16] 2191 ldp x4, x5, [sp, #32] 2192 ldp x6, x7, [sp, #48] 2193 2194 ldp d0, d1, [sp, #64] 2195 ldp d2, d3, [sp, #80] 2196 ldp d4, d5, [sp, #96] 2197 ldp d6, d7, [sp, #112] 2198 2199 add sp, sp, #128 2200 2201 blr xIP0 // native call. 2202 2203 // result sign extension is handled in C code 2204 // prepare for artQuickGenericJniEndTrampoline call 2205 // (Thread*, result, result_f) 2206 // x0 x1 x2 <= C calling convention 2207 mov x1, x0 // Result (from saved). 2208 mov x0, xSELF // Thread register. 2209 fmov x2, d0 // d0 will contain floating point result, but needs to go into x2 2210 2211 bl artQuickGenericJniEndTrampoline 2212 2213 // Pending exceptions possible. 2214 ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET] 2215 cbnz x2, .Lexception_in_native 2216 2217 // Tear down the alloca. 2218 mov sp, x28 2219 .cfi_def_cfa_register sp 2220 2221 // Tear down the callee-save frame. 2222 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2223 REFRESH_MARKING_REGISTER 2224 2225 // store into fpr, for when it's a fpr return... 2226 fmov d0, x0 2227 ret 2228 2229.Lexception_in_native: 2230 // Move to x1 then sp to please assembler. 2231 ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] 2232 add sp, x1, #-1 // Remove the GenericJNI tag. 2233 .cfi_def_cfa_register sp 2234 # This will create a new save-all frame, required by the runtime. 2235 DELIVER_PENDING_EXCEPTION 2236END art_quick_generic_jni_trampoline 2237 2238/* 2239 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those 2240 * of a quick call: 2241 * x0 = method being called/to bridge to. 2242 * x1..x7, d0..d7 = arguments to that method. 2243 */ 2244ENTRY art_quick_to_interpreter_bridge 2245 SETUP_SAVE_REFS_AND_ARGS_FRAME // Set up frame and save arguments. 2246 2247 // x0 will contain mirror::ArtMethod* method. 2248 mov x1, xSELF // How to get Thread::Current() ??? 2249 mov x2, sp 2250 2251 // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self, 2252 // mirror::ArtMethod** sp) 2253 bl artQuickToInterpreterBridge 2254 2255 RESTORE_SAVE_REFS_AND_ARGS_FRAME // TODO: no need to restore arguments in this case. 2256 REFRESH_MARKING_REGISTER 2257 2258 fmov d0, x0 2259 2260 RETURN_OR_DELIVER_PENDING_EXCEPTION 2261END art_quick_to_interpreter_bridge 2262 2263/* 2264 * Called to attempt to execute an obsolete method. 2265 */ 2266ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 2267 2268 2269// 2270// Instrumentation-related stubs 2271// 2272 .extern artInstrumentationMethodEntryFromCode 2273ENTRY art_quick_instrumentation_entry 2274 SETUP_SAVE_REFS_AND_ARGS_FRAME 2275 2276 mov x20, x0 // Preserve method reference in a callee-save. 2277 2278 mov x2, xSELF 2279 mov x3, sp // Pass SP 2280 bl artInstrumentationMethodEntryFromCode // (Method*, Object*, Thread*, SP) 2281 2282 mov xIP0, x0 // x0 = result of call. 2283 mov x0, x20 // Reload method reference. 2284 2285 RESTORE_SAVE_REFS_AND_ARGS_FRAME // Note: will restore xSELF 2286 REFRESH_MARKING_REGISTER 2287 cbz xIP0, 1f // Deliver the pending exception if method is null. 2288 adr xLR, art_quick_instrumentation_exit 2289 br xIP0 // Tail-call method with lr set to art_quick_instrumentation_exit. 2290 22911: 2292 DELIVER_PENDING_EXCEPTION 2293END art_quick_instrumentation_entry 2294 2295 .extern artInstrumentationMethodExitFromCode 2296ENTRY art_quick_instrumentation_exit 2297 mov xLR, #0 // Clobber LR for later checks. 2298 SETUP_SAVE_EVERYTHING_FRAME 2299 2300 add x3, sp, #16 // Pass floating-point result pointer, in kSaveEverything frame. 2301 add x2, sp, #272 // Pass integer result pointer, in kSaveEverything frame. 2302 mov x1, sp // Pass SP. 2303 mov x0, xSELF // Pass Thread. 2304 bl artInstrumentationMethodExitFromCode // (Thread*, SP, gpr_res*, fpr_res*) 2305 2306 cbz x0, .Ldo_deliver_instrumentation_exception 2307 // Handle error 2308 cbnz x1, .Ldeoptimize 2309 // Normal return. 2310 str x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8] 2311 // Set return pc. 2312 RESTORE_SAVE_EVERYTHING_FRAME 2313 REFRESH_MARKING_REGISTER 2314 br lr 2315.Ldo_deliver_instrumentation_exception: 2316 DELIVER_PENDING_EXCEPTION_FRAME_READY 2317.Ldeoptimize: 2318 str x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8] 2319 // Set return pc. 2320 RESTORE_SAVE_EVERYTHING_FRAME 2321 // Jump to art_quick_deoptimize. 2322 b art_quick_deoptimize 2323END art_quick_instrumentation_exit 2324 2325 /* 2326 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 2327 * will long jump to the upcall with a special exception of -1. 2328 */ 2329 .extern artDeoptimize 2330ENTRY art_quick_deoptimize 2331 SETUP_SAVE_EVERYTHING_FRAME 2332 mov x0, xSELF // Pass thread. 2333 bl artDeoptimize // (Thread*) 2334 brk 0 2335END art_quick_deoptimize 2336 2337 /* 2338 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 2339 * will long jump to the upcall with a special exception of -1. 2340 */ 2341 .extern artDeoptimizeFromCompiledCode 2342ENTRY art_quick_deoptimize_from_compiled_code 2343 SETUP_SAVE_EVERYTHING_FRAME 2344 mov x1, xSELF // Pass thread. 2345 bl artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*) 2346 brk 0 2347END art_quick_deoptimize_from_compiled_code 2348 2349 2350 /* 2351 * String's indexOf. 2352 * 2353 * TODO: Not very optimized. 2354 * On entry: 2355 * x0: string object (known non-null) 2356 * w1: char to match (known <= 0xFFFF) 2357 * w2: Starting offset in string data 2358 */ 2359ENTRY art_quick_indexof 2360#if (STRING_COMPRESSION_FEATURE) 2361 ldr w4, [x0, #MIRROR_STRING_COUNT_OFFSET] 2362#else 2363 ldr w3, [x0, #MIRROR_STRING_COUNT_OFFSET] 2364#endif 2365 add x0, x0, #MIRROR_STRING_VALUE_OFFSET 2366#if (STRING_COMPRESSION_FEATURE) 2367 /* w4 holds count (with flag) and w3 holds actual length */ 2368 lsr w3, w4, #1 2369#endif 2370 /* Clamp start to [0..count] */ 2371 cmp w2, #0 2372 csel w2, wzr, w2, lt 2373 cmp w2, w3 2374 csel w2, w3, w2, gt 2375 2376 /* Save a copy to compute result */ 2377 mov x5, x0 2378 2379#if (STRING_COMPRESSION_FEATURE) 2380 tbz w4, #0, .Lstring_indexof_compressed 2381#endif 2382 /* Build pointer to start of data to compare and pre-bias */ 2383 add x0, x0, x2, lsl #1 2384 sub x0, x0, #2 2385 /* Compute iteration count */ 2386 sub w2, w3, w2 2387 2388 /* 2389 * At this point we have: 2390 * x0: start of the data to test 2391 * w1: char to compare 2392 * w2: iteration count 2393 * x5: original start of string data 2394 */ 2395 2396 subs w2, w2, #4 2397 b.lt .Lindexof_remainder 2398 2399.Lindexof_loop4: 2400 ldrh w6, [x0, #2]! 2401 ldrh w7, [x0, #2]! 2402 ldrh wIP0, [x0, #2]! 2403 ldrh wIP1, [x0, #2]! 2404 cmp w6, w1 2405 b.eq .Lmatch_0 2406 cmp w7, w1 2407 b.eq .Lmatch_1 2408 cmp wIP0, w1 2409 b.eq .Lmatch_2 2410 cmp wIP1, w1 2411 b.eq .Lmatch_3 2412 subs w2, w2, #4 2413 b.ge .Lindexof_loop4 2414 2415.Lindexof_remainder: 2416 adds w2, w2, #4 2417 b.eq .Lindexof_nomatch 2418 2419.Lindexof_loop1: 2420 ldrh w6, [x0, #2]! 2421 cmp w6, w1 2422 b.eq .Lmatch_3 2423 subs w2, w2, #1 2424 b.ne .Lindexof_loop1 2425 2426.Lindexof_nomatch: 2427 mov x0, #-1 2428 ret 2429 2430.Lmatch_0: 2431 sub x0, x0, #6 2432 sub x0, x0, x5 2433 asr x0, x0, #1 2434 ret 2435.Lmatch_1: 2436 sub x0, x0, #4 2437 sub x0, x0, x5 2438 asr x0, x0, #1 2439 ret 2440.Lmatch_2: 2441 sub x0, x0, #2 2442 sub x0, x0, x5 2443 asr x0, x0, #1 2444 ret 2445.Lmatch_3: 2446 sub x0, x0, x5 2447 asr x0, x0, #1 2448 ret 2449#if (STRING_COMPRESSION_FEATURE) 2450 /* 2451 * Comparing compressed string character-per-character with 2452 * input character 2453 */ 2454.Lstring_indexof_compressed: 2455 add x0, x0, x2 2456 sub x0, x0, #1 2457 sub w2, w3, w2 2458.Lstring_indexof_compressed_loop: 2459 subs w2, w2, #1 2460 b.lt .Lindexof_nomatch 2461 ldrb w6, [x0, #1]! 2462 cmp w6, w1 2463 b.eq .Lstring_indexof_compressed_matched 2464 b .Lstring_indexof_compressed_loop 2465.Lstring_indexof_compressed_matched: 2466 sub x0, x0, x5 2467 ret 2468#endif 2469END art_quick_indexof 2470 2471 /* 2472 * Create a function `name` calling the ReadBarrier::Mark routine, 2473 * getting its argument and returning its result through W register 2474 * `wreg` (corresponding to X register `xreg`), saving and restoring 2475 * all caller-save registers. 2476 * 2477 * If `wreg` is different from `w0`, the generated function follows a 2478 * non-standard runtime calling convention: 2479 * - register `wreg` is used to pass the (sole) argument of this 2480 * function (instead of W0); 2481 * - register `wreg` is used to return the result of this function 2482 * (instead of W0); 2483 * - W0 is treated like a normal (non-argument) caller-save register; 2484 * - everything else is the same as in the standard runtime calling 2485 * convention (e.g. standard callee-save registers are preserved). 2486 */ 2487.macro READ_BARRIER_MARK_REG name, wreg, xreg 2488ENTRY \name 2489 // Reference is null, no work to do at all. 2490 cbz \wreg, .Lret_rb_\name 2491 // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler. 2492 ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 2493 tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name 2494.Lret_rb_\name: 2495 ret 2496.Lnot_marked_rb_\name: 2497 // Check if the top two bits are one, if this is the case it is a forwarding address. 2498 tst wIP0, wIP0, lsl #1 2499 bmi .Lret_forwarding_address\name 2500.Lslow_rb_\name: 2501 /* 2502 * Allocate 44 stack slots * 8 = 352 bytes: 2503 * - 19 slots for core registers X0-15, X17, X19, LR 2504 * - 1 slot padding 2505 * - 24 slots for floating-point registers D0-D7 and D16-D31 2506 */ 2507 // We must not clobber IP1 since code emitted for HLoadClass and HLoadString 2508 // relies on IP1 being preserved. 2509 // Save all potentially live caller-save core registers. 2510 SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352 2511 SAVE_TWO_REGS x2, x3, 16 2512 SAVE_TWO_REGS x4, x5, 32 2513 SAVE_TWO_REGS x6, x7, 48 2514 SAVE_TWO_REGS x8, x9, 64 2515 SAVE_TWO_REGS x10, x11, 80 2516 SAVE_TWO_REGS x12, x13, 96 2517 SAVE_TWO_REGS x14, x15, 112 2518 SAVE_TWO_REGS x17, x19, 128 // Skip x16, i.e. IP0, and x18, the platform register. 2519 SAVE_REG xLR, 144 // Save also return address. 2520 // Save all potentially live caller-save floating-point registers. 2521 stp d0, d1, [sp, #160] 2522 stp d2, d3, [sp, #176] 2523 stp d4, d5, [sp, #192] 2524 stp d6, d7, [sp, #208] 2525 stp d16, d17, [sp, #224] 2526 stp d18, d19, [sp, #240] 2527 stp d20, d21, [sp, #256] 2528 stp d22, d23, [sp, #272] 2529 stp d24, d25, [sp, #288] 2530 stp d26, d27, [sp, #304] 2531 stp d28, d29, [sp, #320] 2532 stp d30, d31, [sp, #336] 2533 2534 .ifnc \wreg, w0 2535 mov w0, \wreg // Pass arg1 - obj from `wreg` 2536 .endif 2537 bl artReadBarrierMark // artReadBarrierMark(obj) 2538 .ifnc \wreg, w0 2539 mov \wreg, w0 // Return result into `wreg` 2540 .endif 2541 2542 // Restore core regs, except `xreg`, as `wreg` is used to return the 2543 // result of this function (simply remove it from the stack instead). 2544 POP_REGS_NE x0, x1, 0, \xreg 2545 POP_REGS_NE x2, x3, 16, \xreg 2546 POP_REGS_NE x4, x5, 32, \xreg 2547 POP_REGS_NE x6, x7, 48, \xreg 2548 POP_REGS_NE x8, x9, 64, \xreg 2549 POP_REGS_NE x10, x11, 80, \xreg 2550 POP_REGS_NE x12, x13, 96, \xreg 2551 POP_REGS_NE x14, x15, 112, \xreg 2552 POP_REGS_NE x17, x19, 128, \xreg 2553 POP_REG_NE xLR, 144, \xreg // Restore also return address. 2554 // Restore floating-point registers. 2555 ldp d0, d1, [sp, #160] 2556 ldp d2, d3, [sp, #176] 2557 ldp d4, d5, [sp, #192] 2558 ldp d6, d7, [sp, #208] 2559 ldp d16, d17, [sp, #224] 2560 ldp d18, d19, [sp, #240] 2561 ldp d20, d21, [sp, #256] 2562 ldp d22, d23, [sp, #272] 2563 ldp d24, d25, [sp, #288] 2564 ldp d26, d27, [sp, #304] 2565 ldp d28, d29, [sp, #320] 2566 ldp d30, d31, [sp, #336] 2567 // Remove frame and return. 2568 DECREASE_FRAME 352 2569 ret 2570.Lret_forwarding_address\name: 2571 // Shift left by the forwarding address shift. This clears out the state bits since they are 2572 // in the top 2 bits of the lock word. 2573 lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2574 ret 2575END \name 2576.endm 2577 2578READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0 2579READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1 2580READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2 2581READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3 2582READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4 2583READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5 2584READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6 2585READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7 2586READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8 2587READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9 2588READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10 2589READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11 2590READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12 2591READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13 2592READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14 2593READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15 2594// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked 2595READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17 2596// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 x18 is blocked 2597READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19 2598READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20 2599READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21 2600READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22 2601READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23 2602READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24 2603READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25 2604READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26 2605READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27 2606READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28 2607READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29 2608 2609 2610.macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg 2611 .if \xreg 2612 \macro_to_use \x 2613 .else 2614 \macro_to_use \w 2615 .endif 2616.endm 2617 2618.macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg 2619 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg 2620 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg 2621 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg 2622 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg 2623 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg 2624 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg 2625 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg 2626 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg 2627 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg 2628 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg 2629 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg 2630 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg 2631 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg 2632 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg 2633 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg 2634 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg 2635 \macro_for_reserved_register // IP0 is reserved 2636 \macro_for_reserved_register // IP1 is reserved 2637 \macro_for_reserved_register // x18 is reserved 2638 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg 2639 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg 2640 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg 2641 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg 2642 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg 2643 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg 2644 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg 2645 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg 2646 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg 2647 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg 2648 SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg 2649 \macro_for_reserved_register // lr is reserved 2650 \macro_for_reserved_register // sp is reserved 2651.endm 2652 2653.macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register 2654 FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1 2655.endm 2656 2657.macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register 2658 FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0 2659.endm 2660 2661.macro BRK0_BRK0 2662 brk 0 2663 brk 0 2664.endm 2665 2666#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET 2667#error "Array and field introspection code sharing requires same LDR offset." 2668#endif 2669.macro INTROSPECTION_ARRAY_LOAD index_reg 2670 ldr wIP0, [xIP0, \index_reg, lsl #2] 2671 b art_quick_read_barrier_mark_introspection 2672.endm 2673 2674.macro MOV_WIP0_TO_WREG_AND_BL_LR reg 2675 mov \reg, wIP0 2676 br lr // Do not use RET as we do not enter the entrypoint with "BL". 2677.endm 2678 2679.macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset 2680 /* 2681 * Allocate 42 stack slots * 8 = 336 bytes: 2682 * - 18 slots for core registers X0-15, X19, LR 2683 * - 24 slots for floating-point registers D0-D7 and D16-D31 2684 */ 2685 // Save all potentially live caller-save core registers. 2686 SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 336 2687 SAVE_TWO_REGS x2, x3, 16 2688 SAVE_TWO_REGS x4, x5, 32 2689 SAVE_TWO_REGS x6, x7, 48 2690 SAVE_TWO_REGS x8, x9, 64 2691 SAVE_TWO_REGS x10, x11, 80 2692 SAVE_TWO_REGS x12, x13, 96 2693 SAVE_TWO_REGS x14, x15, 112 2694 // Skip x16, x17, i.e. IP0, IP1, and x18, the platform register. 2695 SAVE_TWO_REGS x19, xLR, 128 // Save return address. 2696 // Save all potentially live caller-save floating-point registers. 2697 stp d0, d1, [sp, #144] 2698 stp d2, d3, [sp, #160] 2699 stp d4, d5, [sp, #176] 2700 stp d6, d7, [sp, #192] 2701 stp d16, d17, [sp, #208] 2702 stp d18, d19, [sp, #224] 2703 stp d20, d21, [sp, #240] 2704 stp d22, d23, [sp, #256] 2705 stp d24, d25, [sp, #272] 2706 stp d26, d27, [sp, #288] 2707 stp d28, d29, [sp, #304] 2708 stp d30, d31, [sp, #320] 2709 2710 mov x0, xIP0 2711 bl artReadBarrierMark // artReadBarrierMark(obj) 2712 mov xIP0, x0 2713 2714 // Restore core regs, except x0 and x1 as the return register switch case 2715 // address calculation is smoother with an extra register. 2716 RESTORE_TWO_REGS x2, x3, 16 2717 RESTORE_TWO_REGS x4, x5, 32 2718 RESTORE_TWO_REGS x6, x7, 48 2719 RESTORE_TWO_REGS x8, x9, 64 2720 RESTORE_TWO_REGS x10, x11, 80 2721 RESTORE_TWO_REGS x12, x13, 96 2722 RESTORE_TWO_REGS x14, x15, 112 2723 // Skip x16, x17, i.e. IP0, IP1, and x18, the platform register. 2724 RESTORE_TWO_REGS x19, xLR, 128 // Restore return address. 2725 // Restore caller-save floating-point registers. 2726 ldp d0, d1, [sp, #144] 2727 ldp d2, d3, [sp, #160] 2728 ldp d4, d5, [sp, #176] 2729 ldp d6, d7, [sp, #192] 2730 ldp d16, d17, [sp, #208] 2731 ldp d18, d19, [sp, #224] 2732 ldp d20, d21, [sp, #240] 2733 ldp d22, d23, [sp, #256] 2734 ldp d24, d25, [sp, #272] 2735 ldp d26, d27, [sp, #288] 2736 ldp d28, d29, [sp, #304] 2737 ldp d30, d31, [sp, #320] 2738 2739 ldr x0, [lr, #\ldr_offset] // Load the instruction. 2740 adr xIP1, .Lmark_introspection_return_switch 2741 bfi xIP1, x0, #3, #5 // Calculate switch case address. 2742 RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 336 2743 br xIP1 2744.endm 2745 2746 /* 2747 * Use introspection to load a reference from the same address as the LDR 2748 * instruction in generated code would load (unless loaded by the thunk, 2749 * see below), call ReadBarrier::Mark() with that reference if needed 2750 * and return it in the same register as the LDR instruction would load. 2751 * 2752 * The entrypoint is called through a thunk that differs across load kinds. 2753 * For field and array loads the LDR instruction in generated code follows 2754 * the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk 2755 * knows the holder and performs the gray bit check, returning to the LDR 2756 * instruction if the object is not gray, so this entrypoint no longer 2757 * needs to know anything about the holder. For GC root loads, the LDR 2758 * instruction in generated code precedes the branch to the thunk (i.e. 2759 * the LDR is at [LR, #-8]) and the thunk does not do the gray bit check. 2760 * 2761 * For field accesses and array loads with a constant index the thunk loads 2762 * the reference into IP0 using introspection and calls the main entrypoint, 2763 * art_quick_read_barrier_mark_introspection. With heap poisoning enabled, 2764 * the passed reference is poisoned. 2765 * 2766 * For array accesses with non-constant index, the thunk inserts the bits 2767 * 16-21 of the LDR instruction to the entrypoint address, effectively 2768 * calculating a switch case label based on the index register (bits 16-20) 2769 * and adding an extra offset (bit 21 is set) to differentiate from the 2770 * main entrypoint, then moves the base register to IP0 and jumps to the 2771 * switch case. Therefore we need to align the main entrypoint to 512 bytes, 2772 * accounting for a 256-byte offset followed by 32 array entrypoints 2773 * starting at art_quick_read_barrier_mark_introspection_arrays, each 2774 * containing an LDR (register) and a branch to the main entrypoint. 2775 * 2776 * For GC root accesses we cannot use the main entrypoint because of the 2777 * different offset where the LDR instruction in generated code is located. 2778 * (And even with heap poisoning enabled, GC roots are not poisoned.) 2779 * To re-use the same entrypoint pointer in generated code, we make sure 2780 * that the gc root entrypoint (a copy of the entrypoint with a different 2781 * offset for introspection loads) is located at a known offset (768 bytes, 2782 * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main 2783 * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves 2784 * the root register to IP0 and jumps to the customized entrypoint, 2785 * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also 2786 * performs all the fast-path checks, so we need just the slow path. 2787 * The UnsafeCASObject intrinsic is also using the GC root entrypoint with 2788 * MOV instead of LDR, the destination register is in the same bits. 2789 * 2790 * The code structure is 2791 * art_quick_read_barrier_mark_introspection: 2792 * Up to 256 bytes for the main entrypoint code. 2793 * Padding to 256 bytes if needed. 2794 * art_quick_read_barrier_mark_introspection_arrays: 2795 * Exactly 256 bytes for array load switch cases (32x2 instructions). 2796 * .Lmark_introspection_return_switch: 2797 * Exactly 256 bytes for return switch cases (32x2 instructions). 2798 * art_quick_read_barrier_mark_introspection_gc_roots: 2799 * GC root entrypoint code. 2800 */ 2801 .balign 512 2802ENTRY art_quick_read_barrier_mark_introspection 2803 // At this point, IP0 contains the reference, IP1 can be freely used. 2804 // For heap poisoning, the reference is poisoned, so unpoison it first. 2805 UNPOISON_HEAP_REF wIP0 2806 // If reference is null, just return it in the right register. 2807 cbz wIP0, .Lmark_introspection_return 2808 // Use wIP1 as temp and check the mark bit of the reference. 2809 ldr wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 2810 tbz wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked 2811.Lmark_introspection_return: 2812 // Without an extra register for the return switch case address calculation, 2813 // we exploit the high word of the xIP0 to temporarily store the ref_reg*8, 2814 // so the return switch below must move wIP0 instead of xIP0 to the register. 2815 ldr wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET] // Load the instruction. 2816 bfi xIP0, xIP1, #(32 + 3), #5 // Extract ref_reg*8 to high word in xIP0. 2817 adr xIP1, .Lmark_introspection_return_switch 2818 bfxil xIP1, xIP0, #32, #8 // Calculate return switch case address. 2819 br xIP1 2820.Lmark_introspection_unmarked: 2821 // Check if the top two bits are one, if this is the case it is a forwarding address. 2822 tst wIP1, wIP1, lsl #1 2823 bmi .Lmark_introspection_forwarding_address 2824 READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET 2825 2826.Lmark_introspection_forwarding_address: 2827 // Shift left by the forwarding address shift. This clears out the state bits since they are 2828 // in the top 2 bits of the lock word. 2829 lsl wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2830 b .Lmark_introspection_return 2831 2832 // We're very close to the alloted 256B for the entrypoint code before the 2833 // array switch cases. Should we go a little bit over the limit, we can 2834 // move some code after the array switch cases and return switch cases. 2835 .balign 256 2836 .hidden art_quick_read_barrier_mark_introspection_arrays 2837 .global art_quick_read_barrier_mark_introspection_arrays 2838art_quick_read_barrier_mark_introspection_arrays: 2839 FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0 2840.Lmark_introspection_return_switch: 2841 FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0 2842 .hidden art_quick_read_barrier_mark_introspection_gc_roots 2843 .global art_quick_read_barrier_mark_introspection_gc_roots 2844art_quick_read_barrier_mark_introspection_gc_roots: 2845 READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET 2846END art_quick_read_barrier_mark_introspection 2847 2848.extern artInvokePolymorphic 2849ENTRY art_quick_invoke_polymorphic 2850 SETUP_SAVE_REFS_AND_ARGS_FRAME // Save callee saves in case allocation triggers GC. 2851 mov x0, x1 // x0 := receiver 2852 mov x1, xSELF // x1 := Thread::Current() 2853 mov x2, sp // x2 := SP 2854 bl artInvokePolymorphic // artInvokePolymorphic(receiver, thread, save_area) 2855 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2856 REFRESH_MARKING_REGISTER 2857 fmov d0, x0 // Result is in x0. Copy to floating return register. 2858 RETURN_OR_DELIVER_PENDING_EXCEPTION 2859END art_quick_invoke_polymorphic 2860 2861.extern artInvokeCustom 2862ENTRY art_quick_invoke_custom 2863 SETUP_SAVE_REFS_AND_ARGS_FRAME // Save callee saves in case allocation triggers GC. 2864 // x0 := call_site_idx 2865 mov x1, xSELF // x1 := Thread::Current() 2866 mov x2, sp // x2 := SP 2867 bl artInvokeCustom // artInvokeCustom(call_site_idx, thread, save_area) 2868 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2869 REFRESH_MARKING_REGISTER 2870 fmov d0, x0 // Copy result to double result register. 2871 RETURN_OR_DELIVER_PENDING_EXCEPTION 2872END art_quick_invoke_custom 2873 2874// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2875// Argument 0: x0: The context pointer for ExecuteSwitchImpl. 2876// Argument 1: x1: Pointer to the templated ExecuteSwitchImpl to call. 2877// Argument 2: x2: The value of DEX PC (memory address of the methods bytecode). 2878ENTRY ExecuteSwitchImplAsm 2879 SAVE_TWO_REGS_INCREASE_FRAME x19, xLR, 16 2880 mov x19, x2 // x19 = DEX PC 2881 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* x0 */, 19 /* x19 */, 0) 2882 blr x1 // Call the wrapped method. 2883 RESTORE_TWO_REGS_DECREASE_FRAME x19, xLR, 16 2884 ret 2885END ExecuteSwitchImplAsm 2886