1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22
23.macro INCREASE_FRAME frame_adjustment
24    sub sp, sp, #(\frame_adjustment)
25    .cfi_adjust_cfa_offset (\frame_adjustment)
26.endm
27
28.macro DECREASE_FRAME frame_adjustment
29    add sp, sp, #(\frame_adjustment)
30    .cfi_adjust_cfa_offset -(\frame_adjustment)
31.endm
32
33.macro SAVE_REG reg, offset
34    str \reg, [sp, #(\offset)]
35    .cfi_rel_offset \reg, (\offset)
36.endm
37
38.macro RESTORE_REG reg, offset
39    ldr \reg, [sp, #(\offset)]
40    .cfi_restore \reg
41.endm
42
43.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment
44    str \reg, [sp, #-(\frame_adjustment)]!
45    .cfi_adjust_cfa_offset (\frame_adjustment)
46    .cfi_rel_offset \reg, 0
47.endm
48
49.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment
50    ldr \reg, [sp], #(\frame_adjustment)
51    .cfi_restore \reg
52    .cfi_adjust_cfa_offset -(\frame_adjustment)
53.endm
54
55.macro SAVE_TWO_REGS reg1, reg2, offset
56    stp \reg1, \reg2, [sp, #(\offset)]
57    .cfi_rel_offset \reg1, (\offset)
58    .cfi_rel_offset \reg2, (\offset) + 8
59.endm
60
61.macro RESTORE_TWO_REGS reg1, reg2, offset
62    ldp \reg1, \reg2, [sp, #(\offset)]
63    .cfi_restore \reg1
64    .cfi_restore \reg2
65.endm
66
67.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
68    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
69    .cfi_adjust_cfa_offset (\frame_adjustment)
70    .cfi_rel_offset \reg1, 0
71    .cfi_rel_offset \reg2, 8
72.endm
73
74.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
75    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
76    .cfi_restore \reg1
77    .cfi_restore \reg2
78    .cfi_adjust_cfa_offset -(\frame_adjustment)
79.endm
80
81    /*
82     * Macro that sets up the callee save frame to conform with
83     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
84     */
85.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
86    // art::Runtime* xIP0 = art::Runtime::instance_;
87    // Our registers aren't intermixed - just spill in order.
88    adrp xIP0, _ZN3art7Runtime9instance_E
89    ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E]
90
91    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
92    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
93
94    INCREASE_FRAME 176
95
96    // Ugly compile-time check, but we only have the preprocessor.
97#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
98#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
99#endif
100
101    // Stack alignment filler [sp, #8].
102    // FP callee-saves.
103    stp d8, d9,   [sp, #16]
104    stp d10, d11, [sp, #32]
105    stp d12, d13, [sp, #48]
106    stp d14, d15, [sp, #64]
107
108    // GP callee-saves
109    SAVE_TWO_REGS x19, x20, 80
110    SAVE_TWO_REGS x21, x22, 96
111    SAVE_TWO_REGS x23, x24, 112
112    SAVE_TWO_REGS x25, x26, 128
113    SAVE_TWO_REGS x27, x28, 144
114    SAVE_TWO_REGS x29, xLR, 160
115
116    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
117    str xIP0, [sp]
118    // Place sp in Thread::Current()->top_quick_frame.
119    mov xIP0, sp
120    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
121.endm
122
123    /*
124     * Macro that sets up the callee save frame to conform with
125     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
126     */
127.macro SETUP_SAVE_REFS_ONLY_FRAME
128    // art::Runtime* xIP0 = art::Runtime::instance_;
129    // Our registers aren't intermixed - just spill in order.
130    adrp xIP0, _ZN3art7Runtime9instance_E
131    ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E]
132
133    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
134    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
135
136    INCREASE_FRAME 96
137
138    // Ugly compile-time check, but we only have the preprocessor.
139#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
140#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
141#endif
142
143    // GP callee-saves.
144    // x20 paired with ArtMethod* - see below.
145    SAVE_TWO_REGS x21, x22, 16
146    SAVE_TWO_REGS x23, x24, 32
147    SAVE_TWO_REGS x25, x26, 48
148    SAVE_TWO_REGS x27, x28, 64
149    SAVE_TWO_REGS x29, xLR, 80
150
151    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
152    // Note: We could avoid saving X20 in the case of Baker read
153    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
154    // later; but it's not worth handling this special case.
155    stp xIP0, x20, [sp]
156    .cfi_rel_offset x20, 8
157
158    // Place sp in Thread::Current()->top_quick_frame.
159    mov xIP0, sp
160    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
161.endm
162
163// TODO: Probably no need to restore registers preserved by aapcs64.
164.macro RESTORE_SAVE_REFS_ONLY_FRAME
165    // Callee-saves.
166    // Note: Likewise, we could avoid restoring X20 in the case of Baker
167    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
168    // later; but it's not worth handling this special case.
169    RESTORE_REG x20, 8
170    RESTORE_TWO_REGS x21, x22, 16
171    RESTORE_TWO_REGS x23, x24, 32
172    RESTORE_TWO_REGS x25, x26, 48
173    RESTORE_TWO_REGS x27, x28, 64
174    RESTORE_TWO_REGS x29, xLR, 80
175
176    DECREASE_FRAME 96
177.endm
178
179.macro POP_SAVE_REFS_ONLY_FRAME
180    DECREASE_FRAME 96
181.endm
182
183
184.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
185    INCREASE_FRAME 224
186
187    // Ugly compile-time check, but we only have the preprocessor.
188#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
189#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
190#endif
191
192    // Stack alignment filler [sp, #8].
193    // FP args.
194    stp d0, d1, [sp, #16]
195    stp d2, d3, [sp, #32]
196    stp d4, d5, [sp, #48]
197    stp d6, d7, [sp, #64]
198
199    // Core args.
200    SAVE_TWO_REGS x1, x2, 80
201    SAVE_TWO_REGS x3, x4, 96
202    SAVE_TWO_REGS x5, x6, 112
203
204    // x7, Callee-saves.
205    // Note: We could avoid saving X20 in the case of Baker read
206    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
207    // later; but it's not worth handling this special case.
208    SAVE_TWO_REGS x7, x20, 128
209    SAVE_TWO_REGS x21, x22, 144
210    SAVE_TWO_REGS x23, x24, 160
211    SAVE_TWO_REGS x25, x26, 176
212    SAVE_TWO_REGS x27, x28, 192
213
214    // x29(callee-save) and LR.
215    SAVE_TWO_REGS x29, xLR, 208
216
217.endm
218
219    /*
220     * Macro that sets up the callee save frame to conform with
221     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
222     *
223     * TODO This is probably too conservative - saving FP & LR.
224     */
225.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
226    // art::Runtime* xIP0 = art::Runtime::instance_;
227    // Our registers aren't intermixed - just spill in order.
228    adrp xIP0, _ZN3art7Runtime9instance_E
229    ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E]
230
231    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
232    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
233
234    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
235
236    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
237    // Place sp in Thread::Current()->top_quick_frame.
238    mov xIP0, sp
239    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
240.endm
241
242.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
243    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
244    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
245    // Place sp in Thread::Current()->top_quick_frame.
246    mov xIP0, sp
247    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
248.endm
249
250// TODO: Probably no need to restore registers preserved by aapcs64.
251.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
252    // FP args.
253    ldp d0, d1, [sp, #16]
254    ldp d2, d3, [sp, #32]
255    ldp d4, d5, [sp, #48]
256    ldp d6, d7, [sp, #64]
257
258    // Core args.
259    RESTORE_TWO_REGS x1, x2, 80
260    RESTORE_TWO_REGS x3, x4, 96
261    RESTORE_TWO_REGS x5, x6, 112
262
263    // x7, Callee-saves.
264    // Note: Likewise, we could avoid restoring X20 in the case of Baker
265    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
266    // later; but it's not worth handling this special case.
267    RESTORE_TWO_REGS x7, x20, 128
268    RESTORE_TWO_REGS x21, x22, 144
269    RESTORE_TWO_REGS x23, x24, 160
270    RESTORE_TWO_REGS x25, x26, 176
271    RESTORE_TWO_REGS x27, x28, 192
272
273    // x29(callee-save) and LR.
274    RESTORE_TWO_REGS x29, xLR, 208
275
276    DECREASE_FRAME 224
277.endm
278
279    /*
280     * Macro that sets up the callee save frame to conform with
281     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
282     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
283     * and saving registers x29 and LR is handled elsewhere.
284     */
285.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
286    // Ugly compile-time check, but we only have the preprocessor.
287#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
288#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
289#endif
290
291    // Save FP registers.
292    stp d0, d1,   [sp, #16]
293    stp d2, d3,   [sp, #32]
294    stp d4, d5,   [sp, #48]
295    stp d6, d7,   [sp, #64]
296    stp d8, d9,   [sp, #80]
297    stp d10, d11, [sp, #96]
298    stp d12, d13, [sp, #112]
299    stp d14, d15, [sp, #128]
300    stp d16, d17, [sp, #144]
301    stp d18, d19, [sp, #160]
302    stp d20, d21, [sp, #176]
303    stp d22, d23, [sp, #192]
304    stp d24, d25, [sp, #208]
305    stp d26, d27, [sp, #224]
306    stp d28, d29, [sp, #240]
307    stp d30, d31, [sp, #256]
308
309    // Save core registers.
310    SAVE_TWO_REGS  x0,  x1, 272
311    SAVE_TWO_REGS  x2,  x3, 288
312    SAVE_TWO_REGS  x4,  x5, 304
313    SAVE_TWO_REGS  x6,  x7, 320
314    SAVE_TWO_REGS  x8,  x9, 336
315    SAVE_TWO_REGS x10, x11, 352
316    SAVE_TWO_REGS x12, x13, 368
317    SAVE_TWO_REGS x14, x15, 384
318    SAVE_TWO_REGS x16, x17, 400 // Do not save the platform register.
319    SAVE_TWO_REGS x19, x20, 416
320    SAVE_TWO_REGS x21, x22, 432
321    SAVE_TWO_REGS x23, x24, 448
322    SAVE_TWO_REGS x25, x26, 464
323    SAVE_TWO_REGS x27, x28, 480
324
325    // art::Runtime* xIP0 = art::Runtime::instance_;
326    adrp xIP0, _ZN3art7Runtime9instance_E
327    ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E]
328
329    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
330    ldr xIP0, [xIP0, \runtime_method_offset]
331
332    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
333    str xIP0, [sp]
334    // Place sp in Thread::Current()->top_quick_frame.
335    mov xIP0, sp
336    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
337.endm
338
339    /*
340     * Macro that sets up the callee save frame to conform with
341     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
342     */
343.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
344    INCREASE_FRAME 512
345    SAVE_TWO_REGS x29, xLR, 496
346    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset
347.endm
348
349.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
350    // Restore FP registers.
351    ldp d0, d1,   [sp, #16]
352    ldp d2, d3,   [sp, #32]
353    ldp d4, d5,   [sp, #48]
354    ldp d6, d7,   [sp, #64]
355    ldp d8, d9,   [sp, #80]
356    ldp d10, d11, [sp, #96]
357    ldp d12, d13, [sp, #112]
358    ldp d14, d15, [sp, #128]
359    ldp d16, d17, [sp, #144]
360    ldp d18, d19, [sp, #160]
361    ldp d20, d21, [sp, #176]
362    ldp d22, d23, [sp, #192]
363    ldp d24, d25, [sp, #208]
364    ldp d26, d27, [sp, #224]
365    ldp d28, d29, [sp, #240]
366    ldp d30, d31, [sp, #256]
367
368    // Restore core registers, except x0.
369    RESTORE_REG            x1, 280
370    RESTORE_TWO_REGS  x2,  x3, 288
371    RESTORE_TWO_REGS  x4,  x5, 304
372    RESTORE_TWO_REGS  x6,  x7, 320
373    RESTORE_TWO_REGS  x8,  x9, 336
374    RESTORE_TWO_REGS x10, x11, 352
375    RESTORE_TWO_REGS x12, x13, 368
376    RESTORE_TWO_REGS x14, x15, 384
377    RESTORE_TWO_REGS x16, x17, 400 // Do not restore the platform register.
378    RESTORE_TWO_REGS x19, x20, 416
379    RESTORE_TWO_REGS x21, x22, 432
380    RESTORE_TWO_REGS x23, x24, 448
381    RESTORE_TWO_REGS x25, x26, 464
382    RESTORE_TWO_REGS x27, x28, 480
383    RESTORE_TWO_REGS x29, xLR, 496
384
385    DECREASE_FRAME 512
386.endm
387
388.macro RESTORE_SAVE_EVERYTHING_FRAME
389    RESTORE_REG  x0, 272
390    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
391.endm
392
393// Macro to refresh the Marking Register (W20).
394//
395// This macro must be called at the end of functions implementing
396// entrypoints that possibly (directly or indirectly) perform a
397// suspend check (before they return).
398.macro REFRESH_MARKING_REGISTER
399#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
400    ldr wMR, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
401#endif
402.endm
403
404.macro RETURN_IF_RESULT_IS_ZERO
405    cbnz x0, 1f                // result non-zero branch over
406    ret                        // return
4071:
408.endm
409
410.macro RETURN_IF_RESULT_IS_NON_ZERO
411    cbz x0, 1f                 // result zero branch over
412    ret                        // return
4131:
414.endm
415
416    /*
417     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
418     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
419     */
420.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
421    mov x0, xSELF
422
423    // Point of no return.
424    bl artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
425    brk 0  // Unreached
426.endm
427
428    /*
429     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
430     * exception is Thread::Current()->exception_.
431     */
432.macro DELIVER_PENDING_EXCEPTION
433    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
434    DELIVER_PENDING_EXCEPTION_FRAME_READY
435.endm
436
437.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
438    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
439    cbnz \reg, 1f
440    ret
4411:
442    DELIVER_PENDING_EXCEPTION
443.endm
444
445.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
446    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
447.endm
448
449// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
450.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
451    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
452.endm
453
454.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
455    cbnz w0, 1f                // result non-zero branch over
456    ret                        // return
4571:
458    DELIVER_PENDING_EXCEPTION
459.endm
460
461.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
462    .extern \cxx_name
463ENTRY \c_name
464    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
465    mov x0, xSELF                     // pass Thread::Current
466    bl  \cxx_name                     // \cxx_name(Thread*)
467    brk 0
468END \c_name
469.endm
470
471.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
472    .extern \cxx_name
473ENTRY \c_name
474    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
475    mov x0, xSELF                     // pass Thread::Current
476    bl  \cxx_name                     // \cxx_name(Thread*)
477    brk 0
478END \c_name
479.endm
480
481.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
482    .extern \cxx_name
483ENTRY \c_name
484    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
485    mov x1, xSELF                     // pass Thread::Current.
486    bl  \cxx_name                     // \cxx_name(arg, Thread*).
487    brk 0
488END \c_name
489.endm
490
491.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
492    .extern \cxx_name
493ENTRY \c_name
494    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
495    mov x2, xSELF                     // pass Thread::Current
496    bl  \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
497    brk 0
498END \c_name
499.endm
500
501    /*
502     * Called by managed code, saves callee saves and then calls artThrowException
503     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
504     */
505ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
506
507    /*
508     * Called by managed code to create and deliver a NullPointerException.
509     */
510NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
511
512    /*
513     * Call installed by a signal handler to create and deliver a NullPointerException.
514     */
515    .extern art_quick_throw_null_pointer_exception_from_signal
516ENTRY art_quick_throw_null_pointer_exception_from_signal
517    // The fault handler pushes the gc map address, i.e. "return address", to stack
518    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
519    .cfi_def_cfa_offset __SIZEOF_POINTER__
520    .cfi_rel_offset lr, 0
521    // Save all registers as basis for long jump context.
522    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
523    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
524    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
525    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
526    mov x1, xSELF                     // pass Thread::Current.
527    bl  artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
528    brk 0
529END art_quick_throw_null_pointer_exception_from_signal
530
531    /*
532     * Called by managed code to create and deliver an ArithmeticException.
533     */
534NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
535
536    /*
537     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
538     * index, arg2 holds limit.
539     */
540TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
541
542    /*
543     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
544     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
545     */
546TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
547
548    /*
549     * Called by managed code to create and deliver a StackOverflowError.
550     */
551NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
552
553    /*
554     * All generated callsites for interface invokes and invocation slow paths will load arguments
555     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
556     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
557     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
558     *
559     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
560     * of the target Method* in x0 and method->code_ in x1.
561     *
562     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
563     * thread and we branch to another stub to deliver it.
564     *
565     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
566     * pointing back to the original caller.
567     *
568     * Adapted from ARM32 code.
569     *
570     * Clobbers xIP0.
571     */
572.macro INVOKE_TRAMPOLINE_BODY cxx_name
573    .extern \cxx_name
574    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
575    // Helper signature is always
576    // (method_idx, *this_object, *caller_method, *self, sp)
577
578    mov    x2, xSELF                      // pass Thread::Current
579    mov    x3, sp
580    bl     \cxx_name                      // (method_idx, this, Thread*, SP)
581    mov    xIP0, x1                       // save Method*->code_
582    RESTORE_SAVE_REFS_AND_ARGS_FRAME
583    REFRESH_MARKING_REGISTER
584    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
585    br     xIP0                           // tail call to target
5861:
587    DELIVER_PENDING_EXCEPTION
588.endm
589.macro INVOKE_TRAMPOLINE c_name, cxx_name
590ENTRY \c_name
591    INVOKE_TRAMPOLINE_BODY \cxx_name
592END \c_name
593.endm
594
595INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
596
597INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
598INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
599INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
600INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
601
602
603.macro INVOKE_STUB_CREATE_FRAME
604SAVE_SIZE=6*8   // x4, x5, x19, x20, FP, LR saved.
605    SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE
606    SAVE_TWO_REGS x19, x20, 16
607    SAVE_TWO_REGS xFP, xLR, 32
608
609    mov xFP, sp                            // Use xFP for frame pointer, as it's callee-saved.
610    .cfi_def_cfa_register xFP
611
612    add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and
613    and x10, x10, # ~0xf                   // round up for 16-byte stack alignment.
614    sub sp, sp, x10                        // Adjust SP for ArtMethod*, args and alignment padding.
615
616    mov xSELF, x3                          // Move thread pointer into SELF register.
617
618    // Copy arguments into stack frame.
619    // Use simple copy routine for now.
620    // 4 bytes per slot.
621    // X1 - source address
622    // W2 - args length
623    // X9 - destination address.
624    // W10 - temporary
625    add x9, sp, #8                         // Destination address is bottom of stack + null.
626
627    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
628    // does not have unique-id variables.
6291:
630    cbz w2, 2f
631    sub w2, w2, #4      // Need 65536 bytes of range.
632    ldr w10, [x1, x2]
633    str w10, [x9, x2]
634    b 1b
635
6362:
637    // Store null into ArtMethod* at bottom of frame.
638    str xzr, [sp]
639.endm
640
641.macro INVOKE_STUB_CALL_AND_RETURN
642
643    REFRESH_MARKING_REGISTER
644
645    // load method-> METHOD_QUICK_CODE_OFFSET
646    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
647    // Branch to method.
648    blr x9
649
650    // Pop the ArtMethod* (null), arguments and alignment padding from the stack.
651    mov sp, xFP
652    .cfi_def_cfa_register sp
653
654    // Restore saved registers including value address and shorty address.
655    RESTORE_TWO_REGS x19, x20, 16
656    RESTORE_TWO_REGS xFP, xLR, 32
657    RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE
658
659    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
660    ldrb w10, [x5]
661
662    // Check the return type and store the correct register into the jvalue in memory.
663    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
664
665    // Don't set anything for a void type.
666    cmp w10, #'V'
667    beq 1f
668
669    // Is it a double?
670    cmp w10, #'D'
671    beq 2f
672
673    // Is it a float?
674    cmp w10, #'F'
675    beq 3f
676
677    // Just store x0. Doesn't matter if it is 64 or 32 bits.
678    str x0, [x4]
679
6801:  // Finish up.
681    ret
682
6832:  // Store double.
684    str d0, [x4]
685    ret
686
6873:  // Store float.
688    str s0, [x4]
689    ret
690
691.endm
692
693
694/*
695 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
696 *                                       uint32_t  *args,     x1
697 *                                       uint32_t argsize,    w2
698 *                                       Thread *self,        x3
699 *                                       JValue *result,      x4
700 *                                       char   *shorty);     x5
701 *  +----------------------+
702 *  |                      |
703 *  |  C/C++ frame         |
704 *  |       LR''           |
705 *  |       FP''           | <- SP'
706 *  +----------------------+
707 *  +----------------------+
708 *  |        x28           | <- TODO: Remove callee-saves.
709 *  |         :            |
710 *  |        x19           |
711 *  |        SP'           |
712 *  |        X5            |
713 *  |        X4            |        Saved registers
714 *  |        LR'           |
715 *  |        FP'           | <- FP
716 *  +----------------------+
717 *  | uint32_t out[n-1]    |
718 *  |    :      :          |        Outs
719 *  | uint32_t out[0]      |
720 *  | ArtMethod*           | <- SP  value=null
721 *  +----------------------+
722 *
723 * Outgoing registers:
724 *  x0    - Method*
725 *  x1-x7 - integer parameters.
726 *  d0-d7 - Floating point parameters.
727 *  xSELF = self
728 *  SP = & of ArtMethod*
729 *  x1 = "this" pointer.
730 *
731 */
732ENTRY art_quick_invoke_stub
733    // Spill registers as per AACPS64 calling convention.
734    INVOKE_STUB_CREATE_FRAME
735
736    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
737    // Parse the passed shorty to determine which register to load.
738    // Load addresses for routines that load WXSD registers.
739    adr  x11, .LstoreW2
740    adr  x12, .LstoreX2
741    adr  x13, .LstoreS0
742    adr  x14, .LstoreD0
743
744    // Initialize routine offsets to 0 for integers and floats.
745    // x8 for integers, x15 for floating point.
746    mov x8, #0
747    mov x15, #0
748
749    add x10, x5, #1         // Load shorty address, plus one to skip return value.
750    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
751
752    // Loop to fill registers.
753.LfillRegisters:
754    ldrb w17, [x10], #1       // Load next character in signature, and increment.
755    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
756
757    cmp  w17, #'F' // is this a float?
758    bne .LisDouble
759
760    cmp x15, # 8*12         // Skip this load if all registers full.
761    beq .Ladvance4
762
763    add x17, x13, x15       // Calculate subroutine to jump to.
764    br  x17
765
766.LisDouble:
767    cmp w17, #'D'           // is this a double?
768    bne .LisLong
769
770    cmp x15, # 8*12         // Skip this load if all registers full.
771    beq .Ladvance8
772
773    add x17, x14, x15       // Calculate subroutine to jump to.
774    br x17
775
776.LisLong:
777    cmp w17, #'J'           // is this a long?
778    bne .LisOther
779
780    cmp x8, # 6*12          // Skip this load if all registers full.
781    beq .Ladvance8
782
783    add x17, x12, x8        // Calculate subroutine to jump to.
784    br x17
785
786.LisOther:                  // Everything else takes one vReg.
787    cmp x8, # 6*12          // Skip this load if all registers full.
788    beq .Ladvance4
789
790    add x17, x11, x8        // Calculate subroutine to jump to.
791    br x17
792
793.Ladvance4:
794    add x9, x9, #4
795    b .LfillRegisters
796
797.Ladvance8:
798    add x9, x9, #8
799    b .LfillRegisters
800
801// Macro for loading a parameter into a register.
802//  counter - the register with offset into these tables
803//  size - the size of the register - 4 or 8 bytes.
804//  register - the name of the register to be loaded.
805.macro LOADREG counter size register return
806    ldr \register , [x9], #\size
807    add \counter, \counter, 12
808    b \return
809.endm
810
811// Store ints.
812.LstoreW2:
813    LOADREG x8 4 w2 .LfillRegisters
814    LOADREG x8 4 w3 .LfillRegisters
815    LOADREG x8 4 w4 .LfillRegisters
816    LOADREG x8 4 w5 .LfillRegisters
817    LOADREG x8 4 w6 .LfillRegisters
818    LOADREG x8 4 w7 .LfillRegisters
819
820// Store longs.
821.LstoreX2:
822    LOADREG x8 8 x2 .LfillRegisters
823    LOADREG x8 8 x3 .LfillRegisters
824    LOADREG x8 8 x4 .LfillRegisters
825    LOADREG x8 8 x5 .LfillRegisters
826    LOADREG x8 8 x6 .LfillRegisters
827    LOADREG x8 8 x7 .LfillRegisters
828
829// Store singles.
830.LstoreS0:
831    LOADREG x15 4 s0 .LfillRegisters
832    LOADREG x15 4 s1 .LfillRegisters
833    LOADREG x15 4 s2 .LfillRegisters
834    LOADREG x15 4 s3 .LfillRegisters
835    LOADREG x15 4 s4 .LfillRegisters
836    LOADREG x15 4 s5 .LfillRegisters
837    LOADREG x15 4 s6 .LfillRegisters
838    LOADREG x15 4 s7 .LfillRegisters
839
840// Store doubles.
841.LstoreD0:
842    LOADREG x15 8 d0 .LfillRegisters
843    LOADREG x15 8 d1 .LfillRegisters
844    LOADREG x15 8 d2 .LfillRegisters
845    LOADREG x15 8 d3 .LfillRegisters
846    LOADREG x15 8 d4 .LfillRegisters
847    LOADREG x15 8 d5 .LfillRegisters
848    LOADREG x15 8 d6 .LfillRegisters
849    LOADREG x15 8 d7 .LfillRegisters
850
851
852.LcallFunction:
853
854    INVOKE_STUB_CALL_AND_RETURN
855
856END art_quick_invoke_stub
857
858/*  extern"C"
859 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
860 *                                       uint32_t  *args,     x1
861 *                                       uint32_t argsize,    w2
862 *                                       Thread *self,        x3
863 *                                       JValue *result,      x4
864 *                                       char   *shorty);     x5
865 */
866ENTRY art_quick_invoke_static_stub
867    // Spill registers as per AACPS64 calling convention.
868    INVOKE_STUB_CREATE_FRAME
869
870    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
871    // Parse the passed shorty to determine which register to load.
872    // Load addresses for routines that load WXSD registers.
873    adr  x11, .LstoreW1_2
874    adr  x12, .LstoreX1_2
875    adr  x13, .LstoreS0_2
876    adr  x14, .LstoreD0_2
877
878    // Initialize routine offsets to 0 for integers and floats.
879    // x8 for integers, x15 for floating point.
880    mov x8, #0
881    mov x15, #0
882
883    add x10, x5, #1     // Load shorty address, plus one to skip return value.
884
885    // Loop to fill registers.
886.LfillRegisters2:
887    ldrb w17, [x10], #1         // Load next character in signature, and increment.
888    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
889
890    cmp  w17, #'F'          // is this a float?
891    bne .LisDouble2
892
893    cmp x15, # 8*12         // Skip this load if all registers full.
894    beq .Ladvance4_2
895
896    add x17, x13, x15       // Calculate subroutine to jump to.
897    br  x17
898
899.LisDouble2:
900    cmp w17, #'D'           // is this a double?
901    bne .LisLong2
902
903    cmp x15, # 8*12         // Skip this load if all registers full.
904    beq .Ladvance8_2
905
906    add x17, x14, x15       // Calculate subroutine to jump to.
907    br x17
908
909.LisLong2:
910    cmp w17, #'J'           // is this a long?
911    bne .LisOther2
912
913    cmp x8, # 7*12          // Skip this load if all registers full.
914    beq .Ladvance8_2
915
916    add x17, x12, x8        // Calculate subroutine to jump to.
917    br x17
918
919.LisOther2:                 // Everything else takes one vReg.
920    cmp x8, # 7*12          // Skip this load if all registers full.
921    beq .Ladvance4_2
922
923    add x17, x11, x8        // Calculate subroutine to jump to.
924    br x17
925
926.Ladvance4_2:
927    add x9, x9, #4
928    b .LfillRegisters2
929
930.Ladvance8_2:
931    add x9, x9, #8
932    b .LfillRegisters2
933
934// Store ints.
935.LstoreW1_2:
936    LOADREG x8 4 w1 .LfillRegisters2
937    LOADREG x8 4 w2 .LfillRegisters2
938    LOADREG x8 4 w3 .LfillRegisters2
939    LOADREG x8 4 w4 .LfillRegisters2
940    LOADREG x8 4 w5 .LfillRegisters2
941    LOADREG x8 4 w6 .LfillRegisters2
942    LOADREG x8 4 w7 .LfillRegisters2
943
944// Store longs.
945.LstoreX1_2:
946    LOADREG x8 8 x1 .LfillRegisters2
947    LOADREG x8 8 x2 .LfillRegisters2
948    LOADREG x8 8 x3 .LfillRegisters2
949    LOADREG x8 8 x4 .LfillRegisters2
950    LOADREG x8 8 x5 .LfillRegisters2
951    LOADREG x8 8 x6 .LfillRegisters2
952    LOADREG x8 8 x7 .LfillRegisters2
953
954// Store singles.
955.LstoreS0_2:
956    LOADREG x15 4 s0 .LfillRegisters2
957    LOADREG x15 4 s1 .LfillRegisters2
958    LOADREG x15 4 s2 .LfillRegisters2
959    LOADREG x15 4 s3 .LfillRegisters2
960    LOADREG x15 4 s4 .LfillRegisters2
961    LOADREG x15 4 s5 .LfillRegisters2
962    LOADREG x15 4 s6 .LfillRegisters2
963    LOADREG x15 4 s7 .LfillRegisters2
964
965// Store doubles.
966.LstoreD0_2:
967    LOADREG x15 8 d0 .LfillRegisters2
968    LOADREG x15 8 d1 .LfillRegisters2
969    LOADREG x15 8 d2 .LfillRegisters2
970    LOADREG x15 8 d3 .LfillRegisters2
971    LOADREG x15 8 d4 .LfillRegisters2
972    LOADREG x15 8 d5 .LfillRegisters2
973    LOADREG x15 8 d6 .LfillRegisters2
974    LOADREG x15 8 d7 .LfillRegisters2
975
976
977.LcallFunction2:
978
979    INVOKE_STUB_CALL_AND_RETURN
980
981END art_quick_invoke_static_stub
982
983
984
985/*  extern"C" void art_quick_osr_stub(void** stack,                x0
986 *                                    size_t stack_size_in_bytes,  x1
987 *                                    const uint8_t* native_pc,    x2
988 *                                    JValue *result,              x3
989 *                                    char   *shorty,              x4
990 *                                    Thread *self)                x5
991 */
992ENTRY art_quick_osr_stub
993    SAVE_SIZE=22*8
994    SAVE_TWO_REGS_INCREASE_FRAME x3, x4, SAVE_SIZE
995    SAVE_TWO_REGS x19, x20, 16
996    SAVE_TWO_REGS x21, x22, 32
997    SAVE_TWO_REGS x23, x24, 48
998    SAVE_TWO_REGS x25, x26, 64
999    SAVE_TWO_REGS x27, x28, 80
1000    SAVE_TWO_REGS xFP, xLR, 96
1001    stp d8, d9,   [sp, #112]
1002    stp d10, d11, [sp, #128]
1003    stp d12, d13, [sp, #144]
1004    stp d14, d15, [sp, #160]
1005
1006    mov xSELF, x5                         // Move thread pointer into SELF register.
1007    REFRESH_MARKING_REGISTER
1008
1009    INCREASE_FRAME 16
1010    str xzr, [sp]                         // Store null for ArtMethod* slot
1011    // Branch to stub.
1012    bl .Losr_entry
1013    .cfi_remember_state
1014    DECREASE_FRAME 16
1015
1016    // Restore saved registers including value address and shorty address.
1017    ldp d8, d9,   [sp, #112]
1018    ldp d10, d11, [sp, #128]
1019    ldp d12, d13, [sp, #144]
1020    ldp d14, d15, [sp, #160]
1021    RESTORE_TWO_REGS x19, x20, 16
1022    RESTORE_TWO_REGS x21, x22, 32
1023    RESTORE_TWO_REGS x23, x24, 48
1024    RESTORE_TWO_REGS x25, x26, 64
1025    RESTORE_TWO_REGS x27, x28, 80
1026    RESTORE_TWO_REGS xFP, xLR, 96
1027    RESTORE_TWO_REGS_DECREASE_FRAME x3, x4, SAVE_SIZE
1028
1029    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
1030    ldrb w10, [x4]
1031
1032    // Check the return type and store the correct register into the jvalue in memory.
1033
1034    // Don't set anything for a void type.
1035    cmp w10, #'V'
1036    beq .Losr_exit
1037    // Is it a double?
1038    cmp w10, #'D'
1039    beq .Losr_return_double
1040    // Is it a float?
1041    cmp w10, #'F'
1042    beq .Losr_return_float
1043    // Just store x0. Doesn't matter if it is 64 or 32 bits.
1044    str x0, [x3]
1045.Losr_exit:
1046    ret
1047.Losr_return_double:
1048    str d0, [x3]
1049    ret
1050.Losr_return_float:
1051    str s0, [x3]
1052    ret
1053
1054.Losr_entry:
1055    .cfi_restore_state                     // Reset unwind info so following code unwinds.
1056    .cfi_def_cfa_offset (SAVE_SIZE+16)     // workaround for clang bug: 31975598
1057
1058    mov x9, sp                             // Save stack pointer.
1059    .cfi_def_cfa_register x9
1060
1061    // Update stack pointer for the callee
1062    sub sp, sp, x1
1063
1064    // Update link register slot expected by the callee.
1065    sub w1, w1, #8
1066    str lr, [sp, x1]
1067
1068    // Copy arguments into stack frame.
1069    // Use simple copy routine for now.
1070    // 4 bytes per slot.
1071    // X0 - source address
1072    // W1 - args length
1073    // SP - destination address.
1074    // W10 - temporary
1075.Losr_loop_entry:
1076    cbz w1, .Losr_loop_exit
1077    sub w1, w1, #4
1078    ldr w10, [x0, x1]
1079    str w10, [sp, x1]
1080    b .Losr_loop_entry
1081
1082.Losr_loop_exit:
1083    // Branch to the OSR entry point.
1084    br x2
1085
1086END art_quick_osr_stub
1087
1088    /*
1089     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_.
1090     * Both must reside on the stack, between current SP and target SP.
1091     * IP0 and IP1 shall be clobbered rather than retrieved from gprs_.
1092     */
1093
1094ENTRY art_quick_do_long_jump
1095    // Load FPRs
1096    ldp d0, d1, [x1, #0]
1097    ldp d2, d3, [x1, #16]
1098    ldp d4, d5, [x1, #32]
1099    ldp d6, d7, [x1, #48]
1100    ldp d8, d9, [x1, #64]
1101    ldp d10, d11, [x1, #80]
1102    ldp d12, d13, [x1, #96]
1103    ldp d14, d15, [x1, #112]
1104    ldp d16, d17, [x1, #128]
1105    ldp d18, d19, [x1, #144]
1106    ldp d20, d21, [x1, #160]
1107    ldp d22, d23, [x1, #176]
1108    ldp d24, d25, [x1, #192]
1109    ldp d26, d27, [x1, #208]
1110    ldp d28, d29, [x1, #224]
1111    ldp d30, d31, [x1, #240]
1112
1113    // Load GPRs. Delay loading x0, x1 because x0 is used as gprs_.
1114    ldp x2, x3, [x0, #16]
1115    ldp x4, x5, [x0, #32]
1116    ldp x6, x7, [x0, #48]
1117    ldp x8, x9, [x0, #64]
1118    ldp x10, x11, [x0, #80]
1119    ldp x12, x13, [x0, #96]
1120    ldp x14, x15, [x0, #112]
1121    // Do not load IP0 (x16) and IP1 (x17), these shall be clobbered below.
1122    // Don't load the platform register (x18) either.
1123    ldr      x19, [x0, #152]      // xSELF.
1124    ldp x20, x21, [x0, #160]      // For Baker RB, wMR (w20) is reloaded below.
1125    ldp x22, x23, [x0, #176]
1126    ldp x24, x25, [x0, #192]
1127    ldp x26, x27, [x0, #208]
1128    ldp x28, x29, [x0, #224]
1129    ldp x30, xIP0, [x0, #240]     // LR and SP, load SP to IP0.
1130
1131    // Load PC to IP1, it's at the end (after the space for the unused XZR).
1132    ldr xIP1, [x0, #33*8]
1133
1134    // Load x0, x1.
1135    ldp x0, x1, [x0, #0]
1136
1137    // Set SP. Do not access fprs_ and gprs_ from now, they are below SP.
1138    mov sp, xIP0
1139
1140    REFRESH_MARKING_REGISTER
1141
1142    br  xIP1
1143END art_quick_do_long_jump
1144
1145    /*
1146     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
1147     * possibly null object to lock.
1148     *
1149     * Derived from arm32 code.
1150     */
1151    .extern artLockObjectFromCode
1152ENTRY art_quick_lock_object
1153    ldr    w1, [xSELF, #THREAD_ID_OFFSET]
1154    cbz    w0, art_quick_lock_object_no_inline
1155                                      // Exclusive load/store has no immediate anymore.
1156    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET
1157.Lretry_lock:
1158    ldaxr  w2, [x4]                   // Acquire needed only in most common case.
1159    eor    w3, w2, w1                 // Prepare the value to store if unlocked
1160                                      //   (thread id, count of 0 and preserved read barrier bits),
1161                                      // or prepare to compare thread id for recursive lock check
1162                                      //   (lock_word.ThreadId() ^ self->ThreadId()).
1163    tst    w2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // Test the non-gc bits.
1164    b.ne   .Lnot_unlocked             // Check if unlocked.
1165    // unlocked case - store w3: original lock word plus thread id, preserved read barrier bits.
1166    stxr   w2, w3, [x4]
1167    cbnz   w2, .Lretry_lock           // If the store failed, retry.
1168    ret
1169.Lnot_unlocked:  // w2: original lock word, w1: thread id, w3: w2 ^ w1
1170                                      // Check lock word state and thread id together,
1171    tst    w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
1172    b.ne   art_quick_lock_object_no_inline
1173    add    w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // Increment the recursive lock count.
1174    tst    w3, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED  // Test the new thin lock count.
1175    b.eq   art_quick_lock_object_no_inline  // Zero as the new count indicates overflow, go slow path.
1176    stxr   w2, w3, [x4]
1177    cbnz   w2, .Lretry_lock           // If the store failed, retry.
1178    ret
1179END art_quick_lock_object
1180
1181ENTRY art_quick_lock_object_no_inline
1182    // This is also the slow path for art_quick_lock_object.
1183    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1184    mov    x1, xSELF                  // pass Thread::Current
1185    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1186    RESTORE_SAVE_REFS_ONLY_FRAME
1187    REFRESH_MARKING_REGISTER
1188    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1189END art_quick_lock_object_no_inline
1190
1191    /*
1192     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
1193     * x0 holds the possibly null object to lock.
1194     *
1195     * Derived from arm32 code.
1196     */
1197    .extern artUnlockObjectFromCode
1198ENTRY art_quick_unlock_object
1199    ldr    w1, [xSELF, #THREAD_ID_OFFSET]
1200    cbz    x0, art_quick_unlock_object_no_inline
1201                                      // Exclusive load/store has no immediate anymore.
1202    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET
1203.Lretry_unlock:
1204#ifndef USE_READ_BARRIER
1205    ldr    w2, [x4]
1206#else
1207    ldxr   w2, [x4]                   // Need to use atomic instructions for read barrier.
1208#endif
1209    eor    w3, w2, w1                 // Prepare the value to store if simply locked
1210                                      //   (mostly 0s, and preserved read barrier bits),
1211                                      // or prepare to compare thread id for recursive lock check
1212                                      //   (lock_word.ThreadId() ^ self->ThreadId()).
1213    tst    w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // Test the non-gc bits.
1214    b.ne   .Lnot_simply_locked        // Locked recursively or by other thread?
1215    // Transition to unlocked.
1216#ifndef USE_READ_BARRIER
1217    stlr   w3, [x4]
1218#else
1219    stlxr  w2, w3, [x4]               // Need to use atomic instructions for read barrier.
1220    cbnz   w2, .Lretry_unlock         // If the store failed, retry.
1221#endif
1222    ret
1223.Lnot_simply_locked:
1224                                      // Check lock word state and thread id together,
1225    tst    w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
1226    b.ne   art_quick_unlock_object_no_inline
1227    sub    w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
1228#ifndef USE_READ_BARRIER
1229    str    w3, [x4]
1230#else
1231    stxr   w2, w3, [x4]               // Need to use atomic instructions for read barrier.
1232    cbnz   w2, .Lretry_unlock         // If the store failed, retry.
1233#endif
1234    ret
1235END art_quick_unlock_object
1236
1237ENTRY art_quick_unlock_object_no_inline
1238    // This is also the slow path for art_quick_unlock_object.
1239    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1240    mov    x1, xSELF                  // pass Thread::Current
1241    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1242    RESTORE_SAVE_REFS_ONLY_FRAME
1243    REFRESH_MARKING_REGISTER
1244    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1245END art_quick_unlock_object_no_inline
1246
1247    /*
1248     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
1249     * artThrowClassCastExceptionForObject.
1250     */
1251    .extern artInstanceOfFromCode
1252    .extern artThrowClassCastExceptionForObject
1253ENTRY art_quick_check_instance_of
1254    // Type check using the bit string passes null as the target class. In that case just throw.
1255    cbz x1, .Lthrow_class_cast_exception_for_bitstring_check
1256
1257    // Store arguments and link register
1258    // Stack needs to be 16B aligned on calls.
1259    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1260    SAVE_REG xLR, 24
1261
1262    // Call runtime code
1263    bl artInstanceOfFromCode
1264
1265    // Restore LR.
1266    RESTORE_REG xLR, 24
1267
1268    // Check for exception
1269    cbz x0, .Lthrow_class_cast_exception
1270
1271    // Restore and return
1272    .cfi_remember_state
1273    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1274    ret
1275    .cfi_restore_state                // Reset unwind info so following code unwinds.
1276    .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
1277
1278.Lthrow_class_cast_exception:
1279    // Restore
1280    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1281
1282.Lthrow_class_cast_exception_for_bitstring_check:
1283    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1284    mov x2, xSELF                     // pass Thread::Current
1285    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
1286    brk 0                             // We should not return here...
1287END art_quick_check_instance_of
1288
1289// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
1290.macro POP_REG_NE xReg, offset, xExclude
1291    .ifnc \xReg, \xExclude
1292        ldr \xReg, [sp, #\offset]     // restore xReg
1293        .cfi_restore \xReg
1294    .endif
1295.endm
1296
1297// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
1298// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
1299.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
1300    .ifc \xReg1, \xExclude
1301        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
1302    .else
1303        .ifc \xReg2, \xExclude
1304            ldr \xReg1, [sp, #\offset]          // restore xReg1
1305        .else
1306            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
1307        .endif
1308    .endif
1309    .cfi_restore \xReg1
1310    .cfi_restore \xReg2
1311.endm
1312
1313    /*
1314     * Macro to insert read barrier, only used in art_quick_aput_obj.
1315     * xDest, wDest and xObj are registers, offset is a defined literal such as
1316     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
1317     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
1318     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1319     */
1320.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
1321#ifdef USE_READ_BARRIER
1322# ifdef USE_BAKER_READ_BARRIER
1323    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
1324    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
1325    // False dependency to avoid needing load/load fence.
1326    add \xObj, \xObj, \xTemp, lsr #32
1327    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1328    UNPOISON_HEAP_REF \wDest
1329    b .Lrb_exit\number
1330# endif  // USE_BAKER_READ_BARRIER
1331.Lrb_slowpath\number:
1332    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
1333    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
1334    SAVE_TWO_REGS x2, x3, 16
1335    SAVE_TWO_REGS x4, xLR, 32
1336
1337    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
1338    .ifnc \xObj, x1
1339        mov x1, \xObj               // pass xObj
1340    .endif
1341    mov w2, #\offset                // pass offset
1342    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
1343    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
1344    .ifnc \wDest, w0
1345        mov \wDest, w0              // save return value in wDest
1346    .endif
1347
1348    // Conditionally restore saved registers
1349    POP_REG_NE x0, 0, \xDest
1350    POP_REG_NE x1, 8, \xDest
1351    POP_REG_NE x2, 16, \xDest
1352    POP_REG_NE x3, 24, \xDest
1353    POP_REG_NE x4, 32, \xDest
1354    RESTORE_REG xLR, 40
1355    DECREASE_FRAME 48
1356.Lrb_exit\number:
1357#else
1358    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1359    UNPOISON_HEAP_REF \wDest
1360#endif  // USE_READ_BARRIER
1361.endm
1362
1363#ifdef USE_READ_BARRIER
1364    .extern artReadBarrierSlow
1365#endif
1366ENTRY art_quick_aput_obj
1367    cbz x2, .Ldo_aput_null
1368    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
1369                                                                    // This also zero-extends to x3
1370    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
1371    // This also zero-extends to x3
1372    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
1373                                                                    // This also zero-extends to x4
1374    cmp w3, w4  // value's type == array's component type - trivial assignability
1375    bne .Lcheck_assignability
1376.Ldo_aput:
1377    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1378                                                         // "Compress" = do nothing
1379    POISON_HEAP_REF w2
1380    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1381    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1382    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1383    strb w3, [x3, x0]
1384    ret
1385.Ldo_aput_null:
1386    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1387                                                         // "Compress" = do nothing
1388    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1389    ret
1390.Lcheck_assignability:
1391    // Store arguments and link register
1392    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1393    SAVE_TWO_REGS x2, xLR, 16
1394
1395    // Call runtime code
1396    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1397    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1398    bl artIsAssignableFromCode
1399
1400    // Check for exception
1401    cbz x0, .Lthrow_array_store_exception
1402
1403    // Restore
1404    .cfi_remember_state
1405    RESTORE_TWO_REGS x2, xLR, 16
1406    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1407
1408    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1409                                                          // "Compress" = do nothing
1410    POISON_HEAP_REF w2
1411    str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
1412    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1413    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1414    strb w3, [x3, x0]
1415    ret
1416    .cfi_restore_state            // Reset unwind info so following code unwinds.
1417    .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
1418.Lthrow_array_store_exception:
1419    RESTORE_TWO_REGS x2, xLR, 16
1420    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1421
1422    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1423    mov x1, x2                      // Pass value.
1424    mov x2, xSELF                   // Pass Thread::Current.
1425    bl artThrowArrayStoreException  // (Object*, Object*, Thread*).
1426    brk 0                           // Unreached.
1427END art_quick_aput_obj
1428
1429// Macro to facilitate adding new allocation entrypoints.
1430.macro ONE_ARG_DOWNCALL name, entrypoint, return
1431    .extern \entrypoint
1432ENTRY \name
1433    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1434    mov    x1, xSELF                  // pass Thread::Current
1435    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1436    RESTORE_SAVE_REFS_ONLY_FRAME
1437    REFRESH_MARKING_REGISTER
1438    \return
1439END \name
1440.endm
1441
1442// Macro to facilitate adding new allocation entrypoints.
1443.macro TWO_ARG_DOWNCALL name, entrypoint, return
1444    .extern \entrypoint
1445ENTRY \name
1446    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1447    mov    x2, xSELF                  // pass Thread::Current
1448    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1449    RESTORE_SAVE_REFS_ONLY_FRAME
1450    REFRESH_MARKING_REGISTER
1451    \return
1452END \name
1453.endm
1454
1455// Macro to facilitate adding new allocation entrypoints.
1456.macro THREE_ARG_DOWNCALL name, entrypoint, return
1457    .extern \entrypoint
1458ENTRY \name
1459    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1460    mov    x3, xSELF                  // pass Thread::Current
1461    bl     \entrypoint
1462    RESTORE_SAVE_REFS_ONLY_FRAME
1463    REFRESH_MARKING_REGISTER
1464    \return
1465END \name
1466.endm
1467
1468// Macro to facilitate adding new allocation entrypoints.
1469.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1470    .extern \entrypoint
1471ENTRY \name
1472    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1473    mov    x4, xSELF                  // pass Thread::Current
1474    bl     \entrypoint                //
1475    RESTORE_SAVE_REFS_ONLY_FRAME
1476    REFRESH_MARKING_REGISTER
1477    \return
1478END \name
1479.endm
1480
1481// Macros taking opportunity of code similarities for downcalls.
1482.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
1483    .extern \entrypoint
1484ENTRY \name
1485    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1486    mov    x1, xSELF                  // pass Thread::Current
1487    bl     \entrypoint                // (uint32_t type_idx, Thread*)
1488    RESTORE_SAVE_REFS_ONLY_FRAME
1489    REFRESH_MARKING_REGISTER
1490    \return
1491END \name
1492.endm
1493
1494.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
1495    .extern \entrypoint
1496ENTRY \name
1497    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1498    mov    x2, xSELF                  // pass Thread::Current
1499    bl     \entrypoint
1500    RESTORE_SAVE_REFS_ONLY_FRAME
1501    REFRESH_MARKING_REGISTER
1502    \return
1503END \name
1504.endm
1505
1506.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
1507    .extern \entrypoint
1508ENTRY \name
1509    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1510    mov    x3, xSELF                  // pass Thread::Current
1511    bl     \entrypoint
1512    RESTORE_SAVE_REFS_ONLY_FRAME
1513    REFRESH_MARKING_REGISTER
1514    \return
1515END \name
1516.endm
1517
1518    /*
1519     * Macro for resolution and initialization of indexed DEX file
1520     * constants such as classes and strings.
1521     */
1522.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1523    .extern \entrypoint
1524ENTRY \name
1525    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset       // save everything for stack crawl
1526    mov   x1, xSELF                   // pass Thread::Current
1527    bl    \entrypoint                 // (int32_t index, Thread* self)
1528    cbz   w0, 1f                      // If result is null, deliver the OOME.
1529    .cfi_remember_state
1530    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
1531    REFRESH_MARKING_REGISTER
1532    ret                        // return
1533    .cfi_restore_state
1534    .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING  // workaround for clang bug: 31975598
15351:
1536    DELIVER_PENDING_EXCEPTION_FRAME_READY
1537END \name
1538.endm
1539
1540.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1541    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1542.endm
1543
1544.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1545    cbz w0, 1f                 // result zero branch over
1546    ret                        // return
15471:
1548    DELIVER_PENDING_EXCEPTION
1549.endm
1550
1551    /*
1552     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
1553     * failure.
1554     */
1555TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1556
1557    /*
1558     * Entry from managed code when uninitialized static storage, this stub will run the class
1559     * initializer and deliver the exception on error. On success the static storage base is
1560     * returned.
1561     */
1562ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1563ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1564ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1565ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1566ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1567ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1568
1569// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1570// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1571
1572ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1573ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1574ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1575ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1576ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1577ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1578ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1579
1580TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1581TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1582TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1583TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1584TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1585TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1586TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1587
1588TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1589TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1590TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1591TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1592TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1593
1594THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1595THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1596THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1597THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1598THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1599
1600// Generate the allocation entrypoints for each allocator.
1601GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1602// Comment out allocators that have arm64 specific asm.
1603// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1604// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1605GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1606GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
1607// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1608// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1609// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1610// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1611// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1612GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1613GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1614GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1615
1616// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1617// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1618GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1619GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
1620// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1621// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1622// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1623// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1624// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1625GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1626GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1627GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1628
1629// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1630// If isInitialized=0 the compiler can only assume it's been at least resolved.
1631.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1632ENTRY \c_name
1633    // Fast path rosalloc allocation.
1634    // x0: type, xSELF(x19): Thread::Current
1635    // x1-x7: free.
1636    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1637                                                              // allocation stack has room.
1638                                                              // ldp won't work due to large offset.
1639    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1640    cmp    x3, x4
1641    bhs    .Lslow_path\c_name
1642    ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
1643    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1644                                                              // local allocation. Also does the
1645                                                              // finalizable and initialization
1646                                                              // checks.
1647    // When isInitialized == 0, then the class is potentially not yet initialized.
1648    // If the class is not yet initialized, the object size will be very large to force the branch
1649    // below to be taken.
1650    //
1651    // See InitializeClassVisitors in class-inl.h for more details.
1652    bhs    .Lslow_path\c_name
1653                                                              // Compute the rosalloc bracket index
1654                                                              // from the size. Since the size is
1655                                                              // already aligned we can combine the
1656                                                              // two shifts together.
1657    add    x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1658                                                              // Subtract pointer size since ther
1659                                                              // are no runs for 0 byte allocations
1660                                                              // and the size is already aligned.
1661    ldr    x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1662                                                              // Load the free list head (x3). This
1663                                                              // will be the return val.
1664    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1665    cbz    x3, .Lslow_path\c_name
1666    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1667    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1668                                                              // and update the list head with the
1669                                                              // next pointer.
1670    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1671                                                              // Store the class pointer in the
1672                                                              // header. This also overwrites the
1673                                                              // next pointer. The offsets are
1674                                                              // asserted to match.
1675
1676#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1677#error "Class pointer needs to overwrite next pointer."
1678#endif
1679    POISON_HEAP_REF w0
1680    str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
1681                                                              // Push the new object onto the thread
1682                                                              // local allocation stack and
1683                                                              // increment the thread local
1684                                                              // allocation stack top.
1685    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1686    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
1687    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1688                                                              // Decrement the size of the free list
1689
1690    // After this "STR" the object is published to the thread local allocation stack,
1691    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1692    // It is not yet visible to the running (user) compiled code until after the return.
1693    //
1694    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1695    // the state of the allocation stack slot. It can be a pointer to one of:
1696    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1697    //       (The stack initial state is "null" pointers).
1698    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1699    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1700    // Other states are not allowed.
1701    //
1702    // An object that is invalid only temporarily, and will eventually become valid.
1703    // The internal runtime code simply checks if the object is not null or is partial and then
1704    // ignores it.
1705    //
1706    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1707    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1708    // "next" pointer is not-cyclic.)
1709    //
1710    // See also b/28790624 for a listing of CLs dealing with this race.
1711    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1712    sub    x1, x1, #1
1713                                                              // TODO: consider combining this store
1714                                                              // and the list head store above using
1715                                                              // strd.
1716    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1717
1718    mov    x0, x3                                             // Set the return value and return.
1719.if \isInitialized == 0
1720    // This barrier is only necessary when the allocation also requires
1721    // a class initialization check.
1722    //
1723    // If the class is already observably initialized, then new-instance allocations are protected
1724    // from publishing by the compiler which inserts its own StoreStore barrier.
1725    dmb    ish
1726    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1727    // they should happen-after the implicit initialization check.
1728    //
1729    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1730    // a new observably-initialized class state.
1731.endif
1732    ret
1733.Lslow_path\c_name:
1734    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
1735    mov    x1, xSELF                                // pass Thread::Current
1736    bl     \cxx_name
1737    RESTORE_SAVE_REFS_ONLY_FRAME
1738    REFRESH_MARKING_REGISTER
1739    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1740END \c_name
1741.endm
1742
1743ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1744ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1745
1746// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1747// If isInitialized=0 the compiler can only assume it's been at least resolved.
1748.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized
1749    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
1750    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
1751    ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
1752    add    x6, x4, x7                                         // Add object size to tlab pos.
1753    cmp    x6, x5                                             // Check if it fits, overflow works
1754                                                              // since the tlab pos and end are 32
1755                                                              // bit values.
1756
1757    // When isInitialized == 0, then the class is potentially not yet initialized.
1758    // If the class is not yet initialized, the object size will be very large to force the branch
1759    // below to be taken.
1760    //
1761    // See InitializeClassVisitors in class-inl.h for more details.
1762    bhi    \slowPathLabel
1763    str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1764    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1765    add    x5, x5, #1
1766    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1767    POISON_HEAP_REF w0
1768    str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1769                                                              // Fence. This is "ish" not "ishst" so
1770                                                              // that the code after this allocation
1771                                                              // site will see the right values in
1772                                                              // the fields of the class.
1773    mov    x0, x4
1774.if \isInitialized == 0
1775    // This barrier is only necessary when the allocation also requires
1776    // a class initialization check.
1777    //
1778    // If the class is already observably initialized, then new-instance allocations are protected
1779    // from publishing by the compiler which inserts its own StoreStore barrier.
1780    dmb    ish
1781    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1782    // they should happen-after the implicit initialization check.
1783    //
1784    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1785    // a new observably-initialized class state.
1786.endif
1787    ret
1788.endm
1789
1790// The common code for art_quick_alloc_object_*region_tlab
1791.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1792ENTRY \name
1793    // Fast path region tlab allocation.
1794    // x0: type, xSELF(x19): Thread::Current
1795    // x1-x7: free.
1796    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized
1797.Lslow_path\name:
1798    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
1799    mov    x1, xSELF                           // Pass Thread::Current.
1800    bl     \entrypoint                         // (mirror::Class*, Thread*)
1801    RESTORE_SAVE_REFS_ONLY_FRAME
1802    REFRESH_MARKING_REGISTER
1803    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1804END \name
1805.endm
1806
1807GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1808GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1809GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1810GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1811
1812.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1813    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask
1814                                                              // (addr + 7) & ~7. The mask must
1815                                                              // be 64 bits to keep high bits in
1816                                                              // case of overflow.
1817    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
1818    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
1819    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
1820    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
1821    bhs    \slowPathLabel                                     // path.
1822
1823    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
1824                                                              // we use (end - begin) to handle
1825                                                              // negative size arrays. It is
1826                                                              // assumed that a negative size will
1827                                                              // always be greater unsigned than
1828                                                              // region size.
1829    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
1830    sub    \xTemp2, \xTemp2, \xTemp0
1831    cmp    \xTemp1, \xTemp2
1832
1833    // The array class is always initialized here. Unlike new-instance,
1834    // this does not act as a double test.
1835    bhi    \slowPathLabel
1836    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1837                                                              // Move old thread_local_pos to x0
1838                                                              // for the return value.
1839    mov    x0, \xTemp0
1840    add    \xTemp0, \xTemp0, \xTemp1
1841    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
1842    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
1843    add    \xTemp0, \xTemp0, #1
1844    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1845    POISON_HEAP_REF \wClass
1846    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
1847    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
1848                                                              // Fence.
1849// new-array is special. The class is loaded and immediately goes to the Initialized state
1850// before it is published. Therefore the only fence needed is for the publication of the object.
1851// See ClassLinker::CreateArrayClass() for more details.
1852
1853// For publication of the new array, we don't need a 'dmb ishst' here.
1854// The compiler generates 'dmb ishst' for all new-array insts.
1855    ret
1856.endm
1857
1858.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1859ENTRY \name
1860    // Fast path array allocation for region tlab allocation.
1861    // x0: mirror::Class* type
1862    // x1: int32_t component_count
1863    // x2-x7: free.
1864    mov    x3, x0
1865    \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1866    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1867.Lslow_path\name:
1868    // x0: mirror::Class* klass
1869    // x1: int32_t component_count
1870    // x2: Thread* self
1871    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1872    mov    x2, xSELF                  // pass Thread::Current
1873    bl     \entrypoint
1874    RESTORE_SAVE_REFS_ONLY_FRAME
1875    REFRESH_MARKING_REGISTER
1876    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1877END \name
1878.endm
1879
1880.macro COMPUTE_ARRAY_SIZE_UNKNOWN xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1881    // Array classes are never finalizable or uninitialized, no need to check.
1882    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
1883    UNPOISON_HEAP_REF \wTemp0
1884    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1885    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
1886                                                              // bits.
1887                                                              // xCount is holding a 32 bit value,
1888                                                              // it can not overflow.
1889    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
1890    // Add array data offset and alignment.
1891    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1892#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1893#error Long array data offset must be 4 greater than int array data offset.
1894#endif
1895
1896    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
1897                                                              // component size shift is 3
1898                                                              // (for 64 bit alignment).
1899    and    \xTemp0, \xTemp0, #4
1900    add    \xTemp1, \xTemp1, \xTemp0
1901.endm
1902
1903.macro COMPUTE_ARRAY_SIZE_8 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1904    // Add array data offset and alignment.
1905    add    \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1906.endm
1907
1908.macro COMPUTE_ARRAY_SIZE_16 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1909    lsl    \xTemp1, \xCount, #1
1910    // Add array data offset and alignment.
1911    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1912.endm
1913
1914.macro COMPUTE_ARRAY_SIZE_32 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1915    lsl    \xTemp1, \xCount, #2
1916    // Add array data offset and alignment.
1917    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1918.endm
1919
1920.macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1921    lsl    \xTemp1, \xCount, #3
1922    // Add array data offset and alignment.
1923    add    \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1924.endm
1925
1926// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
1927// the entrypoint once all backends have been updated to use the size variants.
1928GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1929GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1930GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1931GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1932GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1933GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1934GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1935GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1936GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1937GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1938
1939    /*
1940     * Called by managed code when the thread has been asked to suspend.
1941     */
1942    .extern artTestSuspendFromCode
1943ENTRY art_quick_test_suspend
1944    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save callee saves for stack crawl
1945    mov    x0, xSELF
1946    bl     artTestSuspendFromCode             // (Thread*)
1947    RESTORE_SAVE_EVERYTHING_FRAME
1948    REFRESH_MARKING_REGISTER
1949    ret
1950END art_quick_test_suspend
1951
1952ENTRY art_quick_implicit_suspend
1953    mov    x0, xSELF
1954    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
1955    bl     artTestSuspendFromCode             // (Thread*)
1956    RESTORE_SAVE_REFS_ONLY_FRAME
1957    REFRESH_MARKING_REGISTER
1958    ret
1959END art_quick_implicit_suspend
1960
1961     /*
1962     * Called by managed code that is attempting to call a method on a proxy class. On entry
1963     * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
1964     * method agrees with a ref and args callee save frame.
1965     */
1966     .extern artQuickProxyInvokeHandler
1967ENTRY art_quick_proxy_invoke_handler
1968    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
1969    mov     x2, xSELF                   // pass Thread::Current
1970    mov     x3, sp                      // pass SP
1971    bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
1972    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
1973    cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
1974    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
1975    REFRESH_MARKING_REGISTER
1976    fmov    d0, x0                      // Store result in d0 in case it was float or double
1977    ret                                 // return on success
1978.Lexception_in_proxy:
1979    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1980    DELIVER_PENDING_EXCEPTION
1981END art_quick_proxy_invoke_handler
1982
1983    /*
1984     * Called to resolve an imt conflict.
1985     * x0 is the conflict ArtMethod.
1986     * xIP1 is a hidden argument that holds the target interface method's dex method index.
1987     *
1988     * Note that this stub writes to xIP0, xIP1, x13-x15, and x0.
1989     */
1990    .extern artLookupResolvedMethod
1991ENTRY art_quick_imt_conflict_trampoline
1992    ldr xIP0, [sp, #0]  // Load referrer
1993    // Load the declaring class (without read barrier) and access flags (for obsolete method check).
1994    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1995#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4
1996#error "Expecting declaring class and access flags to be consecutive for LDP."
1997#endif
1998    ldp wIP0, w15, [xIP0, #ART_METHOD_DECLARING_CLASS_OFFSET]
1999    // If the method is obsolete, just go through the dex cache miss slow path.
2000    tbnz x15, #ACC_OBSOLETE_METHOD_SHIFT, .Limt_conflict_trampoline_dex_cache_miss
2001    ldr wIP0, [xIP0, #MIRROR_CLASS_DEX_CACHE_OFFSET]  // Load the DexCache (without read barrier).
2002    UNPOISON_HEAP_REF wIP0
2003    ubfx x15, xIP1, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
2004    ldr xIP0, [xIP0, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET]  // Load the resolved methods.
2005    add xIP0, xIP0, x15, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
2006
2007    // Relaxed atomic load x14:x15 from the dex cache slot.
2008.Limt_conflict_trampoline_retry_load:
2009    ldxp x14, x15, [xIP0]
2010    stxp w13, x14, x15, [xIP0]
2011    cbnz w13, .Limt_conflict_trampoline_retry_load
2012
2013    cmp x15, xIP1       // Compare method index to see if we had a DexCache method hit.
2014    bne .Limt_conflict_trampoline_dex_cache_miss
2015.Limt_conflict_trampoline_have_interface_method:
2016    ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
2017    ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
2018.Limt_table_iterate:
2019    cmp x0, x14
2020    // Branch if found. Benchmarks have shown doing a branch here is better.
2021    beq .Limt_table_found
2022    // If the entry is null, the interface method is not in the ImtConflictTable.
2023    cbz x0, .Lconflict_trampoline
2024    // Iterate over the entries of the ImtConflictTable.
2025    ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
2026    b .Limt_table_iterate
2027.Limt_table_found:
2028    // We successfully hit an entry in the table. Load the target method
2029    // and jump to it.
2030    ldr x0, [xIP1, #__SIZEOF_POINTER__]
2031    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
2032    br xIP0
2033.Lconflict_trampoline:
2034    // Call the runtime stub to populate the ImtConflictTable and jump to the
2035    // resolved method.
2036    mov x0, x14  // Load interface method
2037    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
2038.Limt_conflict_trampoline_dex_cache_miss:
2039    // We're not creating a proper runtime method frame here,
2040    // artLookupResolvedMethod() is not allowed to walk the stack.
2041
2042    // Save GPR args and return address, allocate space for FPR args, align stack.
2043    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8)
2044    SAVE_TWO_REGS x2, x3, 16
2045    SAVE_TWO_REGS x4, x5, 32
2046    SAVE_TWO_REGS x6, x7, 48
2047    SAVE_REG      xLR, (8 * 8 + 8 * 8 + 8)
2048
2049    // Save FPR args.
2050    stp d0, d1, [sp, #64]
2051    stp d2, d3, [sp, #80]
2052    stp d4, d5, [sp, #96]
2053    stp d6, d7, [sp, #112]
2054
2055    mov x0, xIP1                            // Pass method index.
2056    ldr x1, [sp, #(8 * 8 + 8 * 8 + 8 + 8)]  // Pass referrer.
2057    bl artLookupResolvedMethod              // (uint32_t method_index, ArtMethod* referrer)
2058    mov x14, x0   // Move the interface method to x14 where the loop above expects it.
2059
2060    // Restore FPR args.
2061    ldp d0, d1, [sp, #64]
2062    ldp d2, d3, [sp, #80]
2063    ldp d4, d5, [sp, #96]
2064    ldp d6, d7, [sp, #112]
2065
2066    // Restore GPR args and return address.
2067    RESTORE_REG      xLR, (8 * 8 + 8 * 8 + 8)
2068    RESTORE_TWO_REGS x2, x3, 16
2069    RESTORE_TWO_REGS x4, x5, 32
2070    RESTORE_TWO_REGS x6, x7, 48
2071    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8)
2072
2073    // If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline().
2074    cbz x14, .Lconflict_trampoline
2075    b .Limt_conflict_trampoline_have_interface_method
2076END art_quick_imt_conflict_trampoline
2077
2078ENTRY art_quick_resolution_trampoline
2079    SETUP_SAVE_REFS_AND_ARGS_FRAME
2080    mov x2, xSELF
2081    mov x3, sp
2082    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
2083    cbz x0, 1f
2084    mov xIP0, x0            // Remember returned code pointer in xIP0.
2085    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
2086    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2087    REFRESH_MARKING_REGISTER
2088    br xIP0
20891:
2090    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2091    DELIVER_PENDING_EXCEPTION
2092END art_quick_resolution_trampoline
2093
2094/*
2095 * Generic JNI frame layout:
2096 *
2097 * #-------------------#
2098 * |                   |
2099 * | caller method...  |
2100 * #-------------------#    <--- SP on entry
2101 * | Return X30/LR     |
2102 * | X29/FP            |    callee save
2103 * | X28               |    callee save
2104 * | X27               |    callee save
2105 * | X26               |    callee save
2106 * | X25               |    callee save
2107 * | X24               |    callee save
2108 * | X23               |    callee save
2109 * | X22               |    callee save
2110 * | X21               |    callee save
2111 * | X20               |    callee save
2112 * | X19               |    callee save
2113 * | X7                |    arg7
2114 * | X6                |    arg6
2115 * | X5                |    arg5
2116 * | X4                |    arg4
2117 * | X3                |    arg3
2118 * | X2                |    arg2
2119 * | X1                |    arg1
2120 * | D7                |    float arg 8
2121 * | D6                |    float arg 7
2122 * | D5                |    float arg 6
2123 * | D4                |    float arg 5
2124 * | D3                |    float arg 4
2125 * | D2                |    float arg 3
2126 * | D1                |    float arg 2
2127 * | D0                |    float arg 1
2128 * | Method*           | <- X0
2129 * #-------------------#
2130 * | local ref cookie  | // 4B
2131 * | handle scope size | // 4B
2132 * #-------------------#
2133 * | JNI Call Stack    |
2134 * #-------------------#    <--- SP on native call
2135 * |                   |
2136 * | Stack for Regs    |    The trampoline assembly will pop these values
2137 * |                   |    into registers for native call
2138 * #-------------------#
2139 * | Native code ptr   |
2140 * #-------------------#
2141 * | Free scratch      |
2142 * #-------------------#
2143 * | Ptr to (1)        |    <--- SP
2144 * #-------------------#
2145 */
2146    /*
2147     * Called to do a generic JNI down-call
2148     */
2149ENTRY art_quick_generic_jni_trampoline
2150    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2151
2152    // Save SP , so we can have static CFI info.
2153    mov x28, sp
2154    .cfi_def_cfa_register x28
2155
2156    // This looks the same, but is different: this will be updated to point to the bottom
2157    // of the frame when the handle scope is inserted.
2158    mov xFP, sp
2159
2160    mov xIP0, #5120
2161    sub sp, sp, xIP0
2162
2163    // prepare for artQuickGenericJniTrampoline call
2164    // (Thread*,  SP)
2165    //    x0      x1   <= C calling convention
2166    //   xSELF    xFP  <= where they are
2167
2168    mov x0, xSELF   // Thread*
2169    mov x1, xFP
2170    bl artQuickGenericJniTrampoline  // (Thread*, sp)
2171
2172    // The C call will have registered the complete save-frame on success.
2173    // The result of the call is:
2174    // x0: pointer to native code, 0 on error.
2175    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
2176
2177    // Check for error = 0.
2178    cbz x0, .Lexception_in_native
2179
2180    // Release part of the alloca.
2181    mov sp, x1
2182
2183    // Save the code pointer
2184    mov xIP0, x0
2185
2186    // Load parameters from frame into registers.
2187    // TODO Check with artQuickGenericJniTrampoline.
2188    //      Also, check again APPCS64 - the stack arguments are interleaved.
2189    ldp x0, x1, [sp]
2190    ldp x2, x3, [sp, #16]
2191    ldp x4, x5, [sp, #32]
2192    ldp x6, x7, [sp, #48]
2193
2194    ldp d0, d1, [sp, #64]
2195    ldp d2, d3, [sp, #80]
2196    ldp d4, d5, [sp, #96]
2197    ldp d6, d7, [sp, #112]
2198
2199    add sp, sp, #128
2200
2201    blr xIP0        // native call.
2202
2203    // result sign extension is handled in C code
2204    // prepare for artQuickGenericJniEndTrampoline call
2205    // (Thread*, result, result_f)
2206    //    x0       x1       x2        <= C calling convention
2207    mov x1, x0      // Result (from saved).
2208    mov x0, xSELF   // Thread register.
2209    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
2210
2211    bl artQuickGenericJniEndTrampoline
2212
2213    // Pending exceptions possible.
2214    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2215    cbnz x2, .Lexception_in_native
2216
2217    // Tear down the alloca.
2218    mov sp, x28
2219    .cfi_def_cfa_register sp
2220
2221    // Tear down the callee-save frame.
2222    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2223    REFRESH_MARKING_REGISTER
2224
2225    // store into fpr, for when it's a fpr return...
2226    fmov d0, x0
2227    ret
2228
2229.Lexception_in_native:
2230    // Move to x1 then sp to please assembler.
2231    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
2232    add sp, x1, #-1  // Remove the GenericJNI tag.
2233    .cfi_def_cfa_register sp
2234    # This will create a new save-all frame, required by the runtime.
2235    DELIVER_PENDING_EXCEPTION
2236END art_quick_generic_jni_trampoline
2237
2238/*
2239 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
2240 * of a quick call:
2241 * x0 = method being called/to bridge to.
2242 * x1..x7, d0..d7 = arguments to that method.
2243 */
2244ENTRY art_quick_to_interpreter_bridge
2245    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
2246
2247    //  x0 will contain mirror::ArtMethod* method.
2248    mov x1, xSELF                          // How to get Thread::Current() ???
2249    mov x2, sp
2250
2251    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
2252    //                                      mirror::ArtMethod** sp)
2253    bl   artQuickToInterpreterBridge
2254
2255    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
2256    REFRESH_MARKING_REGISTER
2257
2258    fmov d0, x0
2259
2260    RETURN_OR_DELIVER_PENDING_EXCEPTION
2261END art_quick_to_interpreter_bridge
2262
2263/*
2264 * Called to attempt to execute an obsolete method.
2265 */
2266ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
2267
2268
2269//
2270// Instrumentation-related stubs
2271//
2272    .extern artInstrumentationMethodEntryFromCode
2273ENTRY art_quick_instrumentation_entry
2274    SETUP_SAVE_REFS_AND_ARGS_FRAME
2275
2276    mov   x20, x0             // Preserve method reference in a callee-save.
2277
2278    mov   x2, xSELF
2279    mov   x3, sp  // Pass SP
2280    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP)
2281
2282    mov   xIP0, x0            // x0 = result of call.
2283    mov   x0, x20             // Reload method reference.
2284
2285    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
2286    REFRESH_MARKING_REGISTER
2287    cbz   xIP0, 1f            // Deliver the pending exception if method is null.
2288    adr   xLR, art_quick_instrumentation_exit
2289    br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
2290
22911:
2292    DELIVER_PENDING_EXCEPTION
2293END art_quick_instrumentation_entry
2294
2295    .extern artInstrumentationMethodExitFromCode
2296ENTRY art_quick_instrumentation_exit
2297    mov   xLR, #0             // Clobber LR for later checks.
2298    SETUP_SAVE_EVERYTHING_FRAME
2299
2300    add   x3, sp, #16         // Pass floating-point result pointer, in kSaveEverything frame.
2301    add   x2, sp, #272        // Pass integer result pointer, in kSaveEverything frame.
2302    mov   x1, sp              // Pass SP.
2303    mov   x0, xSELF           // Pass Thread.
2304    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res*, fpr_res*)
2305
2306    cbz   x0, .Ldo_deliver_instrumentation_exception
2307                              // Handle error
2308    cbnz  x1, .Ldeoptimize
2309    // Normal return.
2310    str   x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8]
2311                              // Set return pc.
2312    RESTORE_SAVE_EVERYTHING_FRAME
2313    REFRESH_MARKING_REGISTER
2314    br    lr
2315.Ldo_deliver_instrumentation_exception:
2316    DELIVER_PENDING_EXCEPTION_FRAME_READY
2317.Ldeoptimize:
2318    str   x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8]
2319                              // Set return pc.
2320    RESTORE_SAVE_EVERYTHING_FRAME
2321    // Jump to art_quick_deoptimize.
2322    b     art_quick_deoptimize
2323END art_quick_instrumentation_exit
2324
2325    /*
2326     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
2327     * will long jump to the upcall with a special exception of -1.
2328     */
2329    .extern artDeoptimize
2330ENTRY art_quick_deoptimize
2331    SETUP_SAVE_EVERYTHING_FRAME
2332    mov    x0, xSELF          // Pass thread.
2333    bl     artDeoptimize      // (Thread*)
2334    brk 0
2335END art_quick_deoptimize
2336
2337    /*
2338     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
2339     * will long jump to the upcall with a special exception of -1.
2340     */
2341    .extern artDeoptimizeFromCompiledCode
2342ENTRY art_quick_deoptimize_from_compiled_code
2343    SETUP_SAVE_EVERYTHING_FRAME
2344    mov    x1, xSELF                      // Pass thread.
2345    bl     artDeoptimizeFromCompiledCode  // (DeoptimizationKind, Thread*)
2346    brk 0
2347END art_quick_deoptimize_from_compiled_code
2348
2349
2350    /*
2351     * String's indexOf.
2352     *
2353     * TODO: Not very optimized.
2354     * On entry:
2355     *    x0:   string object (known non-null)
2356     *    w1:   char to match (known <= 0xFFFF)
2357     *    w2:   Starting offset in string data
2358     */
2359ENTRY art_quick_indexof
2360#if (STRING_COMPRESSION_FEATURE)
2361    ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
2362#else
2363    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
2364#endif
2365    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
2366#if (STRING_COMPRESSION_FEATURE)
2367    /* w4 holds count (with flag) and w3 holds actual length */
2368    lsr   w3, w4, #1
2369#endif
2370    /* Clamp start to [0..count] */
2371    cmp   w2, #0
2372    csel  w2, wzr, w2, lt
2373    cmp   w2, w3
2374    csel  w2, w3, w2, gt
2375
2376    /* Save a copy to compute result */
2377    mov   x5, x0
2378
2379#if (STRING_COMPRESSION_FEATURE)
2380    tbz   w4, #0, .Lstring_indexof_compressed
2381#endif
2382    /* Build pointer to start of data to compare and pre-bias */
2383    add   x0, x0, x2, lsl #1
2384    sub   x0, x0, #2
2385    /* Compute iteration count */
2386    sub   w2, w3, w2
2387
2388    /*
2389     * At this point we have:
2390     *  x0: start of the data to test
2391     *  w1: char to compare
2392     *  w2: iteration count
2393     *  x5: original start of string data
2394     */
2395
2396    subs  w2, w2, #4
2397    b.lt  .Lindexof_remainder
2398
2399.Lindexof_loop4:
2400    ldrh  w6, [x0, #2]!
2401    ldrh  w7, [x0, #2]!
2402    ldrh  wIP0, [x0, #2]!
2403    ldrh  wIP1, [x0, #2]!
2404    cmp   w6, w1
2405    b.eq  .Lmatch_0
2406    cmp   w7, w1
2407    b.eq  .Lmatch_1
2408    cmp   wIP0, w1
2409    b.eq  .Lmatch_2
2410    cmp   wIP1, w1
2411    b.eq  .Lmatch_3
2412    subs  w2, w2, #4
2413    b.ge  .Lindexof_loop4
2414
2415.Lindexof_remainder:
2416    adds  w2, w2, #4
2417    b.eq  .Lindexof_nomatch
2418
2419.Lindexof_loop1:
2420    ldrh  w6, [x0, #2]!
2421    cmp   w6, w1
2422    b.eq  .Lmatch_3
2423    subs  w2, w2, #1
2424    b.ne  .Lindexof_loop1
2425
2426.Lindexof_nomatch:
2427    mov   x0, #-1
2428    ret
2429
2430.Lmatch_0:
2431    sub   x0, x0, #6
2432    sub   x0, x0, x5
2433    asr   x0, x0, #1
2434    ret
2435.Lmatch_1:
2436    sub   x0, x0, #4
2437    sub   x0, x0, x5
2438    asr   x0, x0, #1
2439    ret
2440.Lmatch_2:
2441    sub   x0, x0, #2
2442    sub   x0, x0, x5
2443    asr   x0, x0, #1
2444    ret
2445.Lmatch_3:
2446    sub   x0, x0, x5
2447    asr   x0, x0, #1
2448    ret
2449#if (STRING_COMPRESSION_FEATURE)
2450   /*
2451    * Comparing compressed string character-per-character with
2452    * input character
2453    */
2454.Lstring_indexof_compressed:
2455    add   x0, x0, x2
2456    sub   x0, x0, #1
2457    sub   w2, w3, w2
2458.Lstring_indexof_compressed_loop:
2459    subs  w2, w2, #1
2460    b.lt  .Lindexof_nomatch
2461    ldrb  w6, [x0, #1]!
2462    cmp   w6, w1
2463    b.eq  .Lstring_indexof_compressed_matched
2464    b     .Lstring_indexof_compressed_loop
2465.Lstring_indexof_compressed_matched:
2466    sub   x0, x0, x5
2467    ret
2468#endif
2469END art_quick_indexof
2470
2471    /*
2472     * Create a function `name` calling the ReadBarrier::Mark routine,
2473     * getting its argument and returning its result through W register
2474     * `wreg` (corresponding to X register `xreg`), saving and restoring
2475     * all caller-save registers.
2476     *
2477     * If `wreg` is different from `w0`, the generated function follows a
2478     * non-standard runtime calling convention:
2479     * - register `wreg` is used to pass the (sole) argument of this
2480     *   function (instead of W0);
2481     * - register `wreg` is used to return the result of this function
2482     *   (instead of W0);
2483     * - W0 is treated like a normal (non-argument) caller-save register;
2484     * - everything else is the same as in the standard runtime calling
2485     *   convention (e.g. standard callee-save registers are preserved).
2486     */
2487.macro READ_BARRIER_MARK_REG name, wreg, xreg
2488ENTRY \name
2489    // Reference is null, no work to do at all.
2490    cbz \wreg, .Lret_rb_\name
2491    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
2492    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2493    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
2494.Lret_rb_\name:
2495    ret
2496.Lnot_marked_rb_\name:
2497    // Check if the top two bits are one, if this is the case it is a forwarding address.
2498    tst   wIP0, wIP0, lsl #1
2499    bmi   .Lret_forwarding_address\name
2500.Lslow_rb_\name:
2501    /*
2502     * Allocate 44 stack slots * 8 = 352 bytes:
2503     * - 19 slots for core registers X0-15, X17, X19, LR
2504     * - 1 slot padding
2505     * - 24 slots for floating-point registers D0-D7 and D16-D31
2506     */
2507    // We must not clobber IP1 since code emitted for HLoadClass and HLoadString
2508    // relies on IP1 being preserved.
2509    // Save all potentially live caller-save core registers.
2510    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
2511    SAVE_TWO_REGS  x2,  x3, 16
2512    SAVE_TWO_REGS  x4,  x5, 32
2513    SAVE_TWO_REGS  x6,  x7, 48
2514    SAVE_TWO_REGS  x8,  x9, 64
2515    SAVE_TWO_REGS x10, x11, 80
2516    SAVE_TWO_REGS x12, x13, 96
2517    SAVE_TWO_REGS x14, x15, 112
2518    SAVE_TWO_REGS x17, x19, 128  // Skip x16, i.e. IP0, and x18, the platform register.
2519    SAVE_REG      xLR,      144  // Save also return address.
2520    // Save all potentially live caller-save floating-point registers.
2521    stp   d0, d1,   [sp, #160]
2522    stp   d2, d3,   [sp, #176]
2523    stp   d4, d5,   [sp, #192]
2524    stp   d6, d7,   [sp, #208]
2525    stp   d16, d17, [sp, #224]
2526    stp   d18, d19, [sp, #240]
2527    stp   d20, d21, [sp, #256]
2528    stp   d22, d23, [sp, #272]
2529    stp   d24, d25, [sp, #288]
2530    stp   d26, d27, [sp, #304]
2531    stp   d28, d29, [sp, #320]
2532    stp   d30, d31, [sp, #336]
2533
2534    .ifnc \wreg, w0
2535      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
2536    .endif
2537    bl    artReadBarrierMark            // artReadBarrierMark(obj)
2538    .ifnc \wreg, w0
2539      mov   \wreg, w0                   // Return result into `wreg`
2540    .endif
2541
2542    // Restore core regs, except `xreg`, as `wreg` is used to return the
2543    // result of this function (simply remove it from the stack instead).
2544    POP_REGS_NE x0, x1,   0,   \xreg
2545    POP_REGS_NE x2, x3,   16,  \xreg
2546    POP_REGS_NE x4, x5,   32,  \xreg
2547    POP_REGS_NE x6, x7,   48,  \xreg
2548    POP_REGS_NE x8, x9,   64,  \xreg
2549    POP_REGS_NE x10, x11, 80,  \xreg
2550    POP_REGS_NE x12, x13, 96,  \xreg
2551    POP_REGS_NE x14, x15, 112, \xreg
2552    POP_REGS_NE x17, x19, 128, \xreg
2553    POP_REG_NE  xLR,      144, \xreg  // Restore also return address.
2554    // Restore floating-point registers.
2555    ldp   d0, d1,   [sp, #160]
2556    ldp   d2, d3,   [sp, #176]
2557    ldp   d4, d5,   [sp, #192]
2558    ldp   d6, d7,   [sp, #208]
2559    ldp   d16, d17, [sp, #224]
2560    ldp   d18, d19, [sp, #240]
2561    ldp   d20, d21, [sp, #256]
2562    ldp   d22, d23, [sp, #272]
2563    ldp   d24, d25, [sp, #288]
2564    ldp   d26, d27, [sp, #304]
2565    ldp   d28, d29, [sp, #320]
2566    ldp   d30, d31, [sp, #336]
2567    // Remove frame and return.
2568    DECREASE_FRAME 352
2569    ret
2570.Lret_forwarding_address\name:
2571    // Shift left by the forwarding address shift. This clears out the state bits since they are
2572    // in the top 2 bits of the lock word.
2573    lsl   \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2574    ret
2575END \name
2576.endm
2577
2578READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
2579READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
2580READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
2581READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
2582READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
2583READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
2584READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
2585READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
2586READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
2587READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
2588READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
2589READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
2590READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
2591READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
2592READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
2593READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
2594// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
2595READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
2596// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 x18 is blocked
2597READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
2598READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
2599READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
2600READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
2601READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
2602READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
2603READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
2604READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
2605READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
2606READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
2607READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
2608
2609
2610.macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg
2611    .if \xreg
2612      \macro_to_use \x
2613    .else
2614      \macro_to_use \w
2615    .endif
2616.endm
2617
2618.macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg
2619    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg
2620    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg
2621    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg
2622    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg
2623    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg
2624    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg
2625    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg
2626    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg
2627    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg
2628    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg
2629    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg
2630    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg
2631    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg
2632    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg
2633    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg
2634    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg
2635    \macro_for_reserved_register  // IP0 is reserved
2636    \macro_for_reserved_register  // IP1 is reserved
2637    \macro_for_reserved_register  // x18 is reserved
2638    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg
2639    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg
2640    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg
2641    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg
2642    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg
2643    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg
2644    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg
2645    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg
2646    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg
2647    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg
2648    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg
2649    \macro_for_reserved_register  // lr is reserved
2650    \macro_for_reserved_register  // sp is reserved
2651.endm
2652
2653.macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register
2654    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1
2655.endm
2656
2657.macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register
2658    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0
2659.endm
2660
2661.macro BRK0_BRK0
2662    brk 0
2663    brk 0
2664.endm
2665
2666#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2667#error "Array and field introspection code sharing requires same LDR offset."
2668#endif
2669.macro INTROSPECTION_ARRAY_LOAD index_reg
2670    ldr   wIP0, [xIP0, \index_reg, lsl #2]
2671    b     art_quick_read_barrier_mark_introspection
2672.endm
2673
2674.macro MOV_WIP0_TO_WREG_AND_BL_LR reg
2675    mov   \reg, wIP0
2676    br    lr  // Do not use RET as we do not enter the entrypoint with "BL".
2677.endm
2678
2679.macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset
2680    /*
2681     * Allocate 42 stack slots * 8 = 336 bytes:
2682     * - 18 slots for core registers X0-15, X19, LR
2683     * - 24 slots for floating-point registers D0-D7 and D16-D31
2684     */
2685    // Save all potentially live caller-save core registers.
2686    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 336
2687    SAVE_TWO_REGS  x2,  x3, 16
2688    SAVE_TWO_REGS  x4,  x5, 32
2689    SAVE_TWO_REGS  x6,  x7, 48
2690    SAVE_TWO_REGS  x8,  x9, 64
2691    SAVE_TWO_REGS x10, x11, 80
2692    SAVE_TWO_REGS x12, x13, 96
2693    SAVE_TWO_REGS x14, x15, 112
2694    // Skip x16, x17, i.e. IP0, IP1, and x18, the platform register.
2695    SAVE_TWO_REGS x19, xLR, 128       // Save return address.
2696    // Save all potentially live caller-save floating-point registers.
2697    stp   d0, d1,   [sp, #144]
2698    stp   d2, d3,   [sp, #160]
2699    stp   d4, d5,   [sp, #176]
2700    stp   d6, d7,   [sp, #192]
2701    stp   d16, d17, [sp, #208]
2702    stp   d18, d19, [sp, #224]
2703    stp   d20, d21, [sp, #240]
2704    stp   d22, d23, [sp, #256]
2705    stp   d24, d25, [sp, #272]
2706    stp   d26, d27, [sp, #288]
2707    stp   d28, d29, [sp, #304]
2708    stp   d30, d31, [sp, #320]
2709
2710    mov   x0, xIP0
2711    bl    artReadBarrierMark          // artReadBarrierMark(obj)
2712    mov   xIP0, x0
2713
2714    // Restore core regs, except x0 and x1 as the return register switch case
2715    // address calculation is smoother with an extra register.
2716    RESTORE_TWO_REGS  x2,  x3, 16
2717    RESTORE_TWO_REGS  x4,  x5, 32
2718    RESTORE_TWO_REGS  x6,  x7, 48
2719    RESTORE_TWO_REGS  x8,  x9, 64
2720    RESTORE_TWO_REGS x10, x11, 80
2721    RESTORE_TWO_REGS x12, x13, 96
2722    RESTORE_TWO_REGS x14, x15, 112
2723    // Skip x16, x17, i.e. IP0, IP1, and x18, the platform register.
2724    RESTORE_TWO_REGS x19, xLR, 128    // Restore return address.
2725    // Restore caller-save floating-point registers.
2726    ldp   d0, d1,   [sp, #144]
2727    ldp   d2, d3,   [sp, #160]
2728    ldp   d4, d5,   [sp, #176]
2729    ldp   d6, d7,   [sp, #192]
2730    ldp   d16, d17, [sp, #208]
2731    ldp   d18, d19, [sp, #224]
2732    ldp   d20, d21, [sp, #240]
2733    ldp   d22, d23, [sp, #256]
2734    ldp   d24, d25, [sp, #272]
2735    ldp   d26, d27, [sp, #288]
2736    ldp   d28, d29, [sp, #304]
2737    ldp   d30, d31, [sp, #320]
2738
2739    ldr   x0, [lr, #\ldr_offset]      // Load the instruction.
2740    adr   xIP1, .Lmark_introspection_return_switch
2741    bfi   xIP1, x0, #3, #5            // Calculate switch case address.
2742    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 336
2743    br    xIP1
2744.endm
2745
2746    /*
2747     * Use introspection to load a reference from the same address as the LDR
2748     * instruction in generated code would load (unless loaded by the thunk,
2749     * see below), call ReadBarrier::Mark() with that reference if needed
2750     * and return it in the same register as the LDR instruction would load.
2751     *
2752     * The entrypoint is called through a thunk that differs across load kinds.
2753     * For field and array loads the LDR instruction in generated code follows
2754     * the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk
2755     * knows the holder and performs the gray bit check, returning to the LDR
2756     * instruction if the object is not gray, so this entrypoint no longer
2757     * needs to know anything about the holder. For GC root loads, the LDR
2758     * instruction in generated code precedes the branch to the thunk (i.e.
2759     * the LDR is at [LR, #-8]) and the thunk does not do the gray bit check.
2760     *
2761     * For field accesses and array loads with a constant index the thunk loads
2762     * the reference into IP0 using introspection and calls the main entrypoint,
2763     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
2764     * the passed reference is poisoned.
2765     *
2766     * For array accesses with non-constant index, the thunk inserts the bits
2767     * 16-21 of the LDR instruction to the entrypoint address, effectively
2768     * calculating a switch case label based on the index register (bits 16-20)
2769     * and adding an extra offset (bit 21 is set) to differentiate from the
2770     * main entrypoint, then moves the base register to IP0 and jumps to the
2771     * switch case. Therefore we need to align the main entrypoint to 512 bytes,
2772     * accounting for a 256-byte offset followed by 32 array entrypoints
2773     * starting at art_quick_read_barrier_mark_introspection_arrays, each
2774     * containing an LDR (register) and a branch to the main entrypoint.
2775     *
2776     * For GC root accesses we cannot use the main entrypoint because of the
2777     * different offset where the LDR instruction in generated code is located.
2778     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2779     * To re-use the same entrypoint pointer in generated code, we make sure
2780     * that the gc root entrypoint (a copy of the entrypoint with a different
2781     * offset for introspection loads) is located at a known offset (768 bytes,
2782     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
2783     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
2784     * the root register to IP0 and jumps to the customized entrypoint,
2785     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
2786     * performs all the fast-path checks, so we need just the slow path.
2787     * The UnsafeCASObject intrinsic is also using the GC root entrypoint with
2788     * MOV instead of LDR, the destination register is in the same bits.
2789     *
2790     * The code structure is
2791     *   art_quick_read_barrier_mark_introspection:
2792     *     Up to 256 bytes for the main entrypoint code.
2793     *     Padding to 256 bytes if needed.
2794     *   art_quick_read_barrier_mark_introspection_arrays:
2795     *     Exactly 256 bytes for array load switch cases (32x2 instructions).
2796     *   .Lmark_introspection_return_switch:
2797     *     Exactly 256 bytes for return switch cases (32x2 instructions).
2798     *   art_quick_read_barrier_mark_introspection_gc_roots:
2799     *     GC root entrypoint code.
2800     */
2801    .balign 512
2802ENTRY art_quick_read_barrier_mark_introspection
2803    // At this point, IP0 contains the reference, IP1 can be freely used.
2804    // For heap poisoning, the reference is poisoned, so unpoison it first.
2805    UNPOISON_HEAP_REF wIP0
2806    // If reference is null, just return it in the right register.
2807    cbz   wIP0, .Lmark_introspection_return
2808    // Use wIP1 as temp and check the mark bit of the reference.
2809    ldr   wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2810    tbz   wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked
2811.Lmark_introspection_return:
2812    // Without an extra register for the return switch case address calculation,
2813    // we exploit the high word of the xIP0 to temporarily store the ref_reg*8,
2814    // so the return switch below must move wIP0 instead of xIP0 to the register.
2815    ldr   wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET]  // Load the instruction.
2816    bfi   xIP0, xIP1, #(32 + 3), #5   // Extract ref_reg*8 to high word in xIP0.
2817    adr   xIP1, .Lmark_introspection_return_switch
2818    bfxil xIP1, xIP0, #32, #8         // Calculate return switch case address.
2819    br    xIP1
2820.Lmark_introspection_unmarked:
2821    // Check if the top two bits are one, if this is the case it is a forwarding address.
2822    tst   wIP1, wIP1, lsl #1
2823    bmi   .Lmark_introspection_forwarding_address
2824    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
2825
2826.Lmark_introspection_forwarding_address:
2827    // Shift left by the forwarding address shift. This clears out the state bits since they are
2828    // in the top 2 bits of the lock word.
2829    lsl   wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2830    b .Lmark_introspection_return
2831
2832    // We're very close to the alloted 256B for the entrypoint code before the
2833    // array switch cases. Should we go a little bit over the limit, we can
2834    // move some code after the array switch cases and return switch cases.
2835    .balign 256
2836    .hidden art_quick_read_barrier_mark_introspection_arrays
2837    .global art_quick_read_barrier_mark_introspection_arrays
2838art_quick_read_barrier_mark_introspection_arrays:
2839    FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0
2840.Lmark_introspection_return_switch:
2841    FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0
2842    .hidden art_quick_read_barrier_mark_introspection_gc_roots
2843    .global art_quick_read_barrier_mark_introspection_gc_roots
2844art_quick_read_barrier_mark_introspection_gc_roots:
2845    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
2846END art_quick_read_barrier_mark_introspection
2847
2848.extern artInvokePolymorphic
2849ENTRY art_quick_invoke_polymorphic
2850    SETUP_SAVE_REFS_AND_ARGS_FRAME      // Save callee saves in case allocation triggers GC.
2851    mov     x0, x1                      // x0 := receiver
2852    mov     x1, xSELF                   // x1 := Thread::Current()
2853    mov     x2, sp                      // x2 := SP
2854    bl      artInvokePolymorphic        // artInvokePolymorphic(receiver, thread, save_area)
2855    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2856    REFRESH_MARKING_REGISTER
2857    fmov    d0, x0                      // Result is in x0. Copy to floating return register.
2858    RETURN_OR_DELIVER_PENDING_EXCEPTION
2859END  art_quick_invoke_polymorphic
2860
2861.extern artInvokeCustom
2862ENTRY art_quick_invoke_custom
2863    SETUP_SAVE_REFS_AND_ARGS_FRAME    // Save callee saves in case allocation triggers GC.
2864                                      // x0 := call_site_idx
2865    mov     x1, xSELF                 // x1 := Thread::Current()
2866    mov     x2, sp                    // x2 := SP
2867    bl      artInvokeCustom           // artInvokeCustom(call_site_idx, thread, save_area)
2868    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2869    REFRESH_MARKING_REGISTER
2870    fmov    d0, x0                    // Copy result to double result register.
2871    RETURN_OR_DELIVER_PENDING_EXCEPTION
2872END  art_quick_invoke_custom
2873
2874// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2875//  Argument 0: x0: The context pointer for ExecuteSwitchImpl.
2876//  Argument 1: x1: Pointer to the templated ExecuteSwitchImpl to call.
2877//  Argument 2: x2: The value of DEX PC (memory address of the methods bytecode).
2878ENTRY ExecuteSwitchImplAsm
2879    SAVE_TWO_REGS_INCREASE_FRAME x19, xLR, 16
2880    mov x19, x2                                   // x19 = DEX PC
2881    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* x0 */, 19 /* x19 */, 0)
2882    blr x1                                        // Call the wrapped method.
2883    RESTORE_TWO_REGS_DECREASE_FRAME x19, xLR, 16
2884    ret
2885END ExecuteSwitchImplAsm
2886