1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21#include "arch/quick_field_entrypoints.S"
22
23MACRO0(ASSERT_USE_READ_BARRIER)
24#if !defined(USE_READ_BARRIER)
25    int3
26    int3
27#endif
28END_MACRO
29
30// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
31
32
33    /*
34     * Macro that sets up the callee save frame to conform with
35     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
36     */
37MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
38#if defined(__APPLE__)
39    int3
40    int3
41#else
42    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
43    // R10 := Runtime::Current()
44    LOAD_RUNTIME_INSTANCE r10
45    // R10 := ArtMethod* for ref and args callee save frame method.
46    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
47    // Store ArtMethod* to bottom of stack.
48    movq %r10, 0(%rsp)
49    // Store rsp as the top quick frame.
50    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
51#endif  // __APPLE__
52END_MACRO
53
54MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
55    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
56    // Store ArtMethod to bottom of stack.
57    movq %rdi, 0(%rsp)
58    // Store rsp as the stop quick frame.
59    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
60END_MACRO
61
62    /*
63     * Macro that sets up the callee save frame to conform with
64     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
65     * when R14 and R15 are already saved.
66     */
67MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
68#if defined(__APPLE__)
69    int3
70    int3
71#else
72    // Save core registers from highest to lowest to agree with core spills bitmap.
73    // R14 and R15, or at least placeholders for them, are already on the stack.
74    PUSH r13
75    PUSH r12
76    PUSH r11
77    PUSH r10
78    PUSH r9
79    PUSH r8
80    PUSH rdi
81    PUSH rsi
82    PUSH rbp
83    PUSH rbx
84    PUSH rdx
85    PUSH rcx
86    PUSH rax
87    // Create space for FPRs and stack alignment padding.
88    INCREASE_FRAME 8 + 16 * 8
89    // R10 := Runtime::Current()
90    LOAD_RUNTIME_INSTANCE r10
91    // Save FPRs.
92    movq %xmm0, 8(%rsp)
93    movq %xmm1, 16(%rsp)
94    movq %xmm2, 24(%rsp)
95    movq %xmm3, 32(%rsp)
96    movq %xmm4, 40(%rsp)
97    movq %xmm5, 48(%rsp)
98    movq %xmm6, 56(%rsp)
99    movq %xmm7, 64(%rsp)
100    movq %xmm8, 72(%rsp)
101    movq %xmm9, 80(%rsp)
102    movq %xmm10, 88(%rsp)
103    movq %xmm11, 96(%rsp)
104    movq %xmm12, 104(%rsp)
105    movq %xmm13, 112(%rsp)
106    movq %xmm14, 120(%rsp)
107    movq %xmm15, 128(%rsp)
108    // Push ArtMethod* for save everything frame method.
109    pushq \runtime_method_offset(%r10)
110    CFI_ADJUST_CFA_OFFSET(8)
111    // Store rsp as the top quick frame.
112    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
113
114    // Ugly compile-time check, but we only have the preprocessor.
115    // Last +8: implicit return address pushed on stack when caller made call.
116#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
117#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
118#endif
119#endif  // __APPLE__
120END_MACRO
121
122    /*
123     * Macro that sets up the callee save frame to conform with
124     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
125     * when R15 is already saved.
126     */
127MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
128    PUSH r14
129    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
130END_MACRO
131
132    /*
133     * Macro that sets up the callee save frame to conform with
134     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
135     */
136MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
137    PUSH r15
138    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
139END_MACRO
140
141MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
142    // Restore FPRs. Method and padding is still on the stack.
143    movq 16(%rsp), %xmm0
144    movq 24(%rsp), %xmm1
145    movq 32(%rsp), %xmm2
146    movq 40(%rsp), %xmm3
147    movq 48(%rsp), %xmm4
148    movq 56(%rsp), %xmm5
149    movq 64(%rsp), %xmm6
150    movq 72(%rsp), %xmm7
151    movq 80(%rsp), %xmm8
152    movq 88(%rsp), %xmm9
153    movq 96(%rsp), %xmm10
154    movq 104(%rsp), %xmm11
155    movq 112(%rsp), %xmm12
156    movq 120(%rsp), %xmm13
157    movq 128(%rsp), %xmm14
158    movq 136(%rsp), %xmm15
159END_MACRO
160
161MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
162    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
163    POP rcx
164    POP rdx
165    POP rbx
166    POP rbp
167    POP rsi
168    POP rdi
169    POP r8
170    POP r9
171    POP r10
172    POP r11
173    POP r12
174    POP r13
175    POP r14
176    POP r15
177END_MACRO
178
179MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
180    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
181
182    // Remove save everything callee save method, stack alignment padding and FPRs.
183    DECREASE_FRAME 16 + 16 * 8
184
185    POP rax
186    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
187END_MACRO
188
189MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
190    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
191
192    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
193    DECREASE_FRAME 16 + 16 * 8 + 8
194
195    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
196END_MACRO
197
198MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
199    DEFINE_FUNCTION VAR(c_name)
200    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
201    // Outgoing argument set up
202    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
203    call CALLVAR(cxx_name)             // cxx_name(Thread*)
204    UNREACHABLE
205    END_FUNCTION VAR(c_name)
206END_MACRO
207
208MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
209    DEFINE_FUNCTION VAR(c_name)
210    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
211    // Outgoing argument set up
212    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
213    call CALLVAR(cxx_name)             // cxx_name(Thread*)
214    UNREACHABLE
215    END_FUNCTION VAR(c_name)
216END_MACRO
217
218MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
219    DEFINE_FUNCTION VAR(c_name)
220    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
221    // Outgoing argument set up
222    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
223    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
224    UNREACHABLE
225    END_FUNCTION VAR(c_name)
226END_MACRO
227
228MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
229    DEFINE_FUNCTION VAR(c_name)
230    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
231    // Outgoing argument set up
232    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
233    call CALLVAR(cxx_name)             // cxx_name(Thread*)
234    UNREACHABLE
235    END_FUNCTION VAR(c_name)
236END_MACRO
237
238    /*
239     * Called by managed code to create and deliver a NullPointerException.
240     */
241NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
242
243    /*
244     * Call installed by a signal handler to create and deliver a NullPointerException.
245     */
246DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
247    // Fault address and return address were saved by the fault handler.
248    // Save all registers as basis for long jump context; R15 will replace fault address later.
249    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
250    // Retrieve fault address and save R15.
251    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
252    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
253    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
254    // Outgoing argument set up; RDI already contains the fault address.
255    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
256    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
257    UNREACHABLE
258END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
259
260    /*
261     * Called by managed code to create and deliver an ArithmeticException.
262     */
263NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
264
265    /*
266     * Called by managed code to create and deliver a StackOverflowError.
267     */
268NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
269
270    /*
271     * Called by managed code, saves callee saves and then calls artThrowException
272     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
273     */
274ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
275
276    /*
277     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
278     * index, arg2 holds limit.
279     */
280TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
281
282    /*
283     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
284     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
285     */
286TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
287
288    /*
289     * All generated callsites for interface invokes and invocation slow paths will load arguments
290     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
291     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
292     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
293     *
294     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
295     * of the target Method* in rax and method->code_ in rdx.
296     *
297     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
298     * thread and we branch to another stub to deliver it.
299     *
300     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
301     * location on the stack.
302     *
303     * Adapted from x86 code.
304     */
305MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
306    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
307    // Helper signature is always
308    // (method_idx, *this_object, *caller_method, *self, sp)
309
310    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
311    movq %rsp, %rcx                                        // pass SP
312
313    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
314                                                           // save the code pointer
315    movq %rax, %rdi
316    movq %rdx, %rax
317    RESTORE_SAVE_REFS_AND_ARGS_FRAME
318
319    testq %rdi, %rdi
320    jz 1f
321
322    // Tail call to intended method.
323    jmp *%rax
3241:
325    DELIVER_PENDING_EXCEPTION
326END_MACRO
327MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
328    DEFINE_FUNCTION VAR(c_name)
329    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
330    END_FUNCTION VAR(c_name)
331END_MACRO
332
333INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
334
335INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
336INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
337INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
338INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
339
340
341    /*
342     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
343     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
344     * the end of the shorty.
345     */
346MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3471: // LOOP
348    movb (%r10), %al              // al := *shorty
349    addq MACRO_LITERAL(1), %r10   // shorty++
350    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
351    je VAR(finished)
352    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
353    je 2f
354    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
355    je 3f
356    addq MACRO_LITERAL(4), %r11   // arg_array++
357    //  Handle extra space in arg array taken by a long.
358    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
359    jne 1b
360    addq MACRO_LITERAL(4), %r11   // arg_array++
361    jmp 1b                        // goto LOOP
3622:  // FOUND_DOUBLE
363    movsd (%r11), REG_VAR(xmm_reg)
364    addq MACRO_LITERAL(8), %r11   // arg_array+=2
365    jmp 4f
3663:  // FOUND_FLOAT
367    movss (%r11), REG_VAR(xmm_reg)
368    addq MACRO_LITERAL(4), %r11   // arg_array++
3694:
370END_MACRO
371
372    /*
373     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
374     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
375     * the end of the shorty.
376     */
377MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
3781: // LOOP
379    movb (%r10), %al              // al := *shorty
380    addq MACRO_LITERAL(1), %r10   // shorty++
381    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
382    je  VAR(finished)
383    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
384    je 2f
385    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
386    je 3f
387    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
388    je 4f
389    movl (%r11), REG_VAR(gpr_reg32)
390    addq MACRO_LITERAL(4), %r11   // arg_array++
391    jmp 5f
3922:  // FOUND_LONG
393    movq (%r11), REG_VAR(gpr_reg64)
394    addq MACRO_LITERAL(8), %r11   // arg_array+=2
395    jmp 5f
3963:  // SKIP_FLOAT
397    addq MACRO_LITERAL(4), %r11   // arg_array++
398    jmp 1b
3994:  // SKIP_DOUBLE
400    addq MACRO_LITERAL(8), %r11   // arg_array+=2
401    jmp 1b
4025:
403END_MACRO
404
405    /*
406     * Quick invocation stub.
407     * On entry:
408     *   [sp] = return address
409     *   rdi = method pointer
410     *   rsi = argument array that must at least contain the this pointer.
411     *   rdx = size of argument array in bytes
412     *   rcx = (managed) thread pointer
413     *   r8 = JValue* result
414     *   r9 = char* shorty
415     */
416DEFINE_FUNCTION art_quick_invoke_stub
417#if defined(__APPLE__)
418    int3
419    int3
420#else
421    // Set up argument XMM registers.
422    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
423    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
424    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
425    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
426    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
427    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
428    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
429    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
430    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
431    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
432    .balign 16
433.Lxmm_setup_finished:
434    PUSH rbp                      // Save rbp.
435    PUSH r8                       // Save r8/result*.
436    PUSH r9                       // Save r9/shorty*.
437    PUSH rbx                      // Save native callee save rbx
438    PUSH r12                      // Save native callee save r12
439    PUSH r13                      // Save native callee save r13
440    PUSH r14                      // Save native callee save r14
441    PUSH r15                      // Save native callee save r15
442    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
443    CFI_DEF_CFA_REGISTER(rbp)
444
445    movl %edx, %r10d
446    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
447                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
448    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
449    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
450                                   // r13, r14, and r15
451    subq %rdx, %rsp                // Reserve stack space for argument array.
452
453#if (STACK_REFERENCE_SIZE != 4)
454#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
455#endif
456    movq LITERAL(0), (%rsp)       // Store null for method*
457
458    movl %r10d, %ecx              // Place size of args in rcx.
459    movq %rdi, %rax               // rax := method to be called
460    movq %rsi, %r11               // r11 := arg_array
461    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
462                                  // arguments.
463    // Copy arg array into stack.
464    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
465    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
466    movq %rax, %rdi               // rdi := method to be called
467    movl (%r11), %esi             // rsi := this pointer
468    addq LITERAL(4), %r11         // arg_array++
469    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
470    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
471    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
472    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
473.Lgpr_setup_finished:
474    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
475    movq %rbp, %rsp               // Restore stack pointer.
476    POP r15                       // Pop r15
477    POP r14                       // Pop r14
478    POP r13                       // Pop r13
479    POP r12                       // Pop r12
480    POP rbx                       // Pop rbx
481    POP r9                        // Pop r9 - shorty*
482    POP r8                        // Pop r8 - result*.
483    POP rbp                       // Pop rbp
484    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
485    je .Lreturn_double_quick
486    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
487    je .Lreturn_float_quick
488    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
489    ret
490.Lreturn_double_quick:
491    movsd %xmm0, (%r8)            // Store the double floating point result.
492    ret
493.Lreturn_float_quick:
494    movss %xmm0, (%r8)            // Store the floating point result.
495    ret
496#endif  // __APPLE__
497END_FUNCTION art_quick_invoke_stub
498
499    /*
500     * Quick invocation stub.
501     * On entry:
502     *   [sp] = return address
503     *   rdi = method pointer
504     *   rsi = argument array or null if no arguments.
505     *   rdx = size of argument array in bytes
506     *   rcx = (managed) thread pointer
507     *   r8 = JValue* result
508     *   r9 = char* shorty
509     */
510DEFINE_FUNCTION art_quick_invoke_static_stub
511#if defined(__APPLE__)
512    int3
513    int3
514#else
515    // Set up argument XMM registers.
516    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
517    movq %rsi, %r11               // R11 := arg_array
518    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
519    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
520    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
521    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
522    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
523    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
524    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
525    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
526    .balign 16
527.Lxmm_setup_finished2:
528    PUSH rbp                      // Save rbp.
529    PUSH r8                       // Save r8/result*.
530    PUSH r9                       // Save r9/shorty*.
531    PUSH rbx                      // Save rbx
532    PUSH r12                      // Save r12
533    PUSH r13                      // Save r13
534    PUSH r14                      // Save r14
535    PUSH r15                      // Save r15
536    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
537    CFI_DEF_CFA_REGISTER(rbp)
538
539    movl %edx, %r10d
540    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
541                                   // r8, r9, r12, r13, r14, and r15 in frame.
542    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
543    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
544                                   // r13, r14, and r15.
545    subq %rdx, %rsp                // Reserve stack space for argument array.
546
547#if (STACK_REFERENCE_SIZE != 4)
548#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
549#endif
550    movq LITERAL(0), (%rsp)        // Store null for method*
551
552    movl %r10d, %ecx               // Place size of args in rcx.
553    movq %rdi, %rax                // rax := method to be called
554    movq %rsi, %r11                // r11 := arg_array
555    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
556                                   // stack arguments.
557    // Copy arg array into stack.
558    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
559    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
560    movq %rax, %rdi                // rdi := method to be called
561    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
562    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
563    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
564    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
565    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
566.Lgpr_setup_finished2:
567    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
568    movq %rbp, %rsp                // Restore stack pointer.
569    POP r15                        // Pop r15
570    POP r14                        // Pop r14
571    POP r13                        // Pop r13
572    POP r12                        // Pop r12
573    POP rbx                        // Pop rbx
574    POP r9                         // Pop r9 - shorty*.
575    POP r8                         // Pop r8 - result*.
576    POP rbp                        // Pop rbp
577    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
578    je .Lreturn_double_quick2
579    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
580    je .Lreturn_float_quick2
581    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
582    ret
583.Lreturn_double_quick2:
584    movsd %xmm0, (%r8)             // Store the double floating point result.
585    ret
586.Lreturn_float_quick2:
587    movss %xmm0, (%r8)             // Store the floating point result.
588    ret
589#endif  // __APPLE__
590END_FUNCTION art_quick_invoke_static_stub
591
592    /*
593     * Long jump stub.
594     * On entry:
595     *   rdi = gprs
596     *   rsi = fprs
597     */
598DEFINE_FUNCTION art_quick_do_long_jump
599#if defined(__APPLE__)
600    int3
601    int3
602#else
603    // Restore FPRs.
604    movq 0(%rsi), %xmm0
605    movq 8(%rsi), %xmm1
606    movq 16(%rsi), %xmm2
607    movq 24(%rsi), %xmm3
608    movq 32(%rsi), %xmm4
609    movq 40(%rsi), %xmm5
610    movq 48(%rsi), %xmm6
611    movq 56(%rsi), %xmm7
612    movq 64(%rsi), %xmm8
613    movq 72(%rsi), %xmm9
614    movq 80(%rsi), %xmm10
615    movq 88(%rsi), %xmm11
616    movq 96(%rsi), %xmm12
617    movq 104(%rsi), %xmm13
618    movq 112(%rsi), %xmm14
619    movq 120(%rsi), %xmm15
620    // Restore FPRs.
621    movq %rdi, %rsp   // RSP points to gprs.
622    // Load all registers except RSP and RIP with values in gprs.
623    popq %r15
624    popq %r14
625    popq %r13
626    popq %r12
627    popq %r11
628    popq %r10
629    popq %r9
630    popq %r8
631    popq %rdi
632    popq %rsi
633    popq %rbp
634    addq LITERAL(8), %rsp   // Skip rsp
635    popq %rbx
636    popq %rdx
637    popq %rcx
638    popq %rax
639    popq %rsp      // Load stack pointer.
640    ret            // From higher in the stack pop rip.
641#endif  // __APPLE__
642END_FUNCTION art_quick_do_long_jump
643
644MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
645    DEFINE_FUNCTION VAR(c_name)
646    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
647    // Outgoing argument set up
648    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
649    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
650    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
651    CALL_MACRO(return_macro)             // return or deliver exception
652    END_FUNCTION VAR(c_name)
653END_MACRO
654
655MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
656    DEFINE_FUNCTION VAR(c_name)
657    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
658    // Outgoing argument set up
659    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
660    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
661    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
662    CALL_MACRO(return_macro)             // return or deliver exception
663    END_FUNCTION VAR(c_name)
664END_MACRO
665
666MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
667    DEFINE_FUNCTION VAR(c_name)
668    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
669    // Outgoing argument set up
670    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
671    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
672    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
673    CALL_MACRO(return_macro)            // return or deliver exception
674    END_FUNCTION VAR(c_name)
675END_MACRO
676
677MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
678    DEFINE_FUNCTION VAR(c_name)
679    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
680    // Outgoing argument set up
681    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
682    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
683    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
684    CALL_MACRO(return_macro)            // return or deliver exception
685    END_FUNCTION VAR(c_name)
686END_MACRO
687
688    /*
689     * Macro for resolution and initialization of indexed DEX file
690     * constants such as classes and strings.
691     */
692MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
693    DEFINE_FUNCTION VAR(c_name)
694    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
695    // Outgoing argument set up
696    movl %eax, %edi                               // pass the index of the constant as arg0
697    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
698    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
699    testl %eax, %eax                              // If result is null, deliver pending exception.
700    jz 1f
701    DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_RAX /*is_ref=*/1
7021:
703    DELIVER_PENDING_EXCEPTION_FRAME_READY
704    END_FUNCTION VAR(c_name)
705END_MACRO
706
707MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
708    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
709END_MACRO
710
711MACRO0(RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER)
712    testq %rax, %rax               // rax == 0 ?
713    jz  1f                         // if rax == 0 goto 1
714    DEOPT_OR_RETURN /*is_ref=*/1   // Check if deopt is required
7151:                                 // deliver exception on current thread
716    DELIVER_PENDING_EXCEPTION
717END_MACRO
718
719
720MACRO1(RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION, is_ref = 0)
721    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
722    testq %rcx, %rcx               // rcx == 0 ?
723    jnz 1f                         // if rcx != 0 goto 1
724    DEOPT_OR_RETURN \is_ref        // Check if deopt is required
7251:                                 // deliver exception on current thread
726    DELIVER_PENDING_EXCEPTION
727END_MACRO
728
729MACRO0(RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION)
730    RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /*is_ref=*/1
731END_MACRO
732
733MACRO1(DEOPT_OR_RETURN, is_ref = 0)
734  cmpl LITERAL(0), %gs:THREAD_DEOPT_CHECK_REQUIRED_OFFSET
735  jne 2f
736  ret
7372:
738  SETUP_SAVE_EVERYTHING_FRAME
739  movq LITERAL(\is_ref), %rdx          // pass if result is a reference
740  movq %rax, %rsi                      // pass the result
741  movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current
742  call SYMBOL(artDeoptimizeIfNeeded)
743  RESTORE_SAVE_EVERYTHING_FRAME
744  ret
745END_MACRO
746
747MACRO1(DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_RAX, is_ref = 0)
748  cmpl LITERAL(0), %gs:THREAD_DEOPT_CHECK_REQUIRED_OFFSET
749  CFI_REMEMBER_STATE
750  jne 2f
751  RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX
752  ret
7532:
754  CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
755  movq %rax, SAVE_EVERYTHING_FRAME_RAX_OFFSET(%rsp) // update result in the frame
756  movq LITERAL(\is_ref), %rdx                       // pass if result is a reference
757  movq %rax, %rsi                                   // pass the result
758  movq %gs:THREAD_SELF_OFFSET, %rdi                 // pass Thread::Current
759  call SYMBOL(artDeoptimizeIfNeeded)
760  CFI_REMEMBER_STATE
761  RESTORE_SAVE_EVERYTHING_FRAME
762  ret
763  CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
764END_MACRO
765
766
767
768MACRO0(RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER)
769    testl %eax, %eax               // eax == 0 ?
770    jnz  1f                        // if eax != 0 goto 1
771    DEOPT_OR_RETURN                // Check if we need a deopt
7721:                                 // deliver exception on current thread
773    DELIVER_PENDING_EXCEPTION
774END_MACRO
775
776// Generate the allocation entrypoints for each allocator.
777GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
778
779// Comment out allocators that have x86_64 specific asm.
780// Region TLAB:
781// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
782// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
783GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
784GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
785// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
786// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
787// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
788// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
789// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
790GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
791GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
792GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
793// Normal TLAB:
794// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
795// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
796GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
797GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
798// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
799// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
800// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
801// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
802// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
803GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
804GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
805GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
806
807
808// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
809MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
810    DEFINE_FUNCTION VAR(c_name)
811    // Fast path rosalloc allocation.
812    // RDI: mirror::Class*, RAX: return value
813    // RSI, RDX, RCX, R8, R9: free.
814                                                           // Check if the thread local
815                                                           // allocation stack has room.
816    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
817    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
818    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
819    jae    .Lslow_path\c_name
820                                                           // Load the object size
821    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
822                                                           // Check if the size is for a thread
823                                                           // local allocation. Also does the
824                                                           // initialized and finalizable checks.
825    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
826    ja     .Lslow_path\c_name
827                                                           // Compute the rosalloc bracket index
828                                                           // from the size.
829    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
830                                                           // Load the rosalloc run (r9)
831                                                           // Subtract __SIZEOF_POINTER__ to
832                                                           // subtract one from edi as there is no
833                                                           // 0 byte run and the size is already
834                                                           // aligned.
835    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
836                                                           // Load the free list head (rax). This
837                                                           // will be the return val.
838    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
839    testq  %rax, %rax
840    jz     .Lslow_path\c_name
841    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
842                                                           // Push the new object onto the thread
843                                                           // local allocation stack and
844                                                           // increment the thread local
845                                                           // allocation stack top.
846    movl   %eax, (%rcx)
847    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
848    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
849                                                           // Load the next pointer of the head
850                                                           // and update the list head with the
851                                                           // next pointer.
852    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
853    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
854                                                           // Store the class pointer in the
855                                                           // header. This also overwrites the
856                                                           // next pointer. The offsets are
857                                                           // asserted to match.
858#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
859#error "Class pointer needs to overwrite next pointer."
860#endif
861    POISON_HEAP_REF edi
862    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
863                                                           // Decrement the size of the free list
864    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
865                                                           // No fence necessary for x86.
866    ret
867.Lslow_path\c_name:
868    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
869    // Outgoing argument set up
870    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
871    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
872    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
873    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER       // return or deliver exception
874    END_FUNCTION VAR(c_name)
875END_MACRO
876
877ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
878ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
879
880// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
881// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
882// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
883//
884// RDI: the class, RAX: return value.
885// RCX, RSI, RDX: scratch, r8: Thread::Current().
886MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
887    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
888END_MACRO
889
890// The fast path code for art_quick_alloc_object_initialized_region_tlab.
891//
892// RDI: the class, RSI: ArtMethod*, RAX: return value.
893// RCX, RSI, RDX: scratch, r8: Thread::Current().
894MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
895    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
896    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
897    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
898    addq %rax, %rcx                                            // Add size to pos, note that these
899                                                               // are both 32 bit ints, overflow
900                                                               // will cause the add to be past the
901                                                               // end of the thread local region.
902    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
903    ja   RAW_VAR(slowPathLabel)
904    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
905                                                               // Store the class pointer in the
906                                                               // header.
907                                                               // No fence needed for x86.
908    POISON_HEAP_REF edi
909    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
910    ret                                                        // Fast path succeeded.
911END_MACRO
912
913// The fast path code for art_quick_alloc_array_region_tlab.
914// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
915// Free temps: RCX, RDX, R8
916// Output: RAX: return value.
917MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
918    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
919    // Mask out the unaligned part to make sure we are 8 byte aligned.
920    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
921    cmpq LITERAL(MIN_LARGE_OBJECT_THRESHOLD), %r9              // Possibly a large object.
922    jae  RAW_VAR(slowPathLabel)                                // Go to slow path if large object
923    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
924    addq %rax, %r9
925    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
926    ja   RAW_VAR(slowPathLabel)
927    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
928                                                               // Store the class pointer in the
929                                                               // header.
930                                                               // No fence needed for x86.
931    POISON_HEAP_REF edi
932    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
933    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
934    ret                                                        // Fast path succeeded.
935END_MACRO
936
937// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
938// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
939MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
940    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
941    // Outgoing argument set up
942    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
943    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
944    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
945    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER       // return or deliver exception
946END_MACRO
947
948// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
949// called with CC if the GC is not active.
950DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
951    // RDI: mirror::Class* klass
952    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
953    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
954.Lart_quick_alloc_object_resolved_tlab_slow_path:
955    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
956END_FUNCTION art_quick_alloc_object_resolved_tlab
957
958// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
959// May be called with CC if the GC is not active.
960DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
961    // RDI: mirror::Class* klass
962    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
963    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
964.Lart_quick_alloc_object_initialized_tlab_slow_path:
965    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
966END_FUNCTION art_quick_alloc_object_initialized_tlab
967
968MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
969    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
970    UNPOISON_HEAP_REF ecx
971    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
972    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx  // Get component size shift.
973    movq %rsi, %r9
974    salq %cl, %r9                                              // Calculate array count shifted.
975    // Add array header + alignment rounding.
976    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
977    // Add 4 extra bytes if we are doing a long array.
978    addq MACRO_LITERAL(1), %rcx
979    andq MACRO_LITERAL(4), %rcx
980#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
981#error Long array data offset must be 4 greater than int array data offset.
982#endif
983    addq %rcx, %r9
984END_MACRO
985
986MACRO0(COMPUTE_ARRAY_SIZE_8)
987    // RDI: mirror::Class* klass, RSI: int32_t component_count
988    // RDX, RCX, R8, R9: free. RAX: return val.
989    movq %rsi, %r9
990    // Add array header + alignment rounding.
991    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
992END_MACRO
993
994MACRO0(COMPUTE_ARRAY_SIZE_16)
995    // RDI: mirror::Class* klass, RSI: int32_t component_count
996    // RDX, RCX, R8, R9: free. RAX: return val.
997    movq %rsi, %r9
998    salq MACRO_LITERAL(1), %r9
999    // Add array header + alignment rounding.
1000    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1001END_MACRO
1002
1003MACRO0(COMPUTE_ARRAY_SIZE_32)
1004    // RDI: mirror::Class* klass, RSI: int32_t component_count
1005    // RDX, RCX, R8, R9: free. RAX: return val.
1006    movq %rsi, %r9
1007    salq MACRO_LITERAL(2), %r9
1008    // Add array header + alignment rounding.
1009    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1010END_MACRO
1011
1012MACRO0(COMPUTE_ARRAY_SIZE_64)
1013    // RDI: mirror::Class* klass, RSI: int32_t component_count
1014    // RDX, RCX, R8, R9: free. RAX: return val.
1015    movq %rsi, %r9
1016    salq MACRO_LITERAL(3), %r9
1017    // Add array header + alignment rounding.
1018    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1019END_MACRO
1020
1021MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1022    DEFINE_FUNCTION VAR(c_entrypoint)
1023    // RDI: mirror::Class* klass, RSI: int32_t component_count
1024    // RDX, RCX, R8, R9: free. RAX: return val.
1025    CALL_MACRO(size_setup)
1026    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1027.Lslow_path\c_entrypoint:
1028    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1029    // Outgoing argument set up
1030    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1031    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1032    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1033    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER           // return or deliver exception
1034    END_FUNCTION VAR(c_entrypoint)
1035END_MACRO
1036
1037
1038GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1039GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1040GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1041GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1042GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1043
1044GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1045GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1046GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1047GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1048GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1049
1050// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1051DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1052    // Fast path region tlab allocation.
1053    // RDI: mirror::Class* klass
1054    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1055    ASSERT_USE_READ_BARRIER
1056    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1057.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1058    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1059END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1060
1061// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1062DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1063    // Fast path region tlab allocation.
1064    // RDI: mirror::Class* klass
1065    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1066    ASSERT_USE_READ_BARRIER
1067    // No read barrier since the caller is responsible for that.
1068    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1069.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1070    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1071END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1072
1073ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1074ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1075ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1076ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1077ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1078ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1079
1080TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
1081                 artHandleFillArrayDataFromCode, \
1082                 RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1083
1084    /*
1085     * Entry from managed code that tries to lock the object in a fast path and
1086     * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
1087     * RDI holds the possibly null object to lock.
1088     */
1089DEFINE_FUNCTION art_quick_lock_object
1090    testq %rdi, %rdi                      // Null check object.
1091    jz   art_quick_lock_object_no_inline
1092    LOCK_OBJECT_FAST_PATH rdi, ecx, art_quick_lock_object_no_inline
1093END_FUNCTION art_quick_lock_object
1094
1095    /*
1096     * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
1097     * RDI holds the possibly null object to lock.
1098     */
1099DEFINE_FUNCTION art_quick_lock_object_no_inline
1100    // This is also the slow path for art_quick_lock_object.
1101    SETUP_SAVE_REFS_ONLY_FRAME
1102    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1103    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1104    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1105    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1106END_FUNCTION art_quick_lock_object_no_inline
1107
1108    /*
1109     * Entry from managed code that tries to unlock the object in a fast path and calls
1110     * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
1111     * RDI holds the possibly null object to unlock.
1112     */
1113DEFINE_FUNCTION art_quick_unlock_object
1114    testq %rdi, %rdi                      // Null check object.
1115    jz   art_quick_lock_object_no_inline
1116    UNLOCK_OBJECT_FAST_PATH rdi, ecx, /*saved_rax*/ none, art_quick_unlock_object_no_inline
1117END_FUNCTION art_quick_unlock_object
1118
1119    /*
1120     * Entry from managed code that calls `artUnlockObjectFromCode()`
1121     * and delivers exception on failure.
1122     * RDI holds the possibly null object to unlock.
1123     */
1124DEFINE_FUNCTION art_quick_unlock_object_no_inline
1125    // This is also the slow path for art_quick_unlock_object.
1126    SETUP_SAVE_REFS_ONLY_FRAME
1127    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1128    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1129    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1130    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1131END_FUNCTION art_quick_unlock_object_no_inline
1132
1133DEFINE_FUNCTION art_quick_check_instance_of
1134    // Type check using the bit string passes null as the target class. In that case just throw.
1135    testl %esi, %esi
1136    jz .Lthrow_class_cast_exception_for_bitstring_check
1137
1138    // We could check the super classes here but that is usually already checked in the caller.
1139    PUSH rdi                          // Save args for exc
1140    PUSH rsi
1141    subq LITERAL(8), %rsp             // Alignment padding.
1142    CFI_ADJUST_CFA_OFFSET(8)
1143    SETUP_FP_CALLEE_SAVE_FRAME
1144    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1145    testq %rax, %rax
1146    CFI_REMEMBER_STATE
1147    jz .Lthrow_class_cast_exception   // jump forward if not assignable
1148    RESTORE_FP_CALLEE_SAVE_FRAME
1149    addq LITERAL(24), %rsp            // pop arguments
1150    CFI_ADJUST_CFA_OFFSET(-24)
1151    ret
1152
1153.Lthrow_class_cast_exception:
1154    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 64  // Reset unwind info so following code unwinds.
1155    RESTORE_FP_CALLEE_SAVE_FRAME
1156    addq LITERAL(8), %rsp             // pop padding
1157    CFI_ADJUST_CFA_OFFSET(-8)
1158    POP rsi                           // Pop arguments
1159    POP rdi
1160
1161.Lthrow_class_cast_exception_for_bitstring_check:
1162    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1163    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1164    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1165    UNREACHABLE
1166END_FUNCTION art_quick_check_instance_of
1167
1168
1169// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1170MACRO2(POP_REG_NE, reg, exclude_reg)
1171    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1172      DECREASE_FRAME 8
1173    .else
1174      POP RAW_VAR(reg)
1175    .endif
1176END_MACRO
1177
1178DEFINE_FUNCTION art_quick_aput_obj
1179    test %edx, %edx              // store of null
1180    jz .Laput_obj_null
1181    movl MIRROR_OBJECT_CLASS_OFFSET(%rdi), %ecx
1182    UNPOISON_HEAP_REF ecx
1183#ifdef USE_READ_BARRIER
1184    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
1185    CFI_REMEMBER_STATE
1186    jnz .Laput_obj_gc_marking
1187#endif  // USE_READ_BARRIER
1188    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
1189    cmpl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %ecx  // Both poisoned if heap poisoning is enabled.
1190    jne .Laput_obj_check_assignability
1191.Laput_obj_store:
1192    POISON_HEAP_REF edx
1193    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1194    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1195    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1196    movb %dl, (%rdx, %rdi)
1197    ret
1198
1199.Laput_obj_null:
1200    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1201    ret
1202
1203.Laput_obj_check_assignability:
1204    UNPOISON_HEAP_REF ecx         // Unpoison array component type if poisoning is enabled.
1205    PUSH_ARG rdi                  // Save arguments.
1206    PUSH_ARG rsi
1207    PUSH_ARG rdx
1208    movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %esi  // Pass arg2 = value's class.
1209    UNPOISON_HEAP_REF esi
1210.Laput_obj_check_assignability_call:
1211    movl %ecx, %edi               // Pass arg1 = array's component type.
1212    SETUP_FP_CALLEE_SAVE_FRAME
1213    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1214    RESTORE_FP_CALLEE_SAVE_FRAME  // Resore FP registers.
1215    POP_ARG rdx                   // Restore arguments.
1216    POP_ARG rsi
1217    POP_ARG rdi
1218    testq %rax, %rax              // Check for exception.
1219    jz   .Laput_obj_throw_array_store_exception
1220    POISON_HEAP_REF edx
1221    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1222    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1223    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1224    movb %dl, (%rdx, %rdi)
1225    ret
1226
1227.Laput_obj_throw_array_store_exception:
1228    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1229    // Outgoing argument set up.
1230    movq %rdx, %rsi                         // Pass arg 2 = value.
1231    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1232                                            // Pass arg 1 = array.
1233    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1234    UNREACHABLE
1235
1236#ifdef USE_READ_BARRIER
1237.Laput_obj_gc_marking:
1238    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 8
1239    // We need to align stack for `art_quick_read_barrier_mark_regNN`.
1240    INCREASE_FRAME 8                        // Stack alignment.
1241    call SYMBOL(art_quick_read_barrier_mark_reg01)  // Mark ECX
1242    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
1243    UNPOISON_HEAP_REF ecx
1244    call SYMBOL(art_quick_read_barrier_mark_reg01)  // Mark ECX
1245    movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %eax
1246    UNPOISON_HEAP_REF eax
1247    call SYMBOL(art_quick_read_barrier_mark_reg00)  // Mark EAX
1248    DECREASE_FRAME 8                        // Remove stack alignment.
1249    cmpl %eax, %ecx
1250    je .Laput_obj_store
1251    // Prepare arguments in line with `.Laput_obj_check_assignability_call` and jump there.
1252    PUSH_ARG rdi                  // Save arguments.
1253    PUSH_ARG rsi
1254    PUSH_ARG rdx
1255    movl %eax, %esi               // Pass arg2 - type of the value to be stored.
1256    // The arg1 shall be moved at `.Ldo_assignability_check_call`.
1257    jmp .Laput_obj_check_assignability_call
1258#endif  // USE_READ_BARRIER
1259END_FUNCTION art_quick_aput_obj
1260
1261// TODO: This is quite silly on X86_64 now.
1262DEFINE_FUNCTION art_quick_memcpy
1263    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1264    ret
1265END_FUNCTION art_quick_memcpy
1266
1267DEFINE_FUNCTION art_quick_test_suspend
1268    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
1269    // Outgoing argument set up
1270    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1271    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1272    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1273    ret
1274END_FUNCTION art_quick_test_suspend
1275
1276UNIMPLEMENTED art_quick_ldiv
1277UNIMPLEMENTED art_quick_lmod
1278UNIMPLEMENTED art_quick_lmul
1279UNIMPLEMENTED art_quick_lshl
1280UNIMPLEMENTED art_quick_lshr
1281UNIMPLEMENTED art_quick_lushr
1282
1283GENERATE_FIELD_ENTRYPOINTS
1284
1285DEFINE_FUNCTION art_quick_proxy_invoke_handler
1286    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1287
1288    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1289    movq %rsp, %rcx                         // Pass SP.
1290    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1291    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1292    movq %rax, %xmm0                        // Copy return value in case of float returns.
1293    RETURN_OR_DELIVER_PENDING_EXCEPTION
1294END_FUNCTION art_quick_proxy_invoke_handler
1295
1296    /*
1297     * Called to resolve an imt conflict.
1298     * rdi is the conflict ArtMethod.
1299     * rax is a hidden argument that holds the target interface method.
1300     *
1301     * Note that this stub writes to rdi.
1302     */
1303DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1304#if defined(__APPLE__)
1305    int3
1306    int3
1307#else
1308    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1309.Limt_table_iterate:
1310    cmpq %rax, 0(%rdi)
1311    jne .Limt_table_next_entry
1312    // We successfully hit an entry in the table. Load the target method
1313    // and jump to it.
1314    movq __SIZEOF_POINTER__(%rdi), %rdi
1315    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1316.Limt_table_next_entry:
1317    // If the entry is null, the interface method is not in the ImtConflictTable.
1318    cmpq LITERAL(0), 0(%rdi)
1319    jz .Lconflict_trampoline
1320    // Iterate over the entries of the ImtConflictTable.
1321    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1322    jmp .Limt_table_iterate
1323.Lconflict_trampoline:
1324    // Call the runtime stub to populate the ImtConflictTable and jump to the
1325    // resolved method.
1326    movq %rax, %rdi  // Load interface method
1327    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1328#endif  // __APPLE__
1329END_FUNCTION art_quick_imt_conflict_trampoline
1330
1331DEFINE_FUNCTION art_quick_resolution_trampoline
1332    SETUP_SAVE_REFS_AND_ARGS_FRAME
1333    movq %gs:THREAD_SELF_OFFSET, %rdx
1334    movq %rsp, %rcx
1335    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1336    movq %rax, %r10               // Remember returned code pointer in R10.
1337    movq (%rsp), %rdi             // Load called method into RDI.
1338    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1339    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1340    jz 1f
1341    jmp *%r10                     // Tail call into method.
13421:
1343    DELIVER_PENDING_EXCEPTION
1344END_FUNCTION art_quick_resolution_trampoline
1345
1346/* Generic JNI frame layout:
1347 *
1348 * #-------------------#
1349 * |                   |
1350 * | caller method...  |
1351 * #-------------------#    <--- SP on entry
1352 *
1353 *          |
1354 *          V
1355 *
1356 * #-------------------#
1357 * | caller method...  |
1358 * #-------------------#
1359 * | Return            |
1360 * | R15               |    callee save
1361 * | R14               |    callee save
1362 * | R13               |    callee save
1363 * | R12               |    callee save
1364 * | R9                |    arg5
1365 * | R8                |    arg4
1366 * | RSI/R6            |    arg1
1367 * | RBP/R5            |    callee save
1368 * | RBX/R3            |    callee save
1369 * | RDX/R2            |    arg2
1370 * | RCX/R1            |    arg3
1371 * | XMM7              |    float arg 8
1372 * | XMM6              |    float arg 7
1373 * | XMM5              |    float arg 6
1374 * | XMM4              |    float arg 5
1375 * | XMM3              |    float arg 4
1376 * | XMM2              |    float arg 3
1377 * | XMM1              |    float arg 2
1378 * | XMM0              |    float arg 1
1379 * | RDI/Method*       |  <- sp
1380 * #-------------------#
1381 * | Scratch Alloca    |    5K scratch space
1382 * #---------#---------#
1383 * |         | sp*     |
1384 * | Tramp.  #---------#
1385 * | args    | thread  |
1386 * | Tramp.  #---------#
1387 * |         | method  |
1388 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1389 *
1390 *           |
1391 *           v              artQuickGenericJniTrampoline
1392 *
1393 * #-------------------#
1394 * | caller method...  |
1395 * #-------------------#
1396 * | Return PC         |
1397 * | Callee-Saves      |
1398 * | padding           | // 8B
1399 * | Method*           |    <--- (1)
1400 * #-------------------#
1401 * | local ref cookie  | // 4B
1402 * | padding           | // 0B or 4B to align handle scope on 8B address
1403 * | handle scope      | // Size depends on number of references; multiple of 4B.
1404 * #-------------------#
1405 * | JNI Stack Args    | // Empty if all args fit into registers.
1406 * #-------------------#    <--- SP on native call (1)
1407 * | Free scratch      |
1408 * #-------------------#
1409 * | SP for JNI call   | // Pointer to (1).
1410 * #-------------------#
1411 * | Hidden arg        | // For @CriticalNative
1412 * #-------------------#
1413 * |                   |
1414 * | Stack for Regs    |    The trampoline assembly will pop these values
1415 * |                   |    into registers for native call
1416 * #-------------------#
1417 */
1418    /*
1419     * Called to do a generic JNI down-call
1420     */
1421DEFINE_FUNCTION art_quick_generic_jni_trampoline
1422    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1423
1424    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1425    CFI_DEF_CFA_REGISTER(rbp)
1426
1427    subq LITERAL(GENERIC_JNI_TRAMPOLINE_RESERVED_AREA), %rsp
1428    // prepare for artQuickGenericJniTrampoline call
1429    // (Thread*, managed_sp, reserved_area)
1430    //    rdi       rsi           rdx   <= C calling convention
1431    //  gs:...      rbp           rsp   <= where they are
1432    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread::Current().
1433    movq %rbp, %rsi                    // Pass managed frame SP.
1434    movq %rsp, %rdx                    // Pass reserved area.
1435    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1436
1437    // The C call will have registered the complete save-frame on success.
1438    // The result of the call is:
1439    //     %rax: pointer to native code, 0 on error.
1440    //     The bottom of the reserved area contains values for arg registers,
1441    //     hidden arg register and SP for out args for the call.
1442
1443    // Check for error (class init check or locking for synchronized native method can throw).
1444    test %rax, %rax
1445    jz .Lexception_in_native
1446
1447    // pop from the register-passing alloca region
1448    // what's the right layout?
1449    popq %rdi
1450    popq %rsi
1451    popq %rdx
1452    popq %rcx
1453    popq %r8
1454    popq %r9
1455    // TODO: skip floating point if unused, some flag.
1456    movq 0(%rsp), %xmm0
1457    movq 8(%rsp), %xmm1
1458    movq 16(%rsp), %xmm2
1459    movq 24(%rsp), %xmm3
1460    movq 32(%rsp), %xmm4
1461    movq 40(%rsp), %xmm5
1462    movq 48(%rsp), %xmm6
1463    movq 56(%rsp), %xmm7
1464
1465    // Save call target in scratch register.
1466    movq %rax, %r11
1467
1468    // Load hidden arg (rax) for @CriticalNative.
1469    movq 64(%rsp), %rax
1470    // Load SP for out args, releasing unneeded reserved area.
1471    movq 72(%rsp), %rsp
1472
1473    // native call
1474    call *%r11
1475
1476    // result sign extension is handled in C code
1477    // prepare for artQuickGenericJniEndTrampoline call
1478    // (Thread*,  result, result_f)
1479    //   rdi      rsi   rdx       <= C calling convention
1480    //  gs:...    rax   xmm0      <= where they are
1481    movq %gs:THREAD_SELF_OFFSET, %rdi
1482    movq %rax, %rsi
1483    movq %xmm0, %rdx
1484    call SYMBOL(artQuickGenericJniEndTrampoline)
1485
1486    // Pending exceptions possible.
1487    // TODO: use cmpq, needs direct encoding because of gas bug
1488    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1489    test %rcx, %rcx
1490    jnz .Lexception_in_native
1491
1492    // Tear down the alloca.
1493    movq %rbp, %rsp
1494
1495    // store into fpr, for when it's a fpr return...
1496    movq %rax, %xmm0
1497
1498    LOAD_RUNTIME_INSTANCE rcx
1499    cmpb MACRO_LITERAL(0), RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE(%rcx)
1500    CFI_REMEMBER_STATE
1501    jne .Lcall_method_exit_hook
1502.Lcall_method_exit_hook_done:
1503
1504    // Tear down the callee-save frame.
1505    CFI_DEF_CFA_REGISTER(rsp)
1506    // Load callee-save FPRs. Skip FP args.
1507    movq 80(%rsp), %xmm12
1508    movq 88(%rsp), %xmm13
1509    movq 96(%rsp), %xmm14
1510    movq 104(%rsp), %xmm15
1511    // Pop method, padding, FP args and two GRP args (rcx, rdx).
1512    DECREASE_FRAME 16 + 12*8 + 2*8
1513    // Load callee-save GPRs and skip args, mixed together to agree with core spills bitmap.
1514    POP rbx  // Callee save.
1515    POP rbp  // Callee save.
1516    DECREASE_FRAME 3*8  // Skip three args (RSI, R8, R9).
1517    POP r12  // Callee save.
1518    POP r13  // Callee save.
1519    POP r14  // Callee save.
1520    POP r15  // Callee save.
1521    ret
1522
1523.Lcall_method_exit_hook:
1524    CFI_RESTORE_STATE_AND_DEF_CFA rbp, 208
1525    movq LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS), %r8
1526    call art_quick_method_exit_hook
1527    jmp .Lcall_method_exit_hook_done
1528
1529.Lexception_in_native:
1530    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
1531    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
1532    movq (%rsp), %rsp
1533    call art_deliver_pending_exception
1534END_FUNCTION art_quick_generic_jni_trampoline
1535
1536DEFINE_FUNCTION art_deliver_pending_exception
1537    // This will create a new save-all frame, required by the runtime.
1538    DELIVER_PENDING_EXCEPTION
1539END_FUNCTION art_deliver_pending_exception
1540
1541    /*
1542     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1543     * of a quick call:
1544     * RDI = method being called / to bridge to.
1545     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1546     */
1547DEFINE_FUNCTION art_quick_to_interpreter_bridge
1548    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1549    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1550    movq %rsp, %rdx                    // RDX := sp
1551    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1552    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1553    movq %rax, %xmm0                   // Place return value also into floating point return value.
1554    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1555END_FUNCTION art_quick_to_interpreter_bridge
1556
1557    /*
1558     * Called to catch an attempt to invoke an obsolete method.
1559     * RDI = method being called.
1560     */
1561ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1562
1563    /*
1564     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1565     * will long jump to the interpreter bridge.
1566     */
1567DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1568    SETUP_SAVE_EVERYTHING_FRAME
1569                                                // Stack should be aligned now.
1570    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
1571    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
1572    UNREACHABLE
1573END_FUNCTION art_quick_deoptimize_from_compiled_code
1574
1575    /*
1576     * String's compareTo.
1577     *
1578     * On entry:
1579     *    rdi:   this string object (known non-null)
1580     *    rsi:   comp string object (known non-null)
1581     */
1582DEFINE_FUNCTION art_quick_string_compareto
1583    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1584    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1585    /* Build pointers to the start of string data */
1586    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1587    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1588#if (STRING_COMPRESSION_FEATURE)
1589    /* Differ cases */
1590    shrl    LITERAL(1), %r8d
1591    jnc     .Lstring_compareto_this_is_compressed
1592    shrl    LITERAL(1), %r9d
1593    jnc     .Lstring_compareto_that_is_compressed
1594    jmp     .Lstring_compareto_both_not_compressed
1595.Lstring_compareto_this_is_compressed:
1596    shrl    LITERAL(1), %r9d
1597    jnc     .Lstring_compareto_both_compressed
1598    /* Comparison this (8-bit) and that (16-bit) */
1599    mov     %r8d, %eax
1600    subl    %r9d, %eax
1601    mov     %r8d, %ecx
1602    cmovg   %r9d, %ecx
1603    /* Going into loop to compare each character */
1604    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
1605.Lstring_compareto_loop_comparison_this_compressed:
1606    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
1607    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
1608    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
1609    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
1610    subl    %r9d, %r8d
1611    loope   .Lstring_compareto_loop_comparison_this_compressed
1612    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1613.Lstring_compareto_keep_length1:
1614    ret
1615.Lstring_compareto_that_is_compressed:
1616    movl    %r8d, %eax
1617    subl    %r9d, %eax
1618    mov     %r8d, %ecx
1619    cmovg   %r9d, %ecx
1620    /* Comparison this (8-bit) and that (16-bit) */
1621    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
1622.Lstring_compareto_loop_comparison_that_compressed:
1623    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
1624    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
1625    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
1626    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
1627    subl    %r9d, %r8d
1628    loope   .Lstring_compareto_loop_comparison_that_compressed
1629    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1630.Lstring_compareto_keep_length2:
1631    ret
1632.Lstring_compareto_both_compressed:
1633    /* Calculate min length and count diff */
1634    movl    %r8d, %ecx
1635    movl    %r8d, %eax
1636    subl    %r9d, %eax
1637    cmovg   %r9d, %ecx
1638    jecxz   .Lstring_compareto_keep_length3
1639    repe    cmpsb
1640    je      .Lstring_compareto_keep_length3
1641    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
1642    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
1643    jmp     .Lstring_compareto_count_difference
1644#endif // STRING_COMPRESSION_FEATURE
1645.Lstring_compareto_both_not_compressed:
1646    /* Calculate min length and count diff */
1647    movl    %r8d, %ecx
1648    movl    %r8d, %eax
1649    subl    %r9d, %eax
1650    cmovg   %r9d, %ecx
1651    /*
1652     * At this point we have:
1653     *   eax: value to return if first part of strings are equal
1654     *   ecx: minimum among the lengths of the two strings
1655     *   esi: pointer to comp string data
1656     *   edi: pointer to this string data
1657     */
1658    jecxz .Lstring_compareto_keep_length3
1659    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1660    je    .Lstring_compareto_keep_length3
1661    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
1662    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
1663.Lstring_compareto_count_difference:
1664    subl  %ecx, %eax              // return the difference
1665.Lstring_compareto_keep_length3:
1666    ret
1667END_FUNCTION art_quick_string_compareto
1668
1669UNIMPLEMENTED art_quick_memcmp16
1670
1671DEFINE_FUNCTION art_quick_instance_of
1672    SETUP_FP_CALLEE_SAVE_FRAME
1673    subq LITERAL(8), %rsp                      // Alignment padding.
1674    CFI_ADJUST_CFA_OFFSET(8)
1675    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
1676    addq LITERAL(8), %rsp
1677    CFI_ADJUST_CFA_OFFSET(-8)
1678    RESTORE_FP_CALLEE_SAVE_FRAME
1679    ret
1680END_FUNCTION art_quick_instance_of
1681
1682DEFINE_FUNCTION art_quick_string_builder_append
1683    SETUP_SAVE_REFS_ONLY_FRAME                // save ref containing registers for GC
1684    // Outgoing argument set up
1685    leaq FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__(%rsp), %rsi  // pass args
1686    movq %gs:THREAD_SELF_OFFSET, %rdx         // pass Thread::Current()
1687    call artStringBuilderAppend               // (uint32_t, const unit32_t*, Thread*)
1688    RESTORE_SAVE_REFS_ONLY_FRAME              // restore frame up to return address
1689    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER  // return or deopt or deliver exception
1690END_FUNCTION art_quick_string_builder_append
1691
1692// Create a function `name` calling the ReadBarrier::Mark routine,
1693// getting its argument and returning its result through register
1694// `reg`, saving and restoring all caller-save registers.
1695//
1696// The generated function follows a non-standard runtime calling
1697// convention:
1698// - register `reg` (which may be different from RDI) is used to pass the (sole) argument,
1699// - register `reg` (which may be different from RAX) is used to return the result,
1700// - all other registers are callee-save (the values they hold are preserved).
1701MACRO2(READ_BARRIER_MARK_REG, name, reg)
1702    DEFINE_FUNCTION VAR(name)
1703    // Null check so that we can load the lock word.
1704    testq REG_VAR(reg), REG_VAR(reg)
1705    jz .Lret_rb_\name
1706.Lnot_null_\name:
1707    // Check the mark bit, if it is 1 return.
1708    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
1709    jz .Lslow_rb_\name
1710    ret
1711.Lslow_rb_\name:
1712    PUSH rax
1713    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
1714    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
1715    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
1716    // forwarding address one.
1717    // Taken ~25% of the time.
1718    CFI_REMEMBER_STATE
1719    jnae .Lret_forwarding_address\name
1720
1721    // Save all potentially live caller-save core registers.
1722    movq 0(%rsp), %rax
1723    PUSH rcx
1724    PUSH rdx
1725    PUSH rsi
1726    PUSH rdi
1727    PUSH r8
1728    PUSH r9
1729    PUSH r10
1730    PUSH r11
1731    // Create space for caller-save floating-point registers.
1732    INCREASE_FRAME 12 * 8
1733    // Save all potentially live caller-save floating-point registers.
1734    movq %xmm0, 0(%rsp)
1735    movq %xmm1, 8(%rsp)
1736    movq %xmm2, 16(%rsp)
1737    movq %xmm3, 24(%rsp)
1738    movq %xmm4, 32(%rsp)
1739    movq %xmm5, 40(%rsp)
1740    movq %xmm6, 48(%rsp)
1741    movq %xmm7, 56(%rsp)
1742    movq %xmm8, 64(%rsp)
1743    movq %xmm9, 72(%rsp)
1744    movq %xmm10, 80(%rsp)
1745    movq %xmm11, 88(%rsp)
1746    SETUP_FP_CALLEE_SAVE_FRAME
1747
1748    .ifnc RAW_VAR(reg), rdi
1749      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
1750    .endif
1751    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
1752    .ifnc RAW_VAR(reg), rax
1753      movq %rax, REG_VAR(reg)       // Return result into `reg`.
1754    .endif
1755
1756    RESTORE_FP_CALLEE_SAVE_FRAME
1757    // Restore floating-point registers.
1758    movq 0(%rsp), %xmm0
1759    movq 8(%rsp), %xmm1
1760    movq 16(%rsp), %xmm2
1761    movq 24(%rsp), %xmm3
1762    movq 32(%rsp), %xmm4
1763    movq 40(%rsp), %xmm5
1764    movq 48(%rsp), %xmm6
1765    movq 56(%rsp), %xmm7
1766    movq 64(%rsp), %xmm8
1767    movq 72(%rsp), %xmm9
1768    movq 80(%rsp), %xmm10
1769    movq 88(%rsp), %xmm11
1770    // Remove floating-point registers.
1771    DECREASE_FRAME 12 * 8
1772    // Restore core regs, except `reg`, as it is used to return the
1773    // result of this function (simply remove it from the stack instead).
1774    POP_REG_NE r11, RAW_VAR(reg)
1775    POP_REG_NE r10, RAW_VAR(reg)
1776    POP_REG_NE r9, RAW_VAR(reg)
1777    POP_REG_NE r8, RAW_VAR(reg)
1778    POP_REG_NE rdi, RAW_VAR(reg)
1779    POP_REG_NE rsi, RAW_VAR(reg)
1780    POP_REG_NE rdx, RAW_VAR(reg)
1781    POP_REG_NE rcx, RAW_VAR(reg)
1782    POP_REG_NE rax, RAW_VAR(reg)
1783.Lret_rb_\name:
1784    ret
1785.Lret_forwarding_address\name:
1786    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 16
1787    // The overflow cleared the top bits.
1788    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
1789    movq %rax, REG_VAR(reg)
1790    POP_REG_NE rax, RAW_VAR(reg)
1791    ret
1792    END_FUNCTION VAR(name)
1793END_MACRO
1794
1795READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
1796READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
1797READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
1798READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
1799// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
1800// cannot be used to pass arguments.
1801READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
1802READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
1803READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
1804READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
1805READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
1806READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
1807READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
1808READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
1809READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
1810READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
1811READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
1812
1813DEFINE_FUNCTION art_quick_read_barrier_slow
1814    SETUP_FP_CALLEE_SAVE_FRAME
1815    subq LITERAL(8), %rsp           // Alignment padding.
1816    CFI_ADJUST_CFA_OFFSET(8)
1817    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
1818    addq LITERAL(8), %rsp
1819    CFI_ADJUST_CFA_OFFSET(-8)
1820    RESTORE_FP_CALLEE_SAVE_FRAME
1821    ret
1822END_FUNCTION art_quick_read_barrier_slow
1823
1824DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
1825    SETUP_FP_CALLEE_SAVE_FRAME
1826    subq LITERAL(8), %rsp                  // Alignment padding.
1827    CFI_ADJUST_CFA_OFFSET(8)
1828    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
1829    addq LITERAL(8), %rsp
1830    CFI_ADJUST_CFA_OFFSET(-8)
1831    RESTORE_FP_CALLEE_SAVE_FRAME
1832    ret
1833END_FUNCTION art_quick_read_barrier_for_root_slow
1834
1835    /*
1836     * On stack replacement stub.
1837     * On entry:
1838     *   [sp] = return address
1839     *   rdi = stack to copy
1840     *   rsi = size of stack
1841     *   rdx = pc to call
1842     *   rcx = JValue* result
1843     *   r8 = shorty
1844     *   r9 = thread
1845     *
1846     * Note that the native C ABI already aligned the stack to 16-byte.
1847     */
1848DEFINE_FUNCTION art_quick_osr_stub
1849    // Save the non-volatiles.
1850    PUSH rbp                      // Save rbp.
1851    PUSH rcx                      // Save rcx/result*.
1852    PUSH r8                       // Save r8/shorty*.
1853
1854    // Save callee saves.
1855    PUSH rbx
1856    PUSH r12
1857    PUSH r13
1858    PUSH r14
1859    PUSH r15
1860
1861    pushq LITERAL(0)              // Push null for ArtMethod*.
1862    CFI_ADJUST_CFA_OFFSET(8)
1863    movl %esi, %ecx               // rcx := size of stack
1864    movq %rdi, %rsi               // rsi := stack to copy
1865    movq %rsp, %rbp               // Save stack pointer to RBP for CFI use in .Losr_entry.
1866    CFI_REMEMBER_STATE
1867    call .Losr_entry
1868
1869    // Restore stack and callee-saves.
1870    addq LITERAL(8), %rsp
1871    CFI_ADJUST_CFA_OFFSET(-8)
1872    POP r15
1873    POP r14
1874    POP r13
1875    POP r12
1876    POP rbx
1877    POP r8
1878    POP rcx
1879    POP rbp
1880    movq %rax, (%rcx)              // Store the result.
1881    ret
1882.Losr_entry:
1883    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 80
1884    // Since the call has pushed the return address we need to switch the CFA register to RBP.
1885    CFI_DEF_CFA_REGISTER(rbp)
1886
1887    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
1888    subq %rcx, %rsp
1889    movq %rsp, %rdi               // rdi := beginning of stack
1890    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
1891    jmp *%rdx
1892END_FUNCTION art_quick_osr_stub
1893
1894DEFINE_FUNCTION art_quick_invoke_polymorphic
1895                                                   // On entry: RDI := unused, RSI := receiver
1896    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
1897    movq %rsi, %rdi                                // RDI := receiver
1898    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
1899    movq %rsp, %rdx                                // RDX := pass SP
1900    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
1901                                                   // save the code pointer
1902    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1903    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
1904    RETURN_OR_DELIVER_PENDING_EXCEPTION
1905END_FUNCTION art_quick_invoke_polymorphic
1906
1907DEFINE_FUNCTION art_quick_invoke_custom
1908    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
1909                                                   // RDI := call_site_index
1910    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread::Current()
1911    movq %rsp, %rdx                                // RDX := SP
1912    call SYMBOL(artInvokeCustom)                   // artInvokeCustom(Thread*, SP)
1913    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1914    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
1915    RETURN_OR_DELIVER_PENDING_EXCEPTION
1916END_FUNCTION art_quick_invoke_custom
1917
1918// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
1919//  Argument 0: RDI: The context pointer for ExecuteSwitchImpl.
1920//  Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call.
1921//  Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode).
1922DEFINE_FUNCTION ExecuteSwitchImplAsm
1923    PUSH rbx                 // Spill RBX
1924    movq %rdx, %rbx          // RBX = DEX PC (callee save register)
1925    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0)
1926
1927    call *%rsi               // Call the wrapped function
1928
1929    POP rbx                  // Restore RBX
1930    ret
1931END_FUNCTION ExecuteSwitchImplAsm
1932
1933// On entry: edi is the class, r11 is the inline cache. r10 and rax are available.
1934DEFINE_FUNCTION art_quick_update_inline_cache
1935#if (INLINE_CACHE_SIZE != 5)
1936#error "INLINE_CACHE_SIZE not as expected."
1937#endif
1938    // Don't update the cache if we are marking.
1939    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
1940    jnz .Ldone
1941.Lentry1:
1942    movl INLINE_CACHE_CLASSES_OFFSET(%r11), %eax
1943    cmpl %edi, %eax
1944    je .Ldone
1945    cmpl LITERAL(0), %eax
1946    jne .Lentry2
1947    lock cmpxchg %edi, INLINE_CACHE_CLASSES_OFFSET(%r11)
1948    jz .Ldone
1949    jmp .Lentry1
1950.Lentry2:
1951    movl (INLINE_CACHE_CLASSES_OFFSET+4)(%r11), %eax
1952    cmpl %edi, %eax
1953    je .Ldone
1954    cmpl LITERAL(0), %eax
1955    jne .Lentry3
1956    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+4)(%r11)
1957    jz .Ldone
1958    jmp .Lentry2
1959.Lentry3:
1960    movl (INLINE_CACHE_CLASSES_OFFSET+8)(%r11), %eax
1961    cmpl %edi, %eax
1962    je .Ldone
1963    cmpl LITERAL(0), %eax
1964    jne .Lentry4
1965    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+8)(%r11)
1966    jz .Ldone
1967    jmp .Lentry3
1968.Lentry4:
1969    movl (INLINE_CACHE_CLASSES_OFFSET+12)(%r11), %eax
1970    cmpl %edi, %eax
1971    je .Ldone
1972    cmpl LITERAL(0), %eax
1973    jne .Lentry5
1974    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+12)(%r11)
1975    jz .Ldone
1976    jmp .Lentry4
1977.Lentry5:
1978    // Unconditionally store, the cache is megamorphic.
1979    movl %edi, (INLINE_CACHE_CLASSES_OFFSET+16)(%r11)
1980.Ldone:
1981    ret
1982END_FUNCTION art_quick_update_inline_cache
1983
1984// On entry, method is at the bottom of the stack.
1985DEFINE_FUNCTION art_quick_compile_optimized
1986    SETUP_SAVE_EVERYTHING_FRAME
1987    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
1988    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
1989    call SYMBOL(artCompileOptimized)            // (ArtMethod*, Thread*)
1990    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1991    ret
1992END_FUNCTION art_quick_compile_optimized
1993
1994// On entry, method is at the bottom of the stack.
1995DEFINE_FUNCTION art_quick_method_entry_hook
1996    SETUP_SAVE_EVERYTHING_FRAME
1997
1998    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
1999    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
2000    movq %rsp, %rdx                             // SP
2001
2002    call SYMBOL(artMethodEntryHook)              // (ArtMethod*, Thread*, sp)
2003
2004    RESTORE_SAVE_EVERYTHING_FRAME
2005    ret
2006END_FUNCTION art_quick_method_entry_hook
2007
2008// On entry, method is at the bottom of the stack.
2009DEFINE_FUNCTION art_quick_method_exit_hook
2010    SETUP_SAVE_EVERYTHING_FRAME
2011
2012    // R8 passed from JITed code contains frame_size
2013    leaq 16(%rsp), %rcx                         // floating-point result pointer in kSaveEverything
2014                                                // frame
2015    leaq 144(%rsp), %rdx                        // integer result pointer in kSaveEverything frame
2016    leaq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod**
2017    movq %gs:THREAD_SELF_OFFSET, %rdi           // Thread::Current
2018    call SYMBOL(artMethodExitHook)              // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
2019                                                //  frame_size)
2020
2021    // Normal return.
2022    RESTORE_SAVE_EVERYTHING_FRAME
2023    ret
2024END_FUNCTION art_quick_method_exit_hook
2025