1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22MACRO0(ASSERT_USE_READ_BARRIER)
23#if !defined(USE_READ_BARRIER)
24    int3
25    int3
26#endif
27END_MACRO
28
29// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
30
31
32    /*
33     * Macro that sets up the callee save frame to conform with
34     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
35     */
36MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
37#if defined(__APPLE__)
38    int3
39    int3
40#else
41    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
42    // R10 := Runtime::Current()
43    LOAD_RUNTIME_INSTANCE r10
44    // R10 := ArtMethod* for ref and args callee save frame method.
45    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
46    // Store ArtMethod* to bottom of stack.
47    movq %r10, 0(%rsp)
48    // Store rsp as the top quick frame.
49    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
50#endif  // __APPLE__
51END_MACRO
52
53MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
54    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
55    // Store ArtMethod to bottom of stack.
56    movq %rdi, 0(%rsp)
57    // Store rsp as the stop quick frame.
58    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
59END_MACRO
60
61    /*
62     * Macro that sets up the callee save frame to conform with
63     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
64     * when R14 and R15 are already saved.
65     */
66MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
67#if defined(__APPLE__)
68    int3
69    int3
70#else
71    // Save core registers from highest to lowest to agree with core spills bitmap.
72    // R14 and R15, or at least placeholders for them, are already on the stack.
73    PUSH r13
74    PUSH r12
75    PUSH r11
76    PUSH r10
77    PUSH r9
78    PUSH r8
79    PUSH rdi
80    PUSH rsi
81    PUSH rbp
82    PUSH rbx
83    PUSH rdx
84    PUSH rcx
85    PUSH rax
86    // Create space for FPRs and stack alignment padding.
87    subq MACRO_LITERAL(8 + 16 * 8), %rsp
88    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
89    // R10 := Runtime::Current()
90    LOAD_RUNTIME_INSTANCE r10
91    // Save FPRs.
92    movq %xmm0, 8(%rsp)
93    movq %xmm1, 16(%rsp)
94    movq %xmm2, 24(%rsp)
95    movq %xmm3, 32(%rsp)
96    movq %xmm4, 40(%rsp)
97    movq %xmm5, 48(%rsp)
98    movq %xmm6, 56(%rsp)
99    movq %xmm7, 64(%rsp)
100    movq %xmm8, 72(%rsp)
101    movq %xmm9, 80(%rsp)
102    movq %xmm10, 88(%rsp)
103    movq %xmm11, 96(%rsp)
104    movq %xmm12, 104(%rsp)
105    movq %xmm13, 112(%rsp)
106    movq %xmm14, 120(%rsp)
107    movq %xmm15, 128(%rsp)
108    // Push ArtMethod* for save everything frame method.
109    pushq \runtime_method_offset(%r10)
110    CFI_ADJUST_CFA_OFFSET(8)
111    // Store rsp as the top quick frame.
112    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
113
114    // Ugly compile-time check, but we only have the preprocessor.
115    // Last +8: implicit return address pushed on stack when caller made call.
116#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
117#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
118#endif
119#endif  // __APPLE__
120END_MACRO
121
122    /*
123     * Macro that sets up the callee save frame to conform with
124     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
125     * when R15 is already saved.
126     */
127MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
128    PUSH r14
129    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
130END_MACRO
131
132    /*
133     * Macro that sets up the callee save frame to conform with
134     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
135     */
136MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
137    PUSH r15
138    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
139END_MACRO
140
141MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
142    // Restore FPRs. Method and padding is still on the stack.
143    movq 16(%rsp), %xmm0
144    movq 24(%rsp), %xmm1
145    movq 32(%rsp), %xmm2
146    movq 40(%rsp), %xmm3
147    movq 48(%rsp), %xmm4
148    movq 56(%rsp), %xmm5
149    movq 64(%rsp), %xmm6
150    movq 72(%rsp), %xmm7
151    movq 80(%rsp), %xmm8
152    movq 88(%rsp), %xmm9
153    movq 96(%rsp), %xmm10
154    movq 104(%rsp), %xmm11
155    movq 112(%rsp), %xmm12
156    movq 120(%rsp), %xmm13
157    movq 128(%rsp), %xmm14
158    movq 136(%rsp), %xmm15
159END_MACRO
160
161MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
162    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
163    POP rcx
164    POP rdx
165    POP rbx
166    POP rbp
167    POP rsi
168    POP rdi
169    POP r8
170    POP r9
171    POP r10
172    POP r11
173    POP r12
174    POP r13
175    POP r14
176    POP r15
177END_MACRO
178
179MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
180    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
181
182    // Remove save everything callee save method, stack alignment padding and FPRs.
183    addq MACRO_LITERAL(16 + 16 * 8), %rsp
184    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
185
186    POP rax
187    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
188END_MACRO
189
190MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
191    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
192
193    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
194    addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
195    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
196
197    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
198END_MACRO
199
200MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
201    DEFINE_FUNCTION VAR(c_name)
202    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
203    // Outgoing argument set up
204    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
205    call CALLVAR(cxx_name)             // cxx_name(Thread*)
206    UNREACHABLE
207    END_FUNCTION VAR(c_name)
208END_MACRO
209
210MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
211    DEFINE_FUNCTION VAR(c_name)
212    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
213    // Outgoing argument set up
214    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
215    call CALLVAR(cxx_name)             // cxx_name(Thread*)
216    UNREACHABLE
217    END_FUNCTION VAR(c_name)
218END_MACRO
219
220MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
221    DEFINE_FUNCTION VAR(c_name)
222    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
223    // Outgoing argument set up
224    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
225    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
226    UNREACHABLE
227    END_FUNCTION VAR(c_name)
228END_MACRO
229
230MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
231    DEFINE_FUNCTION VAR(c_name)
232    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
233    // Outgoing argument set up
234    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
235    call CALLVAR(cxx_name)             // cxx_name(Thread*)
236    UNREACHABLE
237    END_FUNCTION VAR(c_name)
238END_MACRO
239
240    /*
241     * Called by managed code to create and deliver a NullPointerException.
242     */
243NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
244
245    /*
246     * Call installed by a signal handler to create and deliver a NullPointerException.
247     */
248DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
249    // Fault address and return address were saved by the fault handler.
250    // Save all registers as basis for long jump context; R15 will replace fault address later.
251    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
252    // Retrieve fault address and save R15.
253    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
254    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
255    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
256    // Outgoing argument set up; RDI already contains the fault address.
257    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
258    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
259    UNREACHABLE
260END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
261
262    /*
263     * Called by managed code to create and deliver an ArithmeticException.
264     */
265NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
266
267    /*
268     * Called by managed code to create and deliver a StackOverflowError.
269     */
270NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
271
272    /*
273     * Called by managed code, saves callee saves and then calls artThrowException
274     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
275     */
276ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
277
278    /*
279     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
280     * index, arg2 holds limit.
281     */
282TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
283
284    /*
285     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
286     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
287     */
288TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
289
290    /*
291     * All generated callsites for interface invokes and invocation slow paths will load arguments
292     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
293     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
294     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
295     *
296     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
297     * of the target Method* in rax and method->code_ in rdx.
298     *
299     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
300     * thread and we branch to another stub to deliver it.
301     *
302     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
303     * location on the stack.
304     *
305     * Adapted from x86 code.
306     */
307MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
308    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
309    // Helper signature is always
310    // (method_idx, *this_object, *caller_method, *self, sp)
311
312    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
313    movq %rsp, %rcx                                        // pass SP
314
315    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
316                                                           // save the code pointer
317    movq %rax, %rdi
318    movq %rdx, %rax
319    RESTORE_SAVE_REFS_AND_ARGS_FRAME
320
321    testq %rdi, %rdi
322    jz 1f
323
324    // Tail call to intended method.
325    jmp *%rax
3261:
327    DELIVER_PENDING_EXCEPTION
328END_MACRO
329MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
330    DEFINE_FUNCTION VAR(c_name)
331    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
332    END_FUNCTION VAR(c_name)
333END_MACRO
334
335INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
336
337INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
338INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
339INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
340INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
341
342
343    /*
344     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
345     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
346     * the end of the shorty.
347     */
348MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3491: // LOOP
350    movb (%r10), %al              // al := *shorty
351    addq MACRO_LITERAL(1), %r10   // shorty++
352    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
353    je VAR(finished)
354    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
355    je 2f
356    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
357    je 3f
358    addq MACRO_LITERAL(4), %r11   // arg_array++
359    //  Handle extra space in arg array taken by a long.
360    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
361    jne 1b
362    addq MACRO_LITERAL(4), %r11   // arg_array++
363    jmp 1b                        // goto LOOP
3642:  // FOUND_DOUBLE
365    movsd (%r11), REG_VAR(xmm_reg)
366    addq MACRO_LITERAL(8), %r11   // arg_array+=2
367    jmp 4f
3683:  // FOUND_FLOAT
369    movss (%r11), REG_VAR(xmm_reg)
370    addq MACRO_LITERAL(4), %r11   // arg_array++
3714:
372END_MACRO
373
374    /*
375     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
376     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
377     * the end of the shorty.
378     */
379MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
3801: // LOOP
381    movb (%r10), %al              // al := *shorty
382    addq MACRO_LITERAL(1), %r10   // shorty++
383    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
384    je  VAR(finished)
385    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
386    je 2f
387    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
388    je 3f
389    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
390    je 4f
391    movl (%r11), REG_VAR(gpr_reg32)
392    addq MACRO_LITERAL(4), %r11   // arg_array++
393    jmp 5f
3942:  // FOUND_LONG
395    movq (%r11), REG_VAR(gpr_reg64)
396    addq MACRO_LITERAL(8), %r11   // arg_array+=2
397    jmp 5f
3983:  // SKIP_FLOAT
399    addq MACRO_LITERAL(4), %r11   // arg_array++
400    jmp 1b
4014:  // SKIP_DOUBLE
402    addq MACRO_LITERAL(8), %r11   // arg_array+=2
403    jmp 1b
4045:
405END_MACRO
406
407    /*
408     * Quick invocation stub.
409     * On entry:
410     *   [sp] = return address
411     *   rdi = method pointer
412     *   rsi = argument array that must at least contain the this pointer.
413     *   rdx = size of argument array in bytes
414     *   rcx = (managed) thread pointer
415     *   r8 = JValue* result
416     *   r9 = char* shorty
417     */
418DEFINE_FUNCTION art_quick_invoke_stub
419#if defined(__APPLE__)
420    int3
421    int3
422#else
423    // Set up argument XMM registers.
424    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
425    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
426    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
427    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
428    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
429    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
430    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
431    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
432    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
433    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
434    .balign 16
435.Lxmm_setup_finished:
436    PUSH rbp                      // Save rbp.
437    PUSH r8                       // Save r8/result*.
438    PUSH r9                       // Save r9/shorty*.
439    PUSH rbx                      // Save native callee save rbx
440    PUSH r12                      // Save native callee save r12
441    PUSH r13                      // Save native callee save r13
442    PUSH r14                      // Save native callee save r14
443    PUSH r15                      // Save native callee save r15
444    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
445    CFI_DEF_CFA_REGISTER(rbp)
446
447    movl %edx, %r10d
448    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
449                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
450    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
451    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
452                                   // r13, r14, and r15
453    subq %rdx, %rsp                // Reserve stack space for argument array.
454
455#if (STACK_REFERENCE_SIZE != 4)
456#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
457#endif
458    movq LITERAL(0), (%rsp)       // Store null for method*
459
460    movl %r10d, %ecx              // Place size of args in rcx.
461    movq %rdi, %rax               // rax := method to be called
462    movq %rsi, %r11               // r11 := arg_array
463    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
464                                  // arguments.
465    // Copy arg array into stack.
466    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
467    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
468    movq %rax, %rdi               // rdi := method to be called
469    movl (%r11), %esi             // rsi := this pointer
470    addq LITERAL(4), %r11         // arg_array++
471    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
472    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
473    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
474    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
475.Lgpr_setup_finished:
476    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
477    movq %rbp, %rsp               // Restore stack pointer.
478    POP r15                       // Pop r15
479    POP r14                       // Pop r14
480    POP r13                       // Pop r13
481    POP r12                       // Pop r12
482    POP rbx                       // Pop rbx
483    POP r9                        // Pop r9 - shorty*
484    POP r8                        // Pop r8 - result*.
485    POP rbp                       // Pop rbp
486    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
487    je .Lreturn_double_quick
488    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
489    je .Lreturn_float_quick
490    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
491    ret
492.Lreturn_double_quick:
493    movsd %xmm0, (%r8)            // Store the double floating point result.
494    ret
495.Lreturn_float_quick:
496    movss %xmm0, (%r8)            // Store the floating point result.
497    ret
498#endif  // __APPLE__
499END_FUNCTION art_quick_invoke_stub
500
501    /*
502     * Quick invocation stub.
503     * On entry:
504     *   [sp] = return address
505     *   rdi = method pointer
506     *   rsi = argument array or null if no arguments.
507     *   rdx = size of argument array in bytes
508     *   rcx = (managed) thread pointer
509     *   r8 = JValue* result
510     *   r9 = char* shorty
511     */
512DEFINE_FUNCTION art_quick_invoke_static_stub
513#if defined(__APPLE__)
514    int3
515    int3
516#else
517    // Set up argument XMM registers.
518    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
519    movq %rsi, %r11               // R11 := arg_array
520    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
521    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
522    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
523    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
524    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
525    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
526    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
527    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
528    .balign 16
529.Lxmm_setup_finished2:
530    PUSH rbp                      // Save rbp.
531    PUSH r8                       // Save r8/result*.
532    PUSH r9                       // Save r9/shorty*.
533    PUSH rbx                      // Save rbx
534    PUSH r12                      // Save r12
535    PUSH r13                      // Save r13
536    PUSH r14                      // Save r14
537    PUSH r15                      // Save r15
538    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
539    CFI_DEF_CFA_REGISTER(rbp)
540
541    movl %edx, %r10d
542    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
543                                   // r8, r9, r12, r13, r14, and r15 in frame.
544    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
545    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
546                                   // r13, r14, and r15.
547    subq %rdx, %rsp                // Reserve stack space for argument array.
548
549#if (STACK_REFERENCE_SIZE != 4)
550#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
551#endif
552    movq LITERAL(0), (%rsp)        // Store null for method*
553
554    movl %r10d, %ecx               // Place size of args in rcx.
555    movq %rdi, %rax                // rax := method to be called
556    movq %rsi, %r11                // r11 := arg_array
557    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
558                                   // stack arguments.
559    // Copy arg array into stack.
560    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
561    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
562    movq %rax, %rdi                // rdi := method to be called
563    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
564    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
565    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
566    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
567    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
568.Lgpr_setup_finished2:
569    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
570    movq %rbp, %rsp                // Restore stack pointer.
571    POP r15                        // Pop r15
572    POP r14                        // Pop r14
573    POP r13                        // Pop r13
574    POP r12                        // Pop r12
575    POP rbx                        // Pop rbx
576    POP r9                         // Pop r9 - shorty*.
577    POP r8                         // Pop r8 - result*.
578    POP rbp                        // Pop rbp
579    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
580    je .Lreturn_double_quick2
581    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
582    je .Lreturn_float_quick2
583    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
584    ret
585.Lreturn_double_quick2:
586    movsd %xmm0, (%r8)             // Store the double floating point result.
587    ret
588.Lreturn_float_quick2:
589    movss %xmm0, (%r8)             // Store the floating point result.
590    ret
591#endif  // __APPLE__
592END_FUNCTION art_quick_invoke_static_stub
593
594    /*
595     * Long jump stub.
596     * On entry:
597     *   rdi = gprs
598     *   rsi = fprs
599     */
600DEFINE_FUNCTION art_quick_do_long_jump
601#if defined(__APPLE__)
602    int3
603    int3
604#else
605    // Restore FPRs.
606    movq 0(%rsi), %xmm0
607    movq 8(%rsi), %xmm1
608    movq 16(%rsi), %xmm2
609    movq 24(%rsi), %xmm3
610    movq 32(%rsi), %xmm4
611    movq 40(%rsi), %xmm5
612    movq 48(%rsi), %xmm6
613    movq 56(%rsi), %xmm7
614    movq 64(%rsi), %xmm8
615    movq 72(%rsi), %xmm9
616    movq 80(%rsi), %xmm10
617    movq 88(%rsi), %xmm11
618    movq 96(%rsi), %xmm12
619    movq 104(%rsi), %xmm13
620    movq 112(%rsi), %xmm14
621    movq 120(%rsi), %xmm15
622    // Restore FPRs.
623    movq %rdi, %rsp   // RSP points to gprs.
624    // Load all registers except RSP and RIP with values in gprs.
625    popq %r15
626    popq %r14
627    popq %r13
628    popq %r12
629    popq %r11
630    popq %r10
631    popq %r9
632    popq %r8
633    popq %rdi
634    popq %rsi
635    popq %rbp
636    addq LITERAL(8), %rsp   // Skip rsp
637    popq %rbx
638    popq %rdx
639    popq %rcx
640    popq %rax
641    popq %rsp      // Load stack pointer.
642    ret            // From higher in the stack pop rip.
643#endif  // __APPLE__
644END_FUNCTION art_quick_do_long_jump
645
646MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
647    DEFINE_FUNCTION VAR(c_name)
648    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
649    // Outgoing argument set up
650    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
651    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
652    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
653    CALL_MACRO(return_macro)             // return or deliver exception
654    END_FUNCTION VAR(c_name)
655END_MACRO
656
657MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
658    DEFINE_FUNCTION VAR(c_name)
659    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
660    // Outgoing argument set up
661    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
662    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
663    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
664    CALL_MACRO(return_macro)             // return or deliver exception
665    END_FUNCTION VAR(c_name)
666END_MACRO
667
668MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
669    DEFINE_FUNCTION VAR(c_name)
670    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
671    // Outgoing argument set up
672    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
673    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
674    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
675    CALL_MACRO(return_macro)            // return or deliver exception
676    END_FUNCTION VAR(c_name)
677END_MACRO
678
679MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
680    DEFINE_FUNCTION VAR(c_name)
681    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
682    // Outgoing argument set up
683    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
684    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
685    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
686    CALL_MACRO(return_macro)            // return or deliver exception
687    END_FUNCTION VAR(c_name)
688END_MACRO
689
690MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
691    DEFINE_FUNCTION VAR(c_name)
692    SETUP_SAVE_REFS_ONLY_FRAME
693                                        // arg0 is in rdi
694    movq %gs:THREAD_SELF_OFFSET, %rsi   // pass Thread::Current()
695    call CALLVAR(cxx_name)              // cxx_name(arg0, Thread*)
696    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
697    CALL_MACRO(return_macro)
698    END_FUNCTION VAR(c_name)
699END_MACRO
700
701MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
702    DEFINE_FUNCTION VAR(c_name)
703    SETUP_SAVE_REFS_ONLY_FRAME
704                                        // arg0 and arg1 are in rdi/rsi
705    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
706    call CALLVAR(cxx_name)              // (arg0, arg1, Thread*)
707    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
708    CALL_MACRO(return_macro)
709    END_FUNCTION VAR(c_name)
710END_MACRO
711
712MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
713    DEFINE_FUNCTION VAR(c_name)
714    SETUP_SAVE_REFS_ONLY_FRAME
715                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
716    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
717    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
718    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
719    CALL_MACRO(return_macro)            // return or deliver exception
720    END_FUNCTION VAR(c_name)
721END_MACRO
722
723    /*
724     * Macro for resolution and initialization of indexed DEX file
725     * constants such as classes and strings.
726     */
727MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
728    DEFINE_FUNCTION VAR(c_name)
729    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
730    // Outgoing argument set up
731    movl %eax, %edi                               // pass the index of the constant as arg0
732    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
733    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
734    testl %eax, %eax                              // If result is null, deliver the OOME.
735    jz 1f
736    CFI_REMEMBER_STATE
737    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX        // restore frame up to return address
738    ret
739    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)
7401:
741    DELIVER_PENDING_EXCEPTION_FRAME_READY
742    END_FUNCTION VAR(c_name)
743END_MACRO
744
745MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
746    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
747END_MACRO
748
749MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
750    testq %rax, %rax               // rax == 0 ?
751    jz  1f                         // if rax == 0 goto 1
752    ret                            // return
7531:                                 // deliver exception on current thread
754    DELIVER_PENDING_EXCEPTION
755END_MACRO
756
757MACRO0(RETURN_IF_EAX_ZERO)
758    testl %eax, %eax               // eax == 0 ?
759    jnz  1f                        // if eax != 0 goto 1
760    ret                            // return
7611:                                 // deliver exception on current thread
762    DELIVER_PENDING_EXCEPTION
763END_MACRO
764
765// Generate the allocation entrypoints for each allocator.
766GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
767
768// Comment out allocators that have x86_64 specific asm.
769// Region TLAB:
770// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
771// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
772GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
773GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
774// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
775// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
776// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
777// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
778// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
779GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
780GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
781GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
782// Normal TLAB:
783// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
784// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
785GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
786GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
787// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
788// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
789// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
790// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
791// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
792GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
793GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
794GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
795
796
797// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
798MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
799    DEFINE_FUNCTION VAR(c_name)
800    // Fast path rosalloc allocation.
801    // RDI: mirror::Class*, RAX: return value
802    // RSI, RDX, RCX, R8, R9: free.
803                                                           // Check if the thread local
804                                                           // allocation stack has room.
805    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
806    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
807    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
808    jae    .Lslow_path\c_name
809                                                           // Load the object size
810    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
811                                                           // Check if the size is for a thread
812                                                           // local allocation. Also does the
813                                                           // initialized and finalizable checks.
814    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
815    ja     .Lslow_path\c_name
816                                                           // Compute the rosalloc bracket index
817                                                           // from the size.
818    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
819                                                           // Load the rosalloc run (r9)
820                                                           // Subtract __SIZEOF_POINTER__ to
821                                                           // subtract one from edi as there is no
822                                                           // 0 byte run and the size is already
823                                                           // aligned.
824    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
825                                                           // Load the free list head (rax). This
826                                                           // will be the return val.
827    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
828    testq  %rax, %rax
829    jz     .Lslow_path\c_name
830    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
831                                                           // Push the new object onto the thread
832                                                           // local allocation stack and
833                                                           // increment the thread local
834                                                           // allocation stack top.
835    movl   %eax, (%rcx)
836    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
837    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
838                                                           // Load the next pointer of the head
839                                                           // and update the list head with the
840                                                           // next pointer.
841    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
842    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
843                                                           // Store the class pointer in the
844                                                           // header. This also overwrites the
845                                                           // next pointer. The offsets are
846                                                           // asserted to match.
847#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
848#error "Class pointer needs to overwrite next pointer."
849#endif
850    POISON_HEAP_REF edi
851    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
852                                                           // Decrement the size of the free list
853    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
854                                                           // No fence necessary for x86.
855    ret
856.Lslow_path\c_name:
857    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
858    // Outgoing argument set up
859    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
860    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
861    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
862    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
863    END_FUNCTION VAR(c_name)
864END_MACRO
865
866ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
867ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
868
869// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
870// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
871// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
872//
873// RDI: the class, RAX: return value.
874// RCX, RSI, RDX: scratch, r8: Thread::Current().
875MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
876    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
877END_MACRO
878
879// The fast path code for art_quick_alloc_object_initialized_region_tlab.
880//
881// RDI: the class, RSI: ArtMethod*, RAX: return value.
882// RCX, RSI, RDX: scratch, r8: Thread::Current().
883MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
884    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
885    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
886    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
887    addq %rax, %rcx                                            // Add size to pos, note that these
888                                                               // are both 32 bit ints, overflow
889                                                               // will cause the add to be past the
890                                                               // end of the thread local region.
891    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
892    ja   RAW_VAR(slowPathLabel)
893    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
894    incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
895                                                               // Store the class pointer in the
896                                                               // header.
897                                                               // No fence needed for x86.
898    POISON_HEAP_REF edi
899    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
900    ret                                                        // Fast path succeeded.
901END_MACRO
902
903// The fast path code for art_quick_alloc_array_region_tlab.
904// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
905// Free temps: RCX, RDX, R8
906// Output: RAX: return value.
907MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
908    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
909    // Mask out the unaligned part to make sure we are 8 byte aligned.
910    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
911    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
912    addq %rax, %r9
913    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
914    ja   RAW_VAR(slowPathLabel)
915    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
916    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
917                                                               // Store the class pointer in the
918                                                               // header.
919                                                               // No fence needed for x86.
920    POISON_HEAP_REF edi
921    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
922    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
923    ret                                                        // Fast path succeeded.
924END_MACRO
925
926// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
927// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
928MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
929    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
930    // Outgoing argument set up
931    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
932    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
933    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
934    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
935END_MACRO
936
937// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
938// called with CC if the GC is not active.
939DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
940    // RDI: mirror::Class* klass
941    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
942    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
943.Lart_quick_alloc_object_resolved_tlab_slow_path:
944    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
945END_FUNCTION art_quick_alloc_object_resolved_tlab
946
947// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
948// May be called with CC if the GC is not active.
949DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
950    // RDI: mirror::Class* klass
951    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
952    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
953.Lart_quick_alloc_object_initialized_tlab_slow_path:
954    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
955END_FUNCTION art_quick_alloc_object_initialized_tlab
956
957MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
958    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
959    UNPOISON_HEAP_REF ecx
960    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
961    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
962    movq %rsi, %r9
963    salq %cl, %r9                                              // Calculate array count shifted.
964    // Add array header + alignment rounding.
965    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
966    // Add 4 extra bytes if we are doing a long array.
967    addq MACRO_LITERAL(1), %rcx
968    andq MACRO_LITERAL(4), %rcx
969#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
970#error Long array data offset must be 4 greater than int array data offset.
971#endif
972    addq %rcx, %r9
973END_MACRO
974
975MACRO0(COMPUTE_ARRAY_SIZE_8)
976    // RDI: mirror::Class* klass, RSI: int32_t component_count
977    // RDX, RCX, R8, R9: free. RAX: return val.
978    movq %rsi, %r9
979    // Add array header + alignment rounding.
980    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
981END_MACRO
982
983MACRO0(COMPUTE_ARRAY_SIZE_16)
984    // RDI: mirror::Class* klass, RSI: int32_t component_count
985    // RDX, RCX, R8, R9: free. RAX: return val.
986    movq %rsi, %r9
987    salq MACRO_LITERAL(1), %r9
988    // Add array header + alignment rounding.
989    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
990END_MACRO
991
992MACRO0(COMPUTE_ARRAY_SIZE_32)
993    // RDI: mirror::Class* klass, RSI: int32_t component_count
994    // RDX, RCX, R8, R9: free. RAX: return val.
995    movq %rsi, %r9
996    salq MACRO_LITERAL(2), %r9
997    // Add array header + alignment rounding.
998    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
999END_MACRO
1000
1001MACRO0(COMPUTE_ARRAY_SIZE_64)
1002    // RDI: mirror::Class* klass, RSI: int32_t component_count
1003    // RDX, RCX, R8, R9: free. RAX: return val.
1004    movq %rsi, %r9
1005    salq MACRO_LITERAL(3), %r9
1006    // Add array header + alignment rounding.
1007    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1008END_MACRO
1009
1010MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1011    DEFINE_FUNCTION VAR(c_entrypoint)
1012    // RDI: mirror::Class* klass, RSI: int32_t component_count
1013    // RDX, RCX, R8, R9: free. RAX: return val.
1014    CALL_MACRO(size_setup)
1015    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1016.Lslow_path\c_entrypoint:
1017    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1018    // Outgoing argument set up
1019    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1020    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1021    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1022    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
1023    END_FUNCTION VAR(c_entrypoint)
1024END_MACRO
1025
1026
1027GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1028GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1029GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1030GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1031GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1032
1033GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1034GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1035GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1036GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1037GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1038
1039// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1040DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1041    // Fast path region tlab allocation.
1042    // RDI: mirror::Class* klass
1043    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1044    ASSERT_USE_READ_BARRIER
1045    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1046.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1047    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1048END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1049
1050// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1051DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1052    // Fast path region tlab allocation.
1053    // RDI: mirror::Class* klass
1054    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1055    ASSERT_USE_READ_BARRIER
1056    // No read barrier since the caller is responsible for that.
1057    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1058.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1059    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1060END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1061
1062ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1063ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1064ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1065ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1066ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1067ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1068
1069TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
1070
1071DEFINE_FUNCTION art_quick_lock_object
1072    testl %edi, %edi                      // Null check object/rdi.
1073    jz   .Lslow_lock
1074.Lretry_lock:
1075    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
1076    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx  // Test the 2 high bits.
1077    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
1078    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
1079    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1080    test %ecx, %ecx
1081    jnz  .Lalready_thin                   // Lock word contains a thin lock.
1082    // unlocked case - edx: original lock word, edi: obj.
1083    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
1084    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1085    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
1086    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1087    jnz  .Lretry_lock                     // cmpxchg failed retry
1088    ret
1089.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
1090    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
1091    cmpw %cx, %dx                         // do we hold the lock already?
1092    jne  .Lslow_lock
1093    movl %edx, %ecx                       // copy the lock word to check count overflow.
1094    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1095    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
1096    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
1097    jne  .Lslow_lock                      // count overflowed so go slow
1098    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
1099    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
1100    // update lockword, cmpxchg necessary for read barrier bits.
1101    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
1102    jnz  .Lretry_lock                     // cmpxchg failed retry
1103    ret
1104.Lslow_lock:
1105    SETUP_SAVE_REFS_ONLY_FRAME
1106    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1107    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1108    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1109    RETURN_IF_EAX_ZERO
1110END_FUNCTION art_quick_lock_object
1111
1112DEFINE_FUNCTION art_quick_lock_object_no_inline
1113    SETUP_SAVE_REFS_ONLY_FRAME
1114    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1115    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1116    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1117    RETURN_IF_EAX_ZERO
1118END_FUNCTION art_quick_lock_object_no_inline
1119
1120DEFINE_FUNCTION art_quick_unlock_object
1121    testl %edi, %edi                      // null check object/edi
1122    jz   .Lslow_unlock
1123.Lretry_unlock:
1124    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1125    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1126    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx
1127    jnz  .Lslow_unlock                    // lock word contains a monitor
1128    cmpw %cx, %dx                         // does the thread id match?
1129    jne  .Lslow_unlock
1130    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1131    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
1132    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1133    jae  .Lrecursive_thin_unlock
1134    // update lockword, cmpxchg necessary for read barrier bits.
1135    movl %ecx, %eax                       // eax: old lock word.
1136    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
1137#ifndef USE_READ_BARRIER
1138    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1139#else
1140    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1141    jnz  .Lretry_unlock                   // cmpxchg failed retry
1142#endif
1143    ret
1144.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1145    // update lockword, cmpxchg necessary for read barrier bits.
1146    movl %ecx, %eax                       // eax: old lock word.
1147    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1148#ifndef USE_READ_BARRIER
1149    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1150#else
1151    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1152    jnz  .Lretry_unlock                   // cmpxchg failed retry
1153#endif
1154    ret
1155.Lslow_unlock:
1156    SETUP_SAVE_REFS_ONLY_FRAME
1157    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1158    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1159    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1160    RETURN_IF_EAX_ZERO
1161END_FUNCTION art_quick_unlock_object
1162
1163DEFINE_FUNCTION art_quick_unlock_object_no_inline
1164    SETUP_SAVE_REFS_ONLY_FRAME
1165    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1166    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1167    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1168    RETURN_IF_EAX_ZERO
1169END_FUNCTION art_quick_unlock_object_no_inline
1170
1171DEFINE_FUNCTION art_quick_check_instance_of
1172    // Type check using the bit string passes null as the target class. In that case just throw.
1173    testl %esi, %esi
1174    jz .Lthrow_class_cast_exception_for_bitstring_check
1175
1176    // We could check the super classes here but that is usually already checked in the caller.
1177    PUSH rdi                          // Save args for exc
1178    PUSH rsi
1179    subq LITERAL(8), %rsp             // Alignment padding.
1180    CFI_ADJUST_CFA_OFFSET(8)
1181    SETUP_FP_CALLEE_SAVE_FRAME
1182    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1183    testq %rax, %rax
1184    jz .Lthrow_class_cast_exception   // jump forward if not assignable
1185    CFI_REMEMBER_STATE
1186    RESTORE_FP_CALLEE_SAVE_FRAME
1187    addq LITERAL(24), %rsp            // pop arguments
1188    CFI_ADJUST_CFA_OFFSET(-24)
1189    ret
1190    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 64)  // Reset unwind info so following code unwinds.
1191
1192.Lthrow_class_cast_exception:
1193    RESTORE_FP_CALLEE_SAVE_FRAME
1194    addq LITERAL(8), %rsp             // pop padding
1195    CFI_ADJUST_CFA_OFFSET(-8)
1196    POP rsi                           // Pop arguments
1197    POP rdi
1198
1199.Lthrow_class_cast_exception_for_bitstring_check:
1200    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1201    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1202    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1203    UNREACHABLE
1204END_FUNCTION art_quick_check_instance_of
1205
1206
1207// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1208MACRO2(POP_REG_NE, reg, exclude_reg)
1209    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1210      addq MACRO_LITERAL(8), %rsp
1211      CFI_ADJUST_CFA_OFFSET(-8)
1212    .else
1213      POP RAW_VAR(reg)
1214    .endif
1215END_MACRO
1216
1217    /*
1218     * Macro to insert read barrier, used in art_quick_aput_obj.
1219     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
1220     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
1221     * 64b PUSH/POP and 32b argument.
1222     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1223     *
1224     * As with art_quick_aput_obj function, the 64b versions are in comments.
1225     */
1226MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
1227#ifdef USE_READ_BARRIER
1228    PUSH rax                            // save registers that might be used
1229    PUSH rdi
1230    PUSH rsi
1231    PUSH rdx
1232    PUSH rcx
1233    SETUP_FP_CALLEE_SAVE_FRAME
1234    // Outgoing argument set up
1235    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
1236    // // movq REG_VAR(ref_reg64), %rdi
1237    movl REG_VAR(obj_reg), %esi         // pass obj_reg
1238    // movq REG_VAR(obj_reg), %rsi
1239    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
1240    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
1241    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
1242    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
1243    .ifnc RAW_VAR(dest_reg32), eax
1244    // .ifnc RAW_VAR(dest_reg64), rax
1245      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
1246      // movq %rax, REG_VAR(dest_reg64)
1247    .endif
1248    RESTORE_FP_CALLEE_SAVE_FRAME
1249    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
1250    POP_REG_NE rdx, RAW_VAR(dest_reg64)
1251    POP_REG_NE rsi, RAW_VAR(dest_reg64)
1252    POP_REG_NE rdi, RAW_VAR(dest_reg64)
1253    POP_REG_NE rax, RAW_VAR(dest_reg64)
1254#else
1255    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
1256    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
1257    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
1258#endif  // USE_READ_BARRIER
1259END_MACRO
1260
1261DEFINE_FUNCTION art_quick_aput_obj
1262    testl %edx, %edx                // store of null
1263//  test %rdx, %rdx
1264    jz .Ldo_aput_null
1265    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1266    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1267    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1268    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1269#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1270    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
1271    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
1272    cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
1273#else
1274    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1275//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1276#endif
1277    jne .Lcheck_assignability
1278.Ldo_aput:
1279    POISON_HEAP_REF edx
1280    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1281//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1282    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1283    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1284//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1285    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1286    ret
1287.Ldo_aput_null:
1288    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1289//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1290    ret
1291.Lcheck_assignability:
1292    // Save arguments.
1293    PUSH rdi
1294    PUSH rsi
1295    PUSH rdx
1296    SETUP_FP_CALLEE_SAVE_FRAME
1297
1298#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1299    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
1300    movl %eax, %esi               // Pass arg2 = value's class.
1301    // movq %rax, %rsi
1302#else
1303                                     // "Uncompress" = do nothing, as already zero-extended on load.
1304    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
1305#endif
1306    movq %rcx, %rdi               // Pass arg1 = array's component type.
1307
1308    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1309
1310    // Exception?
1311    testq %rax, %rax
1312    jz   .Lthrow_array_store_exception
1313
1314    RESTORE_FP_CALLEE_SAVE_FRAME
1315    // Restore arguments.
1316    POP  rdx
1317    POP  rsi
1318    POP  rdi
1319
1320    POISON_HEAP_REF edx
1321    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1322//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1323    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1324    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1325//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1326    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1327//  movb %dl, (%rdx, %rdi)
1328    ret
1329    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
1330.Lthrow_array_store_exception:
1331    RESTORE_FP_CALLEE_SAVE_FRAME
1332    // Restore arguments.
1333    POP  rdx
1334    POP  rsi
1335    POP  rdi
1336
1337    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1338
1339    // Outgoing argument set up.
1340    movq %rdx, %rsi                         // Pass arg 2 = value.
1341    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1342                                            // Pass arg 1 = array.
1343    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1344    UNREACHABLE
1345END_FUNCTION art_quick_aput_obj
1346
1347// TODO: This is quite silly on X86_64 now.
1348DEFINE_FUNCTION art_quick_memcpy
1349    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1350    ret
1351END_FUNCTION art_quick_memcpy
1352
1353DEFINE_FUNCTION art_quick_test_suspend
1354    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
1355    // Outgoing argument set up
1356    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1357    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1358    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1359    ret
1360END_FUNCTION art_quick_test_suspend
1361
1362UNIMPLEMENTED art_quick_ldiv
1363UNIMPLEMENTED art_quick_lmod
1364UNIMPLEMENTED art_quick_lmul
1365UNIMPLEMENTED art_quick_lshl
1366UNIMPLEMENTED art_quick_lshr
1367UNIMPLEMENTED art_quick_lushr
1368
1369// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1370// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1371
1372THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1373THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1374THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1375THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1376THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1377
1378TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1379TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1380TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1381TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1382TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1383TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1384TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1385
1386TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1387TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1388TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1389TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1390TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO
1391
1392ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1393ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1394ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1395ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1396ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1397ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1398ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1399
1400DEFINE_FUNCTION art_quick_proxy_invoke_handler
1401    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1402
1403    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1404    movq %rsp, %rcx                         // Pass SP.
1405    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1406    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1407    movq %rax, %xmm0                        // Copy return value in case of float returns.
1408    RETURN_OR_DELIVER_PENDING_EXCEPTION
1409END_FUNCTION art_quick_proxy_invoke_handler
1410
1411    /*
1412     * Called to resolve an imt conflict.
1413     * rdi is the conflict ArtMethod.
1414     * rax is a hidden argument that holds the target interface method.
1415     *
1416     * Note that this stub writes to rdi.
1417     */
1418DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1419#if defined(__APPLE__)
1420    int3
1421    int3
1422#else
1423    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1424.Limt_table_iterate:
1425    cmpq %rax, 0(%rdi)
1426    jne .Limt_table_next_entry
1427    // We successfully hit an entry in the table. Load the target method
1428    // and jump to it.
1429    movq __SIZEOF_POINTER__(%rdi), %rdi
1430    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1431.Limt_table_next_entry:
1432    // If the entry is null, the interface method is not in the ImtConflictTable.
1433    cmpq LITERAL(0), 0(%rdi)
1434    jz .Lconflict_trampoline
1435    // Iterate over the entries of the ImtConflictTable.
1436    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1437    jmp .Limt_table_iterate
1438.Lconflict_trampoline:
1439    // Call the runtime stub to populate the ImtConflictTable and jump to the
1440    // resolved method.
1441    movq %rax, %rdi  // Load interface method
1442    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1443#endif  // __APPLE__
1444END_FUNCTION art_quick_imt_conflict_trampoline
1445
1446DEFINE_FUNCTION art_quick_resolution_trampoline
1447    SETUP_SAVE_REFS_AND_ARGS_FRAME
1448    movq %gs:THREAD_SELF_OFFSET, %rdx
1449    movq %rsp, %rcx
1450    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1451    movq %rax, %r10               // Remember returned code pointer in R10.
1452    movq (%rsp), %rdi             // Load called method into RDI.
1453    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1454    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1455    jz 1f
1456    jmp *%r10                     // Tail call into method.
14571:
1458    DELIVER_PENDING_EXCEPTION
1459END_FUNCTION art_quick_resolution_trampoline
1460
1461/* Generic JNI frame layout:
1462 *
1463 * #-------------------#
1464 * |                   |
1465 * | caller method...  |
1466 * #-------------------#    <--- SP on entry
1467 *
1468 *          |
1469 *          V
1470 *
1471 * #-------------------#
1472 * | caller method...  |
1473 * #-------------------#
1474 * | Return            |
1475 * | R15               |    callee save
1476 * | R14               |    callee save
1477 * | R13               |    callee save
1478 * | R12               |    callee save
1479 * | R9                |    arg5
1480 * | R8                |    arg4
1481 * | RSI/R6            |    arg1
1482 * | RBP/R5            |    callee save
1483 * | RBX/R3            |    callee save
1484 * | RDX/R2            |    arg2
1485 * | RCX/R1            |    arg3
1486 * | XMM7              |    float arg 8
1487 * | XMM6              |    float arg 7
1488 * | XMM5              |    float arg 6
1489 * | XMM4              |    float arg 5
1490 * | XMM3              |    float arg 4
1491 * | XMM2              |    float arg 3
1492 * | XMM1              |    float arg 2
1493 * | XMM0              |    float arg 1
1494 * | RDI/Method*       |  <- sp
1495 * #-------------------#
1496 * | Scratch Alloca    |    5K scratch space
1497 * #---------#---------#
1498 * |         | sp*     |
1499 * | Tramp.  #---------#
1500 * | args    | thread  |
1501 * | Tramp.  #---------#
1502 * |         | method  |
1503 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1504 *
1505 *           |
1506 *           v              artQuickGenericJniTrampoline
1507 *
1508 * #-------------------#
1509 * | caller method...  |
1510 * #-------------------#
1511 * | Return PC         |
1512 * | Callee-Saves      |
1513 * | padding           | // 8B
1514 * | Method*           |    <--- (1)
1515 * #-------------------#
1516 * | local ref cookie  | // 4B
1517 * | padding           | // 0B or 4B to align handle scope on 8B address
1518 * | handle scope      | // Size depends on number of references; multiple of 4B.
1519 * #-------------------#
1520 * | JNI Stack Args    | // Empty if all args fit into registers.
1521 * #-------------------#    <--- SP on native call (1)
1522 * | Free scratch      |
1523 * #-------------------#
1524 * | SP for JNI call   | // Pointer to (1).
1525 * #-------------------#
1526 * | Hidden arg        | // For @CriticalNative
1527 * #-------------------#
1528 * |                   |
1529 * | Stack for Regs    |    The trampoline assembly will pop these values
1530 * |                   |    into registers for native call
1531 * #-------------------#
1532 */
1533    /*
1534     * Called to do a generic JNI down-call
1535     */
1536DEFINE_FUNCTION art_quick_generic_jni_trampoline
1537    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1538
1539    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1540    CFI_DEF_CFA_REGISTER(rbp)
1541
1542    //
1543    // reserve a lot of space
1544    //
1545    //      4    local state ref
1546    //      4    padding
1547    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1548    //     16    handle scope member fields ?
1549    // +  112    14x 8-byte stack-2-register space
1550    // ------
1551    //   4332
1552    // 16-byte aligned: 4336
1553    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1554    //       Also means: the padding is somewhere in the middle
1555    //
1556    //
1557    // New test: use 5K and release
1558    // 5k = 5120
1559    subq LITERAL(5120), %rsp
1560    // prepare for artQuickGenericJniTrampoline call
1561    // (Thread*, managed_sp, reserved_area)
1562    //    rdi       rsi           rdx   <= C calling convention
1563    //  gs:...      rbp           rsp   <= where they are
1564    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread::Current().
1565    movq %rbp, %rsi                    // Pass managed frame SP.
1566    movq %rsp, %rdx                    // Pass reserved area.
1567    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1568
1569    // The C call will have registered the complete save-frame on success.
1570    // The result of the call is:
1571    //     %rax: pointer to native code, 0 on error.
1572    //     The bottom of the reserved area contains values for arg registers,
1573    //     hidden arg register and SP for out args for the call.
1574
1575    // Check for error (class init check or locking for synchronized native method can throw).
1576    test %rax, %rax
1577    jz .Lexception_in_native
1578
1579    // pop from the register-passing alloca region
1580    // what's the right layout?
1581    popq %rdi
1582    popq %rsi
1583    popq %rdx
1584    popq %rcx
1585    popq %r8
1586    popq %r9
1587    // TODO: skip floating point if unused, some flag.
1588    movq 0(%rsp), %xmm0
1589    movq 8(%rsp), %xmm1
1590    movq 16(%rsp), %xmm2
1591    movq 24(%rsp), %xmm3
1592    movq 32(%rsp), %xmm4
1593    movq 40(%rsp), %xmm5
1594    movq 48(%rsp), %xmm6
1595    movq 56(%rsp), %xmm7
1596
1597    // Save call target in scratch register.
1598    movq %rax, %r11
1599
1600    // Load hidden arg (rax) for @CriticalNative.
1601    movq 64(%rsp), %rax
1602    // Load SP for out args, releasing unneeded reserved area.
1603    movq 72(%rsp), %rsp
1604
1605    // native call
1606    call *%r11
1607
1608    // result sign extension is handled in C code
1609    // prepare for artQuickGenericJniEndTrampoline call
1610    // (Thread*,  result, result_f)
1611    //   rdi      rsi   rdx       <= C calling convention
1612    //  gs:...    rax   xmm0      <= where they are
1613    movq %gs:THREAD_SELF_OFFSET, %rdi
1614    movq %rax, %rsi
1615    movq %xmm0, %rdx
1616    call SYMBOL(artQuickGenericJniEndTrampoline)
1617
1618    // Pending exceptions possible.
1619    // TODO: use cmpq, needs direct encoding because of gas bug
1620    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1621    test %rcx, %rcx
1622    jnz .Lexception_in_native
1623
1624    // Tear down the alloca.
1625    movq %rbp, %rsp
1626    CFI_REMEMBER_STATE
1627    CFI_DEF_CFA_REGISTER(rsp)
1628
1629    // Tear down the callee-save frame.
1630    // Load FPRs.
1631    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1632    movq 24(%rsp), %xmm1            // neither does this!!!
1633    movq 32(%rsp), %xmm2
1634    movq 40(%rsp), %xmm3
1635    movq 48(%rsp), %xmm4
1636    movq 56(%rsp), %xmm5
1637    movq 64(%rsp), %xmm6
1638    movq 72(%rsp), %xmm7
1639    movq 80(%rsp), %xmm12
1640    movq 88(%rsp), %xmm13
1641    movq 96(%rsp), %xmm14
1642    movq 104(%rsp), %xmm15
1643    // was 80 bytes
1644    addq LITERAL(80 + 4*8), %rsp
1645    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1646    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1647    POP rcx  // Arg.
1648    POP rdx  // Arg.
1649    POP rbx  // Callee save.
1650    POP rbp  // Callee save.
1651    POP rsi  // Arg.
1652    POP r8   // Arg.
1653    POP r9   // Arg.
1654    POP r12  // Callee save.
1655    POP r13  // Callee save.
1656    POP r14  // Callee save.
1657    POP r15  // Callee save.
1658    // store into fpr, for when it's a fpr return...
1659    movq %rax, %xmm0
1660    ret
1661
1662    // Undo the unwinding information from above since it doesn't apply below.
1663    CFI_RESTORE_STATE_AND_DEF_CFA(rbp, 208)
1664.Lexception_in_native:
1665    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
1666    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
1667    movq (%rsp), %rsp
1668    call art_deliver_pending_exception
1669END_FUNCTION art_quick_generic_jni_trampoline
1670
1671DEFINE_FUNCTION art_deliver_pending_exception
1672    // This will create a new save-all frame, required by the runtime.
1673    DELIVER_PENDING_EXCEPTION
1674END_FUNCTION art_deliver_pending_exception
1675
1676    /*
1677     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1678     * of a quick call:
1679     * RDI = method being called / to bridge to.
1680     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1681     */
1682DEFINE_FUNCTION art_quick_to_interpreter_bridge
1683    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1684    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1685    movq %rsp, %rdx                    // RDX := sp
1686    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1687    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1688    movq %rax, %xmm0                   // Place return value also into floating point return value.
1689    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1690END_FUNCTION art_quick_to_interpreter_bridge
1691
1692    /*
1693     * Called to catch an attempt to invoke an obsolete method.
1694     * RDI = method being called.
1695     */
1696ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1697
1698    /*
1699     * Routine that intercepts method calls and returns.
1700     */
1701DEFINE_FUNCTION art_quick_instrumentation_entry
1702#if defined(__APPLE__)
1703    int3
1704    int3
1705#else
1706    SETUP_SAVE_REFS_AND_ARGS_FRAME
1707
1708    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1709
1710    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1711    movq %rsp, %rcx                     // Pass SP.
1712
1713    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
1714
1715                                  // %rax = result of call.
1716    testq %rax, %rax
1717    jz 1f
1718
1719    movq %r12, %rdi               // Reload method pointer.
1720    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
1721    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
1722
1723    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1724
1725    jmp *%rax                     // Tail call to intended method.
17261:
1727    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1728    DELIVER_PENDING_EXCEPTION
1729#endif  // __APPLE__
1730END_FUNCTION art_quick_instrumentation_entry
1731
1732DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
1733    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1734    CFI_ADJUST_CFA_OFFSET(8)
1735
1736    SETUP_SAVE_EVERYTHING_FRAME
1737
1738    leaq 16(%rsp), %rcx       // Pass floating-point result pointer, in kSaveEverything frame.
1739    leaq 144(%rsp), %rdx      // Pass integer result pointer, in kSaveEverything frame.
1740    movq %rsp, %rsi           // Pass SP.
1741    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1742
1743    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
1744
1745    testq %rax, %rax          // Check if we have a return-pc to go to. If we don't then there was
1746                              // an exception
1747    jz .Ldo_deliver_instrumentation_exception
1748    testq %rdx, %rdx
1749    jnz .Ldeoptimize
1750    // Normal return.
1751    movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
1752    RESTORE_SAVE_EVERYTHING_FRAME
1753    ret
1754.Ldeoptimize:
1755    movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
1756    RESTORE_SAVE_EVERYTHING_FRAME
1757    // Jump to art_quick_deoptimize.
1758    jmp SYMBOL(art_quick_deoptimize)
1759.Ldo_deliver_instrumentation_exception:
1760    DELIVER_PENDING_EXCEPTION_FRAME_READY
1761END_FUNCTION art_quick_instrumentation_exit
1762
1763    /*
1764     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1765     * will long jump to the upcall with a special exception of -1.
1766     */
1767DEFINE_FUNCTION art_quick_deoptimize
1768    SETUP_SAVE_EVERYTHING_FRAME        // Stack should be aligned now.
1769    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1770    call SYMBOL(artDeoptimize)         // (Thread*)
1771    UNREACHABLE
1772END_FUNCTION art_quick_deoptimize
1773
1774    /*
1775     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1776     * will long jump to the interpreter bridge.
1777     */
1778DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1779    SETUP_SAVE_EVERYTHING_FRAME
1780                                                // Stack should be aligned now.
1781    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
1782    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
1783    UNREACHABLE
1784END_FUNCTION art_quick_deoptimize_from_compiled_code
1785
1786    /*
1787     * String's compareTo.
1788     *
1789     * On entry:
1790     *    rdi:   this string object (known non-null)
1791     *    rsi:   comp string object (known non-null)
1792     */
1793DEFINE_FUNCTION art_quick_string_compareto
1794    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1795    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1796    /* Build pointers to the start of string data */
1797    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1798    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1799#if (STRING_COMPRESSION_FEATURE)
1800    /* Differ cases */
1801    shrl    LITERAL(1), %r8d
1802    jnc     .Lstring_compareto_this_is_compressed
1803    shrl    LITERAL(1), %r9d
1804    jnc     .Lstring_compareto_that_is_compressed
1805    jmp     .Lstring_compareto_both_not_compressed
1806.Lstring_compareto_this_is_compressed:
1807    shrl    LITERAL(1), %r9d
1808    jnc     .Lstring_compareto_both_compressed
1809    /* Comparison this (8-bit) and that (16-bit) */
1810    mov     %r8d, %eax
1811    subl    %r9d, %eax
1812    mov     %r8d, %ecx
1813    cmovg   %r9d, %ecx
1814    /* Going into loop to compare each character */
1815    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
1816.Lstring_compareto_loop_comparison_this_compressed:
1817    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
1818    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
1819    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
1820    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
1821    subl    %r9d, %r8d
1822    loope   .Lstring_compareto_loop_comparison_this_compressed
1823    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1824.Lstring_compareto_keep_length1:
1825    ret
1826.Lstring_compareto_that_is_compressed:
1827    movl    %r8d, %eax
1828    subl    %r9d, %eax
1829    mov     %r8d, %ecx
1830    cmovg   %r9d, %ecx
1831    /* Comparison this (8-bit) and that (16-bit) */
1832    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
1833.Lstring_compareto_loop_comparison_that_compressed:
1834    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
1835    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
1836    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
1837    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
1838    subl    %r9d, %r8d
1839    loope   .Lstring_compareto_loop_comparison_that_compressed
1840    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1841.Lstring_compareto_keep_length2:
1842    ret
1843.Lstring_compareto_both_compressed:
1844    /* Calculate min length and count diff */
1845    movl    %r8d, %ecx
1846    movl    %r8d, %eax
1847    subl    %r9d, %eax
1848    cmovg   %r9d, %ecx
1849    jecxz   .Lstring_compareto_keep_length3
1850    repe    cmpsb
1851    je      .Lstring_compareto_keep_length3
1852    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
1853    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
1854    jmp     .Lstring_compareto_count_difference
1855#endif // STRING_COMPRESSION_FEATURE
1856.Lstring_compareto_both_not_compressed:
1857    /* Calculate min length and count diff */
1858    movl    %r8d, %ecx
1859    movl    %r8d, %eax
1860    subl    %r9d, %eax
1861    cmovg   %r9d, %ecx
1862    /*
1863     * At this point we have:
1864     *   eax: value to return if first part of strings are equal
1865     *   ecx: minimum among the lengths of the two strings
1866     *   esi: pointer to comp string data
1867     *   edi: pointer to this string data
1868     */
1869    jecxz .Lstring_compareto_keep_length3
1870    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1871    je    .Lstring_compareto_keep_length3
1872    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
1873    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
1874.Lstring_compareto_count_difference:
1875    subl  %ecx, %eax              // return the difference
1876.Lstring_compareto_keep_length3:
1877    ret
1878END_FUNCTION art_quick_string_compareto
1879
1880UNIMPLEMENTED art_quick_memcmp16
1881
1882DEFINE_FUNCTION art_quick_instance_of
1883    SETUP_FP_CALLEE_SAVE_FRAME
1884    subq LITERAL(8), %rsp                      // Alignment padding.
1885    CFI_ADJUST_CFA_OFFSET(8)
1886    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
1887    addq LITERAL(8), %rsp
1888    CFI_ADJUST_CFA_OFFSET(-8)
1889    RESTORE_FP_CALLEE_SAVE_FRAME
1890    ret
1891END_FUNCTION art_quick_instance_of
1892
1893DEFINE_FUNCTION art_quick_string_builder_append
1894    SETUP_SAVE_REFS_ONLY_FRAME                // save ref containing registers for GC
1895    // Outgoing argument set up
1896    leaq FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__(%rsp), %rsi  // pass args
1897    movq %gs:THREAD_SELF_OFFSET, %rdx         // pass Thread::Current()
1898    call artStringBuilderAppend               // (uint32_t, const unit32_t*, Thread*)
1899    RESTORE_SAVE_REFS_ONLY_FRAME              // restore frame up to return address
1900    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER   // return or deliver exception
1901END_FUNCTION art_quick_string_builder_append
1902
1903// Create a function `name` calling the ReadBarrier::Mark routine,
1904// getting its argument and returning its result through register
1905// `reg`, saving and restoring all caller-save registers.
1906//
1907// The generated function follows a non-standard runtime calling
1908// convention:
1909// - register `reg` (which may be different from RDI) is used to pass
1910//   the (sole) argument of this function;
1911// - register `reg` (which may be different from RAX) is used to return
1912//   the result of this function (instead of RAX);
1913// - if `reg` is different from `rdi`, RDI is treated like a normal
1914//   (non-argument) caller-save register;
1915// - if `reg` is different from `rax`, RAX is treated like a normal
1916//   (non-result) caller-save register;
1917// - everything else is the same as in the standard runtime calling
1918//   convention (e.g. standard callee-save registers are preserved).
1919MACRO2(READ_BARRIER_MARK_REG, name, reg)
1920    DEFINE_FUNCTION VAR(name)
1921    // Null check so that we can load the lock word.
1922    testq REG_VAR(reg), REG_VAR(reg)
1923    jz .Lret_rb_\name
1924.Lnot_null_\name:
1925    // Check the mark bit, if it is 1 return.
1926    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
1927    jz .Lslow_rb_\name
1928    ret
1929.Lslow_rb_\name:
1930    PUSH rax
1931    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
1932    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
1933    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
1934    // forwarding address one.
1935    // Taken ~25% of the time.
1936    jnae .Lret_forwarding_address\name
1937
1938    // Save all potentially live caller-save core registers.
1939    movq 0(%rsp), %rax
1940    PUSH rcx
1941    PUSH rdx
1942    PUSH rsi
1943    PUSH rdi
1944    PUSH r8
1945    PUSH r9
1946    PUSH r10
1947    PUSH r11
1948    // Create space for caller-save floating-point registers.
1949    subq MACRO_LITERAL(12 * 8), %rsp
1950    CFI_ADJUST_CFA_OFFSET(12 * 8)
1951    // Save all potentially live caller-save floating-point registers.
1952    movq %xmm0, 0(%rsp)
1953    movq %xmm1, 8(%rsp)
1954    movq %xmm2, 16(%rsp)
1955    movq %xmm3, 24(%rsp)
1956    movq %xmm4, 32(%rsp)
1957    movq %xmm5, 40(%rsp)
1958    movq %xmm6, 48(%rsp)
1959    movq %xmm7, 56(%rsp)
1960    movq %xmm8, 64(%rsp)
1961    movq %xmm9, 72(%rsp)
1962    movq %xmm10, 80(%rsp)
1963    movq %xmm11, 88(%rsp)
1964    SETUP_FP_CALLEE_SAVE_FRAME
1965
1966    .ifnc RAW_VAR(reg), rdi
1967      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
1968    .endif
1969    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
1970    .ifnc RAW_VAR(reg), rax
1971      movq %rax, REG_VAR(reg)       // Return result into `reg`.
1972    .endif
1973
1974    RESTORE_FP_CALLEE_SAVE_FRAME
1975    // Restore floating-point registers.
1976    movq 0(%rsp), %xmm0
1977    movq 8(%rsp), %xmm1
1978    movq 16(%rsp), %xmm2
1979    movq 24(%rsp), %xmm3
1980    movq 32(%rsp), %xmm4
1981    movq 40(%rsp), %xmm5
1982    movq 48(%rsp), %xmm6
1983    movq 56(%rsp), %xmm7
1984    movq 64(%rsp), %xmm8
1985    movq 72(%rsp), %xmm9
1986    movq 80(%rsp), %xmm10
1987    movq 88(%rsp), %xmm11
1988    // Remove floating-point registers.
1989    addq MACRO_LITERAL(12 * 8), %rsp
1990    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
1991    // Restore core regs, except `reg`, as it is used to return the
1992    // result of this function (simply remove it from the stack instead).
1993    POP_REG_NE r11, RAW_VAR(reg)
1994    POP_REG_NE r10, RAW_VAR(reg)
1995    POP_REG_NE r9, RAW_VAR(reg)
1996    POP_REG_NE r8, RAW_VAR(reg)
1997    POP_REG_NE rdi, RAW_VAR(reg)
1998    POP_REG_NE rsi, RAW_VAR(reg)
1999    POP_REG_NE rdx, RAW_VAR(reg)
2000    POP_REG_NE rcx, RAW_VAR(reg)
2001    POP_REG_NE rax, RAW_VAR(reg)
2002.Lret_rb_\name:
2003    ret
2004.Lret_forwarding_address\name:
2005    // The overflow cleared the top bits.
2006    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
2007    movq %rax, REG_VAR(reg)
2008    POP_REG_NE rax, RAW_VAR(reg)
2009    ret
2010    END_FUNCTION VAR(name)
2011END_MACRO
2012
2013READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
2014READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
2015READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
2016READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
2017// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
2018// cannot be used to pass arguments.
2019READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
2020READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
2021READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
2022READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2023READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2024READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2025READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2026READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
2027READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
2028READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
2029READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
2030
2031DEFINE_FUNCTION art_quick_read_barrier_slow
2032    SETUP_FP_CALLEE_SAVE_FRAME
2033    subq LITERAL(8), %rsp           // Alignment padding.
2034    CFI_ADJUST_CFA_OFFSET(8)
2035    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
2036    addq LITERAL(8), %rsp
2037    CFI_ADJUST_CFA_OFFSET(-8)
2038    RESTORE_FP_CALLEE_SAVE_FRAME
2039    ret
2040END_FUNCTION art_quick_read_barrier_slow
2041
2042DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
2043    SETUP_FP_CALLEE_SAVE_FRAME
2044    subq LITERAL(8), %rsp                  // Alignment padding.
2045    CFI_ADJUST_CFA_OFFSET(8)
2046    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
2047    addq LITERAL(8), %rsp
2048    CFI_ADJUST_CFA_OFFSET(-8)
2049    RESTORE_FP_CALLEE_SAVE_FRAME
2050    ret
2051END_FUNCTION art_quick_read_barrier_for_root_slow
2052
2053    /*
2054     * On stack replacement stub.
2055     * On entry:
2056     *   [sp] = return address
2057     *   rdi = stack to copy
2058     *   rsi = size of stack
2059     *   rdx = pc to call
2060     *   rcx = JValue* result
2061     *   r8 = shorty
2062     *   r9 = thread
2063     *
2064     * Note that the native C ABI already aligned the stack to 16-byte.
2065     */
2066DEFINE_FUNCTION art_quick_osr_stub
2067    // Save the non-volatiles.
2068    PUSH rbp                      // Save rbp.
2069    PUSH rcx                      // Save rcx/result*.
2070    PUSH r8                       // Save r8/shorty*.
2071
2072    // Save callee saves.
2073    PUSH rbx
2074    PUSH r12
2075    PUSH r13
2076    PUSH r14
2077    PUSH r15
2078
2079    pushq LITERAL(0)              // Push null for ArtMethod*.
2080    CFI_ADJUST_CFA_OFFSET(8)
2081    movl %esi, %ecx               // rcx := size of stack
2082    movq %rdi, %rsi               // rsi := stack to copy
2083    movq %rsp, %rbp               // Save stack pointer to RBP for CFI use in .Losr_entry.
2084    call .Losr_entry
2085    CFI_REMEMBER_STATE
2086
2087    // Restore stack and callee-saves.
2088    addq LITERAL(8), %rsp
2089    CFI_ADJUST_CFA_OFFSET(-8)
2090    POP r15
2091    POP r14
2092    POP r13
2093    POP r12
2094    POP rbx
2095    POP r8
2096    POP rcx
2097    POP rbp
2098    movq %rax, (%rcx)              // Store the result.
2099    ret
2100.Losr_entry:
2101    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 80)
2102    // Since the call has pushed the return address we need to switch the CFA register to RBP.
2103    CFI_DEF_CFA_REGISTER(rbp)
2104
2105    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
2106    subq %rcx, %rsp
2107    movq %rsp, %rdi               // rdi := beginning of stack
2108    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
2109    jmp *%rdx
2110END_FUNCTION art_quick_osr_stub
2111
2112DEFINE_FUNCTION art_quick_invoke_polymorphic
2113                                                   // On entry: RDI := unused, RSI := receiver
2114    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2115    movq %rsi, %rdi                                // RDI := receiver
2116    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
2117    movq %rsp, %rdx                                // RDX := pass SP
2118    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
2119                                                   // save the code pointer
2120    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2121    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2122    RETURN_OR_DELIVER_PENDING_EXCEPTION
2123END_FUNCTION art_quick_invoke_polymorphic
2124
2125DEFINE_FUNCTION art_quick_invoke_custom
2126    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2127                                                   // RDI := call_site_index
2128    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread::Current()
2129    movq %rsp, %rdx                                // RDX := SP
2130    call SYMBOL(artInvokeCustom)                   // artInvokeCustom(Thread*, SP)
2131    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2132    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2133    RETURN_OR_DELIVER_PENDING_EXCEPTION
2134END_FUNCTION art_quick_invoke_custom
2135
2136// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2137//  Argument 0: RDI: The context pointer for ExecuteSwitchImpl.
2138//  Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call.
2139//  Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode).
2140DEFINE_FUNCTION ExecuteSwitchImplAsm
2141    PUSH rbx                 // Spill RBX
2142    movq %rdx, %rbx          // RBX = DEX PC (callee save register)
2143    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0)
2144
2145    call *%rsi               // Call the wrapped function
2146
2147    POP rbx                  // Restore RBX
2148    ret
2149END_FUNCTION ExecuteSwitchImplAsm
2150
2151// On entry: edi is the class, r11 is the inline cache. r10 and rax are available.
2152DEFINE_FUNCTION art_quick_update_inline_cache
2153#if (INLINE_CACHE_SIZE != 5)
2154#error "INLINE_CACHE_SIZE not as expected."
2155#endif
2156    // Don't update the cache if we are marking.
2157    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
2158    jnz .Ldone
2159.Lentry1:
2160    movl INLINE_CACHE_CLASSES_OFFSET(%r11), %eax
2161    cmpl %edi, %eax
2162    je .Ldone
2163    cmpl LITERAL(0), %eax
2164    jne .Lentry2
2165    lock cmpxchg %edi, INLINE_CACHE_CLASSES_OFFSET(%r11)
2166    jz .Ldone
2167    jmp .Lentry1
2168.Lentry2:
2169    movl (INLINE_CACHE_CLASSES_OFFSET+4)(%r11), %eax
2170    cmpl %edi, %eax
2171    je .Ldone
2172    cmpl LITERAL(0), %eax
2173    jne .Lentry3
2174    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+4)(%r11)
2175    jz .Ldone
2176    jmp .Lentry2
2177.Lentry3:
2178    movl (INLINE_CACHE_CLASSES_OFFSET+8)(%r11), %eax
2179    cmpl %edi, %eax
2180    je .Ldone
2181    cmpl LITERAL(0), %eax
2182    jne .Lentry4
2183    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+8)(%r11)
2184    jz .Ldone
2185    jmp .Lentry3
2186.Lentry4:
2187    movl (INLINE_CACHE_CLASSES_OFFSET+12)(%r11), %eax
2188    cmpl %edi, %eax
2189    je .Ldone
2190    cmpl LITERAL(0), %eax
2191    jne .Lentry5
2192    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+12)(%r11)
2193    jz .Ldone
2194    jmp .Lentry4
2195.Lentry5:
2196    // Unconditionally store, the cache is megamorphic.
2197    movl %edi, (INLINE_CACHE_CLASSES_OFFSET+16)(%r11)
2198.Ldone:
2199    ret
2200END_FUNCTION art_quick_update_inline_cache
2201
2202// On entry, method is at the bottom of the stack.
2203DEFINE_FUNCTION art_quick_compile_optimized
2204    SETUP_SAVE_EVERYTHING_FRAME
2205    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
2206    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
2207    call SYMBOL(artCompileOptimized)            // (ArtMethod*, Thread*)
2208    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
2209    ret
2210END_FUNCTION art_quick_compile_optimized
2211