1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18
19MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
20    // Create space for ART FP callee-saved registers
21    subq MACRO_LITERAL(4 * 8), %rsp
22    CFI_ADJUST_CFA_OFFSET(4 * 8)
23    movq %xmm12, 0(%rsp)
24    movq %xmm13, 8(%rsp)
25    movq %xmm14, 16(%rsp)
26    movq %xmm15, 24(%rsp)
27END_MACRO
28
29MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
30    // Restore ART FP callee-saved registers
31    movq 0(%rsp), %xmm12
32    movq 8(%rsp), %xmm13
33    movq 16(%rsp), %xmm14
34    movq 24(%rsp), %xmm15
35    addq MACRO_LITERAL(4 * 8), %rsp
36    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
37END_MACRO
38
39// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
40
41    /*
42     * Macro that sets up the callee save frame to conform with
43     * Runtime::CreateCalleeSaveMethod(kSaveAll)
44     */
45MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
46#if defined(__APPLE__)
47    int3
48    int3
49#else
50    // R10 := Runtime::Current()
51    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
52    movq (%r10), %r10
53    // Save callee save registers to agree with core spills bitmap.
54    PUSH r15  // Callee save.
55    PUSH r14  // Callee save.
56    PUSH r13  // Callee save.
57    PUSH r12  // Callee save.
58    PUSH rbp  // Callee save.
59    PUSH rbx  // Callee save.
60    // Create space for FPR args, plus space for ArtMethod*.
61    subq MACRO_LITERAL(4 * 8 + 8), %rsp
62    CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
63    // Save FPRs.
64    movq %xmm12, 8(%rsp)
65    movq %xmm13, 16(%rsp)
66    movq %xmm14, 24(%rsp)
67    movq %xmm15, 32(%rsp)
68    // R10 := ArtMethod* for save all callee save frame method.
69    THIS_LOAD_REQUIRES_READ_BARRIER
70    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
71    // Store ArtMethod* to bottom of stack.
72    movq %r10, 0(%rsp)
73    // Store rsp as the top quick frame.
74    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
75
76    // Ugly compile-time check, but we only have the preprocessor.
77    // Last +8: implicit return address pushed on stack when caller made call.
78#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
79#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
80#endif
81#endif  // __APPLE__
82END_MACRO
83
84    /*
85     * Macro that sets up the callee save frame to conform with
86     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
87     */
88MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
89#if defined(__APPLE__)
90    int3
91    int3
92#else
93    // R10 := Runtime::Current()
94    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
95    movq (%r10), %r10
96    // Save callee and GPR args, mixed together to agree with core spills bitmap.
97    PUSH r15  // Callee save.
98    PUSH r14  // Callee save.
99    PUSH r13  // Callee save.
100    PUSH r12  // Callee save.
101    PUSH rbp  // Callee save.
102    PUSH rbx  // Callee save.
103    // Create space for FPR args, plus space for ArtMethod*.
104    subq LITERAL(8 + 4 * 8), %rsp
105    CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
106    // Save FPRs.
107    movq %xmm12, 8(%rsp)
108    movq %xmm13, 16(%rsp)
109    movq %xmm14, 24(%rsp)
110    movq %xmm15, 32(%rsp)
111    // R10 := ArtMethod* for refs only callee save frame method.
112    THIS_LOAD_REQUIRES_READ_BARRIER
113    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
114    // Store ArtMethod* to bottom of stack.
115    movq %r10, 0(%rsp)
116    // Store rsp as the stop quick frame.
117    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
118
119    // Ugly compile-time check, but we only have the preprocessor.
120    // Last +8: implicit return address pushed on stack when caller made call.
121#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
122#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
123#endif
124#endif  // __APPLE__
125END_MACRO
126
127MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
128    movq 8(%rsp), %xmm12
129    movq 16(%rsp), %xmm13
130    movq 24(%rsp), %xmm14
131    movq 32(%rsp), %xmm15
132    addq LITERAL(8 + 4*8), %rsp
133    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
134    // TODO: optimize by not restoring callee-saves restored by the ABI
135    POP rbx
136    POP rbp
137    POP r12
138    POP r13
139    POP r14
140    POP r15
141END_MACRO
142
143    /*
144     * Macro that sets up the callee save frame to conform with
145     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
146     */
147MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
148#if defined(__APPLE__)
149    int3
150    int3
151#else
152    // R10 := Runtime::Current()
153    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
154    movq (%r10), %r10
155    // Save callee and GPR args, mixed together to agree with core spills bitmap.
156    PUSH r15  // Callee save.
157    PUSH r14  // Callee save.
158    PUSH r13  // Callee save.
159    PUSH r12  // Callee save.
160    PUSH r9   // Quick arg 5.
161    PUSH r8   // Quick arg 4.
162    PUSH rsi  // Quick arg 1.
163    PUSH rbp  // Callee save.
164    PUSH rbx  // Callee save.
165    PUSH rdx  // Quick arg 2.
166    PUSH rcx  // Quick arg 3.
167    // Create space for FPR args and create 2 slots for ArtMethod*.
168    subq MACRO_LITERAL(80 + 4 * 8), %rsp
169    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
170    // R10 := ArtMethod* for ref and args callee save frame method.
171    THIS_LOAD_REQUIRES_READ_BARRIER
172    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
173    // Save FPRs.
174    movq %xmm0, 16(%rsp)
175    movq %xmm1, 24(%rsp)
176    movq %xmm2, 32(%rsp)
177    movq %xmm3, 40(%rsp)
178    movq %xmm4, 48(%rsp)
179    movq %xmm5, 56(%rsp)
180    movq %xmm6, 64(%rsp)
181    movq %xmm7, 72(%rsp)
182    movq %xmm12, 80(%rsp)
183    movq %xmm13, 88(%rsp)
184    movq %xmm14, 96(%rsp)
185    movq %xmm15, 104(%rsp)
186    // Store ArtMethod* to bottom of stack.
187    movq %r10, 0(%rsp)
188    // Store rsp as the top quick frame.
189    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
190
191    // Ugly compile-time check, but we only have the preprocessor.
192    // Last +8: implicit return address pushed on stack when caller made call.
193#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
194#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
195#endif
196#endif  // __APPLE__
197END_MACRO
198
199MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
200    // Save callee and GPR args, mixed together to agree with core spills bitmap.
201    PUSH r15  // Callee save.
202    PUSH r14  // Callee save.
203    PUSH r13  // Callee save.
204    PUSH r12  // Callee save.
205    PUSH r9   // Quick arg 5.
206    PUSH r8   // Quick arg 4.
207    PUSH rsi  // Quick arg 1.
208    PUSH rbp  // Callee save.
209    PUSH rbx  // Callee save.
210    PUSH rdx  // Quick arg 2.
211    PUSH rcx  // Quick arg 3.
212    // Create space for FPR args and create 2 slots for ArtMethod*.
213    subq LITERAL(80 + 4 * 8), %rsp
214    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
215    // Save FPRs.
216    movq %xmm0, 16(%rsp)
217    movq %xmm1, 24(%rsp)
218    movq %xmm2, 32(%rsp)
219    movq %xmm3, 40(%rsp)
220    movq %xmm4, 48(%rsp)
221    movq %xmm5, 56(%rsp)
222    movq %xmm6, 64(%rsp)
223    movq %xmm7, 72(%rsp)
224    movq %xmm12, 80(%rsp)
225    movq %xmm13, 88(%rsp)
226    movq %xmm14, 96(%rsp)
227    movq %xmm15, 104(%rsp)
228    // Store ArtMethod to bottom of stack.
229    movq %rdi, 0(%rsp)
230    // Store rsp as the stop quick frame.
231    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
232END_MACRO
233
234MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
235    // Restore FPRs.
236    movq 16(%rsp), %xmm0
237    movq 24(%rsp), %xmm1
238    movq 32(%rsp), %xmm2
239    movq 40(%rsp), %xmm3
240    movq 48(%rsp), %xmm4
241    movq 56(%rsp), %xmm5
242    movq 64(%rsp), %xmm6
243    movq 72(%rsp), %xmm7
244    movq 80(%rsp), %xmm12
245    movq 88(%rsp), %xmm13
246    movq 96(%rsp), %xmm14
247    movq 104(%rsp), %xmm15
248    addq MACRO_LITERAL(80 + 4 * 8), %rsp
249    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
250    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
251    POP rcx
252    POP rdx
253    POP rbx
254    POP rbp
255    POP rsi
256    POP r8
257    POP r9
258    POP r12
259    POP r13
260    POP r14
261    POP r15
262END_MACRO
263
264
265    /*
266     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
267     * exception is Thread::Current()->exception_.
268     */
269MACRO0(DELIVER_PENDING_EXCEPTION)
270    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
271    // (Thread*) setup
272    movq %gs:THREAD_SELF_OFFSET, %rdi
273    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
274    UNREACHABLE
275END_MACRO
276
277MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
278    DEFINE_FUNCTION VAR(c_name, 0)
279    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
280    // Outgoing argument set up
281    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
282    call VAR(cxx_name, 1)     // cxx_name(Thread*)
283    UNREACHABLE
284    END_FUNCTION VAR(c_name, 0)
285END_MACRO
286
287MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
288    DEFINE_FUNCTION VAR(c_name, 0)
289    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
290    // Outgoing argument set up
291    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
292    call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*)
293    UNREACHABLE
294    END_FUNCTION VAR(c_name, 0)
295END_MACRO
296
297MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
298    DEFINE_FUNCTION VAR(c_name, 0)
299    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
300    // Outgoing argument set up
301    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
302    call VAR(cxx_name, 1)     // cxx_name(Thread*)
303    UNREACHABLE
304    END_FUNCTION VAR(c_name, 0)
305END_MACRO
306
307    /*
308     * Called by managed code to create and deliver a NullPointerException.
309     */
310NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
311
312    /*
313     * Called by managed code to create and deliver an ArithmeticException.
314     */
315NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
316
317    /*
318     * Called by managed code to create and deliver a StackOverflowError.
319     */
320NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
321
322    /*
323     * Called by managed code, saves callee saves and then calls artThrowException
324     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
325     */
326ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
327
328    /*
329     * Called by managed code to create and deliver a NoSuchMethodError.
330     */
331ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
332
333    /*
334     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
335     * index, arg2 holds limit.
336     */
337TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
338
339    /*
340     * All generated callsites for interface invokes and invocation slow paths will load arguments
341     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
342     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
343     * stack and call the appropriate C helper.
344     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
345     *
346     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
347     * of the target Method* in rax and method->code_ in rdx.
348     *
349     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
350     * thread and we branch to another stub to deliver it.
351     *
352     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
353     * location on the stack.
354     *
355     * Adapted from x86 code.
356     */
357MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
358    DEFINE_FUNCTION VAR(c_name, 0)
359    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
360    // Helper signature is always
361    // (method_idx, *this_object, *caller_method, *self, sp)
362
363    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx  // pass caller Method*
364    movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
365    movq %rsp, %r8                                         // pass SP
366
367    call VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, caller method*, Thread*, SP)
368                                                           // save the code pointer
369    movq %rax, %rdi
370    movq %rdx, %rax
371    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
372
373    testq %rdi, %rdi
374    jz 1f
375
376    // Tail call to intended method.
377    jmp *%rax
3781:
379    DELIVER_PENDING_EXCEPTION
380    END_FUNCTION VAR(c_name, 0)
381END_MACRO
382
383INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
384INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
385
386INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
387INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
388INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
389INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
390
391
392    /*
393     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
394     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
395     * the end of the shorty.
396     */
397MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3981: // LOOP
399    movb (%r10), %al              // al := *shorty
400    addq MACRO_LITERAL(1), %r10   // shorty++
401    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
402    je VAR(finished, 1)
403    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
404    je 2f
405    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
406    je 3f
407    addq MACRO_LITERAL(4), %r11   // arg_array++
408    //  Handle extra space in arg array taken by a long.
409    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
410    jne 1b
411    addq MACRO_LITERAL(4), %r11   // arg_array++
412    jmp 1b                        // goto LOOP
4132:  // FOUND_DOUBLE
414    movsd (%r11), REG_VAR(xmm_reg, 0)
415    addq MACRO_LITERAL(8), %r11   // arg_array+=2
416    jmp 4f
4173:  // FOUND_FLOAT
418    movss (%r11), REG_VAR(xmm_reg, 0)
419    addq MACRO_LITERAL(4), %r11   // arg_array++
4204:
421END_MACRO
422
423    /*
424     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
425     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
426     * the end of the shorty.
427     */
428MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
4291: // LOOP
430    movb (%r10), %al              // al := *shorty
431    addq MACRO_LITERAL(1), %r10   // shorty++
432    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
433    je  VAR(finished, 2)
434    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
435    je 2f
436    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
437    je 3f
438    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
439    je 4f
440    movl (%r11), REG_VAR(gpr_reg32, 1)
441    addq MACRO_LITERAL(4), %r11   // arg_array++
442    jmp 5f
4432:  // FOUND_LONG
444    movq (%r11), REG_VAR(gpr_reg64, 0)
445    addq MACRO_LITERAL(8), %r11   // arg_array+=2
446    jmp 5f
4473:  // SKIP_FLOAT
448    addq MACRO_LITERAL(4), %r11   // arg_array++
449    jmp 1b
4504:  // SKIP_DOUBLE
451    addq MACRO_LITERAL(8), %r11   // arg_array+=2
452    jmp 1b
4535:
454END_MACRO
455
456    /*
457     * Quick invocation stub.
458     * On entry:
459     *   [sp] = return address
460     *   rdi = method pointer
461     *   rsi = argument array that must at least contain the this pointer.
462     *   rdx = size of argument array in bytes
463     *   rcx = (managed) thread pointer
464     *   r8 = JValue* result
465     *   r9 = char* shorty
466     */
467DEFINE_FUNCTION art_quick_invoke_stub
468#if defined(__APPLE__)
469    int3
470    int3
471#else
472    // Set up argument XMM registers.
473    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
474    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
475    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
476    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
477    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
478    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
479    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
480    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
481    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
482    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
483    .balign 16
484.Lxmm_setup_finished:
485    PUSH rbp                      // Save rbp.
486    PUSH r8                       // Save r8/result*.
487    PUSH r9                       // Save r9/shorty*.
488    PUSH rbx                      // Save native callee save rbx
489    PUSH r12                      // Save native callee save r12
490    PUSH r13                      // Save native callee save r13
491    PUSH r14                      // Save native callee save r14
492    PUSH r15                      // Save native callee save r15
493    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
494    CFI_DEF_CFA_REGISTER(rbp)
495
496    movl %edx, %r10d
497    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
498                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
499    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
500    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
501                                   // r13, r14, and r15
502    subq %rdx, %rsp                // Reserve stack space for argument array.
503
504#if (STACK_REFERENCE_SIZE != 4)
505#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
506#endif
507    movq LITERAL(0), (%rsp)       // Store null for method*
508
509    movl %r10d, %ecx              // Place size of args in rcx.
510    movq %rdi, %rax               // rax := method to be called
511    movq %rsi, %r11               // r11 := arg_array
512    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
513                                  // arguments.
514    // Copy arg array into stack.
515    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
516    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
517    movq %rax, %rdi               // rdi := method to be called
518    movl (%r11), %esi             // rsi := this pointer
519    addq LITERAL(4), %r11         // arg_array++
520    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
521    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
522    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
523    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
524.Lgpr_setup_finished:
525    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
526    movq %rbp, %rsp               // Restore stack pointer.
527    POP r15                       // Pop r15
528    POP r14                       // Pop r14
529    POP r13                       // Pop r13
530    POP r12                       // Pop r12
531    POP rbx                       // Pop rbx
532    POP r9                        // Pop r9 - shorty*
533    POP r8                        // Pop r8 - result*.
534    POP rbp                       // Pop rbp
535    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
536    je .Lreturn_double_quick
537    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
538    je .Lreturn_float_quick
539    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
540    ret
541.Lreturn_double_quick:
542    movsd %xmm0, (%r8)            // Store the double floating point result.
543    ret
544.Lreturn_float_quick:
545    movss %xmm0, (%r8)            // Store the floating point result.
546    ret
547#endif  // __APPLE__
548END_FUNCTION art_quick_invoke_stub
549
550    /*
551     * Quick invocation stub.
552     * On entry:
553     *   [sp] = return address
554     *   rdi = method pointer
555     *   rsi = argument array or null if no arguments.
556     *   rdx = size of argument array in bytes
557     *   rcx = (managed) thread pointer
558     *   r8 = JValue* result
559     *   r9 = char* shorty
560     */
561DEFINE_FUNCTION art_quick_invoke_static_stub
562#if defined(__APPLE__)
563    int3
564    int3
565#else
566    // Set up argument XMM registers.
567    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
568    movq %rsi, %r11               // R11 := arg_array
569    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
570    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
571    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
572    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
573    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
574    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
575    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
576    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
577    .balign 16
578.Lxmm_setup_finished2:
579    PUSH rbp                      // Save rbp.
580    PUSH r8                       // Save r8/result*.
581    PUSH r9                       // Save r9/shorty*.
582    PUSH rbx                      // Save rbx
583    PUSH r12                      // Save r12
584    PUSH r13                      // Save r13
585    PUSH r14                      // Save r14
586    PUSH r15                      // Save r15
587    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
588    CFI_DEF_CFA_REGISTER(rbp)
589
590    movl %edx, %r10d
591    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
592                                   // r8, r9, r12, r13, r14, and r15 in frame.
593    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
594    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
595                                   // r13, r14, and r15.
596    subq %rdx, %rsp                // Reserve stack space for argument array.
597
598#if (STACK_REFERENCE_SIZE != 4)
599#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
600#endif
601    movq LITERAL(0), (%rsp)        // Store null for method*
602
603    movl %r10d, %ecx               // Place size of args in rcx.
604    movq %rdi, %rax                // rax := method to be called
605    movq %rsi, %r11                // r11 := arg_array
606    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
607                                   // stack arguments.
608    // Copy arg array into stack.
609    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
610    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
611    movq %rax, %rdi                // rdi := method to be called
612    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
613    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
614    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
615    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
616    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
617.Lgpr_setup_finished2:
618    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
619    movq %rbp, %rsp                // Restore stack pointer.
620    POP r15                        // Pop r15
621    POP r14                        // Pop r14
622    POP r13                        // Pop r13
623    POP r12                        // Pop r12
624    POP rbx                        // Pop rbx
625    POP r9                         // Pop r9 - shorty*.
626    POP r8                         // Pop r8 - result*.
627    POP rbp                        // Pop rbp
628    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
629    je .Lreturn_double_quick2
630    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
631    je .Lreturn_float_quick2
632    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
633    ret
634.Lreturn_double_quick2:
635    movsd %xmm0, (%r8)             // Store the double floating point result.
636    ret
637.Lreturn_float_quick2:
638    movss %xmm0, (%r8)             // Store the floating point result.
639    ret
640#endif  // __APPLE__
641END_FUNCTION art_quick_invoke_static_stub
642
643    /*
644     * Long jump stub.
645     * On entry:
646     *   rdi = gprs
647     *   rsi = fprs
648     */
649DEFINE_FUNCTION art_quick_do_long_jump
650#if defined(__APPLE__)
651    int3
652    int3
653#else
654    // Restore FPRs.
655    movq 0(%rsi), %xmm0
656    movq 8(%rsi), %xmm1
657    movq 16(%rsi), %xmm2
658    movq 24(%rsi), %xmm3
659    movq 32(%rsi), %xmm4
660    movq 40(%rsi), %xmm5
661    movq 48(%rsi), %xmm6
662    movq 56(%rsi), %xmm7
663    movq 64(%rsi), %xmm8
664    movq 72(%rsi), %xmm9
665    movq 80(%rsi), %xmm10
666    movq 88(%rsi), %xmm11
667    movq 96(%rsi), %xmm12
668    movq 104(%rsi), %xmm13
669    movq 112(%rsi), %xmm14
670    movq 120(%rsi), %xmm15
671    // Restore FPRs.
672    movq %rdi, %rsp   // RSP points to gprs.
673    // Load all registers except RSP and RIP with values in gprs.
674    popq %r15
675    popq %r14
676    popq %r13
677    popq %r12
678    popq %r11
679    popq %r10
680    popq %r9
681    popq %r8
682    popq %rdi
683    popq %rsi
684    popq %rbp
685    addq LITERAL(8), %rsp   // Skip rsp
686    popq %rbx
687    popq %rdx
688    popq %rcx
689    popq %rax
690    popq %rsp      // Load stack pointer.
691    ret            // From higher in the stack pop rip.
692#endif  // __APPLE__
693END_FUNCTION art_quick_do_long_jump
694
695MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
696    DEFINE_FUNCTION VAR(c_name, 0)
697    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
698    // Outgoing argument set up
699    movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
700    call VAR(cxx_name, 1)                // cxx_name(Thread*)
701    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
702    CALL_MACRO(return_macro, 2)          // return or deliver exception
703    END_FUNCTION VAR(c_name, 0)
704END_MACRO
705
706MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
707    DEFINE_FUNCTION VAR(c_name, 0)
708    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
709    // Outgoing argument set up
710    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
711    call VAR(cxx_name, 1)                // cxx_name(arg0, Thread*)
712    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
713    CALL_MACRO(return_macro, 2)          // return or deliver exception
714    END_FUNCTION VAR(c_name, 0)
715END_MACRO
716
717MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
718    DEFINE_FUNCTION VAR(c_name, 0)
719    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
720    // Outgoing argument set up
721    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
722    call VAR(cxx_name, 1)                // cxx_name(arg0, arg1, Thread*)
723    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
724    CALL_MACRO(return_macro, 2)          // return or deliver exception
725    END_FUNCTION VAR(c_name, 0)
726END_MACRO
727
728MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
729    DEFINE_FUNCTION VAR(c_name, 0)
730    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
731    // Outgoing argument set up
732    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
733    call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, Thread*)
734    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
735    CALL_MACRO(return_macro, 2)         // return or deliver exception
736    END_FUNCTION VAR(c_name, 0)
737END_MACRO
738
739MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
740    DEFINE_FUNCTION VAR(c_name, 0)
741    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
742    // Outgoing argument set up
743    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
744    call VAR(cxx_name, 1)               // cxx_name(arg1, arg2, arg3, arg4, Thread*)
745    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
746    CALL_MACRO(return_macro, 2)         // return or deliver exception
747    END_FUNCTION VAR(c_name, 0)
748END_MACRO
749
750MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
751    DEFINE_FUNCTION VAR(c_name, 0)
752    movq 8(%rsp), %rsi                  // pass referrer
753    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
754                                        // arg0 is in rdi
755    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
756    call VAR(cxx_name, 1)               // cxx_name(arg0, referrer, Thread*)
757    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
758    CALL_MACRO(return_macro, 2)
759    END_FUNCTION VAR(c_name, 0)
760END_MACRO
761
762MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
763    DEFINE_FUNCTION VAR(c_name, 0)
764    movq 8(%rsp), %rdx                  // pass referrer
765    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
766                                        // arg0 and arg1 are in rdi/rsi
767    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
768    call VAR(cxx_name, 1)               // (arg0, arg1, referrer, Thread*)
769    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
770    CALL_MACRO(return_macro, 2)
771    END_FUNCTION VAR(c_name, 0)
772END_MACRO
773
774MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
775    DEFINE_FUNCTION VAR(c_name, 0)
776    movq 8(%rsp), %rcx                  // pass referrer
777    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
778                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
779    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
780    call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, referrer, Thread*)
781    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
782    CALL_MACRO(return_macro, 2)         // return or deliver exception
783    END_FUNCTION VAR(c_name, 0)
784END_MACRO
785
786MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
787    testq %rax, %rax               // rax == 0 ?
788    jz  1f                         // if rax == 0 goto 1
789    ret                            // return
7901:                                 // deliver exception on current thread
791    DELIVER_PENDING_EXCEPTION
792END_MACRO
793
794MACRO0(RETURN_IF_EAX_ZERO)
795    testl %eax, %eax               // eax == 0 ?
796    jnz  1f                        // if eax != 0 goto 1
797    ret                            // return
7981:                                 // deliver exception on current thread
799    DELIVER_PENDING_EXCEPTION
800END_MACRO
801
802MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
803    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
804    testq %rcx, %rcx               // rcx == 0 ?
805    jnz 1f                         // if rcx != 0 goto 1
806    ret                            // return
8071:                                 // deliver exception on current thread
808    DELIVER_PENDING_EXCEPTION
809END_MACRO
810
811// Generate the allocation entrypoints for each allocator.
812// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
813// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
814// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
815// multi-line macros that use each other (hence using 1 macro per newline below).
816#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
817  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
818#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
819  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
820#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
821  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
822#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
823  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
824#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
825  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
826#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
827  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
828#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
829  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
830#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
831  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
832#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
833  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
834#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
835  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
836#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
837  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
838#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
839  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
840
841GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
842GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
843GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
844GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
845GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
846GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
847GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
848GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
849GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
850GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
851GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
852GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
853
854GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
855GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
856GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
857GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
858GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
859GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
860GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
861GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
862GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
863GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
864GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
865GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
866
867GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
868GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
869GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
870GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
871GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
872GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
873GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
874GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
875GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
876GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
877GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
878GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
879
880GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
881GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
882GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
883GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
884GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
885GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
886GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
887GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
888GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
889GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
890GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
891GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
892
893GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
894GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
895GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
896GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
897GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
898GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
899GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
900GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
901GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
902GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
903GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
904GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
905
906GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
907GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
908GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
909GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
910GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
911GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
912GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
913GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
914GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
915GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
916GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
917GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
918
919DEFINE_FUNCTION art_quick_alloc_object_tlab
920    // Fast path tlab allocation.
921    // RDI: uint32_t type_idx, RSI: ArtMethod*
922    // RDX, RCX, R8, R9: free. RAX: return val.
923    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
924                                                               // Load the class
925    movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
926    testl %edx, %edx                                           // Check null class
927    jz   .Lart_quick_alloc_object_tlab_slow_path
928                                                               // Check class status.
929    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
930    jne  .Lart_quick_alloc_object_tlab_slow_path
931                                                               // Check access flags has kAccClassIsFinalizable
932    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
933    jnz  .Lart_quick_alloc_object_tlab_slow_path
934    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
935    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
936    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
937    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
938    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
939    addq %rax, %rcx                                            // Add the object size.
940    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
941    ja   .Lart_quick_alloc_object_tlab_slow_path
942    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
943    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
944                                                               // Store the class pointer in the header.
945                                                               // No fence needed for x86.
946    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
947    ret                                                        // Fast path succeeded.
948.Lart_quick_alloc_object_tlab_slow_path:
949    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
950    // Outgoing argument set up
951    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
952    call SYMBOL(artAllocObjectFromCodeTLAB)      // cxx_name(arg0, arg1, Thread*)
953    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
954    RETURN_IF_RESULT_IS_NON_ZERO         // return or deliver exception
955END_FUNCTION art_quick_alloc_object_tlab
956
957GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
958GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
959GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
960GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
961GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
962GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
963GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
964GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
965GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
966GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
967GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
968
969GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
970GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
971GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
972GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
973GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
974GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
975GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
976GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
977GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
978GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
979GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
980GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
981
982GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
983GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
984GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
985GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
986GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
987GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
988GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
989GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
990GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
991GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
992GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
993GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
994
995GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
996GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
997GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
998GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
999GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
1000GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
1001GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
1002GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
1003GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
1004GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
1005GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
1006GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
1007
1008GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
1009GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1010GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1011GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1012GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
1013GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1014GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1015GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
1016GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1017GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1018GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1019GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1020
1021GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
1022GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
1023GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
1024GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
1025GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
1026GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
1027GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
1028GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
1029GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
1030GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
1031GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
1032GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
1033
1034ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
1035ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
1036ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
1037ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
1038
1039TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
1040
1041DEFINE_FUNCTION art_quick_lock_object
1042    testl %edi, %edi                      // Null check object/rdi.
1043    jz   .Lslow_lock
1044.Lretry_lock:
1045    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
1046    test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
1047    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
1048    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
1049    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
1050    test %ecx, %ecx
1051    jnz  .Lalready_thin                   // Lock word contains a thin lock.
1052    // unlocked case - edx: original lock word, edi: obj.
1053    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
1054    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1055    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
1056    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1057    jnz  .Lretry_lock                     // cmpxchg failed retry
1058    ret
1059.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
1060    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
1061    cmpw %cx, %dx                         // do we hold the lock already?
1062    jne  .Lslow_lock
1063    movl %edx, %ecx                       // copy the lock word to check count overflow.
1064    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
1065    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
1066    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
1067    jne  .Lslow_lock                      // count overflowed so go slow
1068    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
1069    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
1070    // update lockword, cmpxchg necessary for read barrier bits.
1071    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
1072    jnz  .Lretry_lock                     // cmpxchg failed retry
1073    ret
1074.Lslow_lock:
1075    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1076    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1077    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1078    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
1079    RETURN_IF_EAX_ZERO
1080END_FUNCTION art_quick_lock_object
1081
1082DEFINE_FUNCTION art_quick_unlock_object
1083    testl %edi, %edi                      // null check object/edi
1084    jz   .Lslow_unlock
1085.Lretry_unlock:
1086    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1087    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1088    test LITERAL(LOCK_WORD_STATE_MASK), %ecx
1089    jnz  .Lslow_unlock                    // lock word contains a monitor
1090    cmpw %cx, %dx                         // does the thread id match?
1091    jne  .Lslow_unlock
1092    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1093    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
1094    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1095    jae  .Lrecursive_thin_unlock
1096    // update lockword, cmpxchg necessary for read barrier bits.
1097    movl %ecx, %eax                       // eax: old lock word.
1098    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
1099#ifndef USE_READ_BARRIER
1100    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1101#else
1102    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1103    jnz  .Lretry_unlock                   // cmpxchg failed retry
1104#endif
1105    ret
1106.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1107    // update lockword, cmpxchg necessary for read barrier bits.
1108    movl %ecx, %eax                       // eax: old lock word.
1109    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1110#ifndef USE_READ_BARRIER
1111    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1112#else
1113    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1114    jnz  .Lretry_unlock                   // cmpxchg failed retry
1115#endif
1116    ret
1117.Lslow_unlock:
1118    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1119    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1120    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1121    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
1122    RETURN_IF_EAX_ZERO
1123END_FUNCTION art_quick_unlock_object
1124
1125DEFINE_FUNCTION art_quick_check_cast
1126    PUSH rdi                          // Save args for exc
1127    PUSH rsi
1128    SETUP_FP_CALLEE_SAVE_FRAME
1129    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
1130    testq %rax, %rax
1131    jz 1f                             // jump forward if not assignable
1132    RESTORE_FP_CALLEE_SAVE_FRAME
1133    addq LITERAL(16), %rsp            // pop arguments
1134    CFI_ADJUST_CFA_OFFSET(-16)
1135
1136    ret
11371:
1138    RESTORE_FP_CALLEE_SAVE_FRAME
1139    POP rsi                           // Pop arguments
1140    POP rdi
1141    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
1142    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1143    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
1144    int3                              // unreached
1145END_FUNCTION art_quick_check_cast
1146
1147
1148    /*
1149     * Entry from managed code for array put operations of objects where the value being stored
1150     * needs to be checked for compatibility.
1151     *
1152     * Currently all the parameters should fit into the 32b portions of the registers. Index always
1153     * will. So we optimize for a tighter encoding. The 64b versions are in comments.
1154     *
1155     * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
1156     */
1157DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
1158#if defined(__APPLE__)
1159    int3
1160    int3
1161#else
1162    testl %edi, %edi
1163//  testq %rdi, %rdi
1164    jnz art_quick_aput_obj_with_bound_check
1165    jmp art_quick_throw_null_pointer_exception
1166#endif  // __APPLE__
1167END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
1168
1169
1170DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
1171#if defined(__APPLE__)
1172    int3
1173    int3
1174#else
1175    movl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %ecx
1176//  movl MIRROR_ARRAY_LENGTH_OFFSET(%rdi), %ecx  // This zero-extends, so value(%rcx)=value(%ecx)
1177    cmpl %ecx, %esi
1178    jb art_quick_aput_obj
1179    mov %esi, %edi
1180//  mov %rsi, %rdi
1181    mov %ecx, %esi
1182//  mov %rcx, %rsi
1183    jmp art_quick_throw_array_bounds
1184#endif  // __APPLE__
1185END_FUNCTION art_quick_aput_obj_with_bound_check
1186
1187
1188DEFINE_FUNCTION art_quick_aput_obj
1189    testl %edx, %edx                // store of null
1190//  test %rdx, %rdx
1191    jz .Ldo_aput_null
1192    movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %ecx
1193//  movq MIRROR_OBJECT_CLASS_OFFSET(%rdi), %rcx
1194    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
1195//  movq MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
1196    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1197//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1198    jne .Lcheck_assignability
1199.Ldo_aput:
1200    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1201//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1202    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1203    shrl LITERAL(7), %edi
1204//  shrl LITERAL(7), %rdi
1205    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1206    ret
1207.Ldo_aput_null:
1208    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1209//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1210    ret
1211.Lcheck_assignability:
1212    // Save arguments.
1213    PUSH rdi
1214    PUSH rsi
1215    PUSH rdx
1216    subq LITERAL(8), %rsp        // Alignment padding.
1217    CFI_ADJUST_CFA_OFFSET(8)
1218    SETUP_FP_CALLEE_SAVE_FRAME
1219
1220                                  // "Uncompress" = do nothing, as already zero-extended on load.
1221    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
1222    movq %rcx, %rdi               // Pass arg1 = array's component type.
1223
1224    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1225
1226    // Exception?
1227    testq %rax, %rax
1228    jz   .Lthrow_array_store_exception
1229
1230    RESTORE_FP_CALLEE_SAVE_FRAME
1231    // Restore arguments.
1232    addq LITERAL(8), %rsp
1233    CFI_ADJUST_CFA_OFFSET(-8)
1234    POP  rdx
1235    POP  rsi
1236    POP  rdi
1237
1238    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1239//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1240    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1241    shrl LITERAL(7), %edi
1242//  shrl LITERAL(7), %rdi
1243    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1244//  movb %dl, (%rdx, %rdi)
1245    ret
1246    CFI_ADJUST_CFA_OFFSET(32 + 4 * 8)  // Reset unwind info so following code unwinds.
1247.Lthrow_array_store_exception:
1248    RESTORE_FP_CALLEE_SAVE_FRAME
1249    // Restore arguments.
1250    addq LITERAL(8), %rsp
1251    CFI_ADJUST_CFA_OFFSET(-8)
1252    POP  rdx
1253    POP  rsi
1254    POP  rdi
1255
1256    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
1257
1258    // Outgoing argument set up.
1259    movq %rdx, %rsi                         // Pass arg 2 = value.
1260    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1261                                            // Pass arg 1 = array.
1262    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1263    int3                          // unreached
1264END_FUNCTION art_quick_aput_obj
1265
1266// TODO: This is quite silly on X86_64 now.
1267DEFINE_FUNCTION art_quick_memcpy
1268    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1269    ret
1270END_FUNCTION art_quick_memcpy
1271
1272NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
1273
1274UNIMPLEMENTED art_quick_ldiv
1275UNIMPLEMENTED art_quick_lmod
1276UNIMPLEMENTED art_quick_lmul
1277UNIMPLEMENTED art_quick_lshl
1278UNIMPLEMENTED art_quick_lshr
1279UNIMPLEMENTED art_quick_lushr
1280
1281THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
1282THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO
1283THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
1284THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
1285THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
1286
1287TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1288TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1289TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1290TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1291TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1292TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1293TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1294
1295TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_EAX_ZERO
1296TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_EAX_ZERO
1297TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
1298TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
1299
1300ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1301ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1302ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1303ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1304ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1305ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1306ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1307
1308// This is singled out as the argument order is different.
1309DEFINE_FUNCTION art_quick_set64_static
1310    movq %rsi, %rdx                      // pass new_val
1311    movq 8(%rsp), %rsi                   // pass referrer
1312    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1313                                         // field_idx is in rdi
1314    movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
1315    call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
1316    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
1317    RETURN_IF_EAX_ZERO                   // return or deliver exception
1318END_FUNCTION art_quick_set64_static
1319
1320
1321DEFINE_FUNCTION art_quick_proxy_invoke_handler
1322    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
1323
1324    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1325    movq %rsp, %rcx                         // Pass SP.
1326    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1327    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1328    movq %rax, %xmm0                        // Copy return value in case of float returns.
1329    RETURN_OR_DELIVER_PENDING_EXCEPTION
1330END_FUNCTION art_quick_proxy_invoke_handler
1331
1332    /*
1333     * Called to resolve an imt conflict.
1334     * rax is a hidden argument that holds the target method's dex method index.
1335     */
1336DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1337#if defined(__APPLE__)
1338    int3
1339    int3
1340#else
1341    movq 8(%rsp), %rdi            // load caller Method*
1342    movl ART_METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi     // load dex_cache_resolved_methods
1343    movq MIRROR_LONG_ARRAY_DATA_OFFSET(%rdi, %rax, 8), %rdi  // load the target method
1344    jmp art_quick_invoke_interface_trampoline
1345#endif  // __APPLE__
1346END_FUNCTION art_quick_imt_conflict_trampoline
1347
1348DEFINE_FUNCTION art_quick_resolution_trampoline
1349    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1350    movq %gs:THREAD_SELF_OFFSET, %rdx
1351    movq %rsp, %rcx
1352    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1353    movq %rax, %r10               // Remember returned code pointer in R10.
1354    movq (%rsp), %rdi             // Load called method into RDI.
1355    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1356    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1357    jz 1f
1358    jmp *%r10                     // Tail call into method.
13591:
1360    DELIVER_PENDING_EXCEPTION
1361END_FUNCTION art_quick_resolution_trampoline
1362
1363/* Generic JNI frame layout:
1364 *
1365 * #-------------------#
1366 * |                   |
1367 * | caller method...  |
1368 * #-------------------#    <--- SP on entry
1369 *
1370 *          |
1371 *          V
1372 *
1373 * #-------------------#
1374 * | caller method...  |
1375 * #-------------------#
1376 * | Return            |
1377 * | R15               |    callee save
1378 * | R14               |    callee save
1379 * | R13               |    callee save
1380 * | R12               |    callee save
1381 * | R9                |    arg5
1382 * | R8                |    arg4
1383 * | RSI/R6            |    arg1
1384 * | RBP/R5            |    callee save
1385 * | RBX/R3            |    callee save
1386 * | RDX/R2            |    arg2
1387 * | RCX/R1            |    arg3
1388 * | XMM7              |    float arg 8
1389 * | XMM6              |    float arg 7
1390 * | XMM5              |    float arg 6
1391 * | XMM4              |    float arg 5
1392 * | XMM3              |    float arg 4
1393 * | XMM2              |    float arg 3
1394 * | XMM1              |    float arg 2
1395 * | XMM0              |    float arg 1
1396 * | RDI/Method*       |  <- sp
1397 * #-------------------#
1398 * | Scratch Alloca    |    5K scratch space
1399 * #---------#---------#
1400 * |         | sp*     |
1401 * | Tramp.  #---------#
1402 * | args    | thread  |
1403 * | Tramp.  #---------#
1404 * |         | method  |
1405 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1406 *
1407 *           |
1408 *           v              artQuickGenericJniTrampoline
1409 *
1410 * #-------------------#
1411 * | caller method...  |
1412 * #-------------------#
1413 * | Return            |
1414 * | Callee-Save Data  |
1415 * #-------------------#
1416 * | handle scope      |
1417 * #-------------------#
1418 * | Method*           |    <--- (1)
1419 * #-------------------#
1420 * | local ref cookie  | // 4B
1421 * | handle scope size | // 4B   TODO: roll into call stack alignment?
1422 * #-------------------#
1423 * | JNI Call Stack    |
1424 * #-------------------#    <--- SP on native call
1425 * |                   |
1426 * | Stack for Regs    |    The trampoline assembly will pop these values
1427 * |                   |    into registers for native call
1428 * #-------------------#
1429 * | Native code ptr   |
1430 * #-------------------#
1431 * | Free scratch      |
1432 * #-------------------#
1433 * | Ptr to (1)        |    <--- RSP
1434 * #-------------------#
1435 */
1436    /*
1437     * Called to do a generic JNI down-call
1438     */
1439DEFINE_FUNCTION art_quick_generic_jni_trampoline
1440    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
1441
1442    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1443    CFI_DEF_CFA_REGISTER(rbp)
1444
1445    //
1446    // reserve a lot of space
1447    //
1448    //      4    local state ref
1449    //      4    padding
1450    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1451    //     16    handle scope member fields ?
1452    // +  112    14x 8-byte stack-2-register space
1453    // ------
1454    //   4332
1455    // 16-byte aligned: 4336
1456    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1457    //       Also means: the padding is somewhere in the middle
1458    //
1459    //
1460    // New test: use 5K and release
1461    // 5k = 5120
1462    subq LITERAL(5120), %rsp
1463    // prepare for artQuickGenericJniTrampoline call
1464    // (Thread*,  SP)
1465    //    rdi    rsi      <= C calling convention
1466    //  gs:...   rbp      <= where they are
1467    movq %gs:THREAD_SELF_OFFSET, %rdi
1468    movq %rbp, %rsi
1469    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1470
1471    // The C call will have registered the complete save-frame on success.
1472    // The result of the call is:
1473    // %rax: pointer to native code, 0 on error.
1474    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
1475
1476    // Check for error = 0.
1477    test %rax, %rax
1478    jz .Lexception_in_native
1479
1480    // Release part of the alloca.
1481    movq %rdx, %rsp
1482
1483    // pop from the register-passing alloca region
1484    // what's the right layout?
1485    popq %rdi
1486    popq %rsi
1487    popq %rdx
1488    popq %rcx
1489    popq %r8
1490    popq %r9
1491    // TODO: skip floating point if unused, some flag.
1492    movq 0(%rsp), %xmm0
1493    movq 8(%rsp), %xmm1
1494    movq 16(%rsp), %xmm2
1495    movq 24(%rsp), %xmm3
1496    movq 32(%rsp), %xmm4
1497    movq 40(%rsp), %xmm5
1498    movq 48(%rsp), %xmm6
1499    movq 56(%rsp), %xmm7
1500    addq LITERAL(64), %rsp          // floating-point done
1501
1502    // native call
1503    call *%rax
1504
1505    // result sign extension is handled in C code
1506    // prepare for artQuickGenericJniEndTrampoline call
1507    // (Thread*,  result, result_f)
1508    //   rdi      rsi   rdx       <= C calling convention
1509    //  gs:...    rax   xmm0      <= where they are
1510    movq %gs:THREAD_SELF_OFFSET, %rdi
1511    movq %rax, %rsi
1512    movq %xmm0, %rdx
1513    call SYMBOL(artQuickGenericJniEndTrampoline)
1514
1515    // Pending exceptions possible.
1516    // TODO: use cmpq, needs direct encoding because of gas bug
1517    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1518    test %rcx, %rcx
1519    jnz .Lexception_in_native
1520
1521    // Tear down the alloca.
1522    movq %rbp, %rsp
1523    CFI_DEF_CFA_REGISTER(rsp)
1524
1525    // Tear down the callee-save frame.
1526    // Load FPRs.
1527    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1528    movq 24(%rsp), %xmm1            // neither does this!!!
1529    movq 32(%rsp), %xmm2
1530    movq 40(%rsp), %xmm3
1531    movq 48(%rsp), %xmm4
1532    movq 56(%rsp), %xmm5
1533    movq 64(%rsp), %xmm6
1534    movq 72(%rsp), %xmm7
1535    movq 80(%rsp), %xmm12
1536    movq 88(%rsp), %xmm13
1537    movq 96(%rsp), %xmm14
1538    movq 104(%rsp), %xmm15
1539    // was 80 bytes
1540    addq LITERAL(80 + 4*8), %rsp
1541    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1542    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1543    POP rcx  // Arg.
1544    POP rdx  // Arg.
1545    POP rbx  // Callee save.
1546    POP rbp  // Callee save.
1547    POP rsi  // Arg.
1548    POP r8   // Arg.
1549    POP r9   // Arg.
1550    POP r12  // Callee save.
1551    POP r13  // Callee save.
1552    POP r14  // Callee save.
1553    POP r15  // Callee save.
1554    // store into fpr, for when it's a fpr return...
1555    movq %rax, %xmm0
1556    ret
1557.Lexception_in_native:
1558    movq %gs:THREAD_TOP_QUICK_FRAME_OFFSET, %rsp
1559    CFI_DEF_CFA_REGISTER(rsp)
1560    // Do a call to push a new save-all frame required by the runtime.
1561    call .Lexception_call
1562.Lexception_call:
1563    DELIVER_PENDING_EXCEPTION
1564END_FUNCTION art_quick_generic_jni_trampoline
1565
1566    /*
1567     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1568     * of a quick call:
1569     * RDI = method being called / to bridge to.
1570     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1571     */
1572DEFINE_FUNCTION art_quick_to_interpreter_bridge
1573    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
1574    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
1575    movq %rsp, %rdx                        // RDX := sp
1576    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1577    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
1578    movq %rax, %xmm0                   // Place return value also into floating point return value.
1579    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1580END_FUNCTION art_quick_to_interpreter_bridge
1581
1582    /*
1583     * Routine that intercepts method calls and returns.
1584     */
1585DEFINE_FUNCTION art_quick_instrumentation_entry
1586#if defined(__APPLE__)
1587    int3
1588    int3
1589#else
1590    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1591
1592    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1593
1594    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1595    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
1596
1597    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
1598
1599                                  // %rax = result of call.
1600    movq %r12, %rdi               // Reload method pointer.
1601
1602    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
1603    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
1604
1605    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1606
1607    jmp *%rax                     // Tail call to intended method.
1608#endif  // __APPLE__
1609END_FUNCTION art_quick_instrumentation_entry
1610
1611DEFINE_FUNCTION art_quick_instrumentation_exit
1612    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1613
1614    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1615
1616    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
1617    // we would need to fully restore it. As there are a good number of callee-save registers, it
1618    // seems easier to have an extra small stack area. But this should be revisited.
1619
1620    movq  %rsp, %rsi                          // Pass SP.
1621
1622    PUSH rax                  // Save integer result.
1623    subq LITERAL(8), %rsp     // Save floating-point result.
1624    CFI_ADJUST_CFA_OFFSET(8)
1625    movq %xmm0, (%rsp)
1626
1627    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
1628    movq  %rax, %rdx                          // Pass integer result.
1629    movq  %xmm0, %rcx                         // Pass floating-point result.
1630
1631    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
1632
1633    movq  %rax, %rdi          // Store return PC
1634    movq  %rdx, %rsi          // Store second return PC in hidden arg.
1635
1636    movq (%rsp), %xmm0        // Restore floating-point result.
1637    addq LITERAL(8), %rsp
1638    CFI_ADJUST_CFA_OFFSET(-8)
1639    POP rax                   // Restore integer result.
1640
1641    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
1642
1643    addq LITERAL(8), %rsp     // Drop fake return pc.
1644
1645    jmp   *%rdi               // Return.
1646END_FUNCTION art_quick_instrumentation_exit
1647
1648    /*
1649     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1650     * will long jump to the upcall with a special exception of -1.
1651     */
1652DEFINE_FUNCTION art_quick_deoptimize
1653    pushq %rsi                     // Entry point for a jump. Fake that we were called.
1654                                   // Use hidden arg.
1655.globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path)  // Entry point for real calls
1656                                                             // from compiled slow paths.
1657SYMBOL(art_quick_deoptimize_from_compiled_slow_path):
1658    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
1659                                   // Stack should be aligned now.
1660    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
1661    call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
1662    int3                           // Unreachable.
1663END_FUNCTION art_quick_deoptimize
1664
1665    /*
1666     * String's compareTo.
1667     *
1668     * On entry:
1669     *    rdi:   this string object (known non-null)
1670     *    rsi:   comp string object (known non-null)
1671     */
1672DEFINE_FUNCTION art_quick_string_compareto
1673    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1674    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1675    /* Build pointers to the start of string data */
1676    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1677    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1678    /* Calculate min length and count diff */
1679    movl  %r8d, %ecx
1680    movl  %r8d, %eax
1681    subl  %r9d, %eax
1682    cmovg %r9d, %ecx
1683    /*
1684     * At this point we have:
1685     *   eax: value to return if first part of strings are equal
1686     *   ecx: minimum among the lengths of the two strings
1687     *   esi: pointer to comp string data
1688     *   edi: pointer to this string data
1689     */
1690    jecxz .Lkeep_length
1691    repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1692    jne .Lnot_equal
1693.Lkeep_length:
1694    ret
1695    .balign 16
1696.Lnot_equal:
1697    movzwl  -2(%edi), %eax        // get last compared char from this string
1698    movzwl  -2(%esi), %ecx        // get last compared char from comp string
1699    subl  %ecx, %eax              // return the difference
1700    ret
1701END_FUNCTION art_quick_string_compareto
1702
1703UNIMPLEMENTED art_quick_memcmp16
1704
1705DEFINE_FUNCTION art_quick_assignable_from_code
1706    SETUP_FP_CALLEE_SAVE_FRAME
1707    call SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
1708    RESTORE_FP_CALLEE_SAVE_FRAME
1709    ret
1710END_FUNCTION art_quick_assignable_from_code
1711
1712
1713// Return from a nested signal:
1714// Entry:
1715//  rdi: address of jmp_buf in TLS
1716
1717DEFINE_FUNCTION art_nested_signal_return
1718                                    // first arg to longjmp is already in correct register
1719    movq LITERAL(1), %rsi           // second arg to longjmp (1)
1720    call PLT_SYMBOL(longjmp)
1721    int3                            // won't get here
1722END_FUNCTION art_nested_signal_return
1723