1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22MACRO0(ASSERT_USE_READ_BARRIER)
23#if !defined(USE_READ_BARRIER)
24    int3
25    int3
26#endif
27END_MACRO
28
29MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
30    // Create space for ART FP callee-saved registers
31    subq MACRO_LITERAL(4 * 8), %rsp
32    CFI_ADJUST_CFA_OFFSET(4 * 8)
33    movq %xmm12, 0(%rsp)
34    movq %xmm13, 8(%rsp)
35    movq %xmm14, 16(%rsp)
36    movq %xmm15, 24(%rsp)
37END_MACRO
38
39MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
40    // Restore ART FP callee-saved registers
41    movq 0(%rsp), %xmm12
42    movq 8(%rsp), %xmm13
43    movq 16(%rsp), %xmm14
44    movq 24(%rsp), %xmm15
45    addq MACRO_LITERAL(4 * 8), %rsp
46    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
47END_MACRO
48
49// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
50
51    /*
52     * Macro that sets up the callee save frame to conform with
53     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
54     */
55MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME)
56#if defined(__APPLE__)
57    int3
58    int3
59#else
60    // R10 := Runtime::Current()
61    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
62    movq (%r10), %r10
63    // Save callee save registers to agree with core spills bitmap.
64    PUSH r15  // Callee save.
65    PUSH r14  // Callee save.
66    PUSH r13  // Callee save.
67    PUSH r12  // Callee save.
68    PUSH rbp  // Callee save.
69    PUSH rbx  // Callee save.
70    // Create space for FPR args, plus space for ArtMethod*.
71    subq MACRO_LITERAL(4 * 8 + 8), %rsp
72    CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
73    // Save FPRs.
74    movq %xmm12, 8(%rsp)
75    movq %xmm13, 16(%rsp)
76    movq %xmm14, 24(%rsp)
77    movq %xmm15, 32(%rsp)
78    // R10 := ArtMethod* for save all callee save frame method.
79    movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10
80    // Store ArtMethod* to bottom of stack.
81    movq %r10, 0(%rsp)
82    // Store rsp as the top quick frame.
83    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
84
85    // Ugly compile-time check, but we only have the preprocessor.
86    // Last +8: implicit return address pushed on stack when caller made call.
87#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8)
88#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected."
89#endif
90#endif  // __APPLE__
91END_MACRO
92
93    /*
94     * Macro that sets up the callee save frame to conform with
95     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
96     */
97MACRO0(SETUP_SAVE_REFS_ONLY_FRAME)
98#if defined(__APPLE__)
99    int3
100    int3
101#else
102    // R10 := Runtime::Current()
103    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
104    movq (%r10), %r10
105    // Save callee and GPR args, mixed together to agree with core spills bitmap.
106    PUSH r15  // Callee save.
107    PUSH r14  // Callee save.
108    PUSH r13  // Callee save.
109    PUSH r12  // Callee save.
110    PUSH rbp  // Callee save.
111    PUSH rbx  // Callee save.
112    // Create space for FPR args, plus space for ArtMethod*.
113    subq LITERAL(8 + 4 * 8), %rsp
114    CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
115    // Save FPRs.
116    movq %xmm12, 8(%rsp)
117    movq %xmm13, 16(%rsp)
118    movq %xmm14, 24(%rsp)
119    movq %xmm15, 32(%rsp)
120    // R10 := ArtMethod* for refs only callee save frame method.
121    movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10
122    // Store ArtMethod* to bottom of stack.
123    movq %r10, 0(%rsp)
124    // Store rsp as the stop quick frame.
125    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
126
127    // Ugly compile-time check, but we only have the preprocessor.
128    // Last +8: implicit return address pushed on stack when caller made call.
129#if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8)
130#error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected."
131#endif
132#endif  // __APPLE__
133END_MACRO
134
135MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
136    movq 8(%rsp), %xmm12
137    movq 16(%rsp), %xmm13
138    movq 24(%rsp), %xmm14
139    movq 32(%rsp), %xmm15
140    addq LITERAL(8 + 4*8), %rsp
141    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
142    // TODO: optimize by not restoring callee-saves restored by the ABI
143    POP rbx
144    POP rbp
145    POP r12
146    POP r13
147    POP r14
148    POP r15
149END_MACRO
150
151    /*
152     * Macro that sets up the callee save frame to conform with
153     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
154     */
155MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
156#if defined(__APPLE__)
157    int3
158    int3
159#else
160    // R10 := Runtime::Current()
161    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
162    movq (%r10), %r10
163    // Save callee and GPR args, mixed together to agree with core spills bitmap.
164    PUSH r15  // Callee save.
165    PUSH r14  // Callee save.
166    PUSH r13  // Callee save.
167    PUSH r12  // Callee save.
168    PUSH r9   // Quick arg 5.
169    PUSH r8   // Quick arg 4.
170    PUSH rsi  // Quick arg 1.
171    PUSH rbp  // Callee save.
172    PUSH rbx  // Callee save.
173    PUSH rdx  // Quick arg 2.
174    PUSH rcx  // Quick arg 3.
175    // Create space for FPR args and create 2 slots for ArtMethod*.
176    subq MACRO_LITERAL(16 + 12 * 8), %rsp
177    CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
178    // R10 := ArtMethod* for ref and args callee save frame method.
179    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
180    // Save FPRs.
181    movq %xmm0, 16(%rsp)
182    movq %xmm1, 24(%rsp)
183    movq %xmm2, 32(%rsp)
184    movq %xmm3, 40(%rsp)
185    movq %xmm4, 48(%rsp)
186    movq %xmm5, 56(%rsp)
187    movq %xmm6, 64(%rsp)
188    movq %xmm7, 72(%rsp)
189    movq %xmm12, 80(%rsp)
190    movq %xmm13, 88(%rsp)
191    movq %xmm14, 96(%rsp)
192    movq %xmm15, 104(%rsp)
193    // Store ArtMethod* to bottom of stack.
194    movq %r10, 0(%rsp)
195    // Store rsp as the top quick frame.
196    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
197
198    // Ugly compile-time check, but we only have the preprocessor.
199    // Last +8: implicit return address pushed on stack when caller made call.
200#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8)
201#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected."
202#endif
203#endif  // __APPLE__
204END_MACRO
205
206MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
207    // Save callee and GPR args, mixed together to agree with core spills bitmap.
208    PUSH r15  // Callee save.
209    PUSH r14  // Callee save.
210    PUSH r13  // Callee save.
211    PUSH r12  // Callee save.
212    PUSH r9   // Quick arg 5.
213    PUSH r8   // Quick arg 4.
214    PUSH rsi  // Quick arg 1.
215    PUSH rbp  // Callee save.
216    PUSH rbx  // Callee save.
217    PUSH rdx  // Quick arg 2.
218    PUSH rcx  // Quick arg 3.
219    // Create space for FPR args and create 2 slots for ArtMethod*.
220    subq LITERAL(80 + 4 * 8), %rsp
221    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
222    // Save FPRs.
223    movq %xmm0, 16(%rsp)
224    movq %xmm1, 24(%rsp)
225    movq %xmm2, 32(%rsp)
226    movq %xmm3, 40(%rsp)
227    movq %xmm4, 48(%rsp)
228    movq %xmm5, 56(%rsp)
229    movq %xmm6, 64(%rsp)
230    movq %xmm7, 72(%rsp)
231    movq %xmm12, 80(%rsp)
232    movq %xmm13, 88(%rsp)
233    movq %xmm14, 96(%rsp)
234    movq %xmm15, 104(%rsp)
235    // Store ArtMethod to bottom of stack.
236    movq %rdi, 0(%rsp)
237    // Store rsp as the stop quick frame.
238    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
239END_MACRO
240
241MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
242    // Restore FPRs.
243    movq 16(%rsp), %xmm0
244    movq 24(%rsp), %xmm1
245    movq 32(%rsp), %xmm2
246    movq 40(%rsp), %xmm3
247    movq 48(%rsp), %xmm4
248    movq 56(%rsp), %xmm5
249    movq 64(%rsp), %xmm6
250    movq 72(%rsp), %xmm7
251    movq 80(%rsp), %xmm12
252    movq 88(%rsp), %xmm13
253    movq 96(%rsp), %xmm14
254    movq 104(%rsp), %xmm15
255    addq MACRO_LITERAL(80 + 4 * 8), %rsp
256    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
257    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
258    POP rcx
259    POP rdx
260    POP rbx
261    POP rbp
262    POP rsi
263    POP r8
264    POP r9
265    POP r12
266    POP r13
267    POP r14
268    POP r15
269END_MACRO
270
271    /*
272     * Macro that sets up the callee save frame to conform with
273     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
274     * when R14 and R15 are already saved.
275     */
276MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
277#if defined(__APPLE__)
278    int3
279    int3
280#else
281    // Save core registers from highest to lowest to agree with core spills bitmap.
282    // R14 and R15, or at least placeholders for them, are already on the stack.
283    PUSH r13
284    PUSH r12
285    PUSH r11
286    PUSH r10
287    PUSH r9
288    PUSH r8
289    PUSH rdi
290    PUSH rsi
291    PUSH rbp
292    PUSH rbx
293    PUSH rdx
294    PUSH rcx
295    PUSH rax
296    // Create space for FPRs and stack alignment padding.
297    subq MACRO_LITERAL(8 + 16 * 8), %rsp
298    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
299    // R10 := Runtime::Current()
300    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
301    movq (%r10), %r10
302    // Save FPRs.
303    movq %xmm0, 8(%rsp)
304    movq %xmm1, 16(%rsp)
305    movq %xmm2, 24(%rsp)
306    movq %xmm3, 32(%rsp)
307    movq %xmm4, 40(%rsp)
308    movq %xmm5, 48(%rsp)
309    movq %xmm6, 56(%rsp)
310    movq %xmm7, 64(%rsp)
311    movq %xmm8, 72(%rsp)
312    movq %xmm9, 80(%rsp)
313    movq %xmm10, 88(%rsp)
314    movq %xmm11, 96(%rsp)
315    movq %xmm12, 104(%rsp)
316    movq %xmm13, 112(%rsp)
317    movq %xmm14, 120(%rsp)
318    movq %xmm15, 128(%rsp)
319    // Push ArtMethod* for save everything frame method.
320    pushq \runtime_method_offset(%r10)
321    CFI_ADJUST_CFA_OFFSET(8)
322    // Store rsp as the top quick frame.
323    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
324
325    // Ugly compile-time check, but we only have the preprocessor.
326    // Last +8: implicit return address pushed on stack when caller made call.
327#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
328#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
329#endif
330#endif  // __APPLE__
331END_MACRO
332
333    /*
334     * Macro that sets up the callee save frame to conform with
335     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
336     * when R15 is already saved.
337     */
338MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
339    PUSH r14
340    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
341END_MACRO
342
343    /*
344     * Macro that sets up the callee save frame to conform with
345     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
346     */
347MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
348    PUSH r15
349    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
350END_MACRO
351
352MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
353    // Restore FPRs. Method and padding is still on the stack.
354    movq 16(%rsp), %xmm0
355    movq 24(%rsp), %xmm1
356    movq 32(%rsp), %xmm2
357    movq 40(%rsp), %xmm3
358    movq 48(%rsp), %xmm4
359    movq 56(%rsp), %xmm5
360    movq 64(%rsp), %xmm6
361    movq 72(%rsp), %xmm7
362    movq 80(%rsp), %xmm8
363    movq 88(%rsp), %xmm9
364    movq 96(%rsp), %xmm10
365    movq 104(%rsp), %xmm11
366    movq 112(%rsp), %xmm12
367    movq 120(%rsp), %xmm13
368    movq 128(%rsp), %xmm14
369    movq 136(%rsp), %xmm15
370END_MACRO
371
372MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
373    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
374    POP rcx
375    POP rdx
376    POP rbx
377    POP rbp
378    POP rsi
379    POP rdi
380    POP r8
381    POP r9
382    POP r10
383    POP r11
384    POP r12
385    POP r13
386    POP r14
387    POP r15
388END_MACRO
389
390MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
391    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
392
393    // Remove save everything callee save method, stack alignment padding and FPRs.
394    addq MACRO_LITERAL(16 + 16 * 8), %rsp
395    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
396
397    POP rax
398    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
399END_MACRO
400
401MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
402    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
403
404    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
405    addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
406    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
407
408    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
409END_MACRO
410
411    /*
412     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
413     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
414     */
415MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
416    // (Thread*) setup
417    movq %gs:THREAD_SELF_OFFSET, %rdi
418    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
419    UNREACHABLE
420END_MACRO
421
422    /*
423     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
424     * exception is Thread::Current()->exception_.
425     */
426MACRO0(DELIVER_PENDING_EXCEPTION)
427    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
428    DELIVER_PENDING_EXCEPTION_FRAME_READY
429END_MACRO
430
431MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
432    DEFINE_FUNCTION VAR(c_name)
433    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
434    // Outgoing argument set up
435    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
436    call CALLVAR(cxx_name)             // cxx_name(Thread*)
437    UNREACHABLE
438    END_FUNCTION VAR(c_name)
439END_MACRO
440
441MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
442    DEFINE_FUNCTION VAR(c_name)
443    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
444    // Outgoing argument set up
445    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
446    call CALLVAR(cxx_name)             // cxx_name(Thread*)
447    UNREACHABLE
448    END_FUNCTION VAR(c_name)
449END_MACRO
450
451MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
452    DEFINE_FUNCTION VAR(c_name)
453    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
454    // Outgoing argument set up
455    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
456    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
457    UNREACHABLE
458    END_FUNCTION VAR(c_name)
459END_MACRO
460
461MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
462    DEFINE_FUNCTION VAR(c_name)
463    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
464    // Outgoing argument set up
465    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
466    call CALLVAR(cxx_name)             // cxx_name(Thread*)
467    UNREACHABLE
468    END_FUNCTION VAR(c_name)
469END_MACRO
470
471    /*
472     * Called by managed code to create and deliver a NullPointerException.
473     */
474NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
475
476    /*
477     * Call installed by a signal handler to create and deliver a NullPointerException.
478     */
479DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
480    // Fault address and return address were saved by the fault handler.
481    // Save all registers as basis for long jump context; R15 will replace fault address later.
482    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
483    // Retrieve fault address and save R15.
484    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
485    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
486    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
487    // Outgoing argument set up; RDI already contains the fault address.
488    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
489    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
490    UNREACHABLE
491END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
492
493    /*
494     * Called by managed code to create and deliver an ArithmeticException.
495     */
496NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
497
498    /*
499     * Called by managed code to create and deliver a StackOverflowError.
500     */
501NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
502
503    /*
504     * Called by managed code, saves callee saves and then calls artThrowException
505     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
506     */
507ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
508
509    /*
510     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
511     * index, arg2 holds limit.
512     */
513TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
514
515    /*
516     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
517     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
518     */
519TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
520
521    /*
522     * All generated callsites for interface invokes and invocation slow paths will load arguments
523     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
524     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
525     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
526     *
527     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
528     * of the target Method* in rax and method->code_ in rdx.
529     *
530     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
531     * thread and we branch to another stub to deliver it.
532     *
533     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
534     * location on the stack.
535     *
536     * Adapted from x86 code.
537     */
538MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
539    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
540    // Helper signature is always
541    // (method_idx, *this_object, *caller_method, *self, sp)
542
543    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
544    movq %rsp, %rcx                                        // pass SP
545
546    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
547                                                           // save the code pointer
548    movq %rax, %rdi
549    movq %rdx, %rax
550    RESTORE_SAVE_REFS_AND_ARGS_FRAME
551
552    testq %rdi, %rdi
553    jz 1f
554
555    // Tail call to intended method.
556    jmp *%rax
5571:
558    DELIVER_PENDING_EXCEPTION
559END_MACRO
560MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
561    DEFINE_FUNCTION VAR(c_name)
562    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
563    END_FUNCTION VAR(c_name)
564END_MACRO
565
566INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
567
568INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
569INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
570INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
571INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
572
573
574    /*
575     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
576     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
577     * the end of the shorty.
578     */
579MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
5801: // LOOP
581    movb (%r10), %al              // al := *shorty
582    addq MACRO_LITERAL(1), %r10   // shorty++
583    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
584    je VAR(finished)
585    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
586    je 2f
587    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
588    je 3f
589    addq MACRO_LITERAL(4), %r11   // arg_array++
590    //  Handle extra space in arg array taken by a long.
591    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
592    jne 1b
593    addq MACRO_LITERAL(4), %r11   // arg_array++
594    jmp 1b                        // goto LOOP
5952:  // FOUND_DOUBLE
596    movsd (%r11), REG_VAR(xmm_reg)
597    addq MACRO_LITERAL(8), %r11   // arg_array+=2
598    jmp 4f
5993:  // FOUND_FLOAT
600    movss (%r11), REG_VAR(xmm_reg)
601    addq MACRO_LITERAL(4), %r11   // arg_array++
6024:
603END_MACRO
604
605    /*
606     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
607     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
608     * the end of the shorty.
609     */
610MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
6111: // LOOP
612    movb (%r10), %al              // al := *shorty
613    addq MACRO_LITERAL(1), %r10   // shorty++
614    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
615    je  VAR(finished)
616    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
617    je 2f
618    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
619    je 3f
620    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
621    je 4f
622    movl (%r11), REG_VAR(gpr_reg32)
623    addq MACRO_LITERAL(4), %r11   // arg_array++
624    jmp 5f
6252:  // FOUND_LONG
626    movq (%r11), REG_VAR(gpr_reg64)
627    addq MACRO_LITERAL(8), %r11   // arg_array+=2
628    jmp 5f
6293:  // SKIP_FLOAT
630    addq MACRO_LITERAL(4), %r11   // arg_array++
631    jmp 1b
6324:  // SKIP_DOUBLE
633    addq MACRO_LITERAL(8), %r11   // arg_array+=2
634    jmp 1b
6355:
636END_MACRO
637
638    /*
639     * Quick invocation stub.
640     * On entry:
641     *   [sp] = return address
642     *   rdi = method pointer
643     *   rsi = argument array that must at least contain the this pointer.
644     *   rdx = size of argument array in bytes
645     *   rcx = (managed) thread pointer
646     *   r8 = JValue* result
647     *   r9 = char* shorty
648     */
649DEFINE_FUNCTION art_quick_invoke_stub
650#if defined(__APPLE__)
651    int3
652    int3
653#else
654    // Set up argument XMM registers.
655    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
656    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
657    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
658    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
659    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
660    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
661    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
662    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
663    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
664    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
665    .balign 16
666.Lxmm_setup_finished:
667    PUSH rbp                      // Save rbp.
668    PUSH r8                       // Save r8/result*.
669    PUSH r9                       // Save r9/shorty*.
670    PUSH rbx                      // Save native callee save rbx
671    PUSH r12                      // Save native callee save r12
672    PUSH r13                      // Save native callee save r13
673    PUSH r14                      // Save native callee save r14
674    PUSH r15                      // Save native callee save r15
675    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
676    CFI_DEF_CFA_REGISTER(rbp)
677
678    movl %edx, %r10d
679    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
680                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
681    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
682    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
683                                   // r13, r14, and r15
684    subq %rdx, %rsp                // Reserve stack space for argument array.
685
686#if (STACK_REFERENCE_SIZE != 4)
687#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
688#endif
689    movq LITERAL(0), (%rsp)       // Store null for method*
690
691    movl %r10d, %ecx              // Place size of args in rcx.
692    movq %rdi, %rax               // rax := method to be called
693    movq %rsi, %r11               // r11 := arg_array
694    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
695                                  // arguments.
696    // Copy arg array into stack.
697    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
698    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
699    movq %rax, %rdi               // rdi := method to be called
700    movl (%r11), %esi             // rsi := this pointer
701    addq LITERAL(4), %r11         // arg_array++
702    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
703    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
704    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
705    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
706.Lgpr_setup_finished:
707    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
708    movq %rbp, %rsp               // Restore stack pointer.
709    POP r15                       // Pop r15
710    POP r14                       // Pop r14
711    POP r13                       // Pop r13
712    POP r12                       // Pop r12
713    POP rbx                       // Pop rbx
714    POP r9                        // Pop r9 - shorty*
715    POP r8                        // Pop r8 - result*.
716    POP rbp                       // Pop rbp
717    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
718    je .Lreturn_double_quick
719    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
720    je .Lreturn_float_quick
721    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
722    ret
723.Lreturn_double_quick:
724    movsd %xmm0, (%r8)            // Store the double floating point result.
725    ret
726.Lreturn_float_quick:
727    movss %xmm0, (%r8)            // Store the floating point result.
728    ret
729#endif  // __APPLE__
730END_FUNCTION art_quick_invoke_stub
731
732    /*
733     * Quick invocation stub.
734     * On entry:
735     *   [sp] = return address
736     *   rdi = method pointer
737     *   rsi = argument array or null if no arguments.
738     *   rdx = size of argument array in bytes
739     *   rcx = (managed) thread pointer
740     *   r8 = JValue* result
741     *   r9 = char* shorty
742     */
743DEFINE_FUNCTION art_quick_invoke_static_stub
744#if defined(__APPLE__)
745    int3
746    int3
747#else
748    // Set up argument XMM registers.
749    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
750    movq %rsi, %r11               // R11 := arg_array
751    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
752    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
753    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
754    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
755    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
756    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
757    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
758    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
759    .balign 16
760.Lxmm_setup_finished2:
761    PUSH rbp                      // Save rbp.
762    PUSH r8                       // Save r8/result*.
763    PUSH r9                       // Save r9/shorty*.
764    PUSH rbx                      // Save rbx
765    PUSH r12                      // Save r12
766    PUSH r13                      // Save r13
767    PUSH r14                      // Save r14
768    PUSH r15                      // Save r15
769    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
770    CFI_DEF_CFA_REGISTER(rbp)
771
772    movl %edx, %r10d
773    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
774                                   // r8, r9, r12, r13, r14, and r15 in frame.
775    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
776    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
777                                   // r13, r14, and r15.
778    subq %rdx, %rsp                // Reserve stack space for argument array.
779
780#if (STACK_REFERENCE_SIZE != 4)
781#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
782#endif
783    movq LITERAL(0), (%rsp)        // Store null for method*
784
785    movl %r10d, %ecx               // Place size of args in rcx.
786    movq %rdi, %rax                // rax := method to be called
787    movq %rsi, %r11                // r11 := arg_array
788    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
789                                   // stack arguments.
790    // Copy arg array into stack.
791    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
792    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
793    movq %rax, %rdi                // rdi := method to be called
794    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
795    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
796    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
797    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
798    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
799.Lgpr_setup_finished2:
800    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
801    movq %rbp, %rsp                // Restore stack pointer.
802    POP r15                        // Pop r15
803    POP r14                        // Pop r14
804    POP r13                        // Pop r13
805    POP r12                        // Pop r12
806    POP rbx                        // Pop rbx
807    POP r9                         // Pop r9 - shorty*.
808    POP r8                         // Pop r8 - result*.
809    POP rbp                        // Pop rbp
810    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
811    je .Lreturn_double_quick2
812    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
813    je .Lreturn_float_quick2
814    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
815    ret
816.Lreturn_double_quick2:
817    movsd %xmm0, (%r8)             // Store the double floating point result.
818    ret
819.Lreturn_float_quick2:
820    movss %xmm0, (%r8)             // Store the floating point result.
821    ret
822#endif  // __APPLE__
823END_FUNCTION art_quick_invoke_static_stub
824
825    /*
826     * Long jump stub.
827     * On entry:
828     *   rdi = gprs
829     *   rsi = fprs
830     */
831DEFINE_FUNCTION art_quick_do_long_jump
832#if defined(__APPLE__)
833    int3
834    int3
835#else
836    // Restore FPRs.
837    movq 0(%rsi), %xmm0
838    movq 8(%rsi), %xmm1
839    movq 16(%rsi), %xmm2
840    movq 24(%rsi), %xmm3
841    movq 32(%rsi), %xmm4
842    movq 40(%rsi), %xmm5
843    movq 48(%rsi), %xmm6
844    movq 56(%rsi), %xmm7
845    movq 64(%rsi), %xmm8
846    movq 72(%rsi), %xmm9
847    movq 80(%rsi), %xmm10
848    movq 88(%rsi), %xmm11
849    movq 96(%rsi), %xmm12
850    movq 104(%rsi), %xmm13
851    movq 112(%rsi), %xmm14
852    movq 120(%rsi), %xmm15
853    // Restore FPRs.
854    movq %rdi, %rsp   // RSP points to gprs.
855    // Load all registers except RSP and RIP with values in gprs.
856    popq %r15
857    popq %r14
858    popq %r13
859    popq %r12
860    popq %r11
861    popq %r10
862    popq %r9
863    popq %r8
864    popq %rdi
865    popq %rsi
866    popq %rbp
867    addq LITERAL(8), %rsp   // Skip rsp
868    popq %rbx
869    popq %rdx
870    popq %rcx
871    popq %rax
872    popq %rsp      // Load stack pointer.
873    ret            // From higher in the stack pop rip.
874#endif  // __APPLE__
875END_FUNCTION art_quick_do_long_jump
876
877MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
878    DEFINE_FUNCTION VAR(c_name)
879    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
880    // Outgoing argument set up
881    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
882    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
883    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
884    CALL_MACRO(return_macro)             // return or deliver exception
885    END_FUNCTION VAR(c_name)
886END_MACRO
887
888MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
889    DEFINE_FUNCTION VAR(c_name)
890    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
891    // Outgoing argument set up
892    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
893    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
894    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
895    CALL_MACRO(return_macro)             // return or deliver exception
896    END_FUNCTION VAR(c_name)
897END_MACRO
898
899MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
900    DEFINE_FUNCTION VAR(c_name)
901    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
902    // Outgoing argument set up
903    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
904    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
905    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
906    CALL_MACRO(return_macro)            // return or deliver exception
907    END_FUNCTION VAR(c_name)
908END_MACRO
909
910MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
911    DEFINE_FUNCTION VAR(c_name)
912    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
913    // Outgoing argument set up
914    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
915    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
916    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
917    CALL_MACRO(return_macro)            // return or deliver exception
918    END_FUNCTION VAR(c_name)
919END_MACRO
920
921MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
922    DEFINE_FUNCTION VAR(c_name)
923    SETUP_SAVE_REFS_ONLY_FRAME
924                                        // arg0 is in rdi
925    movq %gs:THREAD_SELF_OFFSET, %rsi   // pass Thread::Current()
926    call CALLVAR(cxx_name)              // cxx_name(arg0, Thread*)
927    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
928    CALL_MACRO(return_macro)
929    END_FUNCTION VAR(c_name)
930END_MACRO
931
932MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
933    DEFINE_FUNCTION VAR(c_name)
934    SETUP_SAVE_REFS_ONLY_FRAME
935                                        // arg0 and arg1 are in rdi/rsi
936    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
937    call CALLVAR(cxx_name)              // (arg0, arg1, Thread*)
938    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
939    CALL_MACRO(return_macro)
940    END_FUNCTION VAR(c_name)
941END_MACRO
942
943MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
944    DEFINE_FUNCTION VAR(c_name)
945    SETUP_SAVE_REFS_ONLY_FRAME
946                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
947    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
948    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
949    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
950    CALL_MACRO(return_macro)            // return or deliver exception
951    END_FUNCTION VAR(c_name)
952END_MACRO
953
954    /*
955     * Macro for resolution and initialization of indexed DEX file
956     * constants such as classes and strings.
957     */
958MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
959    DEFINE_FUNCTION VAR(c_name)
960    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
961    // Outgoing argument set up
962    movl %eax, %edi                               // pass the index of the constant as arg0
963    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
964    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
965    testl %eax, %eax                              // If result is null, deliver the OOME.
966    jz 1f
967    CFI_REMEMBER_STATE
968    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX        // restore frame up to return address
969    ret
970    CFI_RESTORE_STATE
971    CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)  // workaround for clang bug: 31975598
9721:
973    DELIVER_PENDING_EXCEPTION_FRAME_READY
974    END_FUNCTION VAR(c_name)
975END_MACRO
976
977MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
978    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
979END_MACRO
980
981MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
982    testq %rax, %rax               // rax == 0 ?
983    jz  1f                         // if rax == 0 goto 1
984    ret                            // return
9851:                                 // deliver exception on current thread
986    DELIVER_PENDING_EXCEPTION
987END_MACRO
988
989MACRO0(RETURN_IF_EAX_ZERO)
990    testl %eax, %eax               // eax == 0 ?
991    jnz  1f                        // if eax != 0 goto 1
992    ret                            // return
9931:                                 // deliver exception on current thread
994    DELIVER_PENDING_EXCEPTION
995END_MACRO
996
997MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
998    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
999    testq %rcx, %rcx               // rcx == 0 ?
1000    jnz 1f                         // if rcx != 0 goto 1
1001    ret                            // return
10021:                                 // deliver exception on current thread
1003    DELIVER_PENDING_EXCEPTION
1004END_MACRO
1005
1006// Generate the allocation entrypoints for each allocator.
1007GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1008
1009// Comment out allocators that have x86_64 specific asm.
1010// Region TLAB:
1011// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1012// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1013GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1014GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
1015// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1016// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1017// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1018// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1019// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1020GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1021GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1022GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1023// Normal TLAB:
1024// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1025// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1026GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1027GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
1028// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1029// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1030// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1031// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1032// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1033GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1034GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1035GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1036
1037
1038// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
1039MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
1040    DEFINE_FUNCTION VAR(c_name)
1041    // Fast path rosalloc allocation.
1042    // RDI: mirror::Class*, RAX: return value
1043    // RSI, RDX, RCX, R8, R9: free.
1044                                                           // Check if the thread local
1045                                                           // allocation stack has room.
1046    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
1047    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
1048    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
1049    jae    .Lslow_path\c_name
1050                                                           // Load the object size
1051    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
1052                                                           // Check if the size is for a thread
1053                                                           // local allocation. Also does the
1054                                                           // initialized and finalizable checks.
1055    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
1056    ja     .Lslow_path\c_name
1057                                                           // Compute the rosalloc bracket index
1058                                                           // from the size.
1059    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
1060                                                           // Load the rosalloc run (r9)
1061                                                           // Subtract __SIZEOF_POINTER__ to
1062                                                           // subtract one from edi as there is no
1063                                                           // 0 byte run and the size is already
1064                                                           // aligned.
1065    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
1066                                                           // Load the free list head (rax). This
1067                                                           // will be the return val.
1068    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
1069    testq  %rax, %rax
1070    jz     .Lslow_path\c_name
1071    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
1072                                                           // Push the new object onto the thread
1073                                                           // local allocation stack and
1074                                                           // increment the thread local
1075                                                           // allocation stack top.
1076    movl   %eax, (%rcx)
1077    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
1078    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
1079                                                           // Load the next pointer of the head
1080                                                           // and update the list head with the
1081                                                           // next pointer.
1082    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
1083    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
1084                                                           // Store the class pointer in the
1085                                                           // header. This also overwrites the
1086                                                           // next pointer. The offsets are
1087                                                           // asserted to match.
1088#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1089#error "Class pointer needs to overwrite next pointer."
1090#endif
1091    POISON_HEAP_REF edi
1092    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
1093                                                           // Decrement the size of the free list
1094    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
1095                                                           // No fence necessary for x86.
1096    ret
1097.Lslow_path\c_name:
1098    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
1099    // Outgoing argument set up
1100    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
1101    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
1102    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
1103    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
1104    END_FUNCTION VAR(c_name)
1105END_MACRO
1106
1107ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
1108ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
1109
1110// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
1111// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
1112// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
1113//
1114// RDI: the class, RAX: return value.
1115// RCX, RSI, RDX: scratch, r8: Thread::Current().
1116MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
1117    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
1118END_MACRO
1119
1120// The fast path code for art_quick_alloc_object_initialized_region_tlab.
1121//
1122// RDI: the class, RSI: ArtMethod*, RAX: return value.
1123// RCX, RSI, RDX: scratch, r8: Thread::Current().
1124MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
1125    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
1126    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
1127    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
1128    addq %rax, %rcx                                            // Add size to pos, note that these
1129                                                               // are both 32 bit ints, overflow
1130                                                               // will cause the add to be past the
1131                                                               // end of the thread local region.
1132    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
1133    ja   RAW_VAR(slowPathLabel)
1134    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
1135    incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
1136                                                               // Store the class pointer in the
1137                                                               // header.
1138                                                               // No fence needed for x86.
1139    POISON_HEAP_REF edi
1140    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
1141    ret                                                        // Fast path succeeded.
1142END_MACRO
1143
1144// The fast path code for art_quick_alloc_array_region_tlab.
1145// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
1146// Free temps: RCX, RDX, R8
1147// Output: RAX: return value.
1148MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
1149    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
1150    // Mask out the unaligned part to make sure we are 8 byte aligned.
1151    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
1152    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
1153    addq %rax, %r9
1154    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
1155    ja   RAW_VAR(slowPathLabel)
1156    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
1157    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
1158                                                               // Store the class pointer in the
1159                                                               // header.
1160                                                               // No fence needed for x86.
1161    POISON_HEAP_REF edi
1162    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
1163    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
1164    ret                                                        // Fast path succeeded.
1165END_MACRO
1166
1167// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
1168// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
1169MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
1170    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
1171    // Outgoing argument set up
1172    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
1173    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
1174    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
1175    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
1176END_MACRO
1177
1178// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
1179// called with CC if the GC is not active.
1180DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
1181    // RDI: mirror::Class* klass
1182    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1183    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
1184.Lart_quick_alloc_object_resolved_tlab_slow_path:
1185    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
1186END_FUNCTION art_quick_alloc_object_resolved_tlab
1187
1188// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
1189// May be called with CC if the GC is not active.
1190DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
1191    // RDI: mirror::Class* klass
1192    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1193    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
1194.Lart_quick_alloc_object_initialized_tlab_slow_path:
1195    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
1196END_FUNCTION art_quick_alloc_object_initialized_tlab
1197
1198MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
1199    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
1200    UNPOISON_HEAP_REF ecx
1201    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
1202    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
1203    movq %rsi, %r9
1204    salq %cl, %r9                                              // Calculate array count shifted.
1205    // Add array header + alignment rounding.
1206    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1207    // Add 4 extra bytes if we are doing a long array.
1208    addq MACRO_LITERAL(1), %rcx
1209    andq MACRO_LITERAL(4), %rcx
1210#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1211#error Long array data offset must be 4 greater than int array data offset.
1212#endif
1213    addq %rcx, %r9
1214END_MACRO
1215
1216MACRO0(COMPUTE_ARRAY_SIZE_8)
1217    // RDI: mirror::Class* klass, RSI: int32_t component_count
1218    // RDX, RCX, R8, R9: free. RAX: return val.
1219    movq %rsi, %r9
1220    // Add array header + alignment rounding.
1221    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1222END_MACRO
1223
1224MACRO0(COMPUTE_ARRAY_SIZE_16)
1225    // RDI: mirror::Class* klass, RSI: int32_t component_count
1226    // RDX, RCX, R8, R9: free. RAX: return val.
1227    movq %rsi, %r9
1228    salq MACRO_LITERAL(1), %r9
1229    // Add array header + alignment rounding.
1230    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1231END_MACRO
1232
1233MACRO0(COMPUTE_ARRAY_SIZE_32)
1234    // RDI: mirror::Class* klass, RSI: int32_t component_count
1235    // RDX, RCX, R8, R9: free. RAX: return val.
1236    movq %rsi, %r9
1237    salq MACRO_LITERAL(2), %r9
1238    // Add array header + alignment rounding.
1239    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1240END_MACRO
1241
1242MACRO0(COMPUTE_ARRAY_SIZE_64)
1243    // RDI: mirror::Class* klass, RSI: int32_t component_count
1244    // RDX, RCX, R8, R9: free. RAX: return val.
1245    movq %rsi, %r9
1246    salq MACRO_LITERAL(3), %r9
1247    // Add array header + alignment rounding.
1248    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1249END_MACRO
1250
1251MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1252    DEFINE_FUNCTION VAR(c_entrypoint)
1253    // RDI: mirror::Class* klass, RSI: int32_t component_count
1254    // RDX, RCX, R8, R9: free. RAX: return val.
1255    CALL_MACRO(size_setup)
1256    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1257.Lslow_path\c_entrypoint:
1258    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1259    // Outgoing argument set up
1260    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1261    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1262    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1263    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
1264    END_FUNCTION VAR(c_entrypoint)
1265END_MACRO
1266
1267
1268GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1269GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1270GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1271GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1272GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1273
1274GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1275GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1276GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1277GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1278GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1279
1280// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1281DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1282    // Fast path region tlab allocation.
1283    // RDI: mirror::Class* klass
1284    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1285    ASSERT_USE_READ_BARRIER
1286    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1287.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1288    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1289END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1290
1291// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1292DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1293    // Fast path region tlab allocation.
1294    // RDI: mirror::Class* klass
1295    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1296    ASSERT_USE_READ_BARRIER
1297    // No read barrier since the caller is responsible for that.
1298    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1299.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1300    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1301END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1302
1303ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1304ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1305ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1306ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1307ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1308ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1309
1310TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
1311
1312DEFINE_FUNCTION art_quick_lock_object
1313    testl %edi, %edi                      // Null check object/rdi.
1314    jz   .Lslow_lock
1315.Lretry_lock:
1316    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
1317    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx  // Test the 2 high bits.
1318    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
1319    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
1320    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1321    test %ecx, %ecx
1322    jnz  .Lalready_thin                   // Lock word contains a thin lock.
1323    // unlocked case - edx: original lock word, edi: obj.
1324    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
1325    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1326    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
1327    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1328    jnz  .Lretry_lock                     // cmpxchg failed retry
1329    ret
1330.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
1331    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
1332    cmpw %cx, %dx                         // do we hold the lock already?
1333    jne  .Lslow_lock
1334    movl %edx, %ecx                       // copy the lock word to check count overflow.
1335    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1336    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
1337    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
1338    jne  .Lslow_lock                      // count overflowed so go slow
1339    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
1340    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
1341    // update lockword, cmpxchg necessary for read barrier bits.
1342    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
1343    jnz  .Lretry_lock                     // cmpxchg failed retry
1344    ret
1345.Lslow_lock:
1346    SETUP_SAVE_REFS_ONLY_FRAME
1347    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1348    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1349    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1350    RETURN_IF_EAX_ZERO
1351END_FUNCTION art_quick_lock_object
1352
1353DEFINE_FUNCTION art_quick_lock_object_no_inline
1354    SETUP_SAVE_REFS_ONLY_FRAME
1355    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1356    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1357    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1358    RETURN_IF_EAX_ZERO
1359END_FUNCTION art_quick_lock_object_no_inline
1360
1361DEFINE_FUNCTION art_quick_unlock_object
1362    testl %edi, %edi                      // null check object/edi
1363    jz   .Lslow_unlock
1364.Lretry_unlock:
1365    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1366    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1367    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx
1368    jnz  .Lslow_unlock                    // lock word contains a monitor
1369    cmpw %cx, %dx                         // does the thread id match?
1370    jne  .Lslow_unlock
1371    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1372    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
1373    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1374    jae  .Lrecursive_thin_unlock
1375    // update lockword, cmpxchg necessary for read barrier bits.
1376    movl %ecx, %eax                       // eax: old lock word.
1377    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
1378#ifndef USE_READ_BARRIER
1379    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1380#else
1381    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1382    jnz  .Lretry_unlock                   // cmpxchg failed retry
1383#endif
1384    ret
1385.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1386    // update lockword, cmpxchg necessary for read barrier bits.
1387    movl %ecx, %eax                       // eax: old lock word.
1388    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1389#ifndef USE_READ_BARRIER
1390    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1391#else
1392    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1393    jnz  .Lretry_unlock                   // cmpxchg failed retry
1394#endif
1395    ret
1396.Lslow_unlock:
1397    SETUP_SAVE_REFS_ONLY_FRAME
1398    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1399    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1400    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1401    RETURN_IF_EAX_ZERO
1402END_FUNCTION art_quick_unlock_object
1403
1404DEFINE_FUNCTION art_quick_unlock_object_no_inline
1405    SETUP_SAVE_REFS_ONLY_FRAME
1406    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1407    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1408    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1409    RETURN_IF_EAX_ZERO
1410END_FUNCTION art_quick_unlock_object_no_inline
1411
1412DEFINE_FUNCTION art_quick_check_instance_of
1413    // Type check using the bit string passes null as the target class. In that case just throw.
1414    testl %esi, %esi
1415    jz .Lthrow_class_cast_exception_for_bitstring_check
1416
1417    // We could check the super classes here but that is usually already checked in the caller.
1418    PUSH rdi                          // Save args for exc
1419    PUSH rsi
1420    subq LITERAL(8), %rsp             // Alignment padding.
1421    CFI_ADJUST_CFA_OFFSET(8)
1422    SETUP_FP_CALLEE_SAVE_FRAME
1423    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1424    testq %rax, %rax
1425    jz .Lthrow_class_cast_exception   // jump forward if not assignable
1426    CFI_REMEMBER_STATE
1427    RESTORE_FP_CALLEE_SAVE_FRAME
1428    addq LITERAL(24), %rsp            // pop arguments
1429    CFI_ADJUST_CFA_OFFSET(-24)
1430    ret
1431    CFI_RESTORE_STATE                 // Reset unwind info so following code unwinds.
1432
1433.Lthrow_class_cast_exception:
1434    RESTORE_FP_CALLEE_SAVE_FRAME
1435    addq LITERAL(8), %rsp             // pop padding
1436    CFI_ADJUST_CFA_OFFSET(-8)
1437    POP rsi                           // Pop arguments
1438    POP rdi
1439
1440.Lthrow_class_cast_exception_for_bitstring_check:
1441    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1442    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1443    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1444    UNREACHABLE
1445END_FUNCTION art_quick_check_instance_of
1446
1447
1448// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1449MACRO2(POP_REG_NE, reg, exclude_reg)
1450    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1451      addq MACRO_LITERAL(8), %rsp
1452      CFI_ADJUST_CFA_OFFSET(-8)
1453    .else
1454      POP RAW_VAR(reg)
1455    .endif
1456END_MACRO
1457
1458    /*
1459     * Macro to insert read barrier, used in art_quick_aput_obj.
1460     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
1461     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
1462     * 64b PUSH/POP and 32b argument.
1463     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1464     *
1465     * As with art_quick_aput_obj function, the 64b versions are in comments.
1466     */
1467MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
1468#ifdef USE_READ_BARRIER
1469    PUSH rax                            // save registers that might be used
1470    PUSH rdi
1471    PUSH rsi
1472    PUSH rdx
1473    PUSH rcx
1474    SETUP_FP_CALLEE_SAVE_FRAME
1475    // Outgoing argument set up
1476    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
1477    // // movq REG_VAR(ref_reg64), %rdi
1478    movl REG_VAR(obj_reg), %esi         // pass obj_reg
1479    // movq REG_VAR(obj_reg), %rsi
1480    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
1481    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
1482    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
1483    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
1484    .ifnc RAW_VAR(dest_reg32), eax
1485    // .ifnc RAW_VAR(dest_reg64), rax
1486      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
1487      // movq %rax, REG_VAR(dest_reg64)
1488    .endif
1489    RESTORE_FP_CALLEE_SAVE_FRAME
1490    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
1491    POP_REG_NE rdx, RAW_VAR(dest_reg64)
1492    POP_REG_NE rsi, RAW_VAR(dest_reg64)
1493    POP_REG_NE rdi, RAW_VAR(dest_reg64)
1494    POP_REG_NE rax, RAW_VAR(dest_reg64)
1495#else
1496    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
1497    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
1498    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
1499#endif  // USE_READ_BARRIER
1500END_MACRO
1501
1502DEFINE_FUNCTION art_quick_aput_obj
1503    testl %edx, %edx                // store of null
1504//  test %rdx, %rdx
1505    jz .Ldo_aput_null
1506    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1507    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1508    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1509    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1510#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1511    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
1512    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
1513    cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
1514#else
1515    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1516//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1517#endif
1518    jne .Lcheck_assignability
1519.Ldo_aput:
1520    POISON_HEAP_REF edx
1521    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1522//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1523    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1524    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1525//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1526    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1527    ret
1528.Ldo_aput_null:
1529    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1530//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1531    ret
1532.Lcheck_assignability:
1533    // Save arguments.
1534    PUSH rdi
1535    PUSH rsi
1536    PUSH rdx
1537    SETUP_FP_CALLEE_SAVE_FRAME
1538
1539#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1540    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
1541    movl %eax, %esi               // Pass arg2 = value's class.
1542    // movq %rax, %rsi
1543#else
1544                                     // "Uncompress" = do nothing, as already zero-extended on load.
1545    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
1546#endif
1547    movq %rcx, %rdi               // Pass arg1 = array's component type.
1548
1549    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1550
1551    // Exception?
1552    testq %rax, %rax
1553    jz   .Lthrow_array_store_exception
1554
1555    RESTORE_FP_CALLEE_SAVE_FRAME
1556    // Restore arguments.
1557    POP  rdx
1558    POP  rsi
1559    POP  rdi
1560
1561    POISON_HEAP_REF edx
1562    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1563//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1564    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1565    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1566//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1567    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1568//  movb %dl, (%rdx, %rdi)
1569    ret
1570    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
1571.Lthrow_array_store_exception:
1572    RESTORE_FP_CALLEE_SAVE_FRAME
1573    // Restore arguments.
1574    POP  rdx
1575    POP  rsi
1576    POP  rdi
1577
1578    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1579
1580    // Outgoing argument set up.
1581    movq %rdx, %rsi                         // Pass arg 2 = value.
1582    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1583                                            // Pass arg 1 = array.
1584    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1585    UNREACHABLE
1586END_FUNCTION art_quick_aput_obj
1587
1588// TODO: This is quite silly on X86_64 now.
1589DEFINE_FUNCTION art_quick_memcpy
1590    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1591    ret
1592END_FUNCTION art_quick_memcpy
1593
1594DEFINE_FUNCTION art_quick_test_suspend
1595    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
1596    // Outgoing argument set up
1597    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1598    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1599    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1600    ret
1601END_FUNCTION art_quick_test_suspend
1602
1603UNIMPLEMENTED art_quick_ldiv
1604UNIMPLEMENTED art_quick_lmod
1605UNIMPLEMENTED art_quick_lmul
1606UNIMPLEMENTED art_quick_lshl
1607UNIMPLEMENTED art_quick_lshr
1608UNIMPLEMENTED art_quick_lushr
1609
1610// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1611// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1612
1613THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1614THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1615THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1616THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1617THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1618
1619TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1620TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1621TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1622TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1623TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1624TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1625TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1626
1627TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1628TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1629TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1630TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1631TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO
1632
1633ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1634ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1635ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1636ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1637ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1638ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1639ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1640
1641DEFINE_FUNCTION art_quick_proxy_invoke_handler
1642    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1643
1644    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1645    movq %rsp, %rcx                         // Pass SP.
1646    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1647    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1648    movq %rax, %xmm0                        // Copy return value in case of float returns.
1649    RETURN_OR_DELIVER_PENDING_EXCEPTION
1650END_FUNCTION art_quick_proxy_invoke_handler
1651
1652    /*
1653     * Called to resolve an imt conflict.
1654     * rdi is the conflict ArtMethod.
1655     * rax is a hidden argument that holds the target interface method's dex method index.
1656     *
1657     * Note that this stub writes to r10, r11, rax and rdi.
1658     */
1659DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1660#if defined(__APPLE__)
1661    int3
1662    int3
1663#else
1664    movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer.
1665    mov %eax, %r11d             // Remember method index in R11.
1666    PUSH rdx                    // Preserve RDX as we need to clobber it by LOCK CMPXCHG16B.
1667    // If the method is obsolete, just go through the dex cache miss slow path.
1668    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1669    testl LITERAL(ACC_OBSOLETE_METHOD), ART_METHOD_ACCESS_FLAGS_OFFSET(%r10)
1670    jnz .Limt_conflict_trampoline_dex_cache_miss
1671    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r10), %r10d  // Load declaring class (no read barrier).
1672    movl MIRROR_CLASS_DEX_CACHE_OFFSET(%r10), %r10d    // Load the DexCache (without read barrier).
1673    UNPOISON_HEAP_REF r10d
1674    movq MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET(%r10), %r10  // Load the resolved methods.
1675    andl LITERAL(METHOD_DEX_CACHE_SIZE_MINUS_ONE), %eax  // Calculate DexCache method slot index.
1676    shll LITERAL(1), %eax       // Multiply by 2 as entries have size 2 * __SIZEOF_POINTER__.
1677    leaq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load DexCache method slot address.
1678    mov %rcx, %rdx              // Make RDX:RAX == RCX:RBX so that LOCK CMPXCHG16B makes no changes.
1679    mov %rbx, %rax              // (The actual value does not matter.)
1680    lock cmpxchg16b (%r10)      // Relaxed atomic load RDX:RAX from the dex cache slot.
1681    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1682    cmp %rdx, %r11              // Compare method index to see if we had a DexCache method hit.
1683    jne .Limt_conflict_trampoline_dex_cache_miss
1684.Limt_table_iterate:
1685    cmpq %rax, 0(%rdi)
1686    jne .Limt_table_next_entry
1687    // We successfully hit an entry in the table. Load the target method
1688    // and jump to it.
1689    movq __SIZEOF_POINTER__(%rdi), %rdi
1690    CFI_REMEMBER_STATE
1691    POP rdx
1692    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1693    CFI_RESTORE_STATE
1694.Limt_table_next_entry:
1695    // If the entry is null, the interface method is not in the ImtConflictTable.
1696    cmpq LITERAL(0), 0(%rdi)
1697    jz .Lconflict_trampoline
1698    // Iterate over the entries of the ImtConflictTable.
1699    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1700    jmp .Limt_table_iterate
1701.Lconflict_trampoline:
1702    // Call the runtime stub to populate the ImtConflictTable and jump to the
1703    // resolved method.
1704    CFI_REMEMBER_STATE
1705    POP rdx
1706    movq %rax, %rdi  // Load interface method
1707    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1708    CFI_RESTORE_STATE
1709.Limt_conflict_trampoline_dex_cache_miss:
1710    // We're not creating a proper runtime method frame here,
1711    // artLookupResolvedMethod() is not allowed to walk the stack.
1712
1713    // Save GPR args and ImtConflictTable; RDX is already saved.
1714    PUSH r9   // Quick arg 5.
1715    PUSH r8   // Quick arg 4.
1716    PUSH rsi  // Quick arg 1.
1717    PUSH rcx  // Quick arg 3.
1718    PUSH rdi  // ImtConflictTable
1719    // Save FPR args and callee-saves, align stack to 16B.
1720    subq MACRO_LITERAL(12 * 8 + 8), %rsp
1721    CFI_ADJUST_CFA_OFFSET(12 * 8 + 8)
1722    movq %xmm0, 0(%rsp)
1723    movq %xmm1, 8(%rsp)
1724    movq %xmm2, 16(%rsp)
1725    movq %xmm3, 24(%rsp)
1726    movq %xmm4, 32(%rsp)
1727    movq %xmm5, 40(%rsp)
1728    movq %xmm6, 48(%rsp)
1729    movq %xmm7, 56(%rsp)
1730    movq %xmm12, 64(%rsp)  // XMM12-15 are callee-save in ART compiled code ABI
1731    movq %xmm13, 72(%rsp)  // but caller-save in native ABI.
1732    movq %xmm14, 80(%rsp)
1733    movq %xmm15, 88(%rsp)
1734
1735    movq %r11, %rdi             // Pass method index.
1736    movq 12 * 8 + 8 + 6 * 8 + 8(%rsp), %rsi   // Pass referrer.
1737    call SYMBOL(artLookupResolvedMethod)  // (uint32_t method_index, ArtMethod* referrer)
1738
1739    // Restore FPRs.
1740    movq 0(%rsp), %xmm0
1741    movq 8(%rsp), %xmm1
1742    movq 16(%rsp), %xmm2
1743    movq 24(%rsp), %xmm3
1744    movq 32(%rsp), %xmm4
1745    movq 40(%rsp), %xmm5
1746    movq 48(%rsp), %xmm6
1747    movq 56(%rsp), %xmm7
1748    movq 64(%rsp), %xmm12
1749    movq 72(%rsp), %xmm13
1750    movq 80(%rsp), %xmm14
1751    movq 88(%rsp), %xmm15
1752    addq MACRO_LITERAL(12 * 8 + 8), %rsp
1753    CFI_ADJUST_CFA_OFFSET(-(12 * 8 + 8))
1754    // Restore ImtConflictTable and GPR args.
1755    POP rdi
1756    POP rcx
1757    POP rsi
1758    POP r8
1759    POP r9
1760
1761    cmp LITERAL(0), %rax        // If the method wasn't resolved,
1762    je .Lconflict_trampoline    //   skip the lookup and go to artInvokeInterfaceTrampoline().
1763    jmp .Limt_table_iterate
1764#endif  // __APPLE__
1765END_FUNCTION art_quick_imt_conflict_trampoline
1766
1767DEFINE_FUNCTION art_quick_resolution_trampoline
1768    SETUP_SAVE_REFS_AND_ARGS_FRAME
1769    movq %gs:THREAD_SELF_OFFSET, %rdx
1770    movq %rsp, %rcx
1771    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1772    movq %rax, %r10               // Remember returned code pointer in R10.
1773    movq (%rsp), %rdi             // Load called method into RDI.
1774    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1775    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1776    jz 1f
1777    jmp *%r10                     // Tail call into method.
17781:
1779    DELIVER_PENDING_EXCEPTION
1780END_FUNCTION art_quick_resolution_trampoline
1781
1782/* Generic JNI frame layout:
1783 *
1784 * #-------------------#
1785 * |                   |
1786 * | caller method...  |
1787 * #-------------------#    <--- SP on entry
1788 *
1789 *          |
1790 *          V
1791 *
1792 * #-------------------#
1793 * | caller method...  |
1794 * #-------------------#
1795 * | Return            |
1796 * | R15               |    callee save
1797 * | R14               |    callee save
1798 * | R13               |    callee save
1799 * | R12               |    callee save
1800 * | R9                |    arg5
1801 * | R8                |    arg4
1802 * | RSI/R6            |    arg1
1803 * | RBP/R5            |    callee save
1804 * | RBX/R3            |    callee save
1805 * | RDX/R2            |    arg2
1806 * | RCX/R1            |    arg3
1807 * | XMM7              |    float arg 8
1808 * | XMM6              |    float arg 7
1809 * | XMM5              |    float arg 6
1810 * | XMM4              |    float arg 5
1811 * | XMM3              |    float arg 4
1812 * | XMM2              |    float arg 3
1813 * | XMM1              |    float arg 2
1814 * | XMM0              |    float arg 1
1815 * | RDI/Method*       |  <- sp
1816 * #-------------------#
1817 * | Scratch Alloca    |    5K scratch space
1818 * #---------#---------#
1819 * |         | sp*     |
1820 * | Tramp.  #---------#
1821 * | args    | thread  |
1822 * | Tramp.  #---------#
1823 * |         | method  |
1824 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1825 *
1826 *           |
1827 *           v              artQuickGenericJniTrampoline
1828 *
1829 * #-------------------#
1830 * | caller method...  |
1831 * #-------------------#
1832 * | Return            |
1833 * | Callee-Save Data  |
1834 * #-------------------#
1835 * | handle scope      |
1836 * #-------------------#
1837 * | Method*           |    <--- (1)
1838 * #-------------------#
1839 * | local ref cookie  | // 4B
1840 * | handle scope size | // 4B   TODO: roll into call stack alignment?
1841 * #-------------------#
1842 * | JNI Call Stack    |
1843 * #-------------------#    <--- SP on native call
1844 * |                   |
1845 * | Stack for Regs    |    The trampoline assembly will pop these values
1846 * |                   |    into registers for native call
1847 * #-------------------#
1848 * | Native code ptr   |
1849 * #-------------------#
1850 * | Free scratch      |
1851 * #-------------------#
1852 * | Ptr to (1)        |    <--- RSP
1853 * #-------------------#
1854 */
1855    /*
1856     * Called to do a generic JNI down-call
1857     */
1858DEFINE_FUNCTION art_quick_generic_jni_trampoline
1859    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1860
1861    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1862    CFI_DEF_CFA_REGISTER(rbp)
1863
1864    //
1865    // reserve a lot of space
1866    //
1867    //      4    local state ref
1868    //      4    padding
1869    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1870    //     16    handle scope member fields ?
1871    // +  112    14x 8-byte stack-2-register space
1872    // ------
1873    //   4332
1874    // 16-byte aligned: 4336
1875    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1876    //       Also means: the padding is somewhere in the middle
1877    //
1878    //
1879    // New test: use 5K and release
1880    // 5k = 5120
1881    subq LITERAL(5120), %rsp
1882    // prepare for artQuickGenericJniTrampoline call
1883    // (Thread*,  SP)
1884    //    rdi    rsi      <= C calling convention
1885    //  gs:...   rbp      <= where they are
1886    movq %gs:THREAD_SELF_OFFSET, %rdi
1887    movq %rbp, %rsi
1888    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1889
1890    // The C call will have registered the complete save-frame on success.
1891    // The result of the call is:
1892    // %rax: pointer to native code, 0 on error.
1893    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
1894
1895    // Check for error = 0.
1896    test %rax, %rax
1897    jz .Lexception_in_native
1898
1899    // Release part of the alloca.
1900    movq %rdx, %rsp
1901
1902    // pop from the register-passing alloca region
1903    // what's the right layout?
1904    popq %rdi
1905    popq %rsi
1906    popq %rdx
1907    popq %rcx
1908    popq %r8
1909    popq %r9
1910    // TODO: skip floating point if unused, some flag.
1911    movq 0(%rsp), %xmm0
1912    movq 8(%rsp), %xmm1
1913    movq 16(%rsp), %xmm2
1914    movq 24(%rsp), %xmm3
1915    movq 32(%rsp), %xmm4
1916    movq 40(%rsp), %xmm5
1917    movq 48(%rsp), %xmm6
1918    movq 56(%rsp), %xmm7
1919    addq LITERAL(64), %rsp          // floating-point done
1920
1921    // native call
1922    call *%rax
1923
1924    // result sign extension is handled in C code
1925    // prepare for artQuickGenericJniEndTrampoline call
1926    // (Thread*,  result, result_f)
1927    //   rdi      rsi   rdx       <= C calling convention
1928    //  gs:...    rax   xmm0      <= where they are
1929    movq %gs:THREAD_SELF_OFFSET, %rdi
1930    movq %rax, %rsi
1931    movq %xmm0, %rdx
1932    call SYMBOL(artQuickGenericJniEndTrampoline)
1933
1934    // Pending exceptions possible.
1935    // TODO: use cmpq, needs direct encoding because of gas bug
1936    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1937    test %rcx, %rcx
1938    jnz .Lexception_in_native
1939
1940    // Tear down the alloca.
1941    movq %rbp, %rsp
1942    CFI_DEF_CFA_REGISTER(rsp)
1943
1944    // Tear down the callee-save frame.
1945    // Load FPRs.
1946    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1947    movq 24(%rsp), %xmm1            // neither does this!!!
1948    movq 32(%rsp), %xmm2
1949    movq 40(%rsp), %xmm3
1950    movq 48(%rsp), %xmm4
1951    movq 56(%rsp), %xmm5
1952    movq 64(%rsp), %xmm6
1953    movq 72(%rsp), %xmm7
1954    movq 80(%rsp), %xmm12
1955    movq 88(%rsp), %xmm13
1956    movq 96(%rsp), %xmm14
1957    movq 104(%rsp), %xmm15
1958    // was 80 bytes
1959    addq LITERAL(80 + 4*8), %rsp
1960    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1961    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1962    POP rcx  // Arg.
1963    POP rdx  // Arg.
1964    POP rbx  // Callee save.
1965    POP rbp  // Callee save.
1966    POP rsi  // Arg.
1967    POP r8   // Arg.
1968    POP r9   // Arg.
1969    POP r12  // Callee save.
1970    POP r13  // Callee save.
1971    POP r14  // Callee save.
1972    POP r15  // Callee save.
1973    // store into fpr, for when it's a fpr return...
1974    movq %rax, %xmm0
1975    ret
1976.Lexception_in_native:
1977    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
1978    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
1979    movq (%rsp), %rsp
1980    CFI_DEF_CFA_REGISTER(rsp)
1981    // Do a call to push a new save-all frame required by the runtime.
1982    call .Lexception_call
1983.Lexception_call:
1984    DELIVER_PENDING_EXCEPTION
1985END_FUNCTION art_quick_generic_jni_trampoline
1986
1987    /*
1988     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1989     * of a quick call:
1990     * RDI = method being called / to bridge to.
1991     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1992     */
1993DEFINE_FUNCTION art_quick_to_interpreter_bridge
1994    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1995    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1996    movq %rsp, %rdx                    // RDX := sp
1997    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1998    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1999    movq %rax, %xmm0                   // Place return value also into floating point return value.
2000    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
2001END_FUNCTION art_quick_to_interpreter_bridge
2002
2003    /*
2004     * Called to catch an attempt to invoke an obsolete method.
2005     * RDI = method being called.
2006     */
2007ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
2008
2009    /*
2010     * Routine that intercepts method calls and returns.
2011     */
2012DEFINE_FUNCTION art_quick_instrumentation_entry
2013#if defined(__APPLE__)
2014    int3
2015    int3
2016#else
2017    SETUP_SAVE_REFS_AND_ARGS_FRAME
2018
2019    movq %rdi, %r12               // Preserve method pointer in a callee-save.
2020
2021    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
2022    movq %rsp, %rcx                     // Pass SP.
2023
2024    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
2025
2026                                  // %rax = result of call.
2027    testq %rax, %rax
2028    jz 1f
2029
2030    movq %r12, %rdi               // Reload method pointer.
2031    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
2032    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
2033
2034    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2035
2036    jmp *%rax                     // Tail call to intended method.
20371:
2038    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2039    DELIVER_PENDING_EXCEPTION
2040#endif  // __APPLE__
2041END_FUNCTION art_quick_instrumentation_entry
2042
2043DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
2044    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
2045    CFI_ADJUST_CFA_OFFSET(8)
2046
2047    SETUP_SAVE_EVERYTHING_FRAME
2048
2049    leaq 16(%rsp), %rcx       // Pass floating-point result pointer, in kSaveEverything frame.
2050    leaq 144(%rsp), %rdx      // Pass integer result pointer, in kSaveEverything frame.
2051    movq %rsp, %rsi           // Pass SP.
2052    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
2053
2054    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
2055
2056    testq %rax, %rax          // Check if we have a return-pc to go to. If we don't then there was
2057                              // an exception
2058    jz .Ldo_deliver_instrumentation_exception
2059    testq %rdx, %rdx
2060    jnz .Ldeoptimize
2061    // Normal return.
2062    movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
2063    RESTORE_SAVE_EVERYTHING_FRAME
2064    ret
2065.Ldeoptimize:
2066    movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
2067    RESTORE_SAVE_EVERYTHING_FRAME
2068    // Jump to art_quick_deoptimize.
2069    jmp SYMBOL(art_quick_deoptimize)
2070.Ldo_deliver_instrumentation_exception:
2071    DELIVER_PENDING_EXCEPTION_FRAME_READY
2072END_FUNCTION art_quick_instrumentation_exit
2073
2074    /*
2075     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
2076     * will long jump to the upcall with a special exception of -1.
2077     */
2078DEFINE_FUNCTION art_quick_deoptimize
2079    SETUP_SAVE_EVERYTHING_FRAME        // Stack should be aligned now.
2080    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
2081    call SYMBOL(artDeoptimize)         // (Thread*)
2082    UNREACHABLE
2083END_FUNCTION art_quick_deoptimize
2084
2085    /*
2086     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
2087     * will long jump to the interpreter bridge.
2088     */
2089DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
2090    SETUP_SAVE_EVERYTHING_FRAME
2091                                                // Stack should be aligned now.
2092    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
2093    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
2094    UNREACHABLE
2095END_FUNCTION art_quick_deoptimize_from_compiled_code
2096
2097    /*
2098     * String's compareTo.
2099     *
2100     * On entry:
2101     *    rdi:   this string object (known non-null)
2102     *    rsi:   comp string object (known non-null)
2103     */
2104DEFINE_FUNCTION art_quick_string_compareto
2105    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
2106    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
2107    /* Build pointers to the start of string data */
2108    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
2109    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
2110#if (STRING_COMPRESSION_FEATURE)
2111    /* Differ cases */
2112    shrl    LITERAL(1), %r8d
2113    jnc     .Lstring_compareto_this_is_compressed
2114    shrl    LITERAL(1), %r9d
2115    jnc     .Lstring_compareto_that_is_compressed
2116    jmp     .Lstring_compareto_both_not_compressed
2117.Lstring_compareto_this_is_compressed:
2118    shrl    LITERAL(1), %r9d
2119    jnc     .Lstring_compareto_both_compressed
2120    /* Comparison this (8-bit) and that (16-bit) */
2121    mov     %r8d, %eax
2122    subl    %r9d, %eax
2123    mov     %r8d, %ecx
2124    cmovg   %r9d, %ecx
2125    /* Going into loop to compare each character */
2126    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
2127.Lstring_compareto_loop_comparison_this_compressed:
2128    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
2129    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
2130    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
2131    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
2132    subl    %r9d, %r8d
2133    loope   .Lstring_compareto_loop_comparison_this_compressed
2134    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
2135.Lstring_compareto_keep_length1:
2136    ret
2137.Lstring_compareto_that_is_compressed:
2138    movl    %r8d, %eax
2139    subl    %r9d, %eax
2140    mov     %r8d, %ecx
2141    cmovg   %r9d, %ecx
2142    /* Comparison this (8-bit) and that (16-bit) */
2143    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
2144.Lstring_compareto_loop_comparison_that_compressed:
2145    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
2146    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
2147    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
2148    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
2149    subl    %r9d, %r8d
2150    loope   .Lstring_compareto_loop_comparison_that_compressed
2151    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
2152.Lstring_compareto_keep_length2:
2153    ret
2154.Lstring_compareto_both_compressed:
2155    /* Calculate min length and count diff */
2156    movl    %r8d, %ecx
2157    movl    %r8d, %eax
2158    subl    %r9d, %eax
2159    cmovg   %r9d, %ecx
2160    jecxz   .Lstring_compareto_keep_length3
2161    repe    cmpsb
2162    je      .Lstring_compareto_keep_length3
2163    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
2164    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
2165    jmp     .Lstring_compareto_count_difference
2166#endif // STRING_COMPRESSION_FEATURE
2167.Lstring_compareto_both_not_compressed:
2168    /* Calculate min length and count diff */
2169    movl    %r8d, %ecx
2170    movl    %r8d, %eax
2171    subl    %r9d, %eax
2172    cmovg   %r9d, %ecx
2173    /*
2174     * At this point we have:
2175     *   eax: value to return if first part of strings are equal
2176     *   ecx: minimum among the lengths of the two strings
2177     *   esi: pointer to comp string data
2178     *   edi: pointer to this string data
2179     */
2180    jecxz .Lstring_compareto_keep_length3
2181    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
2182    je    .Lstring_compareto_keep_length3
2183    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
2184    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
2185.Lstring_compareto_count_difference:
2186    subl  %ecx, %eax              // return the difference
2187.Lstring_compareto_keep_length3:
2188    ret
2189END_FUNCTION art_quick_string_compareto
2190
2191UNIMPLEMENTED art_quick_memcmp16
2192
2193DEFINE_FUNCTION art_quick_instance_of
2194    SETUP_FP_CALLEE_SAVE_FRAME
2195    subq LITERAL(8), %rsp                      // Alignment padding.
2196    CFI_ADJUST_CFA_OFFSET(8)
2197    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
2198    addq LITERAL(8), %rsp
2199    CFI_ADJUST_CFA_OFFSET(-8)
2200    RESTORE_FP_CALLEE_SAVE_FRAME
2201    ret
2202END_FUNCTION art_quick_instance_of
2203
2204// Create a function `name` calling the ReadBarrier::Mark routine,
2205// getting its argument and returning its result through register
2206// `reg`, saving and restoring all caller-save registers.
2207//
2208// The generated function follows a non-standard runtime calling
2209// convention:
2210// - register `reg` (which may be different from RDI) is used to pass
2211//   the (sole) argument of this function;
2212// - register `reg` (which may be different from RAX) is used to return
2213//   the result of this function (instead of RAX);
2214// - if `reg` is different from `rdi`, RDI is treated like a normal
2215//   (non-argument) caller-save register;
2216// - if `reg` is different from `rax`, RAX is treated like a normal
2217//   (non-result) caller-save register;
2218// - everything else is the same as in the standard runtime calling
2219//   convention (e.g. standard callee-save registers are preserved).
2220MACRO2(READ_BARRIER_MARK_REG, name, reg)
2221    DEFINE_FUNCTION VAR(name)
2222    // Null check so that we can load the lock word.
2223    testq REG_VAR(reg), REG_VAR(reg)
2224    jz .Lret_rb_\name
2225.Lnot_null_\name:
2226    // Check the mark bit, if it is 1 return.
2227    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
2228    jz .Lslow_rb_\name
2229    ret
2230.Lslow_rb_\name:
2231    PUSH rax
2232    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
2233    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
2234    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
2235    // forwarding address one.
2236    // Taken ~25% of the time.
2237    jnae .Lret_forwarding_address\name
2238
2239    // Save all potentially live caller-save core registers.
2240    movq 0(%rsp), %rax
2241    PUSH rcx
2242    PUSH rdx
2243    PUSH rsi
2244    PUSH rdi
2245    PUSH r8
2246    PUSH r9
2247    PUSH r10
2248    PUSH r11
2249    // Create space for caller-save floating-point registers.
2250    subq MACRO_LITERAL(12 * 8), %rsp
2251    CFI_ADJUST_CFA_OFFSET(12 * 8)
2252    // Save all potentially live caller-save floating-point registers.
2253    movq %xmm0, 0(%rsp)
2254    movq %xmm1, 8(%rsp)
2255    movq %xmm2, 16(%rsp)
2256    movq %xmm3, 24(%rsp)
2257    movq %xmm4, 32(%rsp)
2258    movq %xmm5, 40(%rsp)
2259    movq %xmm6, 48(%rsp)
2260    movq %xmm7, 56(%rsp)
2261    movq %xmm8, 64(%rsp)
2262    movq %xmm9, 72(%rsp)
2263    movq %xmm10, 80(%rsp)
2264    movq %xmm11, 88(%rsp)
2265    SETUP_FP_CALLEE_SAVE_FRAME
2266
2267    .ifnc RAW_VAR(reg), rdi
2268      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
2269    .endif
2270    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
2271    .ifnc RAW_VAR(reg), rax
2272      movq %rax, REG_VAR(reg)       // Return result into `reg`.
2273    .endif
2274
2275    RESTORE_FP_CALLEE_SAVE_FRAME
2276    // Restore floating-point registers.
2277    movq 0(%rsp), %xmm0
2278    movq 8(%rsp), %xmm1
2279    movq 16(%rsp), %xmm2
2280    movq 24(%rsp), %xmm3
2281    movq 32(%rsp), %xmm4
2282    movq 40(%rsp), %xmm5
2283    movq 48(%rsp), %xmm6
2284    movq 56(%rsp), %xmm7
2285    movq 64(%rsp), %xmm8
2286    movq 72(%rsp), %xmm9
2287    movq 80(%rsp), %xmm10
2288    movq 88(%rsp), %xmm11
2289    // Remove floating-point registers.
2290    addq MACRO_LITERAL(12 * 8), %rsp
2291    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
2292    // Restore core regs, except `reg`, as it is used to return the
2293    // result of this function (simply remove it from the stack instead).
2294    POP_REG_NE r11, RAW_VAR(reg)
2295    POP_REG_NE r10, RAW_VAR(reg)
2296    POP_REG_NE r9, RAW_VAR(reg)
2297    POP_REG_NE r8, RAW_VAR(reg)
2298    POP_REG_NE rdi, RAW_VAR(reg)
2299    POP_REG_NE rsi, RAW_VAR(reg)
2300    POP_REG_NE rdx, RAW_VAR(reg)
2301    POP_REG_NE rcx, RAW_VAR(reg)
2302    POP_REG_NE rax, RAW_VAR(reg)
2303.Lret_rb_\name:
2304    ret
2305.Lret_forwarding_address\name:
2306    // The overflow cleared the top bits.
2307    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
2308    movq %rax, REG_VAR(reg)
2309    POP_REG_NE rax, RAW_VAR(reg)
2310    ret
2311    END_FUNCTION VAR(name)
2312END_MACRO
2313
2314READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
2315READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
2316READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
2317READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
2318// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
2319// cannot be used to pass arguments.
2320READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
2321READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
2322READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
2323READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2324READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2325READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2326READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2327READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
2328READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
2329READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
2330READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
2331
2332DEFINE_FUNCTION art_quick_read_barrier_slow
2333    SETUP_FP_CALLEE_SAVE_FRAME
2334    subq LITERAL(8), %rsp           // Alignment padding.
2335    CFI_ADJUST_CFA_OFFSET(8)
2336    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
2337    addq LITERAL(8), %rsp
2338    CFI_ADJUST_CFA_OFFSET(-8)
2339    RESTORE_FP_CALLEE_SAVE_FRAME
2340    ret
2341END_FUNCTION art_quick_read_barrier_slow
2342
2343DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
2344    SETUP_FP_CALLEE_SAVE_FRAME
2345    subq LITERAL(8), %rsp                  // Alignment padding.
2346    CFI_ADJUST_CFA_OFFSET(8)
2347    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
2348    addq LITERAL(8), %rsp
2349    CFI_ADJUST_CFA_OFFSET(-8)
2350    RESTORE_FP_CALLEE_SAVE_FRAME
2351    ret
2352END_FUNCTION art_quick_read_barrier_for_root_slow
2353
2354    /*
2355     * On stack replacement stub.
2356     * On entry:
2357     *   [sp] = return address
2358     *   rdi = stack to copy
2359     *   rsi = size of stack
2360     *   rdx = pc to call
2361     *   rcx = JValue* result
2362     *   r8 = shorty
2363     *   r9 = thread
2364     *
2365     * Note that the native C ABI already aligned the stack to 16-byte.
2366     */
2367DEFINE_FUNCTION art_quick_osr_stub
2368    // Save the non-volatiles.
2369    PUSH rbp                      // Save rbp.
2370    PUSH rcx                      // Save rcx/result*.
2371    PUSH r8                       // Save r8/shorty*.
2372
2373    // Save callee saves.
2374    PUSH rbx
2375    PUSH r12
2376    PUSH r13
2377    PUSH r14
2378    PUSH r15
2379
2380    pushq LITERAL(0)              // Push null for ArtMethod*.
2381    CFI_ADJUST_CFA_OFFSET(8)
2382    movl %esi, %ecx               // rcx := size of stack
2383    movq %rdi, %rsi               // rsi := stack to copy
2384    movq %rsp, %rbp               // Save stack pointer to RBP for CFI use in .Losr_entry.
2385    call .Losr_entry
2386    CFI_REMEMBER_STATE
2387
2388    // Restore stack and callee-saves.
2389    addq LITERAL(8), %rsp
2390    CFI_ADJUST_CFA_OFFSET(-8)
2391    POP r15
2392    POP r14
2393    POP r13
2394    POP r12
2395    POP rbx
2396    POP r8
2397    POP rcx
2398    POP rbp
2399    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
2400    je .Losr_return_double_quick
2401    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
2402    je .Losr_return_float_quick
2403    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
2404    ret
2405.Losr_return_double_quick:
2406    movsd %xmm0, (%rcx)            // Store the double floating point result.
2407    ret
2408.Losr_return_float_quick:
2409    movss %xmm0, (%rcx)            // Store the floating point result.
2410    ret
2411.Losr_entry:
2412    CFI_RESTORE_STATE             // Restore CFI state; however, since the call has pushed the
2413    CFI_DEF_CFA_REGISTER(rbp)     // return address we need to switch the CFA register to RBP.
2414
2415    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
2416    subq %rcx, %rsp
2417    movq %rsp, %rdi               // rdi := beginning of stack
2418    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
2419    jmp *%rdx
2420END_FUNCTION art_quick_osr_stub
2421
2422DEFINE_FUNCTION art_quick_invoke_polymorphic
2423                                                   // On entry: RDI := unused, RSI := receiver
2424    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2425    movq %rsi, %rdi                                // RDI := receiver
2426    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
2427    movq %rsp, %rdx                                // RDX := pass SP
2428    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
2429                                                   // save the code pointer
2430    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2431    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2432    RETURN_OR_DELIVER_PENDING_EXCEPTION
2433END_FUNCTION art_quick_invoke_polymorphic
2434
2435DEFINE_FUNCTION art_quick_invoke_custom
2436    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2437                                                   // RDI := call_site_index
2438    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread::Current()
2439    movq %rsp, %rdx                                // RDX := SP
2440    call SYMBOL(artInvokeCustom)                   // artInvokeCustom(Thread*, SP)
2441    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2442    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2443    RETURN_OR_DELIVER_PENDING_EXCEPTION
2444END_FUNCTION art_quick_invoke_custom
2445
2446// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2447//  Argument 0: RDI: The context pointer for ExecuteSwitchImpl.
2448//  Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call.
2449//  Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode).
2450DEFINE_FUNCTION ExecuteSwitchImplAsm
2451    PUSH rbx                 // Spill RBX
2452    movq %rdx, %rbx          // RBX = DEX PC (callee save register)
2453    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0)
2454
2455    call *%rsi               // Call the wrapped function
2456
2457    POP rbx                  // Restore RBX
2458    ret
2459END_FUNCTION ExecuteSwitchImplAsm
2460