1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22    /* Deliver the given exception */
23    .extern artDeliverExceptionFromCode
24    /* Deliver an exception pending on a thread */
25    .extern artDeliverPendingException
26
27    /*
28     * Macro to spill the GPRs.
29     */
30.macro SPILL_ALL_CALLEE_SAVE_GPRS
31    push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
32    .cfi_adjust_cfa_offset 36
33    .cfi_rel_offset r4, 0
34    .cfi_rel_offset r5, 4
35    .cfi_rel_offset r6, 8
36    .cfi_rel_offset r7, 12
37    .cfi_rel_offset r8, 16
38    .cfi_rel_offset r9, 20
39    .cfi_rel_offset r10, 24
40    .cfi_rel_offset r11, 28
41    .cfi_rel_offset lr, 32
42.endm
43
44    /*
45     * Macro that sets up the callee save frame to conform with
46     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
47     */
48.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
49    SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
50    vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
51    .cfi_adjust_cfa_offset 64
52    sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
53    .cfi_adjust_cfa_offset 12
54    RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
55    @ Load kSaveAllCalleeSaves Method* into rTemp.
56    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
57    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
58    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
59
60     // Ugly compile-time check, but we only have the preprocessor.
61#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
62#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
63#endif
64.endm
65
66    /*
67     * Macro that sets up the callee save frame to conform with
68     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
69     */
70.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
71    // Note: We could avoid saving R8 in the case of Baker read
72    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
73    // later; but it's not worth handling this special case.
74    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
75    .cfi_adjust_cfa_offset 28
76    .cfi_rel_offset r5, 0
77    .cfi_rel_offset r6, 4
78    .cfi_rel_offset r7, 8
79    .cfi_rel_offset r8, 12
80    .cfi_rel_offset r10, 16
81    .cfi_rel_offset r11, 20
82    .cfi_rel_offset lr, 24
83    sub sp, #4                                    @ bottom word will hold Method*
84    .cfi_adjust_cfa_offset 4
85    RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
86    @ Load kSaveRefsOnly Method* into rTemp.
87    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
88    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
89    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
90
91    // Ugly compile-time check, but we only have the preprocessor.
92#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
93#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
94#endif
95.endm
96
97.macro RESTORE_SAVE_REFS_ONLY_FRAME
98    add sp, #4               @ bottom word holds Method*
99    .cfi_adjust_cfa_offset -4
100    // Note: Likewise, we could avoid restoring R8 in the case of Baker
101    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
102    // later; but it's not worth handling this special case.
103    pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
104    .cfi_restore r5
105    .cfi_restore r6
106    .cfi_restore r7
107    .cfi_restore r8
108    .cfi_restore r10
109    .cfi_restore r11
110    .cfi_restore lr
111    .cfi_adjust_cfa_offset -28
112.endm
113
114    /*
115     * Macro that sets up the callee save frame to conform with
116     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
117     */
118.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
119    // Note: We could avoid saving R8 in the case of Baker read
120    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
121    // later; but it's not worth handling this special case.
122    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
123    .cfi_adjust_cfa_offset 40
124    .cfi_rel_offset r1, 0
125    .cfi_rel_offset r2, 4
126    .cfi_rel_offset r3, 8
127    .cfi_rel_offset r5, 12
128    .cfi_rel_offset r6, 16
129    .cfi_rel_offset r7, 20
130    .cfi_rel_offset r8, 24
131    .cfi_rel_offset r10, 28
132    .cfi_rel_offset r11, 32
133    .cfi_rel_offset lr, 36
134    vpush {s0-s15}                     @ 16 words of float args.
135    .cfi_adjust_cfa_offset 64
136    sub sp, #8                         @ 2 words of space, alignment padding and Method*
137    .cfi_adjust_cfa_offset 8
138    // Ugly compile-time check, but we only have the preprocessor.
139#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
140#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
141#endif
142.endm
143
144.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
145    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
146    RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
147    @ Load kSaveRefsAndArgs Method* into rTemp.
148    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
149    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
150    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
151.endm
152
153.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
154    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
155    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
156    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
157.endm
158
159.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
160    add  sp, #8                      @ rewind sp
161    .cfi_adjust_cfa_offset -8
162    vpop {s0-s15}
163    .cfi_adjust_cfa_offset -64
164    // Note: Likewise, we could avoid restoring X20 in the case of Baker
165    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
166    // later; but it's not worth handling this special case.
167    pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
168    .cfi_restore r1
169    .cfi_restore r2
170    .cfi_restore r3
171    .cfi_restore r5
172    .cfi_restore r6
173    .cfi_restore r7
174    .cfi_restore r8
175    .cfi_restore r10
176    .cfi_restore r11
177    .cfi_restore lr
178    .cfi_adjust_cfa_offset -40
179.endm
180
181    /*
182     * Macro that sets up the callee save frame to conform with
183     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
184     * when core registers are already saved.
185     */
186.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
187                                        @ 14 words of callee saves and args already saved.
188    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
189    .cfi_adjust_cfa_offset 128
190    sub sp, #8                          @ 2 words of space, alignment padding and Method*
191    .cfi_adjust_cfa_offset 8
192    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
193    @ Load kSaveEverything Method* into rTemp.
194    ldr \rTemp, [\rTemp, #\runtime_method_offset]
195    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
196    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
197
198    // Ugly compile-time check, but we only have the preprocessor.
199#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
200#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
201#endif
202.endm
203
204    /*
205     * Macro that sets up the callee save frame to conform with
206     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
207     */
208.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
209    push {r0-r12, lr}                   @ 14 words of callee saves and args.
210    .cfi_adjust_cfa_offset 56
211    .cfi_rel_offset r0, 0
212    .cfi_rel_offset r1, 4
213    .cfi_rel_offset r2, 8
214    .cfi_rel_offset r3, 12
215    .cfi_rel_offset r4, 16
216    .cfi_rel_offset r5, 20
217    .cfi_rel_offset r6, 24
218    .cfi_rel_offset r7, 28
219    .cfi_rel_offset r8, 32
220    .cfi_rel_offset r9, 36
221    .cfi_rel_offset r10, 40
222    .cfi_rel_offset r11, 44
223    .cfi_rel_offset ip, 48
224    .cfi_rel_offset lr, 52
225    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
226.endm
227
228.macro RESTORE_SAVE_EVERYTHING_FRAME
229    add  sp, #8                         @ rewind sp
230    .cfi_adjust_cfa_offset -8
231    vpop {d0-d15}
232    .cfi_adjust_cfa_offset -128
233    pop {r0-r12, lr}                    @ 14 words of callee saves
234    .cfi_restore r0
235    .cfi_restore r1
236    .cfi_restore r2
237    .cfi_restore r3
238    .cfi_restore r4
239    .cfi_restore r5
240    .cfi_restore r6
241    .cfi_restore r7
242    .cfi_restore r8
243    .cfi_restore r9
244    .cfi_restore r10
245    .cfi_restore r11
246    .cfi_restore r12
247    .cfi_restore lr
248    .cfi_adjust_cfa_offset -56
249.endm
250
251.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
252    add  sp, #8                         @ rewind sp
253    .cfi_adjust_cfa_offset -8
254    vpop {d0-d15}
255    .cfi_adjust_cfa_offset -128
256    add  sp, #4                         @ skip r0
257    .cfi_adjust_cfa_offset -4
258    .cfi_restore r0                     @ debugger can no longer restore caller's r0
259    pop {r1-r12, lr}                    @ 13 words of callee saves
260    .cfi_restore r1
261    .cfi_restore r2
262    .cfi_restore r3
263    .cfi_restore r4
264    .cfi_restore r5
265    .cfi_restore r6
266    .cfi_restore r7
267    .cfi_restore r8
268    .cfi_restore r9
269    .cfi_restore r10
270    .cfi_restore r11
271    .cfi_restore r12
272    .cfi_restore lr
273    .cfi_adjust_cfa_offset -52
274.endm
275
276// Macro to refresh the Marking Register (R8).
277//
278// This macro must be called at the end of functions implementing
279// entrypoints that possibly (directly or indirectly) perform a
280// suspend check (before they return).
281.macro REFRESH_MARKING_REGISTER
282#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
283    ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
284#endif
285.endm
286
287.macro RETURN_IF_RESULT_IS_ZERO
288    cbnz   r0, 1f              @ result non-zero branch over
289    bx     lr                  @ return
2901:
291.endm
292
293.macro RETURN_IF_RESULT_IS_NON_ZERO
294    cbz    r0, 1f              @ result zero branch over
295    bx     lr                  @ return
2961:
297.endm
298
299    /*
300     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
301     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
302     */
303.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
304    mov    r0, rSELF                           @ pass Thread::Current
305    bl     artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
306.endm
307
308    /*
309     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
310     * exception is Thread::Current()->exception_.
311     */
312.macro DELIVER_PENDING_EXCEPTION
313    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
314    DELIVER_PENDING_EXCEPTION_FRAME_READY
315.endm
316
317.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
318    .extern \cxx_name
319ENTRY \c_name
320    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
321    mov r0, rSELF                   @ pass Thread::Current
322    bl  \cxx_name                   @ \cxx_name(Thread*)
323END \c_name
324.endm
325
326.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
327    .extern \cxx_name
328ENTRY \c_name
329    SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
330    mov r0, rSELF                   @ pass Thread::Current
331    bl  \cxx_name                   @ \cxx_name(Thread*)
332END \c_name
333.endm
334
335.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
336    .extern \cxx_name
337ENTRY \c_name
338    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
339    mov r1, rSELF                   @ pass Thread::Current
340    bl  \cxx_name                   @ \cxx_name(Thread*)
341END \c_name
342.endm
343
344.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
345    .extern \cxx_name
346ENTRY \c_name
347    SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
348    mov r2, rSELF                   @ pass Thread::Current
349    bl  \cxx_name                   @ \cxx_name(Thread*)
350END \c_name
351.endm
352
353.macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
354    ldr \reg, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ Get exception field.
355    cbnz \reg, 1f
356    bx lr
3571:
358    DELIVER_PENDING_EXCEPTION
359.endm
360
361.macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
362    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
363.endm
364
365.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
366    RETURN_IF_RESULT_IS_ZERO
367    DELIVER_PENDING_EXCEPTION
368.endm
369
370.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
371    RETURN_IF_RESULT_IS_NON_ZERO
372    DELIVER_PENDING_EXCEPTION
373.endm
374
375// Macros taking opportunity of code similarities for downcalls.
376.macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
377    .extern \entrypoint
378ENTRY \name
379    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
380    mov    r1, rSELF                     @ pass Thread::Current
381    bl     \entrypoint                   @ (uint32_t field_idx, Thread*)
382    RESTORE_SAVE_REFS_ONLY_FRAME
383    REFRESH_MARKING_REGISTER
384    \return
385END \name
386.endm
387
388.macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
389    .extern \entrypoint
390ENTRY \name
391    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
392    mov    r2, rSELF                     @ pass Thread::Current
393    bl     \entrypoint                   @ (field_idx, Object*, Thread*)
394    RESTORE_SAVE_REFS_ONLY_FRAME
395    REFRESH_MARKING_REGISTER
396    \return
397END \name
398.endm
399
400.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
401    .extern \entrypoint
402ENTRY \name
403    SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
404    mov    r3, rSELF                     @ pass Thread::Current
405    bl     \entrypoint                   @ (field_idx, Object*, new_val, Thread*)
406    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
407    REFRESH_MARKING_REGISTER
408    \return
409END \name
410.endm
411
412    /*
413     * Called by managed code, saves callee saves and then calls artThrowException
414     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
415     */
416ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
417
418    /*
419     * Called by managed code to create and deliver a NullPointerException.
420     */
421NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
422
423    /*
424     * Call installed by a signal handler to create and deliver a NullPointerException.
425     */
426    .extern art_quick_throw_null_pointer_exception_from_signal
427ENTRY art_quick_throw_null_pointer_exception_from_signal
428    // The fault handler pushes the gc map address, i.e. "return address", to stack
429    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
430    .cfi_def_cfa_offset __SIZEOF_POINTER__
431    .cfi_rel_offset lr, 0
432    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
433    .cfi_adjust_cfa_offset 52
434    .cfi_rel_offset r0, 0
435    .cfi_rel_offset r1, 4
436    .cfi_rel_offset r2, 8
437    .cfi_rel_offset r3, 12
438    .cfi_rel_offset r4, 16
439    .cfi_rel_offset r5, 20
440    .cfi_rel_offset r6, 24
441    .cfi_rel_offset r7, 28
442    .cfi_rel_offset r8, 32
443    .cfi_rel_offset r9, 36
444    .cfi_rel_offset r10, 40
445    .cfi_rel_offset r11, 44
446    .cfi_rel_offset ip, 48
447
448    @ save all registers as basis for long jump context
449    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
450    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
451    mov r1, rSELF                   @ pass Thread::Current
452    bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
453END art_quick_throw_null_pointer_exception_from_signal
454
455    /*
456     * Called by managed code to create and deliver an ArithmeticException.
457     */
458NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
459
460    /*
461     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
462     * index, arg2 holds limit.
463     */
464TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
465
466    /*
467     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
468     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
469     */
470TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
471
472    /*
473     * Called by managed code to create and deliver a StackOverflowError.
474     */
475NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
476
477    /*
478     * All generated callsites for interface invokes and invocation slow paths will load arguments
479     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
480     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
481     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
482     *
483     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
484     * of the target Method* in r0 and method->code_ in r1.
485     *
486     * If unsuccessful, the helper will return null/null. There will bea pending exception in the
487     * thread and we branch to another stub to deliver it.
488     *
489     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
490     * pointing back to the original caller.
491     *
492     * Clobbers IP (R12).
493     */
494.macro INVOKE_TRAMPOLINE_BODY cxx_name
495    .extern \cxx_name
496    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
497    mov    r2, rSELF                      @ pass Thread::Current
498    mov    r3, sp
499    bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
500    mov    r12, r1                        @ save Method*->code_
501    RESTORE_SAVE_REFS_AND_ARGS_FRAME
502    REFRESH_MARKING_REGISTER
503    cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
504    bx     r12                            @ tail call to target
5051:
506    DELIVER_PENDING_EXCEPTION
507.endm
508.macro INVOKE_TRAMPOLINE c_name, cxx_name
509ENTRY \c_name
510    INVOKE_TRAMPOLINE_BODY \cxx_name
511END \c_name
512.endm
513
514INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
515
516INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
517INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
518INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
519INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
520
521    /*
522     * Quick invocation stub internal.
523     * On entry:
524     *   r0 = method pointer
525     *   r1 = argument array or null for no argument methods
526     *   r2 = size of argument array in bytes
527     *   r3 = (managed) thread pointer
528     *   [sp] = JValue* result
529     *   [sp + 4] = result_in_float
530     *   [sp + 8] = core register argument array
531     *   [sp + 12] = fp register argument array
532     *  +-------------------------+
533     *  | uint32_t* fp_reg_args   |
534     *  | uint32_t* core_reg_args |
535     *  |   result_in_float       | <- Caller frame
536     *  |   Jvalue* result        |
537     *  +-------------------------+
538     *  |          lr             |
539     *  |          r11            |
540     *  |          r9             |
541     *  |          r4             | <- r11
542     *  +-------------------------+
543     *  | uint32_t out[n-1]       |
544     *  |    :      :             |        Outs
545     *  | uint32_t out[0]         |
546     *  | StackRef<ArtMethod>     | <- SP  value=null
547     *  +-------------------------+
548     */
549ENTRY art_quick_invoke_stub_internal
550    SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
551    mov    r11, sp                         @ save the stack pointer
552    .cfi_def_cfa_register r11
553
554    mov    r9, r3                          @ move managed thread pointer into r9
555
556    add    r4, r2, #4                      @ create space for method pointer in frame
557    sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
558    and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
559    mov    sp, r4                          @ 16B alignment ourselves.
560
561    mov    r4, r0                          @ save method*
562    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
563    bl     memcpy                          @ memcpy (dest, src, bytes)
564    mov    ip, #0                          @ set ip to 0
565    str    ip, [sp]                        @ store null for method* at bottom of frame
566
567    ldr    ip, [r11, #48]                  @ load fp register argument array pointer
568    vldm   ip, {s0-s15}                    @ copy s0 - s15
569
570    ldr    ip, [r11, #44]                  @ load core register argument array pointer
571    mov    r0, r4                          @ restore method*
572    add    ip, ip, #4                      @ skip r0
573    ldm    ip, {r1-r3}                     @ copy r1 - r3
574
575    REFRESH_MARKING_REGISTER
576
577    ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
578    blx    ip                              @ call the method
579
580    mov    sp, r11                         @ restore the stack pointer
581    .cfi_def_cfa_register sp
582
583    ldr    r4, [sp, #40]                   @ load result_is_float
584    ldr    r9, [sp, #36]                   @ load the result pointer
585    cmp    r4, #0
586    ite    eq
587    strdeq r0, [r9]                        @ store r0/r1 into result pointer
588    vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
589
590    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
591END art_quick_invoke_stub_internal
592
593    /*
594     * On stack replacement stub.
595     * On entry:
596     *   r0 = stack to copy
597     *   r1 = size of stack
598     *   r2 = pc to call
599     *   r3 = JValue* result
600     *   [sp] = shorty
601     *   [sp + 4] = thread
602     */
603ENTRY art_quick_osr_stub
604    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
605    vpush  {s16-s31}                       @ Spill fp-regs (16)
606    .cfi_adjust_cfa_offset 64
607    SAVE_SIZE=(9*4+16*4)
608    mov    r11, sp                         @ Save the stack pointer
609    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
610    .cfi_remember_state
611    mov    r10, r1                         @ Save size of stack
612    ldr    r9, [r11, #(SAVE_SIZE+4)]       @ Move managed thread pointer into r9
613    REFRESH_MARKING_REGISTER
614    mov    r6, r2                          @ Save the pc to call
615    sub    r7, sp, #12                     @ Reserve space for stack pointer,
616                                           @    JValue* result, and ArtMethod* slot.
617    and    r7, #0xFFFFFFF0                 @ Align stack pointer
618    mov    sp, r7                          @ Update stack pointer
619    str    r11, [sp, #4]                   @ Save old stack pointer
620    str    r3, [sp, #8]                    @ Save JValue* result
621    mov    ip, #0
622    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
623    // r11 isn't properly spilled in the osr method, so we need use DWARF expression.
624    // NB: the CFI must be before the call since this is the address gdb will lookup.
625    // NB: gdb expects that cfa_expression returns the CFA value (not address to it).
626    .cfi_escape                            /* CFA = [sp + 4] + SAVE_SIZE */ \
627      0x0f, 6,                             /* DW_CFA_def_cfa_expression(len) */ \
628      0x92, 13, 4,                         /* DW_OP_bregx(reg,offset) */ \
629      0x06,                                /* DW_OP_deref */ \
630      0x23, SAVE_SIZE                      /* DW_OP_plus_uconst(val) */
631    bl     .Losr_entry                     @ Call the method
632    ldr    r10, [sp, #8]                   @ Restore JValue* result
633    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
634    .cfi_def_cfa sp, SAVE_SIZE             @ CFA = sp + SAVE_SIZE
635    ldr    r4, [sp, #SAVE_SIZE]            @ load shorty
636    ldrb   r4, [r4, #0]                    @ load return type
637    cmp    r4, #68                         @ Test if result type char == 'D'.
638    beq    .Losr_fp_result
639    cmp    r4, #70                         @ Test if result type char == 'F'.
640    beq    .Losr_fp_result
641    strd r0, [r10]                         @ Store r0/r1 into result pointer
642    b    .Losr_exit
643.Losr_fp_result:
644    vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
645.Losr_exit:
646    vpop   {s16-s31}
647    .cfi_adjust_cfa_offset -64
648    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
649.Losr_entry:
650    .cfi_restore_state
651    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
652    sub sp, sp, r10                        @ Reserve space for callee stack
653    sub r10, r10, #4
654    str lr, [sp, r10]                      @ Store link register per the compiler ABI
655    mov r2, r10
656    mov r1, r0
657    mov r0, sp
658    bl  memcpy                             @ memcpy (dest r0, src r1, bytes r2)
659    bx r6
660END art_quick_osr_stub
661
662    /*
663     * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_.
664     * Both must reside on the stack, between current SP and target SP.
665     * The r12 (IP) shall be clobbered rather than retrieved from gprs_.
666     */
667ARM_ENTRY art_quick_do_long_jump
668    vldm r1, {s0-s31}     @ Load all fprs from argument fprs_.
669    mov  sp, r0           @ Make SP point to gprs_.
670                          @ Do not access fprs_ from now, they may be below SP.
671    ldm  sp, {r0-r11}     @ load r0-r11 from gprs_.
672    ldr  r12, [sp, #60]   @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12).
673    ldr  lr, [sp, #56]    @ Load LR from gprs_, 56 = 4 * 14.
674    ldr  sp, [sp, #52]    @ Load SP from gprs_ 52 = 4 * 13.
675                          @ Do not access gprs_ from now, they are below SP.
676    REFRESH_MARKING_REGISTER
677    bx   r12              @ Do long jump.
678END art_quick_do_long_jump
679
680    /*
681     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
682     * failure.
683     */
684TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
685
686    /*
687     * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
688     * possibly null object to lock.
689     */
690    .extern artLockObjectFromCode
691ENTRY art_quick_lock_object
692    ldr    r1, [rSELF, #THREAD_ID_OFFSET]
693    cbz    r0, .Lslow_lock
694.Lretry_lock:
695    ldrex  r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
696    eor    r3, r2, r1                 @ Prepare the value to store if unlocked
697                                      @   (thread id, count of 0 and preserved read barrier bits),
698                                      @ or prepare to compare thread id for recursive lock check
699                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
700    ands   ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
701    bne    .Lnot_unlocked             @ Check if unlocked.
702    @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits.
703    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
704    cbnz   r2, .Llock_strex_fail      @ If store failed, retry.
705    dmb    ish                        @ Full (LoadLoad|LoadStore) memory barrier.
706    bx lr
707.Lnot_unlocked:  @ r2: original lock word, r1: thread_id, r3: r2 ^ r1
708#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
709#error "Expecting thin lock count and gc state in consecutive bits."
710#endif
711                                      @ Check lock word state and thread id together,
712    bfc    r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
713    cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, or the lock word's
714                                      @ thread id did not match, go slow path.
715    add    r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Increment the recursive lock count.
716                                      @ Extract the new thin lock count for overflow check.
717    ubfx   r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
718    cbz    r2, .Lslow_lock            @ Zero as the new count indicates overflow, go slow path.
719    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits.
720    cbnz   r2, .Llock_strex_fail      @ If strex failed, retry.
721    bx lr
722.Llock_strex_fail:
723    b      .Lretry_lock               @ retry
724// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call).
725END art_quick_lock_object
726
727ENTRY art_quick_lock_object_no_inline
728    // This is also the slow path for art_quick_lock_object. Note that we
729    // need a local label, the assembler complains about target being out of
730    // range if we try to jump to `art_quick_lock_object_no_inline`.
731.Lslow_lock:
732    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
733    mov    r1, rSELF                  @ pass Thread::Current
734    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
735    RESTORE_SAVE_REFS_ONLY_FRAME
736    REFRESH_MARKING_REGISTER
737    RETURN_IF_RESULT_IS_ZERO
738    DELIVER_PENDING_EXCEPTION
739END art_quick_lock_object_no_inline
740
741    /*
742     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
743     * r0 holds the possibly null object to lock.
744     */
745    .extern artUnlockObjectFromCode
746ENTRY art_quick_unlock_object
747    ldr    r1, [rSELF, #THREAD_ID_OFFSET]
748    cbz    r0, .Lslow_unlock
749.Lretry_unlock:
750#ifndef USE_READ_BARRIER
751    ldr    r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
752#else
753                                      @ Need to use atomic instructions for read barrier.
754    ldrex  r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
755#endif
756    eor    r3, r2, r1                 @ Prepare the value to store if simply locked
757                                      @   (mostly 0s, and preserved read barrier bits),
758                                      @ or prepare to compare thread id for recursive lock check
759                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
760    ands   ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
761    bne    .Lnot_simply_locked        @ Locked recursively or by other thread?
762    @ Transition to unlocked.
763    dmb    ish                        @ Full (LoadStore|StoreStore) memory barrier.
764#ifndef USE_READ_BARRIER
765    str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
766#else
767    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
768    cbnz   r2, .Lunlock_strex_fail    @ If the store failed, retry.
769#endif
770    bx     lr
771.Lnot_simply_locked:  @ r2: original lock word, r1: thread_id, r3: r2 ^ r1
772#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
773#error "Expecting thin lock count and gc state in consecutive bits."
774#endif
775                                      @ Check lock word state and thread id together,
776    bfc    r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
777    cbnz   r3, .Lslow_unlock          @ if either of the top two bits are set, or the lock word's
778                                      @ thread id did not match, go slow path.
779    sub    r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Decrement recursive lock count.
780#ifndef USE_READ_BARRIER
781    str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
782#else
783    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits.
784    cbnz   r2, .Lunlock_strex_fail    @ If the store failed, retry.
785#endif
786    bx     lr
787.Lunlock_strex_fail:
788    b      .Lretry_unlock             @ retry
789// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call).
790END art_quick_unlock_object
791
792ENTRY art_quick_unlock_object_no_inline
793    // This is also the slow path for art_quick_unlock_object. Note that we
794    // need a local label, the assembler complains about target being out of
795    // range if we try to jump to `art_quick_unlock_object_no_inline`.
796.Lslow_unlock:
797    @ save callee saves in case exception allocation triggers GC
798    SETUP_SAVE_REFS_ONLY_FRAME r1
799    mov    r1, rSELF                  @ pass Thread::Current
800    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
801    RESTORE_SAVE_REFS_ONLY_FRAME
802    REFRESH_MARKING_REGISTER
803    RETURN_IF_RESULT_IS_ZERO
804    DELIVER_PENDING_EXCEPTION
805END art_quick_unlock_object_no_inline
806
807    /*
808     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
809     * artThrowClassCastExceptionForObject.
810     */
811    .extern artInstanceOfFromCode
812    .extern artThrowClassCastExceptionForObject
813ENTRY art_quick_check_instance_of
814    // Type check using the bit string passes null as the target class. In that case just throw.
815    cbz r1, .Lthrow_class_cast_exception_for_bitstring_check
816
817    push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
818    .cfi_adjust_cfa_offset 16
819    .cfi_rel_offset r0, 0
820    .cfi_rel_offset r1, 4
821    .cfi_rel_offset r2, 8
822    .cfi_rel_offset lr, 12
823    bl artInstanceOfFromCode
824    cbz    r0, .Lthrow_class_cast_exception
825    pop {r0-r2, pc}
826
827.Lthrow_class_cast_exception:
828    pop {r0-r2, lr}
829    .cfi_adjust_cfa_offset -16
830    .cfi_restore r0
831    .cfi_restore r1
832    .cfi_restore r2
833    .cfi_restore lr
834
835.Lthrow_class_cast_exception_for_bitstring_check:
836    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
837    mov r2, rSELF                   @ pass Thread::Current
838    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
839    bkpt
840END art_quick_check_instance_of
841
842// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
843.macro POP_REG_NE rReg, offset, rExclude
844    .ifnc \rReg, \rExclude
845        ldr \rReg, [sp, #\offset]   @ restore rReg
846        .cfi_restore \rReg
847    .endif
848.endm
849
850// Save rReg's value to [sp, #offset].
851.macro PUSH_REG rReg, offset
852    str \rReg, [sp, #\offset]       @ save rReg
853    .cfi_rel_offset \rReg, \offset
854.endm
855
856    /*
857     * Macro to insert read barrier, only used in art_quick_aput_obj.
858     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
859     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
860     */
861.macro READ_BARRIER rDest, rObj, offset
862#ifdef USE_READ_BARRIER
863    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
864    .cfi_adjust_cfa_offset 24
865    .cfi_rel_offset r0, 0
866    .cfi_rel_offset r1, 4
867    .cfi_rel_offset r2, 8
868    .cfi_rel_offset r3, 12
869    .cfi_rel_offset ip, 16
870    .cfi_rel_offset lr, 20
871    sub sp, #8                      @ push padding
872    .cfi_adjust_cfa_offset 8
873    @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
874    .ifnc \rObj, r1
875        mov r1, \rObj               @ pass rObj
876    .endif
877    mov r2, #\offset                @ pass offset
878    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
879    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
880    .ifnc \rDest, r0
881        mov \rDest, r0              @ save return value in rDest
882    .endif
883    add sp, #8                      @ pop padding
884    .cfi_adjust_cfa_offset -8
885    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
886    POP_REG_NE r1, 4, \rDest
887    POP_REG_NE r2, 8, \rDest
888    POP_REG_NE r3, 12, \rDest
889    POP_REG_NE ip, 16, \rDest
890    add sp, #20
891    .cfi_adjust_cfa_offset -20
892    pop {lr}                        @ restore lr
893    .cfi_adjust_cfa_offset -4
894    .cfi_restore lr
895#else
896    ldr \rDest, [\rObj, #\offset]
897    UNPOISON_HEAP_REF \rDest
898#endif  // USE_READ_BARRIER
899.endm
900
901#ifdef USE_READ_BARRIER
902    .extern artReadBarrierSlow
903#endif
904    .hidden art_quick_aput_obj
905ENTRY art_quick_aput_obj
906#ifdef USE_READ_BARRIER
907    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
908    tst r2, r2
909    beq .Ldo_aput_null
910#else
911    cbz r2, .Ldo_aput_null
912#endif  // USE_READ_BARRIER
913    READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
914    READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
915    READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
916    cmp r3, ip  @ value's type == array's component type - trivial assignability
917    bne .Lcheck_assignability
918.Ldo_aput:
919    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
920    POISON_HEAP_REF r2
921    str r2, [r3, r1, lsl #2]
922    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
923    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
924    strb r3, [r3, r0]
925    blx lr
926.Ldo_aput_null:
927    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
928    str r2, [r3, r1, lsl #2]
929    blx lr
930.Lcheck_assignability:
931    push {r0-r2, lr}             @ save arguments
932    .cfi_adjust_cfa_offset 16
933    .cfi_rel_offset r0, 0
934    .cfi_rel_offset r1, 4
935    .cfi_rel_offset r2, 8
936    .cfi_rel_offset lr, 12
937    mov r1, ip
938    mov r0, r3
939    bl artIsAssignableFromCode
940    cbz r0, .Lthrow_array_store_exception
941    pop {r0-r2, lr}
942    .cfi_restore r0
943    .cfi_restore r1
944    .cfi_restore r2
945    .cfi_restore lr
946    .cfi_adjust_cfa_offset -16
947    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
948    POISON_HEAP_REF r2
949    str r2, [r3, r1, lsl #2]
950    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
951    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
952    strb r3, [r3, r0]
953    blx lr
954.Lthrow_array_store_exception:
955    pop {r0-r2, lr}
956    /* No need to repeat restore cfi directives, the ones above apply here. */
957    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
958    mov r1, r2
959    mov r2, rSELF                  @ pass Thread::Current
960    bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
961    bkpt                           @ unreached
962END art_quick_aput_obj
963
964// Macro to facilitate adding new allocation entrypoints.
965.macro ONE_ARG_DOWNCALL name, entrypoint, return
966    .extern \entrypoint
967ENTRY \name
968    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
969    mov    r1, rSELF                  @ pass Thread::Current
970    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
971    RESTORE_SAVE_REFS_ONLY_FRAME
972    REFRESH_MARKING_REGISTER
973    \return
974END \name
975.endm
976
977// Macro to facilitate adding new allocation entrypoints.
978.macro TWO_ARG_DOWNCALL name, entrypoint, return
979    .extern \entrypoint
980ENTRY \name
981    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
982    mov    r2, rSELF                  @ pass Thread::Current
983    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
984    RESTORE_SAVE_REFS_ONLY_FRAME
985    REFRESH_MARKING_REGISTER
986    \return
987END \name
988.endm
989
990// Macro to facilitate adding new array allocation entrypoints.
991.macro THREE_ARG_DOWNCALL name, entrypoint, return
992    .extern \entrypoint
993ENTRY \name
994    SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
995    mov    r3, rSELF                  @ pass Thread::Current
996    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
997    bl     \entrypoint
998    RESTORE_SAVE_REFS_ONLY_FRAME
999    REFRESH_MARKING_REGISTER
1000    \return
1001END \name
1002.endm
1003
1004// Macro to facilitate adding new allocation entrypoints.
1005.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1006    .extern \entrypoint
1007ENTRY \name
1008    SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
1009    str    rSELF, [sp, #-16]!         @ expand the frame and pass Thread::Current
1010    .cfi_adjust_cfa_offset 16
1011    bl     \entrypoint
1012    add    sp, #16                    @ strip the extra frame
1013    .cfi_adjust_cfa_offset -16
1014    RESTORE_SAVE_REFS_ONLY_FRAME
1015    REFRESH_MARKING_REGISTER
1016    \return
1017END \name
1018.endm
1019
1020    /*
1021     * Macro for resolution and initialization of indexed DEX file
1022     * constants such as classes and strings.
1023     */
1024.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1025    .extern \entrypoint
1026ENTRY \name
1027    SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
1028    mov    r1, rSELF                  @ pass Thread::Current
1029    bl     \entrypoint                @ (uint32_t index, Thread*)
1030    cbz    r0, 1f                     @ If result is null, deliver the OOME.
1031    .cfi_remember_state
1032    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
1033    REFRESH_MARKING_REGISTER
1034    bx     lr
1035    .cfi_restore_state
10361:
1037    DELIVER_PENDING_EXCEPTION_FRAME_READY
1038END \name
1039.endm
1040
1041.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1042    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1043.endm
1044
1045ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1046ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1047ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1048ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1049ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1050ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1051
1052// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1053// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1054
1055    /*
1056     * Called by managed code to resolve a static field and load a non-wide value.
1057     */
1058ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1059ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1060ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1061ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1062ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1063ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1064    /*
1065     * Called by managed code to resolve a static field and load a 64-bit primitive value.
1066     */
1067    .extern artGet64StaticFromCompiledCode
1068ENTRY art_quick_get64_static
1069    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
1070    mov    r1, rSELF                     @ pass Thread::Current
1071    bl     artGet64StaticFromCompiledCode  @ (uint32_t field_idx, Thread*)
1072    ldr    r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1073    RESTORE_SAVE_REFS_ONLY_FRAME
1074    REFRESH_MARKING_REGISTER
1075    cbnz   r2, 1f                        @ success if no exception pending
1076    bx     lr                            @ return on success
10771:
1078    DELIVER_PENDING_EXCEPTION
1079END art_quick_get64_static
1080
1081    /*
1082     * Called by managed code to resolve an instance field and load a non-wide value.
1083     */
1084TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1085TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1086TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1087TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1088TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1089TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1090    /*
1091     * Called by managed code to resolve an instance field and load a 64-bit primitive value.
1092     */
1093    .extern artGet64InstanceFromCompiledCode
1094ENTRY art_quick_get64_instance
1095    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
1096    mov    r2, rSELF                     @ pass Thread::Current
1097    bl     artGet64InstanceFromCompiledCode  @ (field_idx, Object*, Thread*)
1098    ldr    r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1099    RESTORE_SAVE_REFS_ONLY_FRAME
1100    REFRESH_MARKING_REGISTER
1101    cbnz   r2, 1f                        @ success if no exception pending
1102    bx     lr                            @ return on success
11031:
1104    DELIVER_PENDING_EXCEPTION
1105END art_quick_get64_instance
1106
1107    /*
1108     * Called by managed code to resolve a static field and store a value.
1109     */
1110TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1111TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1112TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1113TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1114
1115    /*
1116     * Called by managed code to resolve an instance field and store a non-wide value.
1117     */
1118THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1119THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1120THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1121THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1122
1123    /*
1124     * Called by managed code to resolve an instance field and store a wide value.
1125     */
1126    .extern artSet64InstanceFromCompiledCode
1127ENTRY art_quick_set64_instance
1128    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
1129                                         @ r2:r3 contain the wide argument
1130    str    rSELF, [sp, #-16]!            @ expand the frame and pass Thread::Current
1131    .cfi_adjust_cfa_offset 16
1132    bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
1133    add    sp, #16                       @ release out args
1134    .cfi_adjust_cfa_offset -16
1135    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
1136    REFRESH_MARKING_REGISTER
1137    RETURN_IF_RESULT_IS_ZERO
1138    DELIVER_PENDING_EXCEPTION
1139END art_quick_set64_instance
1140
1141    .extern artSet64StaticFromCompiledCode
1142ENTRY art_quick_set64_static
1143    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
1144                                          @ r2:r3 contain the wide argument
1145    str    rSELF, [sp, #-16]!             @ expand the frame and pass Thread::Current
1146    .cfi_adjust_cfa_offset 16
1147    bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
1148    add    sp, #16                        @ release out args
1149    .cfi_adjust_cfa_offset -16
1150    RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
1151    REFRESH_MARKING_REGISTER
1152    RETURN_IF_RESULT_IS_ZERO
1153    DELIVER_PENDING_EXCEPTION
1154END art_quick_set64_static
1155
1156// Generate the allocation entrypoints for each allocator.
1157GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1158// Comment out allocators that have arm specific asm.
1159// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1160// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1161GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1162GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
1163// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1164// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1165// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1166// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1167// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1168GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1169GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1170GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1171
1172// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1173// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1174GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1175GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
1176// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1177// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1178// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1179// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1180// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1181GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1182GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1183GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1184
1185// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
1186//
1187// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1188// If isInitialized=0 the compiler can only assume it's been at least resolved.
1189.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1190ENTRY \c_name
1191    // Fast path rosalloc allocation.
1192    // r0: type/return value, rSELF (r9): Thread::Current
1193    // r1, r2, r3, r12: free.
1194    ldr    r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1195                                                              // allocation stack has room.
1196                                                              // TODO: consider using ldrd.
1197    ldr    r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1198    cmp    r3, r12
1199    bhs    .Lslow_path\c_name
1200
1201    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
1202    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1203                                                              // local allocation. Also does the
1204                                                              // initialized and finalizable checks.
1205    // When isInitialized == 0, then the class is potentially not yet initialized.
1206    // If the class is not yet initialized, the object size will be very large to force the branch
1207    // below to be taken.
1208    //
1209    // See InitializeClassVisitors in class-inl.h for more details.
1210    bhs    .Lslow_path\c_name
1211                                                              // Compute the rosalloc bracket index
1212                                                              // from the size. Since the size is
1213                                                              // already aligned we can combine the
1214                                                              // two shifts together.
1215    add    r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1216                                                              // Subtract pointer size since ther
1217                                                              // are no runs for 0 byte allocations
1218                                                              // and the size is already aligned.
1219                                                              // Load the rosalloc run (r12)
1220    ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1221                                                              // Load the free list head (r3). This
1222                                                              // will be the return val.
1223    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1224    cbz    r3, .Lslow_path\c_name
1225    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1226    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1227                                                              // and update the list head with the
1228                                                              // next pointer.
1229    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1230                                                              // Store the class pointer in the
1231                                                              // header. This also overwrites the
1232                                                              // next pointer. The offsets are
1233                                                              // asserted to match.
1234#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1235#error "Class pointer needs to overwrite next pointer."
1236#endif
1237    POISON_HEAP_REF r0
1238    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
1239                                                              // Push the new object onto the thread
1240                                                              // local allocation stack and
1241                                                              // increment the thread local
1242                                                              // allocation stack top.
1243    ldr    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1244    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
1245    str    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1246                                                              // Decrement the size of the free list
1247
1248    // After this "STR" the object is published to the thread local allocation stack,
1249    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1250    // It is not yet visible to the running (user) compiled code until after the return.
1251    //
1252    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1253    // the state of the allocation stack slot. It can be a pointer to one of:
1254    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1255    //       (The stack initial state is "null" pointers).
1256    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1257    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1258    // Other states are not allowed.
1259    //
1260    // An object that is invalid only temporarily, and will eventually become valid.
1261    // The internal runtime code simply checks if the object is not null or is partial and then
1262    // ignores it.
1263    //
1264    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1265    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1266    // "next" pointer is not-cyclic.)
1267    //
1268    // See also b/28790624 for a listing of CLs dealing with this race.
1269    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1270    sub    r1, #1
1271                                                              // TODO: consider combining this store
1272                                                              // and the list head store above using
1273                                                              // strd.
1274    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1275
1276    mov    r0, r3                                             // Set the return value and return.
1277.if \isInitialized == 0
1278    // This barrier is only necessary when the allocation also requires
1279    // a class initialization check.
1280    //
1281    // If the class is already observably initialized, then new-instance allocations are protected
1282    // from publishing by the compiler which inserts its own StoreStore barrier.
1283    dmb    ish
1284    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1285    // they should happen-after the implicit initialization check.
1286    //
1287    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1288    // a new observably-initialized class state.
1289.endif
1290    bx     lr
1291
1292.Lslow_path\c_name:
1293    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
1294    mov    r1, rSELF                  @ pass Thread::Current
1295    bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
1296    RESTORE_SAVE_REFS_ONLY_FRAME
1297    REFRESH_MARKING_REGISTER
1298    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1299END \c_name
1300.endm
1301
1302ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1303ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1304
1305// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
1306// and art_quick_alloc_object_resolved/initialized_region_tlab.
1307//
1308// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free.
1309// Need to preserve r0 to the slow path.
1310//
1311// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1312// If isInitialized=0 the compiler can only assume it's been at least resolved.
1313.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
1314                                                             // Load thread_local_pos (r12) and
1315                                                             // thread_local_end (r3) with ldrd.
1316                                                             // Check constraints for ldrd.
1317#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1318#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1319#endif
1320    ldrd   r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1321    sub    r12, r3, r12                                       // Compute the remaining buf size.
1322    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
1323    cmp    r3, r12                                            // Check if it fits.
1324    // When isInitialized == 0, then the class is potentially not yet initialized.
1325    // If the class is not yet initialized, the object size will be very large to force the branch
1326    // below to be taken.
1327    //
1328    // See InitializeClassVisitors in class-inl.h for more details.
1329    bhi    \slowPathLabel
1330    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1331                                                              // Reload old thread_local_pos (r0)
1332                                                              // for the return value.
1333    ldr    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1334    add    r1, r2, r3
1335    str    r1, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1336    // After this "STR" the object is published to the thread local allocation stack,
1337    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1338    // It is not yet visible to the running (user) compiled code until after the return.
1339    //
1340    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1341    // the state of the object. It can be either:
1342    // 1) A partially valid object, with a null class pointer
1343    //       (because the initial state of TLAB buffers is all 0s/nulls).
1344    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1345    // Other states are not allowed.
1346    //
1347    // An object that is invalid only temporarily, and will eventually become valid.
1348    // The internal runtime code simply checks if the object is not null or is partial and then
1349    // ignores it.
1350    //
1351    // (Note: The actual check is done by checking that the object's class pointer is non-null.
1352    // Also, unlike rosalloc, the object can never be observed as null).
1353    ldr    r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1354    add    r1, r1, #1
1355    str    r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1356    POISON_HEAP_REF r0
1357    str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1358                                                              // Fence. This is "ish" not "ishst" so
1359                                                              // that the code after this allocation
1360                                                              // site will see the right values in
1361                                                              // the fields of the class.
1362    mov    r0, r2
1363.if \isInitialized == 0
1364    // This barrier is only necessary when the allocation also requires
1365    // a class initialization check.
1366    //
1367    // If the class is already observably initialized, then new-instance allocations are protected
1368    // from publishing by the compiler which inserts its own StoreStore barrier.
1369    dmb    ish
1370    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1371    // they should happen-after the implicit initialization check.
1372    //
1373    // TODO: Remove dmb for class initialization checks (b/36692143)
1374.endif
1375    bx     lr
1376.endm
1377
1378// The common code for art_quick_alloc_object_*region_tlab
1379.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1380ENTRY \name
1381    // Fast path tlab allocation.
1382    // r0: type, rSELF (r9): Thread::Current
1383    // r1, r2, r3, r12: free.
1384    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
1385.Lslow_path\name:
1386    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
1387    mov    r1, rSELF                                          // Pass Thread::Current.
1388    bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
1389    RESTORE_SAVE_REFS_ONLY_FRAME
1390    REFRESH_MARKING_REGISTER
1391    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1392END \name
1393.endm
1394
1395GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1396GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1397GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1398GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1399
1400
1401// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
1402// and art_quick_alloc_array_resolved/initialized_region_tlab.
1403//
1404// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free.
1405// Need to preserve r0 and r1 to the slow path.
1406.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
1407    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
1408                                                              // (addr + 7) & ~7.
1409
1410                                                              // Load thread_local_pos (r3) and
1411                                                              // thread_local_end (r12) with ldrd.
1412                                                              // Check constraints for ldrd.
1413#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1414#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1415#endif
1416    ldrd   r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1417    sub    r12, r12, r3                                       // Compute the remaining buf size.
1418    cmp    r2, r12                                            // Check if the total_size fits.
1419    // The array class is always initialized here. Unlike new-instance,
1420    // this does not act as a double test.
1421    bhi    \slowPathLabel
1422    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1423    add    r2, r2, r3
1424    str    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1425    ldr    r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1426    add    r2, r2, #1
1427    str    r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1428    POISON_HEAP_REF r0
1429    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1430    str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
1431                                                              // Fence. This is "ish" not "ishst" so
1432                                                              // that the code after this allocation
1433                                                              // site will see the right values in
1434                                                              // the fields of the class.
1435    mov    r0, r3
1436// new-array is special. The class is loaded and immediately goes to the Initialized state
1437// before it is published. Therefore the only fence needed is for the publication of the object.
1438// See ClassLinker::CreateArrayClass() for more details.
1439
1440// For publication of the new array, we don't need a 'dmb ishst' here.
1441// The compiler generates 'dmb ishst' for all new-array insts.
1442    bx     lr
1443.endm
1444
1445.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1446ENTRY \name
1447    // Fast path array allocation for region tlab allocation.
1448    // r0: mirror::Class* type
1449    // r1: int32_t component_count
1450    // rSELF (r9): thread
1451    // r2, r3, r12: free.
1452    \size_setup .Lslow_path\name
1453    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
1454.Lslow_path\name:
1455    // r0: mirror::Class* klass
1456    // r1: int32_t component_count
1457    // r2: Thread* self
1458    SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
1459    mov    r2, rSELF               // pass Thread::Current
1460    bl     \entrypoint
1461    RESTORE_SAVE_REFS_ONLY_FRAME
1462    REFRESH_MARKING_REGISTER
1463    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1464END \name
1465.endm
1466
1467.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
1468    bkpt                                                    // We should never enter here.
1469                                                            // Code below is for reference.
1470                                                            // Possibly a large object, go slow.
1471                                                            // Also does negative array size check.
1472    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
1473    cmp r1, r2
1474    bhi \slow_path
1475                                                            // Array classes are never finalizable
1476                                                            // or uninitialized, no need to check.
1477    ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
1478    UNPOISON_HEAP_REF r3
1479    ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1480    lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
1481                                                            // bits.
1482    lsl    r2, r1, r3                                       // Calculate data size
1483                                                            // Add array data offset and alignment.
1484    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1485#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1486#error Long array data offset must be 4 greater than int array data offset.
1487#endif
1488
1489    add    r3, r3, #1                                       // Add 4 to the length only if the
1490                                                            // component size shift is 3
1491                                                            // (for 64 bit alignment).
1492    and    r3, r3, #4
1493    add    r2, r2, r3
1494.endm
1495
1496.macro COMPUTE_ARRAY_SIZE_8 slow_path
1497    // Possibly a large object, go slow.
1498    // Also does negative array size check.
1499    movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
1500    cmp r1, r2
1501    bhi \slow_path
1502    // Add array data offset and alignment.
1503    add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1504.endm
1505
1506.macro COMPUTE_ARRAY_SIZE_16 slow_path
1507    // Possibly a large object, go slow.
1508    // Also does negative array size check.
1509    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
1510    cmp r1, r2
1511    bhi \slow_path
1512    lsl    r2, r1, #1
1513    // Add array data offset and alignment.
1514    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1515.endm
1516
1517.macro COMPUTE_ARRAY_SIZE_32 slow_path
1518    // Possibly a large object, go slow.
1519    // Also does negative array size check.
1520    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
1521    cmp r1, r2
1522    bhi \slow_path
1523    lsl    r2, r1, #2
1524    // Add array data offset and alignment.
1525    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1526.endm
1527
1528.macro COMPUTE_ARRAY_SIZE_64 slow_path
1529    // Possibly a large object, go slow.
1530    // Also does negative array size check.
1531    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
1532    cmp r1, r2
1533    bhi \slow_path
1534    lsl    r2, r1, #3
1535    // Add array data offset and alignment.
1536    add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1537.endm
1538
1539// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
1540// the entrypoint once all backends have been updated to use the size variants.
1541GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1542GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1543GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1544GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1545GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1546GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1547GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1548GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1549GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1550GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1551
1552    /*
1553     * Called by managed code when the value in rSUSPEND has been decremented to 0.
1554     */
1555    .extern artTestSuspendFromCode
1556ENTRY art_quick_test_suspend
1557    SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
1558    mov    r0, rSELF
1559    bl     artTestSuspendFromCode               @ (Thread*)
1560    RESTORE_SAVE_EVERYTHING_FRAME
1561    REFRESH_MARKING_REGISTER
1562    bx     lr
1563END art_quick_test_suspend
1564
1565ENTRY art_quick_implicit_suspend
1566    mov    r0, rSELF
1567    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
1568    bl     artTestSuspendFromCode             @ (Thread*)
1569    RESTORE_SAVE_REFS_ONLY_FRAME
1570    REFRESH_MARKING_REGISTER
1571    bx     lr
1572END art_quick_implicit_suspend
1573
1574    /*
1575     * Called by managed code that is attempting to call a method on a proxy class. On entry
1576     * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
1577     * frame size of the invoked proxy method agrees with a ref and args callee save frame.
1578     */
1579     .extern artQuickProxyInvokeHandler
1580ENTRY art_quick_proxy_invoke_handler
1581    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1582    mov     r2, rSELF              @ pass Thread::Current
1583    mov     r3, sp                 @ pass SP
1584    blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
1585    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1586    // Tear down the callee-save frame. Skip arg registers.
1587    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1588    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1589    RESTORE_SAVE_REFS_ONLY_FRAME
1590    REFRESH_MARKING_REGISTER
1591    cbnz    r2, 1f                 @ success if no exception is pending
1592    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1593    bx      lr                     @ return on success
15941:
1595    DELIVER_PENDING_EXCEPTION
1596END art_quick_proxy_invoke_handler
1597
1598    /*
1599     * Called to resolve an imt conflict.
1600     * r0 is the conflict ArtMethod.
1601     * r12 is a hidden argument that holds the target interface method's dex method index.
1602     *
1603     * Note that this stub writes to r0, r4, and r12.
1604     */
1605    .extern artLookupResolvedMethod
1606ENTRY art_quick_imt_conflict_trampoline
1607    push    {r1-r2}
1608    .cfi_adjust_cfa_offset (2 * 4)
1609    .cfi_rel_offset r1, 0
1610    .cfi_rel_offset r2, 4
1611    ldr     r4, [sp, #(2 * 4)]  // Load referrer.
1612    ldr     r2, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
1613    // Load the declaring class (without read barrier) and access flags (for obsolete method check).
1614    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1615#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4
1616#error "Expecting declaring class and access flags to be consecutive for LDRD."
1617#endif
1618    ldrd    r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET]
1619    // If the method is obsolete, just go through the dex cache miss slow path.
1620    lsrs    r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1)
1621    bcs     .Limt_conflict_trampoline_dex_cache_miss
1622    ldr     r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET]  // Load the DexCache (without read barrier).
1623    UNPOISON_HEAP_REF r4
1624    ubfx    r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
1625    ldr     r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET]  // Load the resolved methods.
1626    add     r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
1627
1628// FIXME: Configure the build to use the faster code when appropriate.
1629//        Currently we fall back to the slower version.
1630#if HAS_ATOMIC_LDRD
1631    ldrd    r0, r1, [r4]
1632#else
1633    push    {r3}
1634    .cfi_adjust_cfa_offset 4
1635    .cfi_rel_offset r3, 0
1636.Limt_conflict_trampoline_retry_load:
1637    ldrexd  r0, r1, [r4]
1638    strexd  r3, r0, r1, [r4]
1639    cmp     r3, #0
1640    bne     .Limt_conflict_trampoline_retry_load
1641    pop     {r3}
1642    .cfi_adjust_cfa_offset -4
1643    .cfi_restore r3
1644#endif
1645
1646    ldr     r4, [r2]  // Load first entry in ImtConflictTable.
1647    cmp     r1, r12   // Compare method index to see if we had a DexCache method hit.
1648    bne     .Limt_conflict_trampoline_dex_cache_miss
1649.Limt_table_iterate:
1650    cmp     r4, r0
1651    // Branch if found. Benchmarks have shown doing a branch here is better.
1652    beq     .Limt_table_found
1653    // If the entry is null, the interface method is not in the ImtConflictTable.
1654    cbz     r4, .Lconflict_trampoline
1655    // Iterate over the entries of the ImtConflictTable.
1656    ldr     r4, [r2, #(2 * __SIZEOF_POINTER__)]!
1657    b .Limt_table_iterate
1658.Limt_table_found:
1659    // We successfully hit an entry in the table. Load the target method
1660    // and jump to it.
1661    ldr     r0, [r2, #__SIZEOF_POINTER__]
1662    .cfi_remember_state
1663    pop     {r1-r2}
1664    .cfi_adjust_cfa_offset -(2 * 4)
1665    .cfi_restore r1
1666    .cfi_restore r2
1667    ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
1668    .cfi_restore_state
1669.Lconflict_trampoline:
1670    // Call the runtime stub to populate the ImtConflictTable and jump to the
1671    // resolved method.
1672    .cfi_remember_state
1673    pop     {r1-r2}
1674    .cfi_adjust_cfa_offset -(2 * 4)
1675    .cfi_restore r1
1676    .cfi_restore r2
1677    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1678    .cfi_restore_state
1679.Limt_conflict_trampoline_dex_cache_miss:
1680    // We're not creating a proper runtime method frame here,
1681    // artLookupResolvedMethod() is not allowed to walk the stack.
1682
1683    // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
1684    push    {r2-r4, lr}
1685    .cfi_adjust_cfa_offset (4 * 4)
1686    .cfi_rel_offset r3, 4
1687    .cfi_rel_offset lr, 12
1688    // Save FPR args.
1689    vpush   {d0-d7}
1690    .cfi_adjust_cfa_offset (8 * 8)
1691
1692    mov     r0, ip                      // Pass method index.
1693    ldr     r1, [sp, #(8 * 8 + 6 * 4)]  // Pass referrer.
1694    bl      artLookupResolvedMethod     // (uint32_t method_index, ArtMethod* referrer)
1695
1696    // Restore FPR args.
1697    vpop    {d0-d7}
1698    .cfi_adjust_cfa_offset -(8 * 8)
1699    // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
1700    pop     {r2-r4, lr}
1701    .cfi_adjust_cfa_offset -(4 * 4)
1702    .cfi_restore r3
1703    .cfi_restore lr
1704
1705    cmp     r0, #0                  // If the method wasn't resolved,
1706    beq     .Lconflict_trampoline   //   skip the lookup and go to artInvokeInterfaceTrampoline().
1707    b       .Limt_table_iterate
1708END art_quick_imt_conflict_trampoline
1709
1710    .extern artQuickResolutionTrampoline
1711ENTRY art_quick_resolution_trampoline
1712    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1713    mov     r2, rSELF              @ pass Thread::Current
1714    mov     r3, sp                 @ pass SP
1715    blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
1716    cbz     r0, 1f                 @ is code pointer null? goto exception
1717    mov     r12, r0
1718    ldr     r0, [sp, #0]           @ load resolved method in r0
1719    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1720    REFRESH_MARKING_REGISTER
1721    bx      r12                    @ tail-call into actual code
17221:
1723    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1724    DELIVER_PENDING_EXCEPTION
1725END art_quick_resolution_trampoline
1726
1727    /*
1728     * Called to do a generic JNI down-call
1729     */
1730ENTRY art_quick_generic_jni_trampoline
1731    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1732
1733    // Save rSELF
1734    mov r11, rSELF
1735    // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
1736    mov r10, sp
1737    .cfi_def_cfa_register r10
1738
1739    sub sp, sp, #5120
1740
1741    // prepare for artQuickGenericJniTrampoline call
1742    // (Thread*,  SP)
1743    //    r0      r1   <= C calling convention
1744    //  rSELF     r10  <= where they are
1745
1746    mov r0, rSELF   // Thread*
1747    mov r1, r10
1748    blx artQuickGenericJniTrampoline  // (Thread*, sp)
1749
1750    // The C call will have registered the complete save-frame on success.
1751    // The result of the call is:
1752    // r0: pointer to native code, 0 on error.
1753    // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
1754
1755    // Check for error = 0.
1756    cbz r0, .Lexception_in_native
1757
1758    // Release part of the alloca.
1759    mov sp, r1
1760
1761    // Save the code pointer
1762    mov r12, r0
1763
1764    // Load parameters from frame into registers.
1765    pop {r0-r3}
1766
1767    // Softfloat.
1768    // TODO: Change to hardfloat when supported.
1769
1770    blx r12           // native call.
1771
1772    // result sign extension is handled in C code
1773    // prepare for artQuickGenericJniEndTrampoline call
1774    // (Thread*, result, result_f)
1775    //    r0      r2,r3    stack       <= C calling convention
1776    //    r11     r0,r1    r0,r1          <= where they are
1777    sub sp, sp, #8 // Stack alignment.
1778
1779    push {r0-r1}
1780    mov r3, r1
1781    mov r2, r0
1782    mov r0, r11
1783
1784    blx artQuickGenericJniEndTrampoline
1785
1786    // Restore self pointer.
1787    mov rSELF, r11
1788
1789    // Pending exceptions possible.
1790    ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1791    cbnz r2, .Lexception_in_native
1792
1793    // Tear down the alloca.
1794    mov sp, r10
1795    .cfi_def_cfa_register sp
1796
1797    // Tear down the callee-save frame. Skip arg registers.
1798    add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
1799    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
1800    RESTORE_SAVE_REFS_ONLY_FRAME
1801    REFRESH_MARKING_REGISTER
1802
1803    // store into fpr, for when it's a fpr return...
1804    vmov d0, r0, r1
1805    bx lr      // ret
1806    // Undo the unwinding information from above since it doesn't apply below.
1807    .cfi_def_cfa_register r10
1808    .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
1809
1810.Lexception_in_native:
1811    ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
1812    add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
1813    mov sp, ip
1814    .cfi_def_cfa_register sp
1815    # This will create a new save-all frame, required by the runtime.
1816    DELIVER_PENDING_EXCEPTION
1817END art_quick_generic_jni_trampoline
1818
1819    .extern artQuickToInterpreterBridge
1820ENTRY art_quick_to_interpreter_bridge
1821    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
1822    mov     r1, rSELF              @ pass Thread::Current
1823    mov     r2, sp                 @ pass SP
1824    blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
1825    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1826    // Tear down the callee-save frame. Skip arg registers.
1827    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1828    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1829    RESTORE_SAVE_REFS_ONLY_FRAME
1830    REFRESH_MARKING_REGISTER
1831    cbnz    r2, 1f                 @ success if no exception is pending
1832    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1833    bx      lr                     @ return on success
18341:
1835    DELIVER_PENDING_EXCEPTION
1836END art_quick_to_interpreter_bridge
1837
1838/*
1839 * Called to attempt to execute an obsolete method.
1840 */
1841ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1842
1843    /*
1844     * Routine that intercepts method calls and returns.
1845     */
1846    .extern artInstrumentationMethodEntryFromCode
1847    .extern artInstrumentationMethodExitFromCode
1848ENTRY art_quick_instrumentation_entry
1849    @ Make stack crawlable and clobber r2 and r3 (post saving)
1850    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1851    @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
1852    str   r0, [sp, #4]
1853    mov   r2, rSELF      @ pass Thread::Current
1854    mov   r3, sp         @ pass SP
1855    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
1856    cbz   r0, .Ldeliver_instrumentation_entry_exception
1857                         @ Deliver exception if we got nullptr as function.
1858    mov   r12, r0        @ r12 holds reference to code
1859    ldr   r0, [sp, #4]   @ restore r0
1860    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1861    adr   lr, art_quick_instrumentation_exit + /* thumb mode */ 1
1862                         @ load art_quick_instrumentation_exit into lr in thumb mode
1863    REFRESH_MARKING_REGISTER
1864    bx    r12            @ call method with lr set to art_quick_instrumentation_exit
1865.Ldeliver_instrumentation_entry_exception:
1866    @ Deliver exception for art_quick_instrumentation_entry placed after
1867    @ art_quick_instrumentation_exit so that the fallthrough works.
1868    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1869    DELIVER_PENDING_EXCEPTION
1870END art_quick_instrumentation_entry
1871
1872ENTRY art_quick_instrumentation_exit
1873    mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
1874    SETUP_SAVE_EVERYTHING_FRAME r2
1875
1876    add   r3, sp, #8     @ store fpr_res pointer, in kSaveEverything frame
1877    add   r2, sp, #136   @ store gpr_res pointer, in kSaveEverything frame
1878    mov   r1, sp         @ pass SP
1879    mov   r0, rSELF      @ pass Thread::Current
1880    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
1881
1882    cbz   r0, .Ldo_deliver_instrumentation_exception
1883                         @ Deliver exception if we got nullptr as function.
1884    cbnz  r1, .Ldeoptimize
1885    // Normal return.
1886    str   r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
1887                         @ Set return pc.
1888    RESTORE_SAVE_EVERYTHING_FRAME
1889    REFRESH_MARKING_REGISTER
1890    bx lr
1891.Ldo_deliver_instrumentation_exception:
1892    DELIVER_PENDING_EXCEPTION_FRAME_READY
1893.Ldeoptimize:
1894    str   r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
1895                         @ Set return pc.
1896    RESTORE_SAVE_EVERYTHING_FRAME
1897    // Jump to art_quick_deoptimize.
1898    b     art_quick_deoptimize
1899END art_quick_instrumentation_exit
1900
1901    /*
1902     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1903     * will long jump to the upcall with a special exception of -1.
1904     */
1905    .extern artDeoptimize
1906ENTRY art_quick_deoptimize
1907    SETUP_SAVE_EVERYTHING_FRAME r0
1908    mov    r0, rSELF      @ pass Thread::Current
1909    blx    artDeoptimize  @ (Thread*)
1910END art_quick_deoptimize
1911
1912    /*
1913     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1914     * will long jump to the interpreter bridge.
1915     */
1916    .extern artDeoptimizeFromCompiledCode
1917ENTRY art_quick_deoptimize_from_compiled_code
1918    SETUP_SAVE_EVERYTHING_FRAME r1
1919    mov    r1, rSELF                      @ pass Thread::Current
1920    blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
1921END art_quick_deoptimize_from_compiled_code
1922
1923    /*
1924     * Signed 64-bit integer multiply.
1925     *
1926     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
1927     *        WX
1928     *      x YZ
1929     *  --------
1930     *     ZW ZX
1931     *  YW YX
1932     *
1933     * The low word of the result holds ZX, the high word holds
1934     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
1935     * it doesn't fit in the low 64 bits.
1936     *
1937     * Unlike most ARM math operations, multiply instructions have
1938     * restrictions on using the same register more than once (Rd and Rm
1939     * cannot be the same).
1940     */
1941    /* mul-long vAA, vBB, vCC */
1942ENTRY art_quick_mul_long
1943    push    {r9-r10}
1944    .cfi_adjust_cfa_offset 8
1945    .cfi_rel_offset r9, 0
1946    .cfi_rel_offset r10, 4
1947    mul     ip, r2, r1                  @  ip<- ZxW
1948    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
1949    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
1950    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
1951    mov     r0,r9
1952    mov     r1,r10
1953    pop     {r9-r10}
1954    .cfi_adjust_cfa_offset -8
1955    .cfi_restore r9
1956    .cfi_restore r10
1957    bx      lr
1958END art_quick_mul_long
1959
1960    /*
1961     * Long integer shift.  This is different from the generic 32/64-bit
1962     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1963     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1964     * 6 bits.
1965     * On entry:
1966     *   r0: low word
1967     *   r1: high word
1968     *   r2: shift count
1969     */
1970    /* shl-long vAA, vBB, vCC */
1971ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
1972    and     r2, r2, #63                 @ r2<- r2 & 0x3f
1973    mov     r1, r1, asl r2              @  r1<- r1 << r2
1974    rsb     r3, r2, #32                 @  r3<- 32 - r2
1975    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
1976    subs    ip, r2, #32                 @  ip<- r2 - 32
1977    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
1978    mov     r0, r0, asl r2              @  r0<- r0 << r2
1979    bx      lr
1980END art_quick_shl_long
1981
1982    /*
1983     * Long integer shift.  This is different from the generic 32/64-bit
1984     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1985     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1986     * 6 bits.
1987     * On entry:
1988     *   r0: low word
1989     *   r1: high word
1990     *   r2: shift count
1991     */
1992    /* shr-long vAA, vBB, vCC */
1993ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
1994    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1995    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1996    rsb     r3, r2, #32                 @  r3<- 32 - r2
1997    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1998    subs    ip, r2, #32                 @  ip<- r2 - 32
1999    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
2000    mov     r1, r1, asr r2              @  r1<- r1 >> r2
2001    bx      lr
2002END art_quick_shr_long
2003
2004    /*
2005     * Long integer shift.  This is different from the generic 32/64-bit
2006     * binary operations because vAA/vBB are 64-bit but vCC (the shift
2007     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
2008     * 6 bits.
2009     * On entry:
2010     *   r0: low word
2011     *   r1: high word
2012     *   r2: shift count
2013     */
2014    /* ushr-long vAA, vBB, vCC */
2015ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
2016    and     r2, r2, #63                 @ r0<- r0 & 0x3f
2017    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
2018    rsb     r3, r2, #32                 @  r3<- 32 - r2
2019    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
2020    subs    ip, r2, #32                 @  ip<- r2 - 32
2021    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
2022    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
2023    bx      lr
2024END art_quick_ushr_long
2025
2026    /*
2027     * String's indexOf.
2028     *
2029     * On entry:
2030     *    r0:   string object (known non-null)
2031     *    r1:   char to match (known <= 0xFFFF)
2032     *    r2:   Starting offset in string data
2033     */
2034ENTRY art_quick_indexof
2035    push {r4, r10-r11, lr} @ 4 words of callee saves
2036    .cfi_adjust_cfa_offset 16
2037    .cfi_rel_offset r4, 0
2038    .cfi_rel_offset r10, 4
2039    .cfi_rel_offset r11, 8
2040    .cfi_rel_offset lr, 12
2041#if (STRING_COMPRESSION_FEATURE)
2042    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
2043#else
2044    ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
2045#endif
2046    add   r0, #MIRROR_STRING_VALUE_OFFSET
2047#if (STRING_COMPRESSION_FEATURE)
2048    /* r4 count (with flag) and r3 holds actual length */
2049    lsr   r3, r4, #1
2050#endif
2051    /* Clamp start to [0..count] */
2052    cmp   r2, #0
2053    it    lt
2054    movlt r2, #0
2055    cmp   r2, r3
2056    it    gt
2057    movgt r2, r3
2058
2059    /* Save a copy in r12 to later compute result */
2060    mov   r12, r0
2061
2062    /* Build pointer to start of data to compare and pre-bias */
2063#if (STRING_COMPRESSION_FEATURE)
2064    lsrs  r4, r4, #1
2065    bcc   .Lstring_indexof_compressed
2066#endif
2067    add   r0, r0, r2, lsl #1
2068    sub   r0, #2
2069
2070    /* Compute iteration count */
2071    sub   r2, r3, r2
2072
2073    /*
2074     * At this point we have:
2075     *   r0: start of data to test
2076     *   r1: char to compare
2077     *   r2: iteration count
2078     *   r4: compression style (used temporarily)
2079     *   r12: original start of string data
2080     *   r3, r4, r10, r11 available for loading string data
2081     */
2082
2083    subs  r2, #4
2084    blt   .Lindexof_remainder
2085
2086.Lindexof_loop4:
2087    ldrh  r3, [r0, #2]!
2088    ldrh  r4, [r0, #2]!
2089    ldrh  r10, [r0, #2]!
2090    ldrh  r11, [r0, #2]!
2091    cmp   r3, r1
2092    beq   .Lmatch_0
2093    cmp   r4, r1
2094    beq   .Lmatch_1
2095    cmp   r10, r1
2096    beq   .Lmatch_2
2097    cmp   r11, r1
2098    beq   .Lmatch_3
2099    subs  r2, #4
2100    bge   .Lindexof_loop4
2101
2102.Lindexof_remainder:
2103    adds  r2, #4
2104    beq   .Lindexof_nomatch
2105
2106.Lindexof_loop1:
2107    ldrh  r3, [r0, #2]!
2108    cmp   r3, r1
2109    beq   .Lmatch_3
2110    subs  r2, #1
2111    bne   .Lindexof_loop1
2112
2113.Lindexof_nomatch:
2114    mov   r0, #-1
2115    pop {r4, r10-r11, pc}
2116
2117.Lmatch_0:
2118    sub   r0, #6
2119    sub   r0, r12
2120    asr   r0, r0, #1
2121    pop {r4, r10-r11, pc}
2122.Lmatch_1:
2123    sub   r0, #4
2124    sub   r0, r12
2125    asr   r0, r0, #1
2126    pop {r4, r10-r11, pc}
2127.Lmatch_2:
2128    sub   r0, #2
2129    sub   r0, r12
2130    asr   r0, r0, #1
2131    pop {r4, r10-r11, pc}
2132.Lmatch_3:
2133    sub   r0, r12
2134    asr   r0, r0, #1
2135    pop {r4, r10-r11, pc}
2136#if (STRING_COMPRESSION_FEATURE)
2137.Lstring_indexof_compressed:
2138    add   r0, r0, r2
2139    sub   r0, #1
2140    sub   r2, r3, r2
2141.Lstring_indexof_compressed_loop:
2142    subs  r2, #1
2143    blt   .Lindexof_nomatch
2144    ldrb  r3, [r0, #1]!
2145    cmp   r3, r1
2146    beq   .Lstring_indexof_compressed_matched
2147    b     .Lstring_indexof_compressed_loop
2148.Lstring_indexof_compressed_matched:
2149    sub   r0, r12
2150    pop {r4, r10-r11, pc}
2151#endif
2152END art_quick_indexof
2153
2154    /* Assembly routines used to handle ABI differences. */
2155
2156    /* double fmod(double a, double b) */
2157    .extern fmod
2158ENTRY art_quick_fmod
2159    push  {lr}
2160    .cfi_adjust_cfa_offset 4
2161    .cfi_rel_offset lr, 0
2162    sub   sp, #4
2163    .cfi_adjust_cfa_offset 4
2164    vmov  r0, r1, d0
2165    vmov  r2, r3, d1
2166    bl    fmod
2167    vmov  d0, r0, r1
2168    add   sp, #4
2169    .cfi_adjust_cfa_offset -4
2170    pop   {pc}
2171END art_quick_fmod
2172
2173    /* float fmodf(float a, float b) */
2174     .extern fmodf
2175ENTRY art_quick_fmodf
2176    push  {lr}
2177    .cfi_adjust_cfa_offset 4
2178    .cfi_rel_offset lr, 0
2179    sub   sp, #4
2180    .cfi_adjust_cfa_offset 4
2181    vmov  r0, r1, d0
2182    bl    fmodf
2183    vmov  s0, r0
2184    add   sp, #4
2185    .cfi_adjust_cfa_offset -4
2186    pop   {pc}
2187END art_quick_fmodf
2188
2189    /* int64_t art_d2l(double d) */
2190    .extern art_d2l
2191ENTRY art_quick_d2l
2192    vmov  r0, r1, d0
2193    b     art_d2l
2194END art_quick_d2l
2195
2196    /* int64_t art_f2l(float f) */
2197    .extern art_f2l
2198ENTRY art_quick_f2l
2199    vmov  r0, s0
2200    b     art_f2l
2201END art_quick_f2l
2202
2203    /* float art_l2f(int64_t l) */
2204    .extern art_l2f
2205ENTRY art_quick_l2f
2206    push  {lr}
2207    .cfi_adjust_cfa_offset 4
2208    .cfi_rel_offset lr, 0
2209    sub   sp, #4
2210    .cfi_adjust_cfa_offset 4
2211    bl    art_l2f
2212    vmov  s0, r0
2213    add   sp, #4
2214    .cfi_adjust_cfa_offset -4
2215    pop   {pc}
2216END art_quick_l2f
2217
2218.macro CONDITIONAL_CBZ reg, reg_if, dest
2219.ifc \reg, \reg_if
2220    cbz \reg, \dest
2221.endif
2222.endm
2223
2224.macro CONDITIONAL_CMPBZ reg, reg_if, dest
2225.ifc \reg, \reg_if
2226    cmp \reg, #0
2227    beq \dest
2228.endif
2229.endm
2230
2231// Use CBZ if the register is in {r0, r7} otherwise compare and branch.
2232.macro SMART_CBZ reg, dest
2233    CONDITIONAL_CBZ \reg, r0, \dest
2234    CONDITIONAL_CBZ \reg, r1, \dest
2235    CONDITIONAL_CBZ \reg, r2, \dest
2236    CONDITIONAL_CBZ \reg, r3, \dest
2237    CONDITIONAL_CBZ \reg, r4, \dest
2238    CONDITIONAL_CBZ \reg, r5, \dest
2239    CONDITIONAL_CBZ \reg, r6, \dest
2240    CONDITIONAL_CBZ \reg, r7, \dest
2241    CONDITIONAL_CMPBZ \reg, r8, \dest
2242    CONDITIONAL_CMPBZ \reg, r9, \dest
2243    CONDITIONAL_CMPBZ \reg, r10, \dest
2244    CONDITIONAL_CMPBZ \reg, r11, \dest
2245    CONDITIONAL_CMPBZ \reg, r12, \dest
2246    CONDITIONAL_CMPBZ \reg, r13, \dest
2247    CONDITIONAL_CMPBZ \reg, r14, \dest
2248    CONDITIONAL_CMPBZ \reg, r15, \dest
2249.endm
2250
2251    /*
2252     * Create a function `name` calling the ReadBarrier::Mark routine,
2253     * getting its argument and returning its result through register
2254     * `reg`, saving and restoring all caller-save registers.
2255     *
2256     * IP is clobbered; `reg` must not be IP.
2257     *
2258     * If `reg` is different from `r0`, the generated function follows a
2259     * non-standard runtime calling convention:
2260     * - register `reg` is used to pass the (sole) argument of this
2261     *   function (instead of R0);
2262     * - register `reg` is used to return the result of this function
2263     *   (instead of R0);
2264     * - R0 is treated like a normal (non-argument) caller-save register;
2265     * - everything else is the same as in the standard runtime calling
2266     *   convention (e.g. standard callee-save registers are preserved).
2267     */
2268.macro READ_BARRIER_MARK_REG name, reg
2269ENTRY \name
2270    // Null check so that we can load the lock word.
2271    SMART_CBZ \reg, .Lret_rb_\name
2272    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
2273    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
2274    tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2275    beq .Lnot_marked_rb_\name
2276    // Already marked, return right away.
2277.Lret_rb_\name:
2278    bx lr
2279
2280.Lnot_marked_rb_\name:
2281    // Test that both the forwarding state bits are 1.
2282#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2283    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2284    // the highest bits and the "forwarding address" state to have all bits set.
2285#error "Unexpected lock word state shift or forwarding address state value."
2286#endif
2287    cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2288    bhs .Lret_forwarding_address\name
2289
2290.Lslow_rb_\name:
2291    // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
2292    // make a tail call here. Currently, it serves only for stack alignment but
2293    // we may reintroduce kSaveEverything calls here in the future.
2294    push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
2295    .cfi_adjust_cfa_offset 32
2296    .cfi_rel_offset r0, 0
2297    .cfi_rel_offset r1, 4
2298    .cfi_rel_offset r2, 8
2299    .cfi_rel_offset r3, 12
2300    .cfi_rel_offset r4, 16
2301    .cfi_rel_offset r9, 20
2302    .cfi_rel_offset ip, 24
2303    .cfi_rel_offset lr, 28
2304
2305    .ifnc \reg, r0
2306      mov   r0, \reg                    @ pass arg1 - obj from `reg`
2307    .endif
2308
2309    vpush {s0-s15}                      @ save floating-point caller-save registers
2310    .cfi_adjust_cfa_offset 64
2311    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
2312    vpop {s0-s15}                       @ restore floating-point registers
2313    .cfi_adjust_cfa_offset -64
2314
2315    .ifc \reg, r0                       @ Save result to the stack slot or destination register.
2316      str r0, [sp, #0]
2317    .else
2318      .ifc \reg, r1
2319        str r0, [sp, #4]
2320      .else
2321        .ifc \reg, r2
2322          str r0, [sp, #8]
2323        .else
2324          .ifc \reg, r3
2325            str r0, [sp, #12]
2326          .else
2327            .ifc \reg, r4
2328              str r0, [sp, #16]
2329            .else
2330              .ifc \reg, r9
2331                str r0, [sp, #20]
2332              .else
2333                mov \reg, r0
2334              .endif
2335            .endif
2336          .endif
2337        .endif
2338      .endif
2339    .endif
2340
2341    pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
2342    .cfi_adjust_cfa_offset -32
2343    .cfi_restore r0
2344    .cfi_restore r1
2345    .cfi_restore r2
2346    .cfi_restore r3
2347    .cfi_restore r4
2348    .cfi_restore r9
2349    .cfi_restore ip
2350    .cfi_restore lr
2351    bx lr
2352.Lret_forwarding_address\name:
2353    // Shift left by the forwarding address shift. This clears out the state bits since they are
2354    // in the top 2 bits of the lock word.
2355    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2356    bx lr
2357END \name
2358.endm
2359
2360READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
2361READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
2362READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
2363READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
2364READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
2365READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
2366READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
2367READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
2368READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2369READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2370READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2371READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2372
2373// Helper macros for Baker CC read barrier mark introspection (BRBMI).
2374.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
2375    \macro_for_register r0
2376    \macro_for_register r1
2377    \macro_for_register r2
2378    \macro_for_register r3
2379    \macro_for_register r4
2380    \macro_for_register r5
2381    \macro_for_register r6
2382    \macro_for_register r7
2383    \macro_for_reserved_register  // r8 (rMR) is the marking register.
2384    \macro_for_register r9
2385    \macro_for_register r10
2386    \macro_for_register r11
2387    \macro_for_reserved_register  // IP is reserved.
2388    \macro_for_reserved_register  // SP is reserved.
2389    \macro_for_reserved_register  // LR is reserved.
2390    \macro_for_reserved_register  // PC is reserved.
2391.endm
2392
2393.macro BRBMI_RETURN_SWITCH_CASE reg
2394    .balign 8
2395.Lmark_introspection_return_switch_case_\reg:
2396    mov     rMR, #1
2397    mov     \reg, ip
2398    bx      lr
2399.endm
2400
2401.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
2402    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
2403.endm
2404
2405.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2406    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
2407.endm
2408
2409#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2410#error "Array and field introspection code sharing requires same LDR offset."
2411#endif
2412.macro BRBMI_ARRAY_LOAD index_reg
2413    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
2414    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
2415    .balign 8                                           // Add padding to 8 bytes.
2416.endm
2417
2418.macro BRBMI_BKPT_FILL_4B
2419    bkpt    0
2420    bkpt    0
2421.endm
2422
2423.macro BRBMI_BKPT_FILL_8B
2424    BRBMI_BKPT_FILL_4B
2425    BRBMI_BKPT_FILL_4B
2426.endm
2427
2428.macro BRBMI_RUNTIME_CALL
2429    // Note: This macro generates exactly 22 bytes of code. The core register
2430    // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
2431
2432    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
2433    .cfi_adjust_cfa_offset 24
2434    .cfi_rel_offset r0, 0
2435    .cfi_rel_offset r1, 4
2436    .cfi_rel_offset r2, 8
2437    .cfi_rel_offset r3, 12
2438    .cfi_rel_offset r7, 16
2439    .cfi_rel_offset lr, 20
2440
2441    mov     r0, ip                    // Pass the reference.
2442    vpush {s0-s15}                    // save floating-point caller-save registers
2443    .cfi_adjust_cfa_offset 64
2444    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
2445    vpop    {s0-s15}                  // restore floating-point registers
2446    .cfi_adjust_cfa_offset -64
2447    mov     ip, r0                    // Move reference to ip in preparation for return switch.
2448
2449    pop     {r0-r3, r7, lr}           // Restore registers.
2450    .cfi_adjust_cfa_offset -24
2451    .cfi_restore r0
2452    .cfi_restore r1
2453    .cfi_restore r2
2454    .cfi_restore r3
2455    .cfi_restore r7
2456    .cfi_restore lr
2457.endm
2458
2459.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
2460    // If reference is null, just return it in the right register.
2461    cmp     ip, #0
2462    beq     .Lmark_introspection_return\label_suffix
2463    // Use rMR as temp and check the mark bit of the reference.
2464    ldr     rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2465    tst     rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2466    beq     .Lmark_introspection_unmarked\label_suffix
2467.Lmark_introspection_return\label_suffix:
2468.endm
2469
2470.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
2471.Lmark_introspection_unmarked\label_suffix:
2472    // Check if the top two bits are one, if this is the case it is a forwarding address.
2473#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2474    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2475    // the highest bits and the "forwarding address" state to have all bits set.
2476#error "Unexpected lock word state shift or forwarding address state value."
2477#endif
2478    cmp     rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2479    bhs     .Lmark_introspection_forwarding_address\label_suffix
2480.endm
2481
2482.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
2483.Lmark_introspection_forwarding_address\label_suffix:
2484    // Note: This macro generates exactly 22 bytes of code, the branch is near.
2485
2486    // Shift left by the forwarding address shift. This clears out the state bits since they are
2487    // in the top 2 bits of the lock word.
2488    lsl     ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2489    b       .Lmark_introspection_return\label_suffix
2490.endm
2491
2492.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
2493    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
2494    ldrh    rMR, [lr, #(-1 + \ldr_offset + 2)]
2495.endm
2496
2497.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
2498    // Load the 16-bit instruction. Adjust for the thumb state in LR.
2499    ldrh    rMR, [lr, #(-1 + \ldr_offset)]
2500.endm
2501
2502.macro BRBMI_EXTRACT_RETURN_REG_wide
2503    lsr     rMR, rMR, #12             // Extract `ref_reg`.
2504.endm
2505
2506.macro BRBMI_EXTRACT_RETURN_REG_narrow
2507    and     rMR, rMR, #7              // Extract `ref_reg`.
2508.endm
2509
2510.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix
2511    BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset
2512    BRBMI_EXTRACT_RETURN_REG\label_suffix
2513.endm
2514
2515.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix
2516    .balign 32
2517    .thumb_func
2518    .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
2519    .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2520    .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2521art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
2522    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix
2523.endm
2524
2525.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix
2526    .balign 16
2527    // Note: Generates exactly 16 bytes of code.
2528    BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
2529    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix
2530    b .Lmark_introspection_runtime_call
2531.endm
2532
2533    /*
2534     * Use introspection to load a reference from the same address as the LDR
2535     * instruction in generated code would load (unless loaded by the thunk,
2536     * see below), call ReadBarrier::Mark() with that reference if needed
2537     * and return it in the same register as the LDR instruction would load.
2538     *
2539     * The entrypoint is called through a thunk that differs across load kinds.
2540     * For field and array loads the LDR instruction in generated code follows
2541     * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
2542     * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
2543     * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
2544     * knows the holder and performs the gray bit check, returning to the LDR
2545     * instruction if the object is not gray, so this entrypoint no longer
2546     * needs to know anything about the holder. For GC root loads, the LDR
2547     * instruction in generated code precedes the branch to the thunk, i.e. the
2548     * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
2549     * where the -1 is again the Thumb mode bit adjustment, and the thunk does
2550     * not do the gray bit check.
2551     *
2552     * For field accesses and array loads with a constant index the thunk loads
2553     * the reference into IP using introspection and calls the main entrypoint
2554     * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or
2555     * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known
2556     * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)
2557     * from the main entrypoint and the thunk adjusts the entrypoint pointer.
2558     * With heap poisoning enabled, the passed reference is poisoned.
2559     *
2560     * For array accesses with non-constant index, the thunk inserts the bits
2561     * 0-5 of the LDR instruction to the entrypoint address, effectively
2562     * calculating a switch case label based on the index register (bits 0-3)
2563     * and adding an extra offset (bits 4-5 hold the shift which is always 2
2564     * for reference loads) to differentiate from the main entrypoint, then
2565     * moves the base register to IP and jumps to the switch case. Therefore
2566     * we need to align the main entrypoint to 512 bytes, accounting for
2567     * a 256-byte offset followed by 16 array entrypoints starting at
2568     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
2569     * (register) and a branch to the main entrypoint.
2570     *
2571     * For GC root accesses we cannot use the main entrypoint because of the
2572     * different offset where the LDR instruction in generated code is located.
2573     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2574     * To re-use the same entrypoint pointer in generated code, we make sure
2575     * that the gc root entrypoint (a copy of the entrypoint with a different
2576     * offset for introspection loads) is located at a known offset (0xc0/0xe0
2577     * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/
2578     * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the
2579     * main entrypoint and the GC root thunk adjusts the entrypoint pointer,
2580     * moves the root register to IP and jumps to the customized entrypoint,
2581     * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
2582     * The thunk also performs all the fast-path checks, so we need just the
2583     * slow path.
2584     *
2585     * The UnsafeCASObject intrinsic is similar to the GC roots wide approach
2586     * but using ADD (register, T3) instead of the LDR (immediate, T3), so the
2587     * destination register is in bits 8-11 rather than 12-15. Therefore it has
2588     * its own entrypoint, art_quick_read_barrier_mark_introspection_unsafe_cas
2589     * at the offset BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET.
2590     *
2591     * The code structure is
2592     *   art_quick_read_barrier_mark_introspection:                   // @0x00
2593     *     Up to 32 bytes code for main entrypoint fast-path code for fields
2594     *     (and array elements with constant offset) with LDR encoding T3;
2595     *     jumps to the switch in the "narrow" entrypoint.
2596     *   art_quick_read_barrier_mark_introspection_narrow:            // @0x20
2597     *     Up to 48 bytes code for fast path code for fields (and array
2598     *     elements with constant offset) with LDR encoding T1, ending in the
2599     *     return switch instruction TBB and the table with switch offsets.
2600     *   .Lmark_introspection_return_switch_case_r0:                  // @0x50
2601     *     Exactly 88 bytes of code for the return switch cases (8 bytes per
2602     *     case, 11 cases; no code for reserved registers).
2603     *   .Lmark_introspection_forwarding_address_narrow:              // @0xa8
2604     *     Exactly 6 bytes to extract the forwarding address and jump to the
2605     *     "narrow" entrypoint fast path.
2606     *   .Lmark_introspection_return_switch_case_bad:                 // @0xae
2607     *     Exactly 2 bytes, bkpt for unexpected return register.
2608     *   .Lmark_introspection_unmarked_narrow:                        // @0xb0
2609     *     Exactly 16 bytes for "narrow" entrypoint slow path.
2610     *   art_quick_read_barrier_mark_introspection_gc_roots_wide:     // @0xc0
2611     *     GC root entrypoint code for LDR encoding T3 (10 bytes); loads and
2612     *     extracts the return register and jumps to the runtime call.
2613     *   .Lmark_introspection_forwarding_address_wide:                // @0xca
2614     *     Exactly 6 bytes to extract the forwarding address and jump to the
2615     *     "wide" entrypoint fast path.
2616     *   .Lmark_introspection_unmarked_wide:                          // @0xd0
2617     *     Exactly 16 bytes for "wide" entrypoint slow path.
2618     *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:   // @0xe0
2619     *     GC root entrypoint code for LDR encoding T1 (8 bytes); loads and
2620     *     extracts the return register and falls through to the runtime call.
2621     *   .Lmark_introspection_runtime_call:                           // @0xe8
2622     *     Exactly 24 bytes for the runtime call to MarkReg() and jump to the
2623     *     return switch.
2624     *   art_quick_read_barrier_mark_introspection_arrays:            // @0x100
2625     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
2626     *   art_quick_read_barrier_mark_introspection_unsafe_cas:        // @0x180
2627     *     UnsafeCASObject intrinsic entrypoint for ADD (register) encoding T3
2628     *     (6 bytes). Loads the return register and jumps to the runtime call.
2629     */
2630#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2631    .balign 512
2632ENTRY art_quick_read_barrier_mark_introspection
2633    // At this point, IP contains the reference, rMR is clobbered by the thunk
2634    // and can be freely used as it will be set back to 1 before returning.
2635    // For heap poisoning, the reference is poisoned, so unpoison it first.
2636    UNPOISON_HEAP_REF ip
2637    // Check for null or marked, lock word is loaded into rMR.
2638    BRBMI_CHECK_NULL_AND_MARKED _wide
2639    // Load and extract the return register from the instruction.
2640    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2641    b       .Lmark_introspection_return_switch
2642
2643    .balign 32
2644    .thumb_func
2645    .type art_quick_read_barrier_mark_introspection_narrow, #function
2646    .hidden art_quick_read_barrier_mark_introspection_narrow
2647    .global art_quick_read_barrier_mark_introspection_narrow
2648art_quick_read_barrier_mark_introspection_narrow:
2649    // At this point, IP contains the reference, rMR is clobbered by the thunk
2650    // and can be freely used as it will be set back to 1 before returning.
2651    // For heap poisoning, the reference is poisoned, so unpoison it first.
2652    UNPOISON_HEAP_REF ip
2653    // Check for null or marked, lock word is loaded into rMR.
2654    BRBMI_CHECK_NULL_AND_MARKED _narrow
2655    // Load and extract the return register from the instruction.
2656    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2657.Lmark_introspection_return_switch:
2658    tbb     [pc, rMR]                 // Jump to the switch case.
2659.Lmark_introspection_return_table:
2660    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2661    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */
2662
2663    .balign 8
2664    BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow  // 6 bytes
2665.Lmark_introspection_return_switch_case_bad:
2666    bkpt                              // 2 bytes
2667
2668    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2669
2670    // 8 bytes for the loading and extracting of the return register.
2671    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
2672    // 2 bytes for near branch to the runtime call.
2673    b .Lmark_introspection_runtime_call
2674
2675    BRBMI_EXTRACT_FORWARDING_ADDRESS _wide  // Not even 4-byte aligned.
2676
2677    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2678
2679    // 8 bytes for the loading and extracting of the return register.
2680    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
2681    // And the runtime call and branch to the switch taking exactly 24 bytes
2682    // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch)
2683    // shall take the rest of the 32-byte section (within a cache line).
2684.Lmark_introspection_runtime_call:
2685    BRBMI_RUNTIME_CALL
2686    b       .Lmark_introspection_return_switch
2687
2688    .balign 256
2689    .thumb_func
2690    .type art_quick_read_barrier_mark_introspection_arrays, #function
2691    .hidden art_quick_read_barrier_mark_introspection_arrays
2692    .global art_quick_read_barrier_mark_introspection_arrays
2693art_quick_read_barrier_mark_introspection_arrays:
2694    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
2695
2696    .balign 8
2697    .thumb_func
2698    .type art_quick_read_barrier_mark_introspection_unsafe_cas, #function
2699    .hidden art_quick_read_barrier_mark_introspection_unsafe_cas
2700    .global art_quick_read_barrier_mark_introspection_unsafe_cas
2701art_quick_read_barrier_mark_introspection_unsafe_cas:
2702    // Load the byte of the ADD instruction that contains Rd. Adjust for the thumb state in LR.
2703    // The ADD (register, T3) is |11101011000|S|Rn|(0)imm3|Rd|imm2|type|Rm| and we're using
2704    // no shift (type=0, imm2=0, imm3=0), so the byte we read here, i.e. |(0)imm3|Rd|,
2705    // contains only the register number, the top 4 bits are 0.
2706    ldrb    rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET + 3)]
2707    b .Lmark_introspection_runtime_call
2708END art_quick_read_barrier_mark_introspection
2709#else  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2710ENTRY art_quick_read_barrier_mark_introspection
2711    bkpt                              // Unreachable.
2712END art_quick_read_barrier_mark_introspection
2713#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2714
2715.extern artInvokePolymorphic
2716ENTRY art_quick_invoke_polymorphic
2717    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
2718    mov     r0, r1                 @ r0 := receiver
2719    mov     r1, rSELF              @ r1 := Thread::Current
2720    mov     r2, sp                 @ r2 := SP
2721    bl      artInvokePolymorphic   @ artInvokePolymorphic(receiver, Thread*, SP)
2722    str     r1, [sp, 72]           @ r0:r1 := Result. Copy r1 to context.
2723    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2724    REFRESH_MARKING_REGISTER
2725    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2726    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2727END art_quick_invoke_polymorphic
2728
2729.extern artInvokeCustom
2730ENTRY art_quick_invoke_custom
2731    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
2732                                   @ r0 := call_site_idx
2733    mov     r1, rSELF              @ r1 := Thread::Current
2734    mov     r2, sp                 @ r2 := SP
2735    bl      artInvokeCustom        @ artInvokeCustom(call_site_idx, Thread*, SP)
2736    str     r1, [sp, #72]          @ Save r1 to context (r0:r1 = result)
2737    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2738    REFRESH_MARKING_REGISTER
2739    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2740    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2741END art_quick_invoke_custom
2742
2743// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2744//  Argument 0: r0: The context pointer for ExecuteSwitchImpl.
2745//  Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call.
2746//  Argument 2: r2: The value of DEX PC (memory address of the methods bytecode).
2747ENTRY ExecuteSwitchImplAsm
2748    push {r4, lr}                                 // 2 words of callee saves.
2749    .cfi_adjust_cfa_offset 8
2750    .cfi_rel_offset r4, 0
2751    .cfi_rel_offset lr, 4
2752    mov r4, r2                                    // r4 = DEX PC
2753    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0)
2754    blx r1                                        // Call the wrapped method.
2755    pop {r4, pc}
2756END ExecuteSwitchImplAsm
2757