1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21#include "arch/quick_field_entrypoints.S"
22
23    /* Deliver the given exception */
24    .extern artDeliverExceptionFromCode
25    /* Deliver an exception pending on a thread */
26    .extern artDeliverPendingException
27
28.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
29    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
30    LOAD_RUNTIME_INSTANCE \rTemp                  @ Load Runtime::Current into rTemp.
31    @ Load kSaveRefsAndArgs Method* into rTemp.
32    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
33    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
34    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
35.endm
36
37.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
38    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
39    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
40    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
41.endm
42
43    /*
44     * Macro that sets up the callee save frame to conform with
45     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
46     * when core registers are already saved.
47     */
48.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
49                                        @ 14 words of callee saves and args already saved.
50    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
51    .cfi_adjust_cfa_offset 128
52    sub sp, #8                          @ 2 words of space, alignment padding and Method*
53    .cfi_adjust_cfa_offset 8
54    LOAD_RUNTIME_INSTANCE \rTemp        @ Load Runtime::Current into rTemp.
55    @ Load kSaveEverything Method* into rTemp.
56    ldr \rTemp, [\rTemp, #\runtime_method_offset]
57    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
58    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
59
60    // Ugly compile-time check, but we only have the preprocessor.
61#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
62#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
63#endif
64.endm
65
66    /*
67     * Macro that sets up the callee save frame to conform with
68     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
69     */
70.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
71    push {r0-r12, lr}                   @ 14 words of callee saves and args.
72    .cfi_adjust_cfa_offset 56
73    .cfi_rel_offset r0, 0
74    .cfi_rel_offset r1, 4
75    .cfi_rel_offset r2, 8
76    .cfi_rel_offset r3, 12
77    .cfi_rel_offset r4, 16
78    .cfi_rel_offset r5, 20
79    .cfi_rel_offset r6, 24
80    .cfi_rel_offset r7, 28
81    .cfi_rel_offset r8, 32
82    .cfi_rel_offset r9, 36
83    .cfi_rel_offset r10, 40
84    .cfi_rel_offset r11, 44
85    .cfi_rel_offset ip, 48
86    .cfi_rel_offset lr, 52
87    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
88.endm
89
90.macro RESTORE_SAVE_EVERYTHING_FRAME
91    add  sp, #8                         @ rewind sp
92    .cfi_adjust_cfa_offset -8
93    vpop {d0-d15}
94    .cfi_adjust_cfa_offset -128
95    pop {r0-r12, lr}                    @ 14 words of callee saves
96    .cfi_restore r0
97    .cfi_restore r1
98    .cfi_restore r2
99    .cfi_restore r3
100    .cfi_restore r4
101    .cfi_restore r5
102    .cfi_restore r6
103    .cfi_restore r7
104    .cfi_restore r8
105    .cfi_restore r9
106    .cfi_restore r10
107    .cfi_restore r11
108    .cfi_restore r12
109    .cfi_restore lr
110    .cfi_adjust_cfa_offset -56
111.endm
112
113.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
114    add  sp, #8                         @ rewind sp
115    .cfi_adjust_cfa_offset -8
116    vpop {d0-d15}
117    .cfi_adjust_cfa_offset -128
118    add  sp, #4                         @ skip r0
119    .cfi_adjust_cfa_offset -4
120    .cfi_restore r0                     @ debugger can no longer restore caller's r0
121    pop {r1-r12, lr}                    @ 13 words of callee saves
122    .cfi_restore r1
123    .cfi_restore r2
124    .cfi_restore r3
125    .cfi_restore r4
126    .cfi_restore r5
127    .cfi_restore r6
128    .cfi_restore r7
129    .cfi_restore r8
130    .cfi_restore r9
131    .cfi_restore r10
132    .cfi_restore r11
133    .cfi_restore r12
134    .cfi_restore lr
135    .cfi_adjust_cfa_offset -52
136.endm
137
138.macro RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION is_ref = 0
139    // Use R2 to allow returning 64-bit values in R0-R1.
140    ldr r2, [rSELF, # THREAD_EXCEPTION_OFFSET]  // Get exception field.
141    cbnz r2, 1f
142    DEOPT_OR_RETURN r2, \is_ref                 // Check if deopt is required
1431:
144    DELIVER_PENDING_EXCEPTION
145.endm
146
147.macro RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
148    RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /* is_ref= */ 1
149.endm
150
151.macro DEOPT_OR_RETURN temp, is_ref = 0
152  ldr \temp, [rSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
153  cbnz \temp, 2f
154  bx     lr
1552:
156  SETUP_SAVE_EVERYTHING_FRAME \temp
157  mov r2, \is_ref                      // pass if result is a reference
158  mov r1, r0                           // pass the result
159  mov r0, rSELF                        // Thread::Current
160  bl artDeoptimizeIfNeeded
161  RESTORE_SAVE_EVERYTHING_FRAME
162  REFRESH_MARKING_REGISTER
163  bx     lr
164.endm
165
166.macro DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_R0 temp, is_ref
167  ldr \temp, [rSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
168  cbnz \temp, 2f
169  CFI_REMEMBER_STATE
170  RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
171  REFRESH_MARKING_REGISTER
172  bx     lr
173  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
1742:
175  str    r0, [sp, SAVE_EVERYTHING_FRAME_R0_OFFSET] // update result in the frame
176  mov r2, \is_ref                                  // pass if result is a reference
177  mov r1, r0                                       // pass the result
178  mov r0, rSELF                                    // Thread::Current
179  bl artDeoptimizeIfNeeded
180  CFI_REMEMBER_STATE
181  RESTORE_SAVE_EVERYTHING_FRAME
182  REFRESH_MARKING_REGISTER
183  bx     lr
184  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
185.endm
186
187.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
188    .extern \cxx_name
189ENTRY \c_name
190    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
191    mov r0, rSELF                   @ pass Thread::Current
192    bl  \cxx_name                   @ \cxx_name(Thread*)
193END \c_name
194.endm
195
196.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
197    .extern \cxx_name
198ENTRY \c_name
199    SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
200    mov r0, rSELF                   @ pass Thread::Current
201    bl  \cxx_name                   @ \cxx_name(Thread*)
202END \c_name
203.endm
204
205.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
206    .extern \cxx_name
207ENTRY \c_name
208    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
209    mov r1, rSELF                   @ pass Thread::Current
210    bl  \cxx_name                   @ \cxx_name(Thread*)
211END \c_name
212.endm
213
214.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
215    .extern \cxx_name
216ENTRY \c_name
217    SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
218    mov r2, rSELF                   @ pass Thread::Current
219    bl  \cxx_name                   @ \cxx_name(Thread*)
220END \c_name
221.endm
222
223.macro RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
224    cbnz   r0, 1f              @ result non-zero branch over
225    DEOPT_OR_RETURN r1
2261:
227    DELIVER_PENDING_EXCEPTION
228.endm
229
230.macro RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
231    cbz    r0, 1f              @ result zero branch over
232    DEOPT_OR_RETURN r1, /* is_ref= */ 1
2331:
234    DELIVER_PENDING_EXCEPTION
235.endm
236
237// Macros taking opportunity of code similarities for downcalls.
238// Used for field and allocation entrypoints.
239.macro N_ARG_DOWNCALL n, name, entrypoint, return
240    .extern \entrypoint
241ENTRY \name
242    SETUP_SAVE_REFS_ONLY_FRAME r\n        @ save callee saves in case of GC
243    mov    r\n, rSELF                     @ pass Thread::Current
244    bl     \entrypoint                    @ (<args>, Thread*)
245    RESTORE_SAVE_REFS_ONLY_FRAME
246    REFRESH_MARKING_REGISTER
247    \return
248END \name
249.endm
250
251.macro ONE_ARG_DOWNCALL name, entrypoint, return
252    N_ARG_DOWNCALL 1, \name, \entrypoint, \return
253.endm
254
255.macro TWO_ARG_DOWNCALL name, entrypoint, return
256    N_ARG_DOWNCALL 2, \name, \entrypoint, \return
257.endm
258
259.macro THREE_ARG_DOWNCALL name, entrypoint, return
260    N_ARG_DOWNCALL 3, \name, \entrypoint, \return
261.endm
262
263// Macro to facilitate adding new allocation entrypoints.
264.macro FOUR_ARG_DOWNCALL name, entrypoint, return
265    .extern \entrypoint
266ENTRY \name
267    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
268    str    rSELF, [sp, #-16]!             @ expand the frame and pass Thread::Current
269    .cfi_adjust_cfa_offset 16
270    bl     \entrypoint                    @ (<args>, Thread*)
271    DECREASE_FRAME 16                     @ strip the extra frame
272    RESTORE_SAVE_REFS_ONLY_FRAME
273    REFRESH_MARKING_REGISTER
274    \return
275END \name
276.endm
277
278    /*
279     * Called by managed code, saves callee saves and then calls artThrowException
280     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
281     */
282ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
283
284    /*
285     * Called by managed code to create and deliver a NullPointerException.
286     */
287NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
288
289    /*
290     * Call installed by a signal handler to create and deliver a NullPointerException.
291     */
292    .extern art_quick_throw_null_pointer_exception_from_signal
293ENTRY art_quick_throw_null_pointer_exception_from_signal
294    // The fault handler pushes the gc map address, i.e. "return address", to stack
295    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
296    .cfi_def_cfa_offset __SIZEOF_POINTER__
297    .cfi_rel_offset lr, 0
298    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
299    .cfi_adjust_cfa_offset 52
300    .cfi_rel_offset r0, 0
301    .cfi_rel_offset r1, 4
302    .cfi_rel_offset r2, 8
303    .cfi_rel_offset r3, 12
304    .cfi_rel_offset r4, 16
305    .cfi_rel_offset r5, 20
306    .cfi_rel_offset r6, 24
307    .cfi_rel_offset r7, 28
308    .cfi_rel_offset r8, 32
309    .cfi_rel_offset r9, 36
310    .cfi_rel_offset r10, 40
311    .cfi_rel_offset r11, 44
312    .cfi_rel_offset ip, 48
313
314    @ save all registers as basis for long jump context
315    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
316    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
317    mov r1, rSELF                   @ pass Thread::Current
318    bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
319END art_quick_throw_null_pointer_exception_from_signal
320
321    /*
322     * Called by managed code to create and deliver an ArithmeticException.
323     */
324NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
325
326    /*
327     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
328     * index, arg2 holds limit.
329     */
330TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
331
332    /*
333     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
334     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
335     */
336TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
337
338    /*
339     * Called by managed code to create and deliver a StackOverflowError.
340     */
341NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
342
343    /*
344     * All generated callsites for interface invokes and invocation slow paths will load arguments
345     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
346     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
347     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
348     *
349     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
350     * of the target Method* in r0 and method->code_ in r1.
351     *
352     * If unsuccessful, the helper will return null/null. There will bea pending exception in the
353     * thread and we branch to another stub to deliver it.
354     *
355     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
356     * pointing back to the original caller.
357     *
358     * Clobbers IP (R12).
359     */
360.macro INVOKE_TRAMPOLINE_BODY cxx_name
361    .extern \cxx_name
362    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
363    mov    r2, rSELF                      @ pass Thread::Current
364    mov    r3, sp
365    bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
366    mov    r12, r1                        @ save Method*->code_
367    RESTORE_SAVE_REFS_AND_ARGS_FRAME
368    REFRESH_MARKING_REGISTER
369    cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
370    bx     r12                            @ tail call to target
3711:
372    DELIVER_PENDING_EXCEPTION
373.endm
374.macro INVOKE_TRAMPOLINE c_name, cxx_name
375ENTRY \c_name
376    INVOKE_TRAMPOLINE_BODY \cxx_name
377END \c_name
378.endm
379
380INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
381
382INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
383INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
384INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
385INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
386
387    /*
388     * Quick invocation stub internal.
389     * On entry:
390     *   r0 = method pointer
391     *   r1 = argument array or null for no argument methods
392     *   r2 = size of argument array in bytes
393     *   r3 = (managed) thread pointer
394     *   [sp] = JValue* result
395     *   [sp + 4] = result_in_float
396     *   [sp + 8] = core register argument array
397     *   [sp + 12] = fp register argument array
398     *  +-------------------------+
399     *  | uint32_t* fp_reg_args   |
400     *  | uint32_t* core_reg_args |
401     *  |   result_in_float       | <- Caller frame
402     *  |   Jvalue* result        |
403     *  +-------------------------+
404     *  |          lr             |
405     *  |          r11            |
406     *  |          r9             |
407     *  |          r4             | <- r11
408     *  +-------------------------+
409     *  | uint32_t out[n-1]       |
410     *  |    :      :             |        Outs
411     *  | uint32_t out[0]         |
412     *  | StackRef<ArtMethod>     | <- SP  value=null
413     *  +-------------------------+
414     */
415ENTRY art_quick_invoke_stub_internal
416    SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
417    mov    r11, sp                         @ save the stack pointer
418    .cfi_def_cfa_register r11
419
420    mov    r9, r3                          @ move managed thread pointer into r9
421
422    add    r4, r2, #4                      @ create space for method pointer in frame
423    sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
424    and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
425    mov    sp, r4                          @ 16B alignment ourselves.
426
427    mov    r4, r0                          @ save method*
428    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
429    bl     memcpy                          @ memcpy (dest, src, bytes)
430    mov    ip, #0                          @ set ip to 0
431    str    ip, [sp]                        @ store null for method* at bottom of frame
432
433    ldr    ip, [r11, #48]                  @ load fp register argument array pointer
434    vldm   ip, {s0-s15}                    @ copy s0 - s15
435
436    ldr    ip, [r11, #44]                  @ load core register argument array pointer
437    mov    r0, r4                          @ restore method*
438    add    ip, ip, #4                      @ skip r0
439    ldm    ip, {r1-r3}                     @ copy r1 - r3
440
441    REFRESH_MARKING_REGISTER
442
443    ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
444    blx    ip                              @ call the method
445
446    mov    sp, r11                         @ restore the stack pointer
447    .cfi_def_cfa_register sp
448
449    ldr    r4, [sp, #40]                   @ load result_is_float
450    ldr    r9, [sp, #36]                   @ load the result pointer
451    cmp    r4, #0
452    ite    eq
453    strdeq r0, [r9]                        @ store r0/r1 into result pointer
454    vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
455
456    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
457END art_quick_invoke_stub_internal
458
459    /*
460     * On stack replacement stub.
461     * On entry:
462     *   r0 = stack to copy
463     *   r1 = size of stack
464     *   r2 = pc to call
465     *   r3 = JValue* result
466     *   [sp] = shorty
467     *   [sp + 4] = thread
468     */
469ENTRY art_quick_osr_stub
470    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
471    vpush  {s16-s31}                       @ Spill fp-regs (16)
472    .cfi_adjust_cfa_offset 64
473    SAVE_SIZE=(9*4+16*4)
474    mov    r11, sp                         @ Save the stack pointer
475    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
476    CFI_REMEMBER_STATE
477    mov    r10, r1                         @ Save size of stack
478    ldr    r9, [r11, #(SAVE_SIZE+4)]       @ Move managed thread pointer into r9
479    REFRESH_MARKING_REGISTER
480    mov    r6, r2                          @ Save the pc to call
481    sub    r7, sp, #12                     @ Reserve space for stack pointer,
482                                           @    JValue* result, and ArtMethod* slot.
483    and    r7, #0xFFFFFFF0                 @ Align stack pointer
484    mov    sp, r7                          @ Update stack pointer
485    str    r11, [sp, #4]                   @ Save old stack pointer
486    str    r3, [sp, #8]                    @ Save JValue* result
487    mov    ip, #0
488    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
489    // r11 isn't properly spilled in the osr method, so we need use DWARF expression.
490    // NB: the CFI must be before the call since this is the address gdb will lookup.
491    // NB: gdb expects that cfa_expression returns the CFA value (not address to it).
492    .cfi_escape                            /* CFA = [sp + 4] + SAVE_SIZE */ \
493      0x0f, 6,                             /* DW_CFA_def_cfa_expression(len) */ \
494      0x92, 13, 4,                         /* DW_OP_bregx(reg,offset) */ \
495      0x06,                                /* DW_OP_deref */ \
496      0x23, SAVE_SIZE                      /* DW_OP_plus_uconst(val) */
497    bl     .Losr_entry                     @ Call the method
498    ldr    r10, [sp, #8]                   @ Restore JValue* result
499    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
500    .cfi_def_cfa sp, SAVE_SIZE             @ CFA = sp + SAVE_SIZE
501    strd r0, [r10]                         @ Store r0/r1 into result pointer
502    vpop   {s16-s31}
503    .cfi_adjust_cfa_offset -64
504    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
505.Losr_entry:
506    CFI_RESTORE_STATE_AND_DEF_CFA r11, SAVE_SIZE  @ CFA = r11 + SAVE_SIZE
507    sub sp, sp, r10                        @ Reserve space for callee stack
508    sub r10, r10, #4
509    str lr, [sp, r10]                      @ Store link register per the compiler ABI
510    mov r2, r10
511    mov r1, r0
512    mov r0, sp
513    bl  memcpy                             @ memcpy (dest r0, src r1, bytes r2)
514    bx r6
515END art_quick_osr_stub
516
517    /*
518     * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_.
519     * Both must reside on the stack, between current SP and target SP.
520     * The r12 (IP) shall be clobbered rather than retrieved from gprs_.
521     */
522ARM_ENTRY art_quick_do_long_jump
523    vldm r1, {s0-s31}     @ Load all fprs from argument fprs_.
524    mov  sp, r0           @ Make SP point to gprs_.
525                          @ Do not access fprs_ from now, they may be below SP.
526    ldm  sp, {r0-r11}     @ load r0-r11 from gprs_.
527    ldr  r12, [sp, #60]   @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12).
528    ldr  lr, [sp, #56]    @ Load LR from gprs_, 56 = 4 * 14.
529    ldr  sp, [sp, #52]    @ Load SP from gprs_ 52 = 4 * 13.
530                          @ Do not access gprs_ from now, they are below SP.
531    REFRESH_MARKING_REGISTER
532    bx   r12              @ Do long jump.
533END art_quick_do_long_jump
534
535    /*
536     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
537     * failure.
538     */
539TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
540                 artHandleFillArrayDataFromCode, \
541                 RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
542
543    /*
544     * Entry from managed code that tries to lock the object in a fast path and
545     * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
546     * r0 holds the possibly null object to lock.
547     */
548ENTRY art_quick_lock_object
549    // Note: the slow path is actually the art_quick_lock_object_no_inline (tail call).
550    LOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Llock_object_slow, /*can_be_null*/ 1
551END art_quick_lock_object
552
553    /*
554     * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
555     * r0 holds the possibly null object to lock.
556     */
557    .extern artLockObjectFromCode
558ENTRY art_quick_lock_object_no_inline
559    // This is also the slow path for art_quick_lock_object.
560    // Note that we need a local label as the assembler emits bad instructions
561    // for CBZ/CBNZ if we try to jump to `art_quick_lock_object_no_inline`.
562.Llock_object_slow:
563    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
564    mov    r1, rSELF                  @ pass Thread::Current
565    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
566    RESTORE_SAVE_REFS_ONLY_FRAME
567    REFRESH_MARKING_REGISTER
568    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
569END art_quick_lock_object_no_inline
570
571    /*
572     * Entry from managed code that tries to unlock the object in a fast path and calls
573     * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
574     * r0 holds the possibly null object to unlock.
575     */
576ENTRY art_quick_unlock_object
577    // Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call).
578    UNLOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Lunlock_object_slow, /*can_be_null*/ 1
579END art_quick_unlock_object
580
581    /*
582     * Entry from managed code that calls `artUnlockObjectFromCode()`
583     * and delivers exception on failure.
584     * r0 holds the possibly null object to unlock.
585     */
586    .extern artUnlockObjectFromCode
587ENTRY art_quick_unlock_object_no_inline
588    // This is also the slow path for art_quick_unlock_object.
589    // Note that we need a local label as the assembler emits bad instructions
590    // for CBZ/CBNZ if we try to jump to `art_quick_unlock_object_no_inline`.
591.Lunlock_object_slow:
592    @ save callee saves in case exception allocation triggers GC
593    SETUP_SAVE_REFS_ONLY_FRAME r1
594    mov    r1, rSELF                  @ pass Thread::Current
595    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
596    RESTORE_SAVE_REFS_ONLY_FRAME
597    REFRESH_MARKING_REGISTER
598    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
599END art_quick_unlock_object_no_inline
600
601    /*
602     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
603     * artThrowClassCastExceptionForObject.
604     */
605    .extern artInstanceOfFromCode
606    .extern artThrowClassCastExceptionForObject
607ENTRY art_quick_check_instance_of
608    // Type check using the bit string passes null as the target class. In that case just throw.
609    cbz r1, .Lthrow_class_cast_exception_for_bitstring_check
610
611    push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
612    .cfi_adjust_cfa_offset 16
613    .cfi_rel_offset r0, 0
614    .cfi_rel_offset r1, 4
615    .cfi_rel_offset r2, 8
616    .cfi_rel_offset lr, 12
617    bl artInstanceOfFromCode
618    cbz    r0, .Lthrow_class_cast_exception
619    pop {r0-r2, pc}
620
621.Lthrow_class_cast_exception:
622    pop {r0-r2, lr}
623    .cfi_adjust_cfa_offset -16
624    .cfi_restore r0
625    .cfi_restore r1
626    .cfi_restore r2
627    .cfi_restore lr
628
629.Lthrow_class_cast_exception_for_bitstring_check:
630    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
631    mov r2, rSELF                   @ pass Thread::Current
632    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
633    bkpt
634END art_quick_check_instance_of
635
636// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
637.macro POP_REG_NE rReg, offset, rExclude
638    .ifnc \rReg, \rExclude
639        ldr \rReg, [sp, #\offset]   @ restore rReg
640        .cfi_restore \rReg
641    .endif
642.endm
643
644// Save rReg's value to [sp, #offset].
645.macro PUSH_REG rReg, offset
646    str \rReg, [sp, #\offset]       @ save rReg
647    .cfi_rel_offset \rReg, \offset
648.endm
649
650    // Helper macros for `art_quick_aput_obj`.
651#ifdef USE_READ_BARRIER
652#ifdef USE_BAKER_READ_BARRIER
653.macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD rDest, rObj, offset, gray_slow_path_label
654    ldr ip, [\rObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
655    tst ip, #LOCK_WORD_READ_BARRIER_STATE_MASK_SHIFTED
656    bne \gray_slow_path_label
657    // False dependency to avoid needing load/load fence.
658    add \rObj, \rObj, ip, lsr #32
659    ldr \rDest, [\rObj, #\offset]
660    UNPOISON_HEAP_REF \rDest
661.endm
662
663.macro BAKER_RB_LOAD_AND_MARK rDest, rObj, offset, mark_function
664    ldr \rDest, [\rObj, #\offset]
665    UNPOISON_HEAP_REF \rDest
666    str lr, [sp, #-8]!             @ Save LR with correct stack alignment.
667    .cfi_rel_offset lr, 0
668    .cfi_adjust_cfa_offset 8
669    bl \mark_function
670    ldr lr, [sp], #8               @ Restore LR.
671    .cfi_restore lr
672    .cfi_adjust_cfa_offset -8
673.endm
674#else  // USE_BAKER_READ_BARRIER
675    .extern artReadBarrierSlow
676.macro READ_BARRIER_SLOW rDest, rObj, offset
677    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
678    .cfi_adjust_cfa_offset 24
679    .cfi_rel_offset r0, 0
680    .cfi_rel_offset r1, 4
681    .cfi_rel_offset r2, 8
682    .cfi_rel_offset r3, 12
683    .cfi_rel_offset ip, 16
684    .cfi_rel_offset lr, 20
685    sub sp, #8                      @ push padding
686    .cfi_adjust_cfa_offset 8
687    @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
688    .ifnc \rObj, r1
689        mov r1, \rObj               @ pass rObj
690    .endif
691    mov r2, #\offset                @ pass offset
692    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
693    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
694    .ifnc \rDest, r0
695        mov \rDest, r0              @ save return value in rDest
696    .endif
697    add sp, #8                      @ pop padding
698    .cfi_adjust_cfa_offset -8
699    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
700    POP_REG_NE r1, 4, \rDest
701    POP_REG_NE r2, 8, \rDest
702    POP_REG_NE r3, 12, \rDest
703    POP_REG_NE ip, 16, \rDest
704    add sp, #20
705    .cfi_adjust_cfa_offset -20
706    pop {lr}                        @ restore lr
707    .cfi_adjust_cfa_offset -4
708    .cfi_restore lr
709.endm
710#endif // USE_BAKER_READ_BARRIER
711#endif  // USE_READ_BARRIER
712
713    .hidden art_quick_aput_obj
714ENTRY art_quick_aput_obj
715#if defined(USE_READ_BARRIER) && !defined(USE_BAKER_READ_BARRIER)
716    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from `READ_BARRIER_SLOW`.
717    tst r2, r2
718    beq .Laput_obj_null
719    READ_BARRIER_SLOW r3, r0, MIRROR_OBJECT_CLASS_OFFSET
720    READ_BARRIER_SLOW r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
721    READ_BARRIER_SLOW r4, r2, MIRROR_OBJECT_CLASS_OFFSET
722#else  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
723    cbz r2, .Laput_obj_null
724#ifdef USE_READ_BARRIER
725    cmp rMR, #0
726    bne .Laput_obj_gc_marking
727#endif  // USE_READ_BARRIER
728    ldr r3, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
729    UNPOISON_HEAP_REF r3
730    // R4 is a scratch register in managed ARM ABI.
731    ldr r4, [r2, #MIRROR_OBJECT_CLASS_OFFSET]
732    UNPOISON_HEAP_REF r4
733    ldr r3, [r3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
734    UNPOISON_HEAP_REF r3
735#endif  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
736    cmp r3, r4  @ value's type == array's component type - trivial assignability
737    bne .Laput_obj_check_assignability
738.Laput_obj_store:
739    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
740    POISON_HEAP_REF r2
741    str r2, [r3, r1, lsl #2]
742    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
743    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
744    strb r3, [r3, r0]
745    blx lr
746
747.Laput_obj_null:
748    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
749    str r2, [r3, r1, lsl #2]
750    blx lr
751
752.Laput_obj_check_assignability:
753    push {r0-r2, lr}             @ save arguments
754    .cfi_adjust_cfa_offset 16
755    .cfi_rel_offset lr, 12
756    mov r1, r4
757    mov r0, r3
758    bl artIsAssignableFromCode
759    CFI_REMEMBER_STATE
760    cbz r0, .Lthrow_array_store_exception
761    pop {r0-r2, lr}
762    .cfi_restore lr
763    .cfi_adjust_cfa_offset -16
764    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
765    POISON_HEAP_REF r2
766    str r2, [r3, r1, lsl #2]
767    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
768    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
769    strb r3, [r3, r0]
770    blx lr
771
772.Lthrow_array_store_exception:
773    CFI_RESTORE_STATE_AND_DEF_CFA sp, 16
774    pop {r0-r2, lr}
775    .cfi_restore lr
776    .cfi_adjust_cfa_offset -16
777#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
778    CFI_REMEMBER_STATE
779#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
780    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
781    mov r1, r2
782    mov r2, rSELF                  @ Pass Thread::Current.
783    bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
784    bkpt                           @ Unreachable.
785
786#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
787    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
788.Laput_obj_gc_marking:
789    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
790        r3, r0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class
791.Laput_obj_mark_array_class_continue:
792    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
793        r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element
794.Laput_obj_mark_array_element_continue:
795    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
796        r4, r2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class
797.Laput_obj_mark_object_class_continue:
798
799    cmp r3, r4  @ value's type == array's component type - trivial assignability
800    // All registers are set up for correctly `.Laput_obj_check_assignability`.
801    bne .Laput_obj_check_assignability
802    b   .Laput_obj_store
803
804.Laput_obj_mark_array_class:
805    BAKER_RB_LOAD_AND_MARK r3, r0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg03
806    b .Laput_obj_mark_array_class_continue
807
808.Laput_obj_mark_array_element:
809    BAKER_RB_LOAD_AND_MARK \
810        r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg03
811    b .Laput_obj_mark_array_element_continue
812
813.Laput_obj_mark_object_class:
814    BAKER_RB_LOAD_AND_MARK r4, r2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg04
815    b .Laput_obj_mark_object_class_continue
816#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
817END art_quick_aput_obj
818
819    /*
820     * Macro for resolution and initialization of indexed DEX file
821     * constants such as classes and strings.
822     */
823.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
824    .extern \entrypoint
825ENTRY \name
826    SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
827    mov    r1, rSELF                  @ pass Thread::Current
828    bl     \entrypoint                @ (uint32_t index, Thread*)
829    cbz    r0, 1f                     @ If result is null, deliver the OOME.
830    str    r0, [sp, #136]             @ store result in the frame
831    DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_R0 r1, /* is_ref= */ 1
8321:
833    DELIVER_PENDING_EXCEPTION_FRAME_READY
834END \name
835.endm
836
837.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
838    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
839.endm
840
841ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
842ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
843ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
844ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
845ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
846ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
847
848// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
849// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
850
851GENERATE_STATIC_FIELD_GETTERS
852
853GENERATE_INSTANCE_FIELD_GETTERS
854
855GENERATE_STATIC_FIELD_SETTERS /* emit64= */ 0
856
857GENERATE_INSTANCE_FIELD_SETTERS /* emit64= */ 0
858
859    /*
860     * Called by managed code to resolve an instance field and store a wide value.
861     */
862    .extern artSet64InstanceFromCompiledCode
863ENTRY art_quick_set64_instance
864    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
865                                         @ r2:r3 contain the wide argument
866    str    rSELF, [sp, #-16]!            @ expand the frame and pass Thread::Current
867    .cfi_adjust_cfa_offset 16
868    bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
869    add    sp, #16                       @ release out args
870    .cfi_adjust_cfa_offset -16
871    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
872    REFRESH_MARKING_REGISTER
873    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
874END art_quick_set64_instance
875
876    .extern artSet64StaticFromCompiledCode
877ENTRY art_quick_set64_static
878    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
879                                          @ r2:r3 contain the wide argument
880    str    rSELF, [sp, #-16]!             @ expand the frame and pass Thread::Current
881    .cfi_adjust_cfa_offset 16
882    bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
883    add    sp, #16                        @ release out args
884    .cfi_adjust_cfa_offset -16
885    RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
886    REFRESH_MARKING_REGISTER
887    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
888END art_quick_set64_static
889
890// Generate the allocation entrypoints for each allocator.
891GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
892// Comment out allocators that have arm specific asm.
893// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
894// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
895GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
896GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
897// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
898// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
899// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
900// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
901// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
902GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
903GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
904GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
905
906// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
907// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
908GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
909GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
910// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
911// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
912// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
913// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
914// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
915GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
916GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
917GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
918
919// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
920//
921// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
922// If isInitialized=0 the compiler can only assume it's been at least resolved.
923.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
924ENTRY \c_name
925    // Fast path rosalloc allocation.
926    // r0: type/return value, rSELF (r9): Thread::Current
927    // r1, r2, r3, r12: free.
928    ldr    r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
929                                                              // allocation stack has room.
930                                                              // TODO: consider using ldrd.
931    ldr    r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
932    cmp    r3, r12
933    bhs    .Lslow_path\c_name
934
935    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
936    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
937                                                              // local allocation.
938    // If the class is not yet visibly initialized, or it is finalizable,
939    // the object size will be very large to force the branch below to be taken.
940    //
941    // See Class::SetStatus() in class.cc for more details.
942    bhs    .Lslow_path\c_name
943                                                              // Compute the rosalloc bracket index
944                                                              // from the size. Since the size is
945                                                              // already aligned we can combine the
946                                                              // two shifts together.
947    add    r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
948                                                              // Subtract pointer size since there
949                                                              // are no runs for 0 byte allocations
950                                                              // and the size is already aligned.
951                                                              // Load the rosalloc run (r12)
952    ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
953                                                              // Load the free list head (r3). This
954                                                              // will be the return val.
955    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
956    cbz    r3, .Lslow_path\c_name
957    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
958    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
959                                                              // and update the list head with the
960                                                              // next pointer.
961    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
962                                                              // Store the class pointer in the
963                                                              // header. This also overwrites the
964                                                              // next pointer. The offsets are
965                                                              // asserted to match.
966#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
967#error "Class pointer needs to overwrite next pointer."
968#endif
969    POISON_HEAP_REF r0
970    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
971                                                              // Push the new object onto the thread
972                                                              // local allocation stack and
973                                                              // increment the thread local
974                                                              // allocation stack top.
975    ldr    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
976    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
977    str    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
978                                                              // Decrement the size of the free list
979
980    // After this "STR" the object is published to the thread local allocation stack,
981    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
982    // It is not yet visible to the running (user) compiled code until after the return.
983    //
984    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
985    // the state of the allocation stack slot. It can be a pointer to one of:
986    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
987    //       (The stack initial state is "null" pointers).
988    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
989    // 2) A fully valid object, with a valid class pointer pointing to a real class.
990    // Other states are not allowed.
991    //
992    // An object that is invalid only temporarily, and will eventually become valid.
993    // The internal runtime code simply checks if the object is not null or is partial and then
994    // ignores it.
995    //
996    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
997    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
998    // "next" pointer is not-cyclic.)
999    //
1000    // See also b/28790624 for a listing of CLs dealing with this race.
1001    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1002    sub    r1, #1
1003                                                              // TODO: consider combining this store
1004                                                              // and the list head store above using
1005                                                              // strd.
1006    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1007
1008    mov    r0, r3                                             // Set the return value and return.
1009    // No barrier. The class is already observably initialized (otherwise the fast
1010    // path size check above would fail) and new-instance allocations are protected
1011    // from publishing by the compiler which inserts its own StoreStore barrier.
1012    bx     lr
1013
1014.Lslow_path\c_name:
1015    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
1016    mov    r1, rSELF                  @ pass Thread::Current
1017    bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
1018    RESTORE_SAVE_REFS_ONLY_FRAME
1019    REFRESH_MARKING_REGISTER
1020    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1021END \c_name
1022.endm
1023
1024ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1025ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1026
1027// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
1028// and art_quick_alloc_object_resolved/initialized_region_tlab.
1029//
1030// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free.
1031// Need to preserve r0 to the slow path.
1032//
1033// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1034// If isInitialized=0 the compiler can only assume it's been at least resolved.
1035.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
1036                                                             // Load thread_local_pos (r12) and
1037                                                             // thread_local_end (r3) with ldrd.
1038                                                             // Check constraints for ldrd.
1039#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1040#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1041#endif
1042    ldrd   r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1043    sub    r12, r3, r12                                       // Compute the remaining buf size.
1044    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
1045    cmp    r3, r12                                            // Check if it fits.
1046    // If the class is not yet visibly initialized, or it is finalizable,
1047    // the object size will be very large to force the branch below to be taken.
1048    //
1049    // See Class::SetStatus() in class.cc for more details.
1050    bhi    \slowPathLabel
1051    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1052                                                              // Reload old thread_local_pos (r0)
1053                                                              // for the return value.
1054    ldr    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1055    add    r1, r2, r3
1056    str    r1, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1057    // After this "STR" the object is published to the thread local allocation stack,
1058    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1059    // It is not yet visible to the running (user) compiled code until after the return.
1060    //
1061    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1062    // the state of the object. It can be either:
1063    // 1) A partially valid object, with a null class pointer
1064    //       (because the initial state of TLAB buffers is all 0s/nulls).
1065    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1066    // Other states are not allowed.
1067    //
1068    // An object that is invalid only temporarily, and will eventually become valid.
1069    // The internal runtime code simply checks if the object is not null or is partial and then
1070    // ignores it.
1071    //
1072    // (Note: The actual check is done by checking that the object's class pointer is non-null.
1073    // Also, unlike rosalloc, the object can never be observed as null).
1074    POISON_HEAP_REF r0
1075    str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1076    mov    r0, r2
1077    // No barrier. The class is already observably initialized (otherwise the fast
1078    // path size check above would fail) and new-instance allocations are protected
1079    // from publishing by the compiler which inserts its own StoreStore barrier.
1080    bx     lr
1081.endm
1082
1083// The common code for art_quick_alloc_object_*region_tlab
1084// Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up.
1085// Caller must execute a constructor fence after this.
1086.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1087ENTRY \name
1088    // Fast path tlab allocation.
1089    // r0: type, rSELF (r9): Thread::Current
1090    // r1, r2, r3, r12: free.
1091    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
1092.Lslow_path\name:
1093    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
1094    mov    r1, rSELF                                          // Pass Thread::Current.
1095    bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
1096    RESTORE_SAVE_REFS_ONLY_FRAME
1097    REFRESH_MARKING_REGISTER
1098    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1099END \name
1100.endm
1101
1102GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1103GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1104GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1105GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1106
1107
1108// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
1109// and art_quick_alloc_array_resolved/initialized_region_tlab.
1110//
1111// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free.
1112// Need to preserve r0 and r1 to the slow path.
1113.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
1114    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
1115                                                              // (addr + 7) & ~7.
1116
1117                                                              // Load thread_local_pos (r3) and
1118                                                              // thread_local_end (r12) with ldrd.
1119                                                              // Check constraints for ldrd.
1120#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1121#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1122#endif
1123    ldrd   r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1124    sub    r12, r12, r3                                       // Compute the remaining buf size.
1125    cmp    r2, r12                                            // Check if the total_size fits.
1126    // The array class is always initialized here. Unlike new-instance,
1127    // this does not act as a double test.
1128    bhi    \slowPathLabel
1129    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1130    add    r2, r2, r3
1131    str    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1132    POISON_HEAP_REF r0
1133    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1134    str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
1135    mov    r0, r3
1136// new-array is special. The class is loaded and immediately goes to the Initialized state
1137// before it is published. Therefore the only fence needed is for the publication of the object.
1138// See ClassLinker::CreateArrayClass() for more details.
1139
1140// For publication of the new array, we don't need a 'dmb ishst' here.
1141// The compiler generates 'dmb ishst' for all new-array insts.
1142    bx     lr
1143.endm
1144
1145// Caller must execute a constructor fence after this.
1146.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1147ENTRY \name
1148    // Fast path array allocation for region tlab allocation.
1149    // r0: mirror::Class* type
1150    // r1: int32_t component_count
1151    // rSELF (r9): thread
1152    // r2, r3, r12: free.
1153    \size_setup .Lslow_path\name
1154    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
1155.Lslow_path\name:
1156    // r0: mirror::Class* klass
1157    // r1: int32_t component_count
1158    // r2: Thread* self
1159    SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
1160    mov    r2, rSELF               // pass Thread::Current
1161    bl     \entrypoint
1162    RESTORE_SAVE_REFS_ONLY_FRAME
1163    REFRESH_MARKING_REGISTER
1164    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1165END \name
1166.endm
1167
1168.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
1169    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
1170    cmp r1, r2
1171    bhi \slow_path
1172                                                            // Array classes are never finalizable
1173                                                            // or uninitialized, no need to check.
1174    ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
1175    UNPOISON_HEAP_REF r3
1176    ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1177    lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
1178                                                            // bits.
1179    lsl    r2, r1, r3                                       // Calculate data size
1180                                                            // Add array data offset and alignment.
1181    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1182#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1183#error Long array data offset must be 4 greater than int array data offset.
1184#endif
1185
1186    add    r3, r3, #1                                       // Add 4 to the length only if the
1187                                                            // component size shift is 3
1188                                                            // (for 64 bit alignment).
1189    and    r3, r3, #4
1190    add    r2, r2, r3
1191.endm
1192
1193.macro COMPUTE_ARRAY_SIZE_8 slow_path
1194    // Possibly a large object, go slow.
1195    // Also does negative array size check.
1196    movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
1197    cmp r1, r2
1198    bhi \slow_path
1199    // Add array data offset and alignment.
1200    add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1201.endm
1202
1203.macro COMPUTE_ARRAY_SIZE_16 slow_path
1204    // Possibly a large object, go slow.
1205    // Also does negative array size check.
1206    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
1207    cmp r1, r2
1208    bhi \slow_path
1209    lsl    r2, r1, #1
1210    // Add array data offset and alignment.
1211    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1212.endm
1213
1214.macro COMPUTE_ARRAY_SIZE_32 slow_path
1215    // Possibly a large object, go slow.
1216    // Also does negative array size check.
1217    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
1218    cmp r1, r2
1219    bhi \slow_path
1220    lsl    r2, r1, #2
1221    // Add array data offset and alignment.
1222    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1223.endm
1224
1225.macro COMPUTE_ARRAY_SIZE_64 slow_path
1226    // Possibly a large object, go slow.
1227    // Also does negative array size check.
1228    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
1229    cmp r1, r2
1230    bhi \slow_path
1231    lsl    r2, r1, #3
1232    // Add array data offset and alignment.
1233    add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1234.endm
1235
1236GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1237GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1238GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1239GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1240GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1241GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1242GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1243GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1244GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1245GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1246
1247    /*
1248     * Called by managed code when the value in rSUSPEND has been decremented to 0.
1249     */
1250    .extern artTestSuspendFromCode
1251ENTRY art_quick_test_suspend
1252    SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
1253    mov    r0, rSELF
1254    bl     artTestSuspendFromCode               @ (Thread*)
1255    RESTORE_SAVE_EVERYTHING_FRAME
1256    REFRESH_MARKING_REGISTER
1257    bx     lr
1258END art_quick_test_suspend
1259
1260    .extern artImplicitSuspendFromCode
1261ENTRY art_quick_implicit_suspend
1262    mov    r0, rSELF
1263    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
1264    bl     artImplicitSuspendFromCode         @ (Thread*)
1265    RESTORE_SAVE_REFS_ONLY_FRAME
1266    REFRESH_MARKING_REGISTER
1267    bx     lr
1268END art_quick_implicit_suspend
1269
1270    /*
1271     * Called by managed code that is attempting to call a method on a proxy class. On entry
1272     * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
1273     * frame size of the invoked proxy method agrees with a ref and args callee save frame.
1274     */
1275     .extern artQuickProxyInvokeHandler
1276ENTRY art_quick_proxy_invoke_handler
1277    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1278    mov     r2, rSELF              @ pass Thread::Current
1279    mov     r3, sp                 @ pass SP
1280    blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
1281    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1282    // Tear down the callee-save frame. Skip arg registers.
1283    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1284    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1285    RESTORE_SAVE_REFS_ONLY_FRAME
1286    REFRESH_MARKING_REGISTER
1287    cbnz    r2, 1f                 @ success if no exception is pending
1288    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1289    bx      lr                     @ return on success
12901:
1291    DELIVER_PENDING_EXCEPTION
1292END art_quick_proxy_invoke_handler
1293
1294    /*
1295     * Called to resolve an imt conflict.
1296     * r0 is the conflict ArtMethod.
1297     * r12 is a hidden argument that holds the target interface method.
1298     *
1299     * Note that this stub writes to r0, r4, and r12.
1300     */
1301ENTRY art_quick_imt_conflict_trampoline
1302    ldr     r0, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
1303    ldr     r4, [r0]  // Load first entry in ImtConflictTable.
1304.Limt_table_iterate:
1305    cmp     r4, r12
1306    // Branch if found. Benchmarks have shown doing a branch here is better.
1307    beq     .Limt_table_found
1308    // If the entry is null, the interface method is not in the ImtConflictTable.
1309    cbz     r4, .Lconflict_trampoline
1310    // Iterate over the entries of the ImtConflictTable.
1311    ldr     r4, [r0, #(2 * __SIZEOF_POINTER__)]!
1312    b .Limt_table_iterate
1313.Limt_table_found:
1314    // We successfully hit an entry in the table. Load the target method
1315    // and jump to it.
1316    ldr     r0, [r0, #__SIZEOF_POINTER__]
1317    ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
1318.Lconflict_trampoline:
1319    // Pass interface method to the trampoline.
1320    mov r0, r12
1321    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1322END art_quick_imt_conflict_trampoline
1323
1324    .extern artQuickResolutionTrampoline
1325ENTRY art_quick_resolution_trampoline
1326    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1327    mov     r2, rSELF              @ pass Thread::Current
1328    mov     r3, sp                 @ pass SP
1329    blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
1330    CFI_REMEMBER_STATE
1331    cbz     r0, 1f                 @ is code pointer null? goto exception
1332    mov     r12, r0
1333    ldr     r0, [sp, #0]           @ load resolved method in r0
1334    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1335    REFRESH_MARKING_REGISTER
1336    bx      r12                    @ tail-call into actual code
13371:
1338    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
1339    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1340    DELIVER_PENDING_EXCEPTION
1341END art_quick_resolution_trampoline
1342
1343    /*
1344     * Called to do a generic JNI down-call
1345     */
1346ENTRY art_quick_generic_jni_trampoline
1347    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1348
1349    // Save rSELF
1350    mov r11, rSELF
1351    // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
1352    mov r10, sp
1353    .cfi_def_cfa_register r10
1354
1355    sub sp, sp, #GENERIC_JNI_TRAMPOLINE_RESERVED_AREA
1356
1357    // prepare for artQuickGenericJniTrampoline call
1358    // (Thread*, managed_sp, reserved_area)
1359    //    r0         r1            r2   <= C calling convention
1360    //  rSELF       r10            sp   <= where they are
1361
1362    mov r0, rSELF   // Thread*
1363    mov r1, r10     // SP for the managed frame.
1364    mov r2, sp      // reserved area for arguments and other saved data (up to managed frame)
1365    blx artQuickGenericJniTrampoline  // (Thread*, managed_sp, reserved_area)
1366
1367    // The C call will have registered the complete save-frame on success.
1368    // The result of the call is:
1369    //     r0: pointer to native code, 0 on error.
1370    //     The bottom of the reserved area contains values for arg registers,
1371    //     hidden arg register and SP for out args for the call.
1372
1373    // Check for error (class init check or locking for synchronized native method can throw).
1374    cbz r0, .Lexception_in_native
1375
1376    // Save the code pointer
1377    mov lr, r0
1378
1379    // Load parameters from frame into registers r0-r3 (soft-float),
1380    // hidden arg (r4) for @CriticalNative and SP for out args.
1381    pop {r0-r3, r4, ip}
1382
1383    // Apply the new SP for out args, releasing unneeded reserved area.
1384    mov sp, ip
1385
1386    // Softfloat.
1387    // TODO: Change to hardfloat when supported.
1388
1389    blx lr            // native call.
1390
1391    // result sign extension is handled in C code
1392    // prepare for artQuickGenericJniEndTrampoline call
1393    // (Thread*, result, result_f)
1394    //    r0      r2,r3    stack       <= C calling convention
1395    //    r11     r0,r1    r0,r1       <= where they are
1396    sub sp, sp, #8 // Stack alignment.
1397
1398    push {r0-r1}
1399    mov r3, r1
1400    mov r2, r0
1401    mov r0, r11
1402
1403    blx artQuickGenericJniEndTrampoline
1404
1405    // Restore self pointer.
1406    mov rSELF, r11
1407
1408    // Pending exceptions possible.
1409    ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1410    cbnz r2, .Lexception_in_native
1411
1412    // Tear down the alloca.
1413    mov sp, r10
1414
1415    // store into fpr, for when it's a fpr return...
1416    vmov d0, r0, r1
1417
1418    LOAD_RUNTIME_INSTANCE r2
1419    ldrb r2, [r2,  #RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE]
1420    CFI_REMEMBER_STATE
1421    cbnz r2, .Lcall_method_exit_hook
1422.Lcall_method_exit_hook_done:
1423
1424    // Tear down the callee-save frame. Skip arg registers.
1425    .cfi_def_cfa_register sp
1426    add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - 7 * 4)
1427    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - 7 * 4)
1428    pop {r5-r8, r10-r11, lr}  @ This must match the non-args registers restored by
1429    .cfi_restore r5           @ `RESTORE_SAVE_REFS_AND_ARGS_FRAME`.
1430    .cfi_restore r6
1431    .cfi_restore r7
1432    .cfi_restore r8
1433    .cfi_restore r10
1434    .cfi_restore r11
1435    .cfi_restore lr
1436    .cfi_adjust_cfa_offset -(7 * 4)
1437    REFRESH_MARKING_REGISTER
1438    bx lr      // ret
1439
1440.Lcall_method_exit_hook:
1441    CFI_RESTORE_STATE_AND_DEF_CFA r10, FRAME_SIZE_SAVE_REFS_AND_ARGS
1442    mov r2, #FRAME_SIZE_SAVE_REFS_AND_ARGS
1443    bl art_quick_method_exit_hook
1444    b .Lcall_method_exit_hook_done
1445
1446.Lexception_in_native:
1447    ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
1448    add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
1449    mov sp, ip
1450    bl art_deliver_pending_exception
1451END art_quick_generic_jni_trampoline
1452
1453ENTRY art_deliver_pending_exception
1454    # This will create a new save-all frame, required by the runtime.
1455    DELIVER_PENDING_EXCEPTION
1456END art_deliver_pending_exception
1457
1458    .extern artQuickToInterpreterBridge
1459ENTRY art_quick_to_interpreter_bridge
1460    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
1461    mov     r1, rSELF              @ pass Thread::Current
1462    mov     r2, sp                 @ pass SP
1463    blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
1464    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1465    // Tear down the callee-save frame. Skip arg registers.
1466    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1467    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1468    RESTORE_SAVE_REFS_ONLY_FRAME
1469    REFRESH_MARKING_REGISTER
1470    cbnz    r2, 1f                 @ success if no exception is pending
1471    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1472    bx      lr                     @ return on success
14731:
1474    DELIVER_PENDING_EXCEPTION
1475END art_quick_to_interpreter_bridge
1476
1477/*
1478 * Called to attempt to execute an obsolete method.
1479 */
1480ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1481
1482    /*
1483     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1484     * will long jump to the interpreter bridge.
1485     */
1486    .extern artDeoptimizeFromCompiledCode
1487ENTRY art_quick_deoptimize_from_compiled_code
1488    SETUP_SAVE_EVERYTHING_FRAME r1
1489    mov    r1, rSELF                      @ pass Thread::Current
1490    blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
1491END art_quick_deoptimize_from_compiled_code
1492
1493    /*
1494     * Signed 64-bit integer multiply.
1495     *
1496     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
1497     *        WX
1498     *      x YZ
1499     *  --------
1500     *     ZW ZX
1501     *  YW YX
1502     *
1503     * The low word of the result holds ZX, the high word holds
1504     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
1505     * it doesn't fit in the low 64 bits.
1506     *
1507     * Unlike most ARM math operations, multiply instructions have
1508     * restrictions on using the same register more than once (Rd and Rm
1509     * cannot be the same).
1510     */
1511    /* mul-long vAA, vBB, vCC */
1512ENTRY art_quick_mul_long
1513    push    {r9-r10}
1514    .cfi_adjust_cfa_offset 8
1515    .cfi_rel_offset r9, 0
1516    .cfi_rel_offset r10, 4
1517    mul     ip, r2, r1                  @  ip<- ZxW
1518    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
1519    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
1520    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
1521    mov     r0,r9
1522    mov     r1,r10
1523    pop     {r9-r10}
1524    .cfi_adjust_cfa_offset -8
1525    .cfi_restore r9
1526    .cfi_restore r10
1527    bx      lr
1528END art_quick_mul_long
1529
1530    /*
1531     * Long integer shift.  This is different from the generic 32/64-bit
1532     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1533     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1534     * 6 bits.
1535     * On entry:
1536     *   r0: low word
1537     *   r1: high word
1538     *   r2: shift count
1539     */
1540    /* shl-long vAA, vBB, vCC */
1541ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
1542    and     r2, r2, #63                 @ r2<- r2 & 0x3f
1543    mov     r1, r1, asl r2              @  r1<- r1 << r2
1544    rsb     r3, r2, #32                 @  r3<- 32 - r2
1545    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
1546    subs    ip, r2, #32                 @  ip<- r2 - 32
1547    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
1548    mov     r0, r0, asl r2              @  r0<- r0 << r2
1549    bx      lr
1550END art_quick_shl_long
1551
1552    /*
1553     * Long integer shift.  This is different from the generic 32/64-bit
1554     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1555     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1556     * 6 bits.
1557     * On entry:
1558     *   r0: low word
1559     *   r1: high word
1560     *   r2: shift count
1561     */
1562    /* shr-long vAA, vBB, vCC */
1563ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
1564    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1565    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1566    rsb     r3, r2, #32                 @  r3<- 32 - r2
1567    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1568    subs    ip, r2, #32                 @  ip<- r2 - 32
1569    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
1570    mov     r1, r1, asr r2              @  r1<- r1 >> r2
1571    bx      lr
1572END art_quick_shr_long
1573
1574    /*
1575     * Long integer shift.  This is different from the generic 32/64-bit
1576     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1577     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1578     * 6 bits.
1579     * On entry:
1580     *   r0: low word
1581     *   r1: high word
1582     *   r2: shift count
1583     */
1584    /* ushr-long vAA, vBB, vCC */
1585ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
1586    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1587    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1588    rsb     r3, r2, #32                 @  r3<- 32 - r2
1589    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1590    subs    ip, r2, #32                 @  ip<- r2 - 32
1591    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
1592    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
1593    bx      lr
1594END art_quick_ushr_long
1595
1596    /*
1597     * String's indexOf.
1598     *
1599     * On entry:
1600     *    r0:   string object (known non-null)
1601     *    r1:   char to match (known <= 0xFFFF)
1602     *    r2:   Starting offset in string data
1603     */
1604ENTRY art_quick_indexof
1605    push {r4, r10-r11, lr} @ 4 words of callee saves
1606    .cfi_adjust_cfa_offset 16
1607    .cfi_rel_offset r4, 0
1608    .cfi_rel_offset r10, 4
1609    .cfi_rel_offset r11, 8
1610    .cfi_rel_offset lr, 12
1611#if (STRING_COMPRESSION_FEATURE)
1612    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
1613#else
1614    ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
1615#endif
1616    add   r0, #MIRROR_STRING_VALUE_OFFSET
1617#if (STRING_COMPRESSION_FEATURE)
1618    /* r4 count (with flag) and r3 holds actual length */
1619    lsr   r3, r4, #1
1620#endif
1621    /* Clamp start to [0..count] */
1622    cmp   r2, #0
1623    it    lt
1624    movlt r2, #0
1625    cmp   r2, r3
1626    it    gt
1627    movgt r2, r3
1628
1629    /* Save a copy in r12 to later compute result */
1630    mov   r12, r0
1631
1632    /* Build pointer to start of data to compare and pre-bias */
1633#if (STRING_COMPRESSION_FEATURE)
1634    lsrs  r4, r4, #1
1635    bcc   .Lstring_indexof_compressed
1636#endif
1637    add   r0, r0, r2, lsl #1
1638    sub   r0, #2
1639
1640    /* Compute iteration count */
1641    sub   r2, r3, r2
1642
1643    /*
1644     * At this point we have:
1645     *   r0: start of data to test
1646     *   r1: char to compare
1647     *   r2: iteration count
1648     *   r4: compression style (used temporarily)
1649     *   r12: original start of string data
1650     *   r3, r4, r10, r11 available for loading string data
1651     */
1652
1653    subs  r2, #4
1654    blt   .Lindexof_remainder
1655
1656.Lindexof_loop4:
1657    ldrh  r3, [r0, #2]!
1658    ldrh  r4, [r0, #2]!
1659    ldrh  r10, [r0, #2]!
1660    ldrh  r11, [r0, #2]!
1661    cmp   r3, r1
1662    beq   .Lmatch_0
1663    cmp   r4, r1
1664    beq   .Lmatch_1
1665    cmp   r10, r1
1666    beq   .Lmatch_2
1667    cmp   r11, r1
1668    beq   .Lmatch_3
1669    subs  r2, #4
1670    bge   .Lindexof_loop4
1671
1672.Lindexof_remainder:
1673    adds  r2, #4
1674    beq   .Lindexof_nomatch
1675
1676.Lindexof_loop1:
1677    ldrh  r3, [r0, #2]!
1678    cmp   r3, r1
1679    beq   .Lmatch_3
1680    subs  r2, #1
1681    bne   .Lindexof_loop1
1682
1683.Lindexof_nomatch:
1684    mov   r0, #-1
1685    pop {r4, r10-r11, pc}
1686
1687.Lmatch_0:
1688    sub   r0, #6
1689    sub   r0, r12
1690    asr   r0, r0, #1
1691    pop {r4, r10-r11, pc}
1692.Lmatch_1:
1693    sub   r0, #4
1694    sub   r0, r12
1695    asr   r0, r0, #1
1696    pop {r4, r10-r11, pc}
1697.Lmatch_2:
1698    sub   r0, #2
1699    sub   r0, r12
1700    asr   r0, r0, #1
1701    pop {r4, r10-r11, pc}
1702.Lmatch_3:
1703    sub   r0, r12
1704    asr   r0, r0, #1
1705    pop {r4, r10-r11, pc}
1706#if (STRING_COMPRESSION_FEATURE)
1707.Lstring_indexof_compressed:
1708    add   r0, r0, r2
1709    sub   r0, #1
1710    sub   r2, r3, r2
1711.Lstring_indexof_compressed_loop:
1712    subs  r2, #1
1713    blt   .Lindexof_nomatch
1714    ldrb  r3, [r0, #1]!
1715    cmp   r3, r1
1716    beq   .Lstring_indexof_compressed_matched
1717    b     .Lstring_indexof_compressed_loop
1718.Lstring_indexof_compressed_matched:
1719    sub   r0, r12
1720    pop {r4, r10-r11, pc}
1721#endif
1722END art_quick_indexof
1723
1724    /* Assembly routines used to handle ABI differences. */
1725
1726    /* double fmod(double a, double b) */
1727    .extern fmod
1728ENTRY art_quick_fmod
1729    push  {lr}
1730    .cfi_adjust_cfa_offset 4
1731    .cfi_rel_offset lr, 0
1732    sub   sp, #4
1733    .cfi_adjust_cfa_offset 4
1734    vmov  r0, r1, d0
1735    vmov  r2, r3, d1
1736    bl    fmod
1737    vmov  d0, r0, r1
1738    add   sp, #4
1739    .cfi_adjust_cfa_offset -4
1740    pop   {pc}
1741END art_quick_fmod
1742
1743    /* float fmodf(float a, float b) */
1744     .extern fmodf
1745ENTRY art_quick_fmodf
1746    push  {lr}
1747    .cfi_adjust_cfa_offset 4
1748    .cfi_rel_offset lr, 0
1749    sub   sp, #4
1750    .cfi_adjust_cfa_offset 4
1751    vmov  r0, r1, d0
1752    bl    fmodf
1753    vmov  s0, r0
1754    add   sp, #4
1755    .cfi_adjust_cfa_offset -4
1756    pop   {pc}
1757END art_quick_fmodf
1758
1759    /* int64_t art_d2l(double d) */
1760    .extern art_d2l
1761ENTRY art_quick_d2l
1762    vmov  r0, r1, d0
1763    b     art_d2l
1764END art_quick_d2l
1765
1766    /* int64_t art_f2l(float f) */
1767    .extern art_f2l
1768ENTRY art_quick_f2l
1769    vmov  r0, s0
1770    b     art_f2l
1771END art_quick_f2l
1772
1773    /* float art_l2f(int64_t l) */
1774    .extern art_l2f
1775ENTRY art_quick_l2f
1776    push  {lr}
1777    .cfi_adjust_cfa_offset 4
1778    .cfi_rel_offset lr, 0
1779    sub   sp, #4
1780    .cfi_adjust_cfa_offset 4
1781    bl    art_l2f
1782    vmov  s0, r0
1783    add   sp, #4
1784    .cfi_adjust_cfa_offset -4
1785    pop   {pc}
1786END art_quick_l2f
1787
1788    .extern artStringBuilderAppend
1789ENTRY art_quick_string_builder_append
1790    SETUP_SAVE_REFS_ONLY_FRAME r2       @ save callee saves in case of GC
1791    add    r1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__)  @ pass args
1792    mov    r2, rSELF                    @ pass Thread::Current
1793    bl     artStringBuilderAppend       @ (uint32_t, const unit32_t*, Thread*)
1794    RESTORE_SAVE_REFS_ONLY_FRAME
1795    REFRESH_MARKING_REGISTER
1796    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1797END art_quick_string_builder_append
1798
1799    /*
1800     * Create a function `name` calling the ReadBarrier::Mark routine,
1801     * getting its argument and returning its result through register
1802     * `reg`, saving and restoring all caller-save registers.
1803     *
1804     * IP is clobbered; `reg` must not be IP.
1805     *
1806     * If `reg` is different from `r0`, the generated function follows a
1807     * non-standard runtime calling convention:
1808     * - register `reg` (which may be different from R0) is used to pass the (sole) argument,
1809     * - register `reg` (which may be different from R0) is used to return the result,
1810     * - all other registers are callee-save (the values they hold are preserved).
1811     */
1812.macro READ_BARRIER_MARK_REG name, reg
1813ENTRY \name
1814    // Null check so that we can load the lock word.
1815    SMART_CBZ \reg, .Lret_rb_\name
1816    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
1817    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
1818    tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
1819    beq .Lnot_marked_rb_\name
1820    // Already marked, return right away.
1821.Lret_rb_\name:
1822    bx lr
1823
1824.Lnot_marked_rb_\name:
1825    // Test that both the forwarding state bits are 1.
1826#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
1827    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
1828    // the highest bits and the "forwarding address" state to have all bits set.
1829#error "Unexpected lock word state shift or forwarding address state value."
1830#endif
1831    cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
1832    bhs .Lret_forwarding_address\name
1833
1834.Lslow_rb_\name:
1835    // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
1836    // make a tail call here. Currently, it serves only for stack alignment but
1837    // we may reintroduce kSaveEverything calls here in the future.
1838    push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
1839    .cfi_adjust_cfa_offset 32
1840    .cfi_rel_offset r0, 0
1841    .cfi_rel_offset r1, 4
1842    .cfi_rel_offset r2, 8
1843    .cfi_rel_offset r3, 12
1844    .cfi_rel_offset r4, 16
1845    .cfi_rel_offset r9, 20
1846    .cfi_rel_offset ip, 24
1847    .cfi_rel_offset lr, 28
1848
1849    .ifnc \reg, r0
1850      mov   r0, \reg                    @ pass arg1 - obj from `reg`
1851    .endif
1852
1853    vpush {s0-s15}                      @ save floating-point caller-save registers
1854    .cfi_adjust_cfa_offset 64
1855    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
1856    vpop {s0-s15}                       @ restore floating-point registers
1857    .cfi_adjust_cfa_offset -64
1858
1859    .ifc \reg, r0                       @ Save result to the stack slot or destination register.
1860      str r0, [sp, #0]
1861    .else
1862      .ifc \reg, r1
1863        str r0, [sp, #4]
1864      .else
1865        .ifc \reg, r2
1866          str r0, [sp, #8]
1867        .else
1868          .ifc \reg, r3
1869            str r0, [sp, #12]
1870          .else
1871            .ifc \reg, r4
1872              str r0, [sp, #16]
1873            .else
1874              .ifc \reg, r9
1875                str r0, [sp, #20]
1876              .else
1877                mov \reg, r0
1878              .endif
1879            .endif
1880          .endif
1881        .endif
1882      .endif
1883    .endif
1884
1885    pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
1886    .cfi_adjust_cfa_offset -32
1887    .cfi_restore r0
1888    .cfi_restore r1
1889    .cfi_restore r2
1890    .cfi_restore r3
1891    .cfi_restore r4
1892    .cfi_restore r9
1893    .cfi_restore ip
1894    .cfi_restore lr
1895    bx lr
1896.Lret_forwarding_address\name:
1897    // Shift left by the forwarding address shift. This clears out the state bits since they are
1898    // in the top 2 bits of the lock word.
1899    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
1900    bx lr
1901END \name
1902.endm
1903
1904READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
1905READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
1906READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
1907READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
1908READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
1909READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
1910READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
1911READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
1912READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
1913READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
1914READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
1915READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
1916
1917// Helper macros for Baker CC read barrier mark introspection (BRBMI).
1918.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
1919    \macro_for_register r0
1920    \macro_for_register r1
1921    \macro_for_register r2
1922    \macro_for_register r3
1923    \macro_for_register r4
1924    \macro_for_register r5
1925    \macro_for_register r6
1926    \macro_for_register r7
1927    \macro_for_reserved_register  // r8 (rMR) is the marking register.
1928    \macro_for_register r9
1929    \macro_for_register r10
1930    \macro_for_register r11
1931    \macro_for_reserved_register  // IP is reserved.
1932    \macro_for_reserved_register  // SP is reserved.
1933    \macro_for_reserved_register  // LR is reserved.
1934    \macro_for_reserved_register  // PC is reserved.
1935.endm
1936
1937.macro BRBMI_RETURN_SWITCH_CASE reg
1938    .balign 8
1939.Lmark_introspection_return_switch_case_\reg:
1940    mov     rMR, #1
1941    mov     \reg, ip
1942    bx      lr
1943.endm
1944
1945.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
1946    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
1947.endm
1948
1949.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
1950    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
1951.endm
1952
1953#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
1954#error "Array and field introspection code sharing requires same LDR offset."
1955#endif
1956.macro BRBMI_ARRAY_LOAD index_reg
1957    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
1958    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
1959    .balign 8                                           // Add padding to 8 bytes.
1960.endm
1961
1962.macro BRBMI_BKPT_FILL_4B
1963    bkpt    0
1964    bkpt    0
1965.endm
1966
1967.macro BRBMI_BKPT_FILL_8B
1968    BRBMI_BKPT_FILL_4B
1969    BRBMI_BKPT_FILL_4B
1970.endm
1971
1972.macro BRBMI_RUNTIME_CALL
1973    // Note: This macro generates exactly 22 bytes of code. The core register
1974    // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
1975
1976    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
1977    .cfi_adjust_cfa_offset 24
1978    .cfi_rel_offset r0, 0
1979    .cfi_rel_offset r1, 4
1980    .cfi_rel_offset r2, 8
1981    .cfi_rel_offset r3, 12
1982    .cfi_rel_offset r7, 16
1983    .cfi_rel_offset lr, 20
1984
1985    mov     r0, ip                    // Pass the reference.
1986    vpush {s0-s15}                    // save floating-point caller-save registers
1987    .cfi_adjust_cfa_offset 64
1988    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
1989    vpop    {s0-s15}                  // restore floating-point registers
1990    .cfi_adjust_cfa_offset -64
1991    mov     ip, r0                    // Move reference to ip in preparation for return switch.
1992
1993    pop     {r0-r3, r7, lr}           // Restore registers.
1994    .cfi_adjust_cfa_offset -24
1995    .cfi_restore r0
1996    .cfi_restore r1
1997    .cfi_restore r2
1998    .cfi_restore r3
1999    .cfi_restore r7
2000    .cfi_restore lr
2001.endm
2002
2003.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
2004    // If reference is null, just return it in the right register.
2005    cmp     ip, #0
2006    beq     .Lmark_introspection_return\label_suffix
2007    // Use rMR as temp and check the mark bit of the reference.
2008    ldr     rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2009    tst     rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2010    beq     .Lmark_introspection_unmarked\label_suffix
2011.Lmark_introspection_return\label_suffix:
2012.endm
2013
2014.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
2015.Lmark_introspection_unmarked\label_suffix:
2016    // Check if the top two bits are one, if this is the case it is a forwarding address.
2017#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2018    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2019    // the highest bits and the "forwarding address" state to have all bits set.
2020#error "Unexpected lock word state shift or forwarding address state value."
2021#endif
2022    cmp     rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2023    bhs     .Lmark_introspection_forwarding_address\label_suffix
2024.endm
2025
2026.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
2027.Lmark_introspection_forwarding_address\label_suffix:
2028    // Note: This macro generates exactly 22 bytes of code, the branch is near.
2029
2030    // Shift left by the forwarding address shift. This clears out the state bits since they are
2031    // in the top 2 bits of the lock word.
2032    lsl     ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2033    b       .Lmark_introspection_return\label_suffix
2034.endm
2035
2036.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
2037    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
2038    ldrh    rMR, [lr, #(-1 + \ldr_offset + 2)]
2039.endm
2040
2041.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
2042    // Load the 16-bit instruction. Adjust for the thumb state in LR.
2043    ldrh    rMR, [lr, #(-1 + \ldr_offset)]
2044.endm
2045
2046.macro BRBMI_EXTRACT_RETURN_REG_wide
2047    lsr     rMR, rMR, #12             // Extract `ref_reg`.
2048.endm
2049
2050.macro BRBMI_EXTRACT_RETURN_REG_narrow
2051    and     rMR, rMR, #7              // Extract `ref_reg`.
2052.endm
2053
2054.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix
2055    BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset
2056    BRBMI_EXTRACT_RETURN_REG\label_suffix
2057.endm
2058
2059.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix
2060    .balign 32
2061    .thumb_func
2062    .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
2063    .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2064    .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2065art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
2066    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix
2067.endm
2068
2069.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix
2070    .balign 16
2071    // Note: Generates exactly 16 bytes of code.
2072    BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
2073    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix
2074    b .Lmark_introspection_runtime_call
2075.endm
2076
2077    /*
2078     * Use introspection to load a reference from the same address as the LDR
2079     * instruction in generated code would load (unless loaded by the thunk,
2080     * see below), call ReadBarrier::Mark() with that reference if needed
2081     * and return it in the same register as the LDR instruction would load.
2082     *
2083     * The entrypoint is called through a thunk that differs across load kinds.
2084     * For field and array loads the LDR instruction in generated code follows
2085     * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
2086     * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
2087     * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
2088     * knows the holder and performs the gray bit check, returning to the LDR
2089     * instruction if the object is not gray, so this entrypoint no longer
2090     * needs to know anything about the holder. For GC root loads, the LDR
2091     * instruction in generated code precedes the branch to the thunk, i.e. the
2092     * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
2093     * where the -1 is again the Thumb mode bit adjustment, and the thunk does
2094     * not do the gray bit check.
2095     *
2096     * For field accesses and array loads with a constant index the thunk loads
2097     * the reference into IP using introspection and calls the main entrypoint
2098     * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or
2099     * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known
2100     * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)
2101     * from the main entrypoint and the thunk adjusts the entrypoint pointer.
2102     * With heap poisoning enabled, the passed reference is poisoned.
2103     *
2104     * For array accesses with non-constant index, the thunk inserts the bits
2105     * 0-5 of the LDR instruction to the entrypoint address, effectively
2106     * calculating a switch case label based on the index register (bits 0-3)
2107     * and adding an extra offset (bits 4-5 hold the shift which is always 2
2108     * for reference loads) to differentiate from the main entrypoint, then
2109     * moves the base register to IP and jumps to the switch case. Therefore
2110     * we need to align the main entrypoint to 512 bytes, accounting for
2111     * a 256-byte offset followed by 16 array entrypoints starting at
2112     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
2113     * (register) and a branch to the main entrypoint.
2114     *
2115     * For GC root accesses we cannot use the main entrypoint because of the
2116     * different offset where the LDR instruction in generated code is located.
2117     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2118     * To re-use the same entrypoint pointer in generated code, we make sure
2119     * that the gc root entrypoint (a copy of the entrypoint with a different
2120     * offset for introspection loads) is located at a known offset (0xc0/0xe0
2121     * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/
2122     * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the
2123     * main entrypoint and the GC root thunk adjusts the entrypoint pointer,
2124     * moves the root register to IP and jumps to the customized entrypoint,
2125     * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
2126     * The thunk also performs all the fast-path checks, so we need just the
2127     * slow path.
2128     *
2129     * Intrinsic CAS operations (VarHandle*CompareAnd{Set,Exchange}* and
2130     * UnsafeCASObject) use similar code to the GC roots wide load but using
2131     * MOV (register, T3) instead of the LDR (immediate, T3), with destination
2132     * register in bits 8-11 rather than 12-15. Therefore they have their own
2133     * entrypoint, art_quick_read_barrier_mark_introspection_intrinsic_cas
2134     * at the offset BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET.
2135     * This is used only for high registers, low registers reuse the GC roots
2136     * narrow load entrypoint as the low 3 bits of the destination register
2137     * for MOV (register) encoding T1 match the LDR (immediate) encoding T1.
2138     *
2139     * The code structure is
2140     *   art_quick_read_barrier_mark_introspection:                   // @0x00
2141     *     Up to 32 bytes code for main entrypoint fast-path code for fields
2142     *     (and array elements with constant offset) with LDR encoding T3;
2143     *     jumps to the switch in the "narrow" entrypoint.
2144     *   art_quick_read_barrier_mark_introspection_narrow:            // @0x20
2145     *     Up to 48 bytes code for fast path code for fields (and array
2146     *     elements with constant offset) with LDR encoding T1, ending in the
2147     *     return switch instruction TBB and the table with switch offsets.
2148     *   .Lmark_introspection_return_switch_case_r0:                  // @0x50
2149     *     Exactly 88 bytes of code for the return switch cases (8 bytes per
2150     *     case, 11 cases; no code for reserved registers).
2151     *   .Lmark_introspection_forwarding_address_narrow:              // @0xa8
2152     *     Exactly 6 bytes to extract the forwarding address and jump to the
2153     *     "narrow" entrypoint fast path.
2154     *   .Lmark_introspection_return_switch_case_bad:                 // @0xae
2155     *     Exactly 2 bytes, bkpt for unexpected return register.
2156     *   .Lmark_introspection_unmarked_narrow:                        // @0xb0
2157     *     Exactly 16 bytes for "narrow" entrypoint slow path.
2158     *   art_quick_read_barrier_mark_introspection_gc_roots_wide:     // @0xc0
2159     *     GC root entrypoint code for LDR encoding T3 (10 bytes); loads and
2160     *     extracts the return register and jumps to the runtime call.
2161     *   .Lmark_introspection_forwarding_address_wide:                // @0xca
2162     *     Exactly 6 bytes to extract the forwarding address and jump to the
2163     *     "wide" entrypoint fast path.
2164     *   .Lmark_introspection_unmarked_wide:                          // @0xd0
2165     *     Exactly 16 bytes for "wide" entrypoint slow path.
2166     *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:   // @0xe0
2167     *     GC root entrypoint code for LDR encoding T1 (8 bytes); loads and
2168     *     extracts the return register and falls through to the runtime call.
2169     *   .Lmark_introspection_runtime_call:                           // @0xe8
2170     *     Exactly 24 bytes for the runtime call to MarkReg() and jump to the
2171     *     return switch.
2172     *   art_quick_read_barrier_mark_introspection_arrays:            // @0x100
2173     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
2174     *   art_quick_read_barrier_mark_introspection_intrinsic_cas:     // @0x180
2175     *     Intrinsic CAS entrypoint for MOV (register) encoding T3 (6 bytes).
2176     *     Loads the return register and jumps to the runtime call.
2177     */
2178#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2179ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512
2180    // At this point, IP contains the reference, rMR is clobbered by the thunk
2181    // and can be freely used as it will be set back to 1 before returning.
2182    // For heap poisoning, the reference is poisoned, so unpoison it first.
2183    UNPOISON_HEAP_REF ip
2184    // Check for null or marked, lock word is loaded into rMR.
2185    BRBMI_CHECK_NULL_AND_MARKED _wide
2186    // Load and extract the return register from the instruction.
2187    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2188    b       .Lmark_introspection_return_switch
2189
2190    .balign 32
2191    .thumb_func
2192    .type art_quick_read_barrier_mark_introspection_narrow, #function
2193    .hidden art_quick_read_barrier_mark_introspection_narrow
2194    .global art_quick_read_barrier_mark_introspection_narrow
2195art_quick_read_barrier_mark_introspection_narrow:
2196    // At this point, IP contains the reference, rMR is clobbered by the thunk
2197    // and can be freely used as it will be set back to 1 before returning.
2198    // For heap poisoning, the reference is poisoned, so unpoison it first.
2199    UNPOISON_HEAP_REF ip
2200    // Check for null or marked, lock word is loaded into rMR.
2201    BRBMI_CHECK_NULL_AND_MARKED _narrow
2202    // Load and extract the return register from the instruction.
2203    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2204.Lmark_introspection_return_switch:
2205    tbb     [pc, rMR]                 // Jump to the switch case.
2206.Lmark_introspection_return_table:
2207    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2208    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */
2209
2210    .balign 8
2211    BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow  // 6 bytes
2212.Lmark_introspection_return_switch_case_bad:
2213    bkpt                              // 2 bytes
2214
2215    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2216
2217    // 8 bytes for the loading and extracting of the return register.
2218    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
2219    // 2 bytes for near branch to the runtime call.
2220    b .Lmark_introspection_runtime_call
2221
2222    BRBMI_EXTRACT_FORWARDING_ADDRESS _wide  // Not even 4-byte aligned.
2223
2224    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2225
2226    // 8 bytes for the loading and extracting of the return register.
2227    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
2228    // And the runtime call and branch to the switch taking exactly 24 bytes
2229    // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch)
2230    // shall take the rest of the 32-byte section (within a cache line).
2231.Lmark_introspection_runtime_call:
2232    BRBMI_RUNTIME_CALL
2233    b       .Lmark_introspection_return_switch
2234
2235    .balign 256
2236    .thumb_func
2237    .type art_quick_read_barrier_mark_introspection_arrays, #function
2238    .hidden art_quick_read_barrier_mark_introspection_arrays
2239    .global art_quick_read_barrier_mark_introspection_arrays
2240art_quick_read_barrier_mark_introspection_arrays:
2241    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
2242
2243    .balign 8
2244    .thumb_func
2245    .type art_quick_read_barrier_mark_introspection_intrinsic_cas, #function
2246    .hidden art_quick_read_barrier_mark_introspection_intrinsic_cas
2247    .global art_quick_read_barrier_mark_introspection_intrinsic_cas
2248art_quick_read_barrier_mark_introspection_intrinsic_cas:
2249    // Load the byte of the MOV instruction that contains Rd. Adjust for the thumb state in LR.
2250    // The MOV (register, T3) is |11101010010|S|1111|(0)000|Rd|0000|Rm|, so the byte we read
2251    // here, i.e. |(0)000|Rd|, contains only the register number, the top 4 bits are 0.
2252    ldrb    rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET + 3)]
2253    b .Lmark_introspection_runtime_call
2254END art_quick_read_barrier_mark_introspection
2255#else  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2256ENTRY art_quick_read_barrier_mark_introspection
2257    bkpt                              // Unreachable.
2258END art_quick_read_barrier_mark_introspection
2259#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2260
2261.extern artInvokePolymorphic
2262ENTRY art_quick_invoke_polymorphic
2263    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
2264    mov     r0, r1                 @ r0 := receiver
2265    mov     r1, rSELF              @ r1 := Thread::Current
2266    mov     r2, sp                 @ r2 := SP
2267    bl      artInvokePolymorphic   @ artInvokePolymorphic(receiver, Thread*, SP)
2268    str     r1, [sp, 72]           @ r0:r1 := Result. Copy r1 to context.
2269    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2270    REFRESH_MARKING_REGISTER
2271    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2272    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2273END art_quick_invoke_polymorphic
2274
2275.extern artInvokeCustom
2276ENTRY art_quick_invoke_custom
2277    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
2278                                   @ r0 := call_site_idx
2279    mov     r1, rSELF              @ r1 := Thread::Current
2280    mov     r2, sp                 @ r2 := SP
2281    bl      artInvokeCustom        @ artInvokeCustom(call_site_idx, Thread*, SP)
2282    str     r1, [sp, #72]          @ Save r1 to context (r0:r1 = result)
2283    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2284    REFRESH_MARKING_REGISTER
2285    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2286    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2287END art_quick_invoke_custom
2288
2289// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2290//  Argument 0: r0: The context pointer for ExecuteSwitchImpl.
2291//  Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call.
2292//  Argument 2: r2: The value of DEX PC (memory address of the methods bytecode).
2293ENTRY ExecuteSwitchImplAsm
2294    push {r4, lr}                                 // 2 words of callee saves.
2295    .cfi_adjust_cfa_offset 8
2296    .cfi_rel_offset r4, 0
2297    .cfi_rel_offset lr, 4
2298    mov r4, r2                                    // r4 = DEX PC
2299    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0)
2300    blx r1                                        // Call the wrapped method.
2301    pop {r4, pc}
2302END ExecuteSwitchImplAsm
2303
2304// r0 contains the class, r4 contains the inline cache. We can use ip as temporary.
2305ENTRY art_quick_update_inline_cache
2306#if (INLINE_CACHE_SIZE != 5)
2307#error "INLINE_CACHE_SIZE not as expected."
2308#endif
2309#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2310    // Don't update the cache if we are marking.
2311    cmp rMR, #0
2312    bne .Ldone
2313#endif
2314.Lentry1:
2315    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2316    cmp ip, r0
2317    beq .Ldone
2318    cmp ip, #0
2319    bne .Lentry2
2320    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2321    cmp ip, #0
2322    bne .Lentry1
2323    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2324    cmp ip, #0
2325    bne .Ldone
2326    b .Lentry1
2327.Lentry2:
2328    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2329    cmp ip, r0
2330    beq .Ldone
2331    cmp ip, #0
2332    bne .Lentry3
2333    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2334    cmp ip, #0
2335    bne .Lentry2
2336    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2337    cmp ip, #0
2338    bne .Ldone
2339    b .Lentry2
2340.Lentry3:
2341    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2342    cmp ip, r0
2343    beq .Ldone
2344    cmp ip, #0
2345    bne .Lentry4
2346    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2347    cmp ip, #0
2348    bne .Lentry3
2349    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2350    cmp ip, #0
2351    bne .Ldone
2352    b .Lentry3
2353.Lentry4:
2354    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2355    cmp ip, r0
2356    beq .Ldone
2357    cmp ip, #0
2358    bne .Lentry5
2359    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2360    cmp ip, #0
2361    bne .Lentry4
2362    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2363    cmp ip, #0
2364    bne .Ldone
2365    b .Lentry4
2366.Lentry5:
2367    // Unconditionally store, the inline cache is megamorphic.
2368    str  r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+16]
2369.Ldone:
2370    blx lr
2371END art_quick_update_inline_cache
2372
2373// On entry, method is at the bottom of the stack.
2374ENTRY art_quick_compile_optimized
2375    SETUP_SAVE_EVERYTHING_FRAME r0
2376    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
2377    mov r1, rSELF                            @ pass Thread::Current
2378    bl     artCompileOptimized               @ (ArtMethod*, Thread*)
2379    RESTORE_SAVE_EVERYTHING_FRAME
2380    // We don't need to restore the marking register here, as
2381    // artCompileOptimized doesn't allow thread suspension.
2382    blx lr
2383END art_quick_compile_optimized
2384
2385// On entry, method is at the bottom of the stack.
2386ENTRY art_quick_method_entry_hook
2387    SETUP_SAVE_EVERYTHING_FRAME r0
2388    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
2389    mov r1, rSELF                            @ pass Thread::Current
2390    mov r2, sp                               @ pass SP
2391    bl  artMethodEntryHook                   @ (ArtMethod*, Thread*, SP)
2392    RESTORE_SAVE_EVERYTHING_FRAME
2393    REFRESH_MARKING_REGISTER
2394    blx lr
2395END art_quick_method_entry_hook
2396
2397ENTRY art_quick_method_exit_hook
2398    SETUP_SAVE_EVERYTHING_FRAME r5
2399
2400    INCREASE_FRAME 4                          @ align stack
2401    push {r2}                                 @ pass frame_size stack
2402    .cfi_adjust_cfa_offset 4
2403    add r3, sp, #(8 + 8)                      @ store fpr_res pointer, in kSaveEverything frame
2404    add r2, sp, #(136 + 8)                    @ store gpr_res pointer, in kSaveEverything frame
2405    add r1, sp, #(FRAME_SIZE_SAVE_EVERYTHING + 8)   @ pass ArtMethod**
2406    mov r0, rSELF                             @ pass Thread::Current
2407    blx artMethodExitHook                     @ (Thread*, ArtMethod**, gpr_res*, fpr_res*,
2408                                              @ frame_size)
2409
2410    DECREASE_FRAME 8                          @ pop arguments on stack
2411    RESTORE_SAVE_EVERYTHING_FRAME
2412    REFRESH_MARKING_REGISTER
2413    blx lr
2414END art_quick_method_exit_hook
2415