1// Theory of operation. These invoke-X opcodes bounce to code labels in main.S which attempt a
2// variety of fast paths; the full asm doesn't fit in the per-opcode handler's size limit.
3//
4// Calling convention. There are three argument transfer types.
5// (A) Managed ABI -> Nterp. The ExecuteNterpImpl handles this case. We set up a fresh nterp frame
6//     and move arguments from machine arg registers (and sometimes stack) into the frame.
7// (B) Nterp -> Nterp. An invoke op's fast path handles this case. If we can stay in nterp, then
8//     we set up a fresh nterp frame, and copy the register slots from caller to callee.
9// (C) Nterp -> Managed ABI. Invoke op's remaining cases. To leave nterp, we read out arguments from
10//     the caller's nterp frame and place them into machine arg registers (and sometimes stack).
11//     Doing so requires obtaining and deciphering the method's shorty for arg type, width, and
12//     order info.
13//
14// Fast path structure.
15// (0) If the next method's "quick code" is nterp, then set up a fresh nterp frame and perform a
16//     vreg->vreg transfer. Jump to handler for the next method's first opcode.
17// - The following paths leave nterp. -
18// (1) If the next method is guaranteed to be only object refs, then the managed ABI is very simple:
19//     just place all arguments in the native arg registers using LWU. Call the quick code.
20// (2) The next method might have an arg/return shape that can avoid the shorty, or at least avoid
21//     most complications of the managed ABI arg setup.
22// (2.1) If the next method has 0 args, then peek ahead in dex: if no scalar return, then call the
23//       quick code. (Even when the next opcode is move-result-object, nterp will expect the
24//       reference at a0, matching where the managed ABI leaves it after the call.)
25// (2.2) If the next method has 0 args and scalar return, or has 1 arg, then obtain the shorty.
26// (2.2.1) Post-shorty: if 0 args, call the quick code. (After the call, a returned float must be
27//         copied from fa0 into a0.)
28// (2.2.2) Post-shorty: check the arg's shorty type. If 'L', we must load it with LWU. Otherwise, we
29//         load it with LW and store a copy into FA0 (to avoid another branch). Call the quick code.
30// - The fully pessimistic case. -
31// (3) The next method has 2+ arguments with a mix of float/double/long, OR it is polymorphic OR
32//     custom. Obtain the shorty and perform the full setup for managed ABI. Polymorphic and
33//     custom invokes are specially shunted to the runtime. Otherwise we call the quick code.
34//
35// Code organization. These functions are organized in a three tier structure to aid readability.
36// (P) The "front end" is an opcode handler, such as op_invoke_virtual(). They are defined in
37//     invoke.S. Since all the invoke code cannot fit in the allotted handler region, every invoke
38//     handler has code extending into a "back end".
39// (Q) The opcode handler calls a "back end" label that is located in main.S. The code for that
40//     label is defined in invoke.S. As a convention, the label in main.S is NterpInvokeVirtual. The
41//     code in invoke.S is nterp_invoke_virtual().
42// (R) For the Nterp to Nterp fast path case, the back end calls a label located in main.S, the code
43//     for which is defined in invoke.S. As a convention, the label in main.S is
44//     NterpToNterpInstance, and the code in invoke.S is nterp_to_nterp_instance().
45// Helpers for each tier are placed just after the functions of each tier.
46
47//
48// invoke-kind {vC, vD, vE, vF, vG}, meth@BBBB
49// Format 35c: A|G|op BBBB F|E|D|C
50//
51
52// invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB
53// Format 35c: A|G|6e BBBB F|E|D|C
54//
55// Note: invoke-virtual is used to invoke a normal virtual method (a method that is not private,
56// static, or final, and is also not a constructor).
57%def op_invoke_virtual(range=""):
58   EXPORT_PC
59   FETCH s7, count=2                // s7 := F|E|D|C or CCCC (range)
60   FETCH_FROM_THREAD_CACHE a0, /*slow path*/2f, t0, t1
61                                    // a0 := method idx of resolved virtual method
621:
63%  fetch_receiver(reg="a1", vreg="s7", range=range)
64                                    // a1 := fp[C] (this)
65   // Note: null case handled by SEGV handler.
66   lwu t0, MIRROR_OBJECT_CLASS_OFFSET(a1)
67                                    // t0 := klass object (32-bit addr)
68   UNPOISON_HEAP_REF t0
69   // Entry address = entry's byte offset in vtable + vtable's byte offset in klass object.
70   sh3add a0, a0, t0                // a0 := entry's byte offset
71   ld a0, MIRROR_CLASS_VTABLE_OFFSET_64(a0)
72                                    // a0 := ArtMethod*
73   tail NterpInvokeVirtual${range}  // args a0, a1, s7
742:
75%  resolve_method_into_a0()
76   j 1b
77
78
79// invoke-super {vC, vD, vE, vF, vG}, meth@BBBB
80// Format 35c: A|G|6f BBBB F|E|D|C
81//
82// Note: When the method_id references a method of a non-interface class, invoke-super is used to
83// invoke the closest superclass's virtual method (as opposed to the one with the same method_id in
84// the calling class).
85// Note: In Dex files version 037 or later, if the method_id refers to an interface method,
86// invoke-super is used to invoke the most specific, non-overridden version of that method defined
87// on that interface. The same method restrictions hold as for invoke-virtual. In Dex files prior to
88// version 037, having an interface method_id is illegal and undefined.
89%def op_invoke_super(range=""):
90   EXPORT_PC
91   FETCH s7, count=2              // s7 := F|E|D|C or CCCC (range)
92   FETCH_FROM_THREAD_CACHE a0, /*slow path*/2f, t0, t1
93                                  // a0 := ArtMethod*
941:
95%  fetch_receiver(reg="a1", vreg="s7", range=range)
96                                  // a1 := fp[C] (this)
97   beqz a1, 3f                    // throw if null
98   tail NterpInvokeSuper${range}  // args a0, a1, s7
992:
100%  resolve_method_into_a0()
101   j 1b
1023:
103   tail common_errNullObject
104
105
106// invoke-direct {vC, vD, vE, vF, vG}, meth@BBBB
107// Format 35c: A|G|70 BBBB F|E|D|C
108//
109// Note: invoke-direct is used to invoke a non-static direct method (that is, an instance method
110// that is by its nature non-overridable, namely either a private instance method or a constructor).
111//
112// For additional context on string init, see b/28555675. The object reference is replaced after
113// the string factory call, so we disable thread-caching the resolution of string init, and skip
114// fast paths out to managed ABI calls.
115%def op_invoke_direct(range=""):
116   EXPORT_PC
117   FETCH s7, count=2               // s7 := F|E|D|C or CCCC (range)
118   FETCH_FROM_THREAD_CACHE a0, /*slow path*/2f, t0, t1
119                                   // a0 := ArtMethod*, never String.<init>
1201:
121%  fetch_receiver(reg="a1", vreg="s7", range=range)
122                                   // a1 := fp[C] (this)
123   beqz a1, 3f                     // throw if null
124   tail NterpInvokeDirect${range}  // args a0, a1, s7
1252:
126%  resolve_method_into_a0()        #  a0 := ArtMethod* or String.<init>
127   and t0, a0, 0x1                 // t0 := string-init bit
128   beqz t0, 1b                     // not string init
129   and a0, a0, ~0x1                // clear string-init bit
130   tail NterpInvokeStringInit${range}  // args a0, s7
1313:
132   tail common_errNullObject
133
134
135// invoke-static {vC, vD, vE, vF, vG}, meth@BBBB
136// Format 35c: A|G|71 BBBB F|E|D|C
137//
138// Note: invoke-static is used to invoke a static method (which is always considered a direct
139// method).
140%def op_invoke_static(range=""):
141   EXPORT_PC
142   // TODO: Unnecessary if A=0, and unnecessary if nterp-to-nterp.
143   FETCH s7, count=2               // s7 := F|E|D|C or CCCC (range)
144   FETCH_FROM_THREAD_CACHE a0, /*slow path*/1f, t0, t1
145                                   // a0 := ArtMethod*
146   tail NterpInvokeStatic${range}  // arg a0, s7
1471:
148%  resolve_method_into_a0()
149   tail NterpInvokeStatic${range}  // arg a0, s7
150
151
152// invoke-interface {vC, vD, vE, vF, vG}, meth@BBBB
153// Format 35c: A|G|72 BBBB F|E|D|C
154//
155// Note: invoke-interface is used to invoke an interface method, that is, on an object whose
156// concrete class isn't known, using a method_id that refers to an interface.
157%def op_invoke_interface(range=""):
158   EXPORT_PC
159   FETCH s7, count=2               // s7 := F|E|D|C or CCCC (range)
160   // T0 is eventually used to carry the "hidden argument" in the managed ABI.
161   // This handler is tight on space, so we cache this arg in A0 and move it to T0 later.
162   // Here, A0 is one of
163   // (1) ArtMethod*
164   // (2) ArtMethod* with LSB #1 set (default method)
165   // (3) method index << 16 with LSB #0 set (j.l.Object method)
166   FETCH_FROM_THREAD_CACHE a0, /*slow path*/5f, t0, t1
1671:
168%  fetch_receiver(reg="a1", vreg="s7", range=range)
169                          // a1 := fp[C] (this)
170   // Note: null case handled by SEGV handler.
171   lwu t0, MIRROR_OBJECT_CLASS_OFFSET(a1)
172                          // t0 := klass object (32-bit addr)
173   UNPOISON_HEAP_REF t0
174   slliw t1, a0, 30       // test LSB #0 and #1
175   bltz t1, 3f            // LSB #1 is set; handle default method
176   bgtz t1, 4f            // LSB #0 is set; handle object method
177   // no signal bits; it is a clean ArtMethod*
178   lhu t1, ART_METHOD_IMT_INDEX_OFFSET(a0)
179                          // t1 := idx into interface method table (16-bit value)
1802:
181   ld t0, MIRROR_CLASS_IMT_PTR_OFFSET_64(t0)
182                          // t0 := base address of imt
183   sh3add t0, t1, t0      // t0 := entry's address in imt
184   ld a2, (t0)            // a2 := ArtMethod*
185   tail NterpInvokeInterface${range}  // a0 (hidden arg), a1 (this), a2 (ArtMethod*), s7 (vregs)
1863:
187   andi a0, a0, ~2        // a0 := default ArtMethod*, LSB #1 cleared
188   lhu t1, ART_METHOD_METHOD_INDEX_OFFSET(a0)
189                          // t1 := method_index_ (16-bit value)
190   // Default methods have a contract with art::IMTable.
191   andi t1, t1, ART_METHOD_IMT_MASK
192                          // t1 := idx into interface method table
193   j 2b
1944:
195   // Interface methods on j.l.Object have a contract with NterpGetMethod.
196   srliw t1, a0, 16       // t3 := method index
197   sh3add t0, t1, t0      // t0 := entry's byte offset, before vtable offset adjustment
198   ld a0, MIRROR_CLASS_VTABLE_OFFSET_64(t0)
199   tail NterpInvokeDirect${range}  // args a0, a1, s7
2005:
201%  resolve_method_into_a0()
202   j 1b
203
204
205//
206// invoke-kind/range {vCCCC .. vNNNN}, meth@BBBB
207// Format 3rc: AA|op BBBB CCCC
208// where NNNN = CCCC + AA - 1, that is A determines the count 0..255, and C determines the first
209// register.
210//
211
212// invoke-virtual/range {vCCCC .. vNNNN}, meth@BBBB
213// Format 3rc: AA|74 BBBB CCCC
214//
215// Note: invoke-virtual/range is used to invoke a normal virtual method (a method that is not
216// private, static, or final, and is also not a constructor).
217%def op_invoke_virtual_range():
218%   op_invoke_virtual(range="Range")
219
220
221// invoke-super/range {vCCCC .. vNNNN}, meth@BBBB
222// Format 3rc: AA|75 BBBB CCCC
223//
224// Note: When the method_id references a method of a non-interface class, invoke-super/range is used
225// to invoke the closest superclass's virtual method (as opposed to the one with the same method_id
226// in the calling class).
227// Note: In Dex files version 037 or later, if the method_id refers to an interface method,
228// invoke-super/range is used to invoke the most specific, non-overridden version of that method
229// defined on that interface. In Dex files prior to version 037, having an interface method_id is
230// illegal and undefined.
231%def op_invoke_super_range():
232%   op_invoke_super(range="Range")
233
234
235// invoke-direct/range {vCCCC .. vNNNN}, meth@BBBB
236// Format 3rc: AA|76 BBBB CCCC
237//
238// Note: invoke-direct/range is used to invoke a non-static direct method (that is, an instance
239// method that is by its nature non-overridable, namely either a private instance method or a
240// constructor).
241%def op_invoke_direct_range():
242%   op_invoke_direct(range="Range")
243
244
245// invoke-static/range {vCCCC .. vNNNN}, meth@BBBB
246// Format 3rc: AA|77 BBBB CCCC
247//
248// Note: invoke-static/range is used to invoke a static method (which is always considered a direct
249// method).
250%def op_invoke_static_range():
251%   op_invoke_static(range="Range")
252
253
254// invoke-interface/range {vCCCC .. vNNNN}, meth@BBBB
255// Format 3rc: AA|78 BBBB CCCC
256//
257// Note: invoke-interface/range is used to invoke an interface method, that is, on an object whose
258// concrete class isn't known, using a method_id that refers to an interface.
259%def op_invoke_interface_range():
260%   op_invoke_interface(range="Range")
261
262
263// invoke-polymorphic {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
264// Format 45cc: A|G|fa BBBB F|E|D|C HHHH
265//
266// Note: Invoke the indicated signature polymorphic method. The result (if any) may be stored with
267// an appropriate move-result* variant as the immediately subsequent instruction.
268//
269// The method reference must be to a signature polymorphic method, such as
270// java.lang.invoke.MethodHandle.invoke or java.lang.invoke.MethodHandle.invokeExact.
271//
272// The receiver must be an object supporting the signature polymorphic method being invoked.
273//
274// The prototype reference describes the argument types provided and the expected return type.
275//
276// The invoke-polymorphic bytecode may raise exceptions when it executes. The exceptions are
277// described in the API documentation for the signature polymorphic method being invoked.
278//
279// Present in Dex files from version 038 onwards.
280%def op_invoke_polymorphic(range=""):
281   EXPORT_PC
282   FETCH s7, count=2  // s7 := F|E|D|C or CCCC (range)
283   // No need to fetch the target method; the runtime handles it.
284%  fetch_receiver(reg="s8", vreg="s7", range=range)
285   beqz s8, 1f        // throw if null
286
287   ld a0, (sp)        // a0 := caller ArtMethod*
288   mv a1, xPC
289   call NterpGetShortyFromInvokePolymorphic  // args a0, a1
290   mv a1, s8
291   tail NterpInvokePolymorphic${range}  // args a0 (shorty), a1 (this), s7 (vregs)
2921:
293   tail common_errNullObject
294
295
296// invoke-polymorphic/range {vCCCC .. vNNNN}, meth@BBBB, proto@HHHH
297// Format 4rcc: AA|fb BBBB CCCC HHHH
298// where NNNN = CCCC + AA - 1, that is A determines the count 0..255, and C determines the first
299// register.
300//
301// Note: Invoke the indicated method handle. See the invoke-polymorphic description above for
302// details.
303//
304// Present in Dex files from version 038 onwards.
305%def op_invoke_polymorphic_range():
306%   op_invoke_polymorphic(range="Range")
307
308
309// invoke-custom {vC, vD, vE, vF, vG}, call_site@BBBB
310// Format 35c: A|G|fc BBBB F|E|D|C
311//
312// Note: Resolves and invokes the indicated call site. The result from the invocation (if any) may
313// be stored with an appropriate move-result* variant as the immediately subsequent instruction.
314//
315// This instruction executes in two phases: call site resolution and call site invocation.
316//
317// Call site resolution checks whether the indicated call site has an associated
318// java.lang.invoke.CallSite instance. If not, the bootstrap linker method for the indicated call
319// site is invoked using arguments present in the DEX file (see call_site_item). The bootstrap
320// linker method returns a java.lang.invoke.CallSite instance that will then be associated with the
321// indicated call site if no association exists. Another thread may have already made the
322// association first, and if so execution of the instruction continues with the first associated
323// java.lang.invoke.CallSite instance.
324//
325// Call site invocation is made on the java.lang.invoke.MethodHandle target of the resolved
326// java.lang.invoke.CallSite instance. The target is invoked as if executing invoke-polymorphic
327// (described above) using the method handle and arguments to the invoke-custom instruction as the
328// arguments to an exact method handle invocation.
329//
330// Exceptions raised by the bootstrap linker method are wrapped in a java.lang.BootstrapMethodError.
331// A BootstrapMethodError is also raised if:
332// - the bootstrap linker method fails to return a java.lang.invoke.CallSite instance.
333// - the returned java.lang.invoke.CallSite has a null method handle target.
334// - the method handle target is not of the requested type.
335//
336// Present in Dex files from version 038 onwards.
337%def op_invoke_custom(range=""):
338   EXPORT_PC
339   ld a0, (sp)  // a0 := caller ArtMethod*
340   mv a1, xPC
341   call NterpGetShortyFromInvokeCustom  // args a0, a1
342   mv s7, a0    // s7 := shorty
343   FETCH a0, 1  // a0 := BBBB
344   FETCH s8, 2  // s8 := F|E|D|C or CCCC (range)
345   tail NterpInvokeCustom${range}  // args a0 (BBBB), s7 (shorty), s8 (vregs)
346
347
348// invoke-custom/range {vCCCC .. vNNNN}, call_site@BBBB
349// Format 3rc: AA|fd BBBB CCCC
350// where NNNN = CCCC + AA - 1, that is A determines the count 0..255, and C determines the first
351// register.
352//
353// Note: Resolve and invoke a call site. See the invoke-custom description above for details.
354//
355// Present in Dex files from version 038 onwards.
356%def op_invoke_custom_range():
357%  op_invoke_custom(range="Range")
358
359
360// handler helpers
361
362%def resolve_method_into_a0():
363   mv a0, xSELF
364   ld a1, (sp)  // We can't always rely on a0 = ArtMethod*.
365   mv a2, xPC
366   call nterp_get_method
367
368
369%def fetch_receiver(reg="", vreg="", range=""):
370%  if range == 'Range':
371     GET_VREG_OBJECT $reg, $vreg           // reg := refs[CCCC]
372%  else:
373     andi $reg, $vreg, 0xF                 // reg := C
374     GET_VREG_OBJECT $reg, $reg            // reg := refs[C]
375
376
377//
378// These asm blocks are positioned in main.S for visibility to stack walking.
379//
380
381
382// NterpInvokeVirtual
383// a0: ArtMethod*
384// a1: this
385// s7: vreg ids F|E|D|C
386%def nterp_invoke_virtual():
387%  nterp_invoke_direct(uniq="invoke_virtual")
388
389
390// NterpInvokeSuper
391// a0: ArtMethod*
392// a1: this
393// s7: vreg ids F|E|D|C
394%def nterp_invoke_super():
395%  nterp_invoke_direct(uniq="invoke_super")
396
397
398// NterpInvokeDirect
399// a0: ArtMethod*
400// a1: this
401// s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC
402%def nterp_invoke_direct(uniq="invoke_direct", range=""):
403   ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0)
404                                 // s8 := quick code
405%  try_nterp(quick="s8", z0="t0", skip=f".L{uniq}_simple")
406   call NterpToNterpInstance${range}  // args a0, a1
407   j .L${uniq}_next_op
408
409.L${uniq}_simple:
410%  if range == 'Range':
411%    try_simple_args_range(vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", skip=f".L{uniq}_01", uniq=uniq)
412%  else:
413%    try_simple_args(v_fedc="s7", z0="t0", z1="t1", skip=f".L{uniq}_01", uniq=uniq)
414%#:
415   jalr s8                       // (regular) args a0 - a5, (range) args a0 - a7 and stack
416   j .L${uniq}_next_op
417
418.L${uniq}_01:
419   mv s9, zero                   // initialize shorty reg
420%  try_01_args(vreg="s7", shorty="s9", z0="t0", z1="t1", z2="t2", y0="s10", y1="s11", y2="s0", skip=f".L{uniq}_slow", call=f".L{uniq}_01_call", uniq=uniq, range=range)
421                                 // if s9 := shorty, then maybe (a2, fa0) := fp[D] or fp[CCCC + 1]
422.L${uniq}_01_call:
423   jalr s8                       // args a0, a1, and maybe a2, fa0
424   beqz s9, .L${uniq}_next_op    // no shorty, no scalar return
425%  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_0")
426                                 // a0 := fa0 if float return
427   j .L${uniq}_next_op
428
429.L${uniq}_slow:
430%  get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11")
431%  if range == 'Range':
432%    slow_setup_args_range(shorty="s9", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s10", uniq=uniq)
433%  else:
434%    slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", uniq=uniq)
435%#:
436   jalr s8                       // args in a0-a5, fa0-fa4
437%  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
438                                 // a0 := fa0 if float return
439.L${uniq}_next_op:
440   FETCH_ADVANCE_INST 3
441   GET_INST_OPCODE t0
442   GOTO_OPCODE t0
443
444
445// NterpInvokeStringInit
446// a0: ArtMethod*
447// s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC
448%def nterp_invoke_string_init(uniq="invoke_string_init", range=""):
449   ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0)
450                        // s8 := quick code
451%  try_nterp(quick="s8", z0="t0", skip=f".L{uniq}_slow")
452   call NterpToNterpStringInit${range}  // arg a0
453   j .L${uniq}_next_op
454
455.L${uniq}_slow:
456%  get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11")
457%  if range == 'Range':
458%    slow_setup_args_string_init_range(shorty="s9", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", uniq=uniq)
459%  else:
460%    slow_setup_args_string_init(shorty="s9", v_fedc="s7", z0="t0", z1="t1", z2="t2", uniq=uniq)
461%#:
462   jalr s8              // args (regular) a0 - a5, (range) a0 - a5
463
464.L${uniq}_next_op:
465%  fetch_receiver(reg="t0", vreg="s7", range=range)
466                        // t0 := fp[C] (this)
467%  subst_vreg_references(old="t0", new="a0", z0="t1", z1="t2", z2="t3", uniq=uniq)
468   FETCH_ADVANCE_INST 3
469   GET_INST_OPCODE t0
470   GOTO_OPCODE t0
471
472
473// NterpInvokeStatic
474// a0: ArtMethod*
475// s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC
476%def nterp_invoke_static(uniq="invoke_static", range=""):
477   ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0)
478                               // s8 := quick code
479%  try_nterp(quick="s8", z0="t0", skip=f".L{uniq}_simple")
480   call NterpToNterpStatic${range}  // arg a0
481   j .L${uniq}_next_op
482
483.L${uniq}_simple:
484%  if range == 'Range':
485%    try_simple_args_range(vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", arg_start="0", skip=f".L{uniq}_01", uniq=uniq)
486%  else:
487%    try_simple_args(v_fedc="s7", z0="t0", z1="t1", arg_start="0", skip=f".L{uniq}_01", uniq=uniq)
488%#:
489   jalr s8                     // args (regular) a0 - a5, (range) a0 - a7 and maybe stack
490   j .L${uniq}_next_op
491
492.L${uniq}_01:
493   mv s9, zero                 // initialize shorty reg
494%  try_01_args_static(vreg="s7", shorty="s9", z0="t0", z1="t1", z2="t2", y0="s10", y1="s11", skip=f".L{uniq}_slow", call=f".L{uniq}_01_call", uniq=uniq, range=range)
495                               // if s9 := shorty, then maybe (a2, fa0) := fp[C] or fp[CCCC]
496.L${uniq}_01_call:
497   jalr s8                     // args a0, and maybe a1, fa0
498   beqz s9, .L${uniq}_next_op  // no shorty, no scalar return
499%  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_0")
500                               // a0 := fa0 if float return
501   j .L${uniq}_next_op
502
503.L${uniq}_slow:
504%  get_shorty_save_a0(shorty="s9", y0="s10")
505%  if range == 'Range':
506%    slow_setup_args_range(shorty="s9", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s10", arg_start="0", uniq=uniq)
507%  else:
508%    slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", arg_start="0", uniq=uniq)
509%#:
510   jalr s8                     // args (regular) a0 - a5 and fa0 - fa4, (range) a0 - a7 and fa0 - fa7 and maybe stack
511%  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
512                               // a0 := fa0 if float return
513.L${uniq}_next_op:
514   FETCH_ADVANCE_INST 3
515   GET_INST_OPCODE t0
516   GOTO_OPCODE t0
517
518
519// NterpInvokeInterface
520// a0: the target interface method
521//     - ignored in nterp-to-nterp transfer
522//     - preserved through shorty calls
523//     - side-loaded in T0 as a "hidden argument" in managed ABI transfer
524// a1: this
525// a2: ArtMethod*
526// s7: vreg ids F|E|D|C
527%def nterp_invoke_interface(uniq="invoke_interface", range=""):
528   // We immediately adjust the incoming arguments to suit the rest of the invoke.
529   mv t0, a0                   // t0 := hidden arg, preserve until quick call
530   mv a0, a2                   // a0 := ArtMethod*
531
532   ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0)
533                               // s8 := quick code
534%  try_nterp(quick="s8", z0="t1", skip=f".L{uniq}_simple")
535   call NterpToNterpInstance${range}  // args a0, a1
536   j .L${uniq}_next_op
537
538.L${uniq}_simple:
539%  if range == 'Range':
540%    try_simple_args_range(vC="s7", z0="t1", z1="t2", z2="t3", z3="t4", z4="t5", skip=f".L{uniq}_01", uniq=uniq)
541%  else:
542%    try_simple_args(v_fedc="s7", z0="t1", z1="t2", skip=f".L{uniq}_01", uniq=uniq)
543%#:
544   jalr s8                     // args (regular) a0 - a5 and t0, (range) a0 - a7 and t0 and maybe stack
545   j .L${uniq}_next_op
546
547.L${uniq}_01:
548   mv s9, zero                 // initialize shorty reg
549%  try_01_args(vreg="s7", shorty="s9", z0="t1", z1="t2", z2="t3", y0="s10", y1="s11", y2="s0", interface=True, skip=f".L{uniq}_slow", call=f".L{uniq}_01_call", uniq=uniq, range=range)
550                               // if s9 := shorty, then maybe (a2, fa0) := fp[D] or fp[CCCC + 1]
551                               // (xINST clobbered, if taking this fast path)
552.L${uniq}_01_call:
553   jalr s8                     // args a0, a1, and t0, and maybe a2, fa0
554   beqz s9, .L${uniq}_next_op  // no shorty, no scalar return
555%  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_0")
556                               // a0 := fa0 if float return
557   j .L${uniq}_next_op
558
559.L${uniq}_slow:
560%  get_shorty_for_interface_save_a0_a1_t0(shorty="s9", y0="s10", y1="s11", y2="s0")
561%  if range == 'Range':
562%    slow_setup_args_range(shorty="s9", vC="s7", z0="s10", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s11", uniq=uniq)
563%  else:
564%    slow_setup_args(shorty="s9", vregs="s7", z0="s10", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", uniq=uniq)
565%#:
566   jalr s8                     // args (regular) a0 - a5, fa0 - fa4, t0, (range) a0 - a7, fa0 - fa7, t0
567%  maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1")
568                               // a0 := fa0 if float return
569.L${uniq}_next_op:
570   FETCH_ADVANCE_INST 3
571   GET_INST_OPCODE t0
572   GOTO_OPCODE t0
573
574
575// NterpInvokePolymorphic
576// a0: shorty
577// a1: receiver this
578// s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC
579%def nterp_invoke_polymorphic(uniq="invoke_polymorphic", range=""):
580%  if range == "Range":
581%    slow_setup_args_range(shorty="a0", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s8", uniq=uniq)
582%  else:
583%    slow_setup_args(shorty="a0", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", uniq=uniq)
584%#:
585   // Managed ABI argument regs get spilled to stack and consumed by artInvokePolymorphic.
586   call art_quick_invoke_polymorphic  // args a1 - a7, fa0 - fa7, and maybe stack
587   // Note: If float return, artInvokePolymorphic will place the value in A0, as Nterp expects.
588   FETCH_ADVANCE_INST 4
589   GET_INST_OPCODE t0
590   GOTO_OPCODE t0
591
592
593// NterpInvokeCustom
594// a0: BBBB
595// s7: shorty
596// s8: (regular) vreg ids F|E|D|C, (range) vreg id CCCC
597%def nterp_invoke_custom(uniq="invoke_custom", range=""):
598%  if range == "Range":
599%    slow_setup_args_range(shorty="s7", vC="s8", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s9", arg_start="0", uniq=uniq)
600%  else:
601%    slow_setup_args(shorty="s7", vregs="s8", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", arg_start="0", uniq=uniq)
602%#:
603   // Managed ABI argument regs get spilled to stack and consumed by artInvokeCustom.
604   call art_quick_invoke_custom  // args a0 - a7, fa0 - fa7, and maybe stack
605   // Note: If float return, artInvokeCustom will place the value in A0, as Nterp expects.
606   FETCH_ADVANCE_INST 3
607   GET_INST_OPCODE t0
608   GOTO_OPCODE t0
609
610
611// NterpInvokeVirtualRange
612// a0: ArtMethod*
613// a1: this
614// s7: vreg id CCCC
615%def nterp_invoke_virtual_range():
616%  nterp_invoke_direct(uniq="invoke_virtual_range", range="Range")
617
618
619// NterpInvokeSuperRange
620// a0: ArtMethod*
621// a1: this
622// s7: vreg id CCCC
623%def nterp_invoke_super_range():
624%  nterp_invoke_direct(uniq="invoke_super_range", range="Range")
625
626
627// NterpInvokeDirectRange
628// Hardcoded:
629// a0: ArtMethod*
630// a1: this
631// s7: vreg id CCCC
632%def nterp_invoke_direct_range():
633%  nterp_invoke_direct(uniq="invoke_direct_range", range="Range")
634
635
636// NterpInvokeStringInitRange
637// a0: ArtMethod*
638// s7: vreg id CCCC
639%def nterp_invoke_string_init_range():
640%  nterp_invoke_string_init(uniq="invoke_string_init_range", range="Range")
641
642
643// NterpInvokeStaticRange
644// a0: ArtMethod*
645// s7: vreg id CCCC
646%def nterp_invoke_static_range():
647%  nterp_invoke_static(uniq="invoke_static_range", range="Range")
648
649
650// NterpInvokeInterfaceRange
651// a0: the target interface method
652//     - ignored in nterp-to-nterp transfer
653//     - preserved through shorty calls
654//     - side-loaded in T0 as a "hidden argument" in managed ABI transfer
655// a1: this
656// a2: ArtMethod*
657// s7: vreg id CCCC
658%def nterp_invoke_interface_range():
659%  nterp_invoke_interface(uniq="invoke_interface_range", range="Range")
660
661
662// NterpInvokePolymorphicRange
663%def nterp_invoke_polymorphic_range():
664%  nterp_invoke_polymorphic(uniq="invoke_polymorphic_range", range="Range")
665
666
667// NterpInvokeCustomRange
668%def nterp_invoke_custom_range():
669%  nterp_invoke_custom(uniq="invoke_custom_range", range="Range")
670
671
672// fast path and slow path helpers
673
674
675// Input
676// - quick: quick code ptr
677// Temporaries: z0
678%def try_nterp(quick="", z0="", skip=""):
679   lla $z0, ExecuteNterpImpl
680   bne $z0, $quick, $skip
681
682
683// Hardcoded
684// - a0: ArtMethod*
685// - xINST
686// Input
687// - v_fedc: vreg ids F|E|D|C
688// Temporaries: z0, z1
689%def try_simple_args(v_fedc="", z0="", z1="", arg_start="1", skip="", uniq=""):
690   lwu $z0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
691   // The meaning of nterp-invoke-fast-path-flag for RISC-V diverges from other ISAs.
692   BRANCH_IF_BIT_CLEAR $z0, $z0, ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, $skip
693
694   srliw $z0, xINST, 12              // z0 := A
695%  if arg_start == "0":
696     beqz $z0, .L${uniq}_simple_done  // A = 0: no further args.
697%#:
698   li $z1, 2
699   blt $z0, $z1, .L${uniq}_simple_1  // A = 1
700   beq $z0, $z1, .L${uniq}_simple_2  // A = 2
701   li $z1, 4
702   blt $z0, $z1, .L${uniq}_simple_3  // A = 3
703   beq $z0, $z1, .L${uniq}_simple_4  // A = 4
704   // A = 5
705   srliw $z1, xINST, 8               // z1 := A|G
706   andi $z1, $z1, 0xF                // z1 := G
707   GET_VREG_OBJECT a5, $z1
708.L${uniq}_simple_4:
709   srliw $z1, $v_fedc, 12            // z1 := F
710   GET_VREG_OBJECT a4, $z1
711.L${uniq}_simple_3:
712   srliw $z1, $v_fedc, 8             // z1 := F|E
713   andi $z1, $z1, 0xF                // z1 := E
714   GET_VREG_OBJECT a3, $z1
715.L${uniq}_simple_2:
716   srliw $z1, $v_fedc, 4             // z1 := F|E|D
717   andi $z1, $z1, 0xF                // z1 := D
718   GET_VREG_OBJECT a2, $z1
719.L${uniq}_simple_1:
720%  if arg_start == "0":
721     andi $z1, $v_fedc, 0xF          // z1 := C
722     GET_VREG_OBJECT a1, $z1
723   // instance: a1 already set to "this"
724.L${uniq}_simple_done:
725
726
727// Range variant.
728%def try_simple_args_range(vC="", z0="", z1="", z2="", z3="", z4="", skip="", arg_start="1", uniq=""):
729   lwu $z0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
730   // The meaning of nterp-invoke-fast-path-flag for RISC-V diverges from other ISAs.
731   BRANCH_IF_BIT_CLEAR $z0, $z0, ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, $skip
732
733   srliw $z0, xINST, 8                 // z0 := AA
734%  if arg_start == "0":  # static:
735     beqz $z0, .L${uniq}_simple_done   // AA = 0: no further args.
736     sh2add $z1, $vC, xFP              // z1 := &FP[CCCC]
737     li $z2, 2
738     blt $z0, $z2, .L${uniq}_simple_1  // AA = 1
739%  else:  # instance:
740     li $z2, 2
741     blt $z0, $z2, .L${uniq}_simple_done  // AA = 1, and a1 already loaded.
742     sh2add $z1, $vC, xFP               // z1 := &FP[CCCC]
743%#:
744   // Here: z0, z1, z2 same values for static vs instance.
745   beq $z0, $z2, .L${uniq}_simple_2  // AA = 2
746   li $z2, 4
747   blt $z0, $z2, .L${uniq}_simple_3  // AA = 3
748   beq $z0, $z2, .L${uniq}_simple_4  // AA = 4
749   li $z2, 6
750   blt $z0, $z2, .L${uniq}_simple_5  // AA = 5
751   beq $z0, $z2, .L${uniq}_simple_6  // AA = 6
752   li $z2, 7
753   beq $z0, $z2, .L${uniq}_simple_7  // AA = 7
754
755   // AA >= 8: store in stack. Load/store from FP[CCCC + 7] upwards.
756   slli $z2, $z0, 63                 // z2 := negative if z0 bit #0 is set (odd)
757   sh2add $z0, $z0, $z1              // z0 := loop guard at top of stack
758   addi $z3, $z1, 7*4                // z3 := &FP[CCCC + 7]
759   addi $z4, sp, __SIZEOF_POINTER__ + 7*4
760                                     // z4 := &OUT[CCCC + 7]
761   bltz $z2, .L${uniq}_simple_loop_wide
762                                     // if AA odd, branch to wide-copy
763   lwu $z2, ($z3)
764   sw $z2, ($z4)
765   addi $z3, $z3, 4
766   addi $z4, $z4, 4
767
768.L${uniq}_simple_loop_wide:
769   // TODO: Consider ensuring 64-bit stores are aligned.
770   beq $z3, $z0, .L${uniq}_simple_7
771   ld $z2, ($z3)
772   sd $z2, ($z4)
773   addi $z3, $z3, 8
774   addi $z4, $z4, 8
775   j .L${uniq}_simple_loop_wide
776
777   // Bottom 7 slots of OUT array never written; first args are passed with a1-a7.
778.L${uniq}_simple_7:
779   lwu a7, 6*4($z1)
780.L${uniq}_simple_6:
781   lwu a6, 5*4($z1)
782.L${uniq}_simple_5:
783   lwu a5, 4*4($z1)
784.L${uniq}_simple_4:
785   lwu a4, 3*4($z1)
786.L${uniq}_simple_3:
787   lwu a3, 2*4($z1)
788.L${uniq}_simple_2:
789   lwu a2, 1*4($z1)
790.L${uniq}_simple_1:
791%  if arg_start == "0":  # static:
792     lwu a1, 0*4($z1)
793%#:
794.L${uniq}_simple_done:
795
796
797// Check if a 0/1 arg invoke form is possible, set up a2 and fa0 if needed.
798// If a return value expected, move possible float return to a0.
799// Hardcoded: xINST, xPC, xFP, a0, a1, t0, fa0
800// NOTE xINST clobbered if interface=True and we're taking the fast path.
801// zN are temporaries, yN are callee-save
802%def try_01_args(vreg="", shorty="", z0="", z1="", z2="", y0="", y1="", y2="", interface=False, skip="", call="", uniq="", range=""):
803%  if range == 'Range':
804     srliw $y0, xINST, 8   // y0 := AA
805%  else:
806     srliw $y0, xINST, 12  // y0 := A
807%#:
808   addi $y0, $y0, -2       // y0 := A - 2 or (range) AA - 2
809   bgtz $y0, $skip         // 2+ args: slow path
810   beqz $y0, .L${uniq}_01_shorty  // this and 1 arg: determine arg type with shorty
811   // 0 args
812%  try_01_args_peek_next(z0=z0)  # z0 is zero if invoke has scalar return
813   bnez $z0, $call         // Non-scalar return, 0 args: make the call.
814   // Scalar return, 0 args: determine return type with shorty
815
816.L${uniq}_01_shorty:
817   // Get shorty, stash in callee-save to be available on return.
818   // When getting shorty, stash this fast path's A0 and A1, then restore.
819%  if interface:
820     // xINST is a regular callee save. Safe: orig xINST value unused before FETCH_ADVANCE_INST.
821%    get_shorty_for_interface_save_a0_a1_t0(shorty=shorty, y0=y1, y1=y2, y2="xINST")
822%  else:
823%    get_shorty_save_a0_a1(shorty=shorty, y0=y1, y1=y2)
824%#:
825   // shorty assigned
826   bltz $y0, $call         // Scalar return, 0 args: make the call.
827   // ins = 2: this and 1 arg. Load arg type.
828   lb $z0, 1($shorty)      // z0 := first arg
829   li $z1, 'L'             // ref type
830%  if range == 'Range':
831     sh2add $z2, $vreg, xFP  // z2 := &fp[CCCC]
832     lwu a2, 4($z2)        // a2 := fp[CCCC + 1], zext
833%  else:
834     srliw $z2, $vreg, 4   // z2 := F|E|D
835     andi $z2, $z2, 0xF    // z2 := D
836     sh2add $z2, $z2, xFP  // z2 := &fp[D]
837     lwu a2, ($z2)         // a2 := fp[D], zext
838%#:
839   beq $z0, $z1, $call     // ref type: LWU into a2
840   // non-'L' type
841   fmv.w.x fa0, a2         // overload of managed ABI, for one arg
842   sext.w a2, a2           // scalar type: LW into a2
843   // immediately followed by call
844
845
846// Static variant.
847%def try_01_args_static(vreg="", shorty="", z0="", z1="", z2="", y0="", y1="", skip="", call="", uniq="", range=""):
848%  if range == 'Range':
849     srliw $y0, xINST, 8     // y0 := AA
850%  else:
851     srliw $y0, xINST, 12    // y0 := A
852%#:
853   addi $y0, $y0, -1         // y0 := A - 1 or (range) AA - 1
854   bgtz $y0, $skip           // 2+ args: slow path
855   beqz $y0, .L${uniq}_01_shorty  // 1 arg: determine arg type with shorty
856   // 0 args
857%  try_01_args_peek_next(z0=z0)  # z0 is zero if invoke has scalar return
858   bnez $z0, $call           // Non-scalar return, 0 args: make the call.
859   // Scalar return, 0 args: determine return type with shorty.
860
861.L${uniq}_01_shorty:
862   // Get shorty, stash in callee-save to be available on return.
863   // When getting shorty, stash this fast path's A0 then restore.
864%  get_shorty_save_a0(shorty=shorty, y0=y1)
865   // shorty assigned
866   bltz $y0, $call           // Scalar return, 0 args: make the call.
867   // ins = 1: load arg type
868   lb $z0, 1($shorty)        // z0 := first arg
869   li $z1, 'L'               // ref type
870%  if range == 'Range':
871     sh2add $z2, $vreg, xFP  // z2 := &fp[CCCC]
872%  else:
873     andi $z2, $vreg, 0xF    // z2 := C
874     sh2add $z2, $z2, xFP    // z2 := &fp[C]
875%#:
876   lwu a1, ($z2)             // a1 := fp[C] or (range) fp[CCCC], zext
877   beq $z0, $z1, $call       // ref type: LWU into a1
878   // non-'L' type
879   fmv.w.x fa0, a1           // overload of managed ABI, for one arg
880   sext.w a1, a1             // scalar type: LW into a1
881   // immediately followed by call
882
883
884%def try_01_args_peek_next(z0=""):
885   FETCH $z0, count=3, width=8, byte=0
886                                // z0 := next op
887   andi $z0, $z0, ~1            // clear bit #0
888   addi $z0, $z0, -0x0A         // z0 := zero if op is 0x0A or 0x0B
889
890
891// The invoked method might return in FA0, via managed ABI.
892// The next opcode, MOVE-RESULT{-WIDE}, expects the value in A0.
893%def maybe_float_returned(shorty="", z0="", z1="", uniq=""):
894   lb $z0, ($shorty)  // z0 := first byte of shorty; type of return
895   li $z1, 'F'        //
896   beq $z0, $z1, .L${uniq}_float_return_move
897   li $z1, 'D'        //
898   bne $z0, $z1, .L${uniq}_float_return_done
899.L${uniq}_float_return_move:
900   // If fa0 carries a 32-bit float, the hi bits of fa0 will contain all 1's (NaN boxing).
901   // The use of fmv.x.d will transfer those hi bits into a0, and that's okay, because the next
902   // opcode, move-result, will only read the lo 32-bits of a0 - the box bits are correctly ignored.
903   // If fa0 carries a 64-bit float, then fmv.x.d works as expected.
904   fmv.x.d a0, fa0
905.L${uniq}_float_return_done:
906
907
908// Hardcoded:
909// - a0: ArtMethod*
910// - a1: this
911// Callee-saves: y0, y1
912%def get_shorty_save_a0_a1(shorty="", y0="", y1=""):
913   mv $y1, a1
914   mv $y0, a0
915   call NterpGetShorty  // arg a0
916   mv $shorty, a0
917   mv a0, $y0
918   mv a1, $y1
919
920
921// Static variant.
922// Hardcoded:
923// - a0: ArtMethod*
924// Callee-saves: y0
925%def get_shorty_save_a0(shorty="", y0=""):
926   mv $y0, a0
927   call NterpGetShorty  // arg a0
928   mv $shorty, a0
929   mv a0, $y0
930
931
932// Interface variant.
933// Hardcoded:
934// - a0: ArtMethod*
935// - a1: this
936// - t0: "hidden argument"
937// Callee-saves: y0, y1, y2
938%def get_shorty_for_interface_save_a0_a1_t0(shorty="", y0="", y1="", y2=""):
939   mv $y2, t0
940   mv $y1, a1
941   mv $y0, a0
942   ld a0, (sp)            // a0 := caller ArtMethod*
943   FETCH reg=a1, count=1  // a1 := BBBB method idx
944   call NterpGetShortyFromMethodId
945   mv $shorty, a0
946   mv a0, $y0
947   mv a1, $y1
948   mv t0, $y2
949
950
951// Hardcoded: xFP, xREFS
952// Starting with vreg index 0, replace any old reference with new reference.
953%def subst_vreg_references(old="", new="", z0="", z1="", z2="", uniq=""):
954   mv $z0, xFP               // z0 := &fp[0]
955   mv $z1, xREFS             // z1 := &refs[0]
956.L${uniq}_subst_try:
957   lwu $z2, ($z1)
958   bne $z2, $old, .L${uniq}_subst_next
959   sw $new, ($z0)
960   sw $new, ($z1)
961.L${uniq}_subst_next:
962   addi $z0, $z0, 4
963   addi $z1, $z1, 4
964   bne $z1, xFP, .L${uniq}_subst_try
965
966
967// Hardcoded
968// - a0: ArtMethod*
969// - a1: this
970// Input
971// - vregs: F|E|D|C from dex
972%def slow_setup_args(shorty="", vregs="", z0="", z1="", z2="", z3="", z4="", z5="", z6="", arg_start="1", uniq=""):
973   srliw $z0, xINST, 12     // z0 := A
974   li $z1, 5
975   blt $z0, $z1, .L${uniq}_slow_gpr
976   // A = 5: need vreg G
977   srliw $z1, xINST, 8      // z1 := A|G
978   andi $z1, $z1, 0xF       // z1 := G
979   slliw $z1, $z1, 16       // z1 := G0000
980   add $vregs, $z1, $vregs  // vregs := G|F|E|D|C
981
982.L${uniq}_slow_gpr:
983   addi $z0, $shorty, 1     // z0 := first arg of shorty
984   srliw $z1, $vregs, 4*$arg_start
985                            // z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C
986   li $z2, 'D'              // double
987   li $z3, 'F'              // float
988   li $z4, 'J'              // long
989   li $z5, 'L'              // ref
990   // linear scan through shorty: extract non-float vregs
991%  if arg_start == "0":  # static can place vC into a1; instance already loaded "this" into a1.
992%    load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0")
993%  load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
994%  load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
995%  load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
996%  load_vreg_in_gpr(gpr="a5", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4")
997
998.L${uniq}_slow_fpr:
999   addi $z0, $shorty, 1     // z0 := first arg of shorty
1000   srliw $z1, $vregs, 4*$arg_start
1001                            // z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C
1002   // linear scan through shorty: extract float/double vregs
1003%  load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0")
1004%  load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1")
1005%  load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2")
1006%  load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3")
1007%  if arg_start == "0":  # static can place G into fa4; instance has only 4 args.
1008%    load_vreg_in_fpr(fpr="fa4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_4")
1009%#:
1010.L${uniq}_slow_done:
1011
1012
1013// String-init variant: up to 4 args, no long/double/float args.
1014// Ref args ('L') loaded with LW *must* apply ZEXT.W to avoid subtle address bugs.
1015%def slow_setup_args_string_init(shorty="", v_fedc="", z0="", z1="", z2="", uniq=""):
1016   srliw $z0, xINST, 12            // z0 := A; possible values 1-5
1017   li $z1, 2
1018   blt $z0, $z1, .L${uniq}_slow_1  // A = 1
1019   li $z2, 'L'                     // z2 := ref type
1020   beq $z0, $z1, .L${uniq}_slow_2  // A = 2
1021   li $z1, 4
1022   blt $z0, $z1, .L${uniq}_slow_3  // A = 3
1023   beq $z0, $z1, .L${uniq}_slow_4  // A = 4
1024
1025   // A = 5
1026   srliw $z0, xINST, 8             // z0 := A|G
1027   andi $z0, $z0, 0xF              // z0 := G
1028%  get_vreg("a4", z0)
1029   lb $z1, 4($shorty)              // shorty RDEFG
1030   bne $z1, $z2, .L${uniq}_slow_4
1031   zext.w a4, a4
1032.L${uniq}_slow_4:
1033   srliw $z1, $v_fedc, 12          // z1 := F
1034%  get_vreg("a3", z1)
1035   lb $z1, 3($shorty)              // shorty RDEF
1036   bne $z1, $z2, .L${uniq}_slow_3
1037   zext.w a3, a3
1038.L${uniq}_slow_3:
1039   srliw $z1, $v_fedc, 8           // z1 := F|E
1040   andi $z1, $z1, 0xF              // z1 := E
1041%  get_vreg("a2", z1)
1042   lb $z1, 2($shorty)              // shorty RDE
1043   bne $z1, $z2, .L${uniq}_slow_2
1044   zext.w a2, a2
1045.L${uniq}_slow_2:
1046   srliw $z1, $v_fedc, 4           // z1 := F|E|D
1047   andi $z1, $z1, 0xF              // z1 := D
1048%  get_vreg("a1", z1)
1049   lb $z1, 1($shorty)              // shorty RD
1050   bne $z1, $z2, .L${uniq}_slow_1
1051   zext.w a1, a1
1052.L${uniq}_slow_1:
1053   // "this" never read in string-init
1054
1055
1056// Range and static-range variant.
1057// Hardcoded
1058// - (caller) xPC, xINST, xFP
1059// - (callee) sp
1060// Input
1061// - vC: CCCC from dex
1062%def slow_setup_args_range(shorty="", vC="", z0="", z1="", z2="", z3="", z4="", z5="", z6="", z7="", arg_start="1", uniq=""):
1063   addi $z0, $shorty, 1       // z0 := first arg of shorty
1064   addi $z1, $vC, $arg_start  // z1 := (instance) CCCC+1, (static) CCCC
1065   mv $z2, zero               // z2 := is_out_stack_needed false
1066   li $z3, 'D'                // double
1067   li $z4, 'F'                // float
1068   li $z5, 'J'                // long
1069   li $z6, 'L'                // ref
1070
1071   // linear scan through shorty: extract non-float vregs
1072%  if arg_start == "0":  # static can place vCCCC into a1; instance already loaded "this" into a1.
1073%    load_vreg_in_gpr_range(gpr="a1", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1")
1074%  load_vreg_in_gpr_range(gpr="a2", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2")
1075%  load_vreg_in_gpr_range(gpr="a3", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3")
1076%  load_vreg_in_gpr_range(gpr="a4", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4")
1077%  load_vreg_in_gpr_range(gpr="a5", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_5")
1078%  load_vreg_in_gpr_range(gpr="a6", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_6")
1079%  load_vreg_in_gpr_range(gpr="a7", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_7")
1080%  is_out_stack_needed(needed=z2, shorty=z0, D=z3, F=z4, z0=z1, uniq=uniq)
1081
1082.L${uniq}_slow_fpr:
1083   addi $z0, $shorty, 1       // z0 := first arg of shorty
1084   addi $z1, $vC, $arg_start  // z1 := (instance) CCCC+1, (static) CCCC
1085   // linear scan through shorty: extract float/double vregs
1086%  load_vreg_in_fpr_range(fpr="fa0", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_0")
1087%  load_vreg_in_fpr_range(fpr="fa1", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_1")
1088%  load_vreg_in_fpr_range(fpr="fa2", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_2")
1089%  load_vreg_in_fpr_range(fpr="fa3", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_3")
1090%  load_vreg_in_fpr_range(fpr="fa4", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_4")
1091%  load_vreg_in_fpr_range(fpr="fa5", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_5")
1092%  load_vreg_in_fpr_range(fpr="fa6", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_6")
1093%  load_vreg_in_fpr_range(fpr="fa7", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_7")
1094%  is_out_stack_needed_float(needed=z2, shorty=z0, D=z3, F=z4, z0=z1, uniq=uniq)
1095
1096.L${uniq}_slow_stack:
1097   beqz $z2, .L${uniq}_slow_done  // No stack needed, skip it. Otherwise copy-paste it all with LD/SD.
1098   addi $z0, sp, 8            // z0 := base addr of out array
1099   sh2add $z1, $vC, xFP       // z1 := base addr of FP[CCCC]
1100   srliw $z2, xINST, 8        // z2 := AA, vreg count
1101   sh2add $z2, $z2, $z1       // z2 := loop guard, addr of one slot past top of xFP array
1102%  copy_vregs_to_out(out=z0, fp=z1, fp_top=z2, z0=z3, uniq=uniq)
1103.L${uniq}_slow_done:
1104
1105
1106// String-init variant: up to 4 args, no long/float/double args.
1107// Ref args ('L') loaded with LW *must* apply ZEXT.W to avoid subtle address bugs.
1108%def slow_setup_args_string_init_range(shorty="", vC="", z0="", z1="", z2="", z3="", uniq=""):
1109   srliw $z0, xINST, 8             // z0 := AA; possible values 1-5
1110   li $z1, 2
1111   blt $z0, $z1, .L${uniq}_slow_1  // A = 1
1112   sh2add $z2, $vC, xFP            // z2 := &fp[CCCC]
1113   li $z3, 'L'                     // z3 := ref type
1114   beq $z0, $z1, .L${uniq}_slow_2  // A = 2
1115   li $z1, 4
1116   blt $z0, $z1, .L${uniq}_slow_3  // A = 3
1117   beq $z0, $z1, .L${uniq}_slow_4  // A = 4
1118   // A = 5
1119   lw a4, 4*4($z2)
1120   lb $z1, 4($shorty)
1121   bne $z1, $z3, .L${uniq}_slow_4
1122   zext.w a4, a4
1123.L${uniq}_slow_4:
1124   lw a3, 3*4($z2)
1125   lb $z1, 3($shorty)
1126   bne $z1, $z3, .L${uniq}_slow_3
1127   zext.w a3, a3
1128.L${uniq}_slow_3:
1129   lw a2, 2*4($z2)
1130   lb $z1, 2($shorty)
1131   bne $z1, $z3, .L${uniq}_slow_2
1132   zext.w a2, a2
1133.L${uniq}_slow_2:
1134   lw a1, 1*4($z2)
1135   lb $z1, 1($shorty)
1136   bne $z1, $z3, .L${uniq}_slow_1
1137   zext.w a1, a1
1138.L${uniq}_slow_1:
1139   // "this" never read in string-init
1140
1141
1142// Iterate through 4-bit vreg ids in the "vregs" register, load a non-FP value
1143// into one argument register.
1144%def load_vreg_in_gpr(gpr="", shorty="", vregs="", D="", F="", J="", L="", z0="", done="", uniq=""):
1145.L${uniq}_gpr_find:
1146   lb $z0, ($shorty)         // z0 := next shorty arg spec
1147   addi $shorty, $shorty, 1  // increment char ptr
1148   beqz $z0, $done           // z0 == \0
1149   beq $z0, $F, .L${uniq}_gpr_skip_4_bytes
1150   beq $z0, $D, .L${uniq}_gpr_skip_8_bytes
1151
1152   andi $gpr, $vregs, 0xF    // gpr := vreg id
1153   beq $z0, $J, .L${uniq}_gpr_load_8_bytes
1154%  get_vreg(gpr, gpr)        #  gpr := 32-bit load
1155   bne $z0, $L, .L${uniq}_gpr_load_common
1156   zext.w $gpr, $gpr
1157.L${uniq}_gpr_load_common:
1158   srliw $vregs, $vregs, 4   // shift out the processed arg, one vreg
1159   j .L${uniq}_gpr_set       // and exit
1160.L${uniq}_gpr_load_8_bytes:
1161   GET_VREG_WIDE $gpr, $gpr  // gpr := 64-bit load
1162   srliw $vregs, $vregs, 8   // shift out the processed arg, a vreg pair
1163   j .L${uniq}_gpr_set       // and exit
1164
1165.L${uniq}_gpr_skip_8_bytes:
1166   srliw $vregs, $vregs, 4   // shift out a skipped arg
1167.L${uniq}_gpr_skip_4_bytes:
1168   srliw $vregs, $vregs, 4   // shift out a skipped arg
1169   j .L${uniq}_gpr_find
1170.L${uniq}_gpr_set:
1171
1172
1173// Iterate through 4-bit vreg ids in the "vregs" register, load a float or double
1174// value into one floating point argument register.
1175%def load_vreg_in_fpr(fpr="", shorty="", vregs="", D="", F="", J="", z0="", done="", uniq=""):
1176.L${uniq}_fpr_find:
1177   lb $z0, ($shorty)         // z0 := next shorty arg spec
1178   addi $shorty, $shorty, 1  // increment char ptr
1179   beqz $z0, $done           // z0 == \0
1180   beq $z0, $F, .L${uniq}_fpr_load_4_bytes
1181   beq $z0, $D, .L${uniq}_fpr_load_8_bytes
1182
1183   srliw $vregs, $vregs, 4   // shift out a skipped arg, one vreg
1184   bne $z0, $J, .L${uniq}_fpr_find
1185   srliw $vregs, $vregs, 4   // shift out one more skipped arg, for J
1186   j .L${uniq}_fpr_find
1187
1188.L${uniq}_fpr_load_4_bytes:
1189   andi $z0, $vregs, 0xF
1190%  get_vreg_float(fpr, z0)
1191   srliw $vregs, $vregs, 4   // shift out the processed arg, one vreg
1192   j .L${uniq}_fpr_set
1193.L${uniq}_fpr_load_8_bytes:
1194   andi $z0, $vregs, 0xF
1195   GET_VREG_DOUBLE $fpr, $z0
1196   srliw $vregs, $vregs, 8   // shift out the processed arg, a vreg pair
1197.L${uniq}_fpr_set:
1198
1199
1200// Range variant
1201%def load_vreg_in_gpr_range(gpr="", shorty="", idx="", D="", F="", J="", L="", z0="", done="", uniq=""):
1202.L${uniq}_gpr_range_find:
1203   lb $z0, ($shorty)           // z0 := next shorty arg
1204   addi $shorty, $shorty, 1    // increment char ptr
1205   beqz $z0, $done             // z0 == \0
1206   beq $z0, $F, .L${uniq}_gpr_range_skip_1_vreg
1207   beq $z0, $D, .L${uniq}_gpr_range_skip_2_vreg
1208
1209   beq $z0, $J, .L${uniq}_gpr_range_load_2_vreg
1210%  get_vreg(gpr, idx)
1211   bne $z0, $L, .L${uniq}_gpr_range_load_common
1212   zext.w $gpr, $gpr
1213.L${uniq}_gpr_range_load_common:
1214   addi $idx, $idx, 1
1215   j .L${uniq}_gpr_range_done
1216.L${uniq}_gpr_range_load_2_vreg:
1217   GET_VREG_WIDE $gpr, $idx
1218   addi $idx, $idx, 2
1219   j .L${uniq}_gpr_range_done
1220
1221.L${uniq}_gpr_range_skip_2_vreg:
1222   addi $idx, $idx, 1
1223.L${uniq}_gpr_range_skip_1_vreg:
1224   addi $idx, $idx, 1
1225   j .L${uniq}_gpr_range_find
1226.L${uniq}_gpr_range_done:
1227
1228
1229// Range variant.
1230%def load_vreg_in_fpr_range(fpr="", shorty="", idx="", D="", F="", J="", z0="", done="", uniq=""):
1231.L${uniq}_fpr_range_find:
1232   lb $z0, ($shorty)         // z0 := next shorty arg
1233   addi $shorty, $shorty, 1  // increment char ptr
1234   beqz $z0, $done           // z0 == \0
1235   beq $z0, $F, .L${uniq}_fpr_range_load_4_bytes
1236   beq $z0, $D, .L${uniq}_fpr_range_load_8_bytes
1237
1238   addi $idx, $idx, 1        // increment idx
1239   bne $z0, $J, .L${uniq}_fpr_range_find
1240   addi $idx, $idx, 1        // increment once more for J
1241   j .L${uniq}_fpr_range_find
1242
1243.L${uniq}_fpr_range_load_4_bytes:
1244   mv $z0, $idx
1245%  get_vreg_float(fpr, z0)
1246   addi $idx, $idx, 1
1247   j .L${uniq}_fpr_range_set
1248.L${uniq}_fpr_range_load_8_bytes:
1249   mv $z0, $idx
1250   GET_VREG_DOUBLE $fpr, $z0
1251   addi $idx, $idx, 2
1252.L${uniq}_fpr_range_set:
1253
1254
1255%def is_out_stack_needed(needed="", shorty="", D="", F="", z0="", uniq=""):
1256.L${uniq}_scan_arg:
1257   lb $z0, ($shorty)
1258   addi $shorty, $shorty, 1
1259   beqz $z0, .L${uniq}_scan_done
1260   beq $z0, $F, .L${uniq}_scan_arg
1261   beq $z0, $D, .L${uniq}_scan_arg
1262   li $needed, 1
1263.L${uniq}_scan_done:
1264
1265
1266%def is_out_stack_needed_float(needed="", shorty="", D="", F="", z0="", uniq=""):
1267   bnez $needed, .L${uniq}_scan_float_done
1268.L${uniq}_scan_float_arg:
1269   lb $z0, ($shorty)
1270   addi $shorty, $shorty, 1
1271   beqz $z0, .L${uniq}_scan_float_done
1272   beq $z0, $F, .L${uniq}_scan_float_found
1273   beq $z0, $D, .L${uniq}_scan_float_found
1274   j .L${uniq}_scan_float_arg
1275.L${uniq}_scan_float_found:
1276   li $needed, 1
1277.L${uniq}_scan_float_done:
1278
1279
1280%def copy_vregs_to_out(out="", fp="", fp_top="", z0="", uniq=""):
1281   sub $z0, $fp_top, $fp  // z0 := byte range
1282   BRANCH_IF_BIT_CLEAR $z0, $z0, 2, .L${uniq}_copy_wide
1283                          // branch if odd count of slots
1284   lwu $z0, ($fp)
1285   sw $z0, ($out)
1286   addi $fp, $fp, 4
1287   addi $out, $out, 4
1288.L${uniq}_copy_wide:
1289   beq $fp, $fp_top, .L${uniq}_copy_done
1290   ld $z0, ($fp)
1291   sd $z0, ($out)
1292   addi $fp, $fp, 8
1293   addi $out, $out, 8
1294   j .L${uniq}_copy_wide
1295.L${uniq}_copy_done:
1296
1297
1298// NterpToNterpInstance
1299// a0: ArtMethod*
1300// a1: this
1301%def nterp_to_nterp_instance():
1302%  nterp_to_nterp(how_vC="in_a1", uniq="n2n_instance")
1303
1304
1305// NterpToNterpStringInit
1306// a0: ArtMethod*
1307%def nterp_to_nterp_string_init():
1308%  nterp_to_nterp(how_vC="skip", uniq="n2n_string_init")
1309
1310
1311// NterpToNterpStatic
1312// a0: ArtMethod*
1313%def nterp_to_nterp_static():
1314%  nterp_to_nterp(a1_instance=False, how_vC="load", uniq="n2n_static")
1315
1316
1317// NterpToNterpInstanceRange
1318%def nterp_to_nterp_instance_range():
1319%  nterp_to_nterp(how_vC="in_a1", uniq="n2n_instance_range", range="Range")
1320
1321
1322// NterpToNterpStringInitRange
1323%def nterp_to_nterp_string_init_range():
1324%  nterp_to_nterp(how_vC="skip", uniq="n2n_string_init_range", range="Range")
1325
1326
1327// NterpToNterpStaticRange
1328%def nterp_to_nterp_static_range():
1329%  nterp_to_nterp(a1_instance=False, how_vC="load", uniq="n2n_static_range", range="Range")
1330
1331
1332// helpers
1333
1334
1335%def nterp_to_nterp(a1_instance=True, how_vC="", uniq="", range=""):
1336   .cfi_startproc
1337%  setup_nterp_frame(cfi_refs="23", refs="s8", fp="s9", pc="s10", regs="s11", spills_sp="t0", z0="t1", z1="t2", z2="t3", z3="t4", uniq=uniq)
1338       // s8  := callee xREFS
1339       // s9  := callee xFP
1340       // s10 := callee xPC
1341       // s11 := fp/refs vreg count
1342       // t0  := post-spills pre-frame sp (unused here)
1343       // sp  := post-frame callee sp
1344%  if range == 'Range':
1345%    n2n_arg_move_range(refs="s8", fp="s9", regs="s11", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", a1_instance=a1_instance, how_vC=how_vC, uniq=uniq)
1346%  else:
1347%    n2n_arg_move(refs="s8", fp="s9", pc="s10", regs="s11", v_fedc="s7", z0="t0", z1="t1", z2="t2", z3="t3", a1_instance=a1_instance, how_vC=how_vC, uniq=uniq)
1348%#:
1349   mv xREFS, s8
1350   mv xFP, s9
1351   mv xPC, s10
1352   CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0)
1353
1354   START_EXECUTING_INSTRUCTIONS
1355   .cfi_endproc
1356
1357
1358// See runtime/nterp_helpers.cc for a diagram of the setup.
1359// Hardcoded
1360// - a0 - ArtMethod*
1361// Input
1362// - \cfi_refs: dwarf register number of \refs, for CFI
1363// - \uniq: string to ensure unique symbolic labels between instantiations
1364// Output
1365// - sp: adjusted downward for callee-saves and nterp frame
1366// - \refs: callee xREFS
1367// - \fp: callee xFP
1368// - \pc: callee xPC
1369// - \regs: register count in \refs
1370// - \ins: in count
1371// - \spills_sp: stack pointer after reg spills
1372%def setup_nterp_frame(cfi_refs="", refs="", fp="", pc="", regs="", ins="zero", spills_sp="", z0="", z1="", z2="", z3="", uniq=""):
1373   // Check guard page for stack overflow.
1374   li $z0, -STACK_OVERFLOW_RESERVED_BYTES
1375   add $z0, $z0, sp
1376   ld zero, ($z0)
1377
1378   INCREASE_FRAME NTERP_SIZE_SAVE_CALLEE_SAVES
1379                        // sp := sp + callee-saves
1380   SETUP_NTERP_SAVE_CALLEE_SAVES
1381
1382   ld $pc, ART_METHOD_DATA_OFFSET_64(a0)
1383   FETCH_CODE_ITEM_INFO code_item=$pc, regs=$regs, outs=$z0, ins=$ins
1384                        // pc   := callee dex array
1385                        // regs := vreg count for fp array and refs array
1386                        // z0   := vreg count for outs array
1387                        // ins  := vreg count for ins array
1388
1389   // Compute required frame size: ((2 * \regs) + \z0) * 4 + 24
1390   // - The register array and reference array each have \regs number of slots.
1391   // - The out array has \z0 slots.
1392   // - Each register slot is 4 bytes.
1393   // - Additional 24 bytes for 3 fields: saved frame pointer, dex pc, and ArtMethod*.
1394   sh1add $z1, $regs, $z0
1395   slli $z1, $z1, 2
1396   addi $z1, $z1, 24    // z1 := frame size, without alignment padding
1397
1398   // compute new stack pointer
1399   sub $z1, sp, $z1
1400   // 16-byte alignment.
1401   andi $z1, $z1, ~0xF  // z1 := new sp
1402
1403   // Set \refs to base of reference array. Align to pointer size for the frame pointer and dex pc
1404   // pointer, below the reference array.
1405   sh2add $z0, $z0, $z1  // z0 := out array size in bytes
1406   addi $z0, $z0, 28     //     + 24 bytes for 3 fields, plus 4 for alignment slack.
1407   andi $refs, $z0, -__SIZEOF_POINTER__
1408                         // refs := refs array base
1409
1410   // Set \fp to base of register array, above the reference array. This region is already aligned.
1411   sh2add $fp, $regs, $refs
1412                         // fp := fp array base
1413
1414   // Set up the stack pointer.
1415   mv $spills_sp, sp     // spills_sp := old sp
1416   .cfi_def_cfa_register $spills_sp
1417   mv sp, $z1            // sp := new sp
1418   sd $spills_sp, -8($refs)
1419   // The CFA rule is now a dwarf expression, because the nterp frame offset for SP is a dynamic
1420   // value, and thus SP cannot help compute CFA. For the duration of the nterp frame, CFI
1421   // directives cannot adjust this CFA rule, but may still capture CFI for register spills as
1422   // "register + offset" with a dwarf expression.
1423   CFI_DEF_CFA_BREG_PLUS_UCONST $cfi_refs, -8, NTERP_SIZE_SAVE_CALLEE_SAVES
1424
1425   // Put nulls in reference array.
1426   beqz $regs, .L${uniq}_ref_zero_done
1427   mv $z0, $refs         // z0 := address iterator
1428.L${uniq}_ref_zero:
1429   // Write in 8-byte increments, so fp[0] gets zero'ed too, if \regs is odd.
1430   sd zero, ($z0)
1431   addi $z0, $z0, 8
1432   bltu $z0, $fp, .L${uniq}_ref_zero
1433.L${uniq}_ref_zero_done:
1434   // Save the ArtMethod*.
1435   sd a0, (sp)
1436
1437
1438// Hardcoded
1439// - (caller) xINST, xFP, xREFS, xPC
1440// - a0: ArtMethod*
1441// - a1: this, for instance invoke
1442%def n2n_arg_move(refs="", fp="", regs="", pc="", v_fedc="", z0="", z1="", z2="", z3="", a1_instance=True, how_vC="", uniq=""):
1443   srliw $z0, xINST, 12       // z0 := A (arg count)
1444
1445%  if not a1_instance:
1446     beqz $z0, .L${uniq}_arg_done
1447%#:
1448   // A >= 1, decide and branch
1449   li $z1, 2
1450   sub $z2, $regs, $z0        // z2 := regs - A; vC's index in fp
1451   sh2add $z3, $z2, $fp       // z3 := addr of fp[C]
1452   sh2add $z2, $z2, $refs     // z2 := addr of refs[C]
1453   blt $z0, $z1, .L${uniq}_arg_1
1454   beq $z0, $z1, .L${uniq}_arg_2
1455   li $z1, 4
1456   blt $z0, $z1, .L${uniq}_arg_3
1457   beq $z0, $z1, .L${uniq}_arg_4
1458
1459   // A = 5
1460   srliw $z0, xINST, 8
1461   andi $z0, $z0, 0xF         // z0 := G
1462%  get_vreg(z1, z0)           #  z1 := xFP[G]
1463   sw $z1, (4*4)($z3)         // fp[G] := z1
1464   GET_VREG_OBJECT $z0, $z0   // z0 := xREFS[G]
1465   sw $z0, (4*4)($z2)         // refs[G] := z0
1466.L${uniq}_arg_4:
1467   srliw $z0, $v_fedc, 12     // z0 := F
1468%  get_vreg(z1, z0)           #  z1 := xFP[F]
1469   sw $z1, (3*4)($z3)         // fp[F] := z1
1470   GET_VREG_OBJECT $z0, $z0   // z0 := xREFS[F]
1471   sw $z0, (3*4)($z2)         // refs[F] := z0
1472.L${uniq}_arg_3:
1473   srliw $z0, $v_fedc, 8      // z0 := F|E
1474   andi $z0, $z0, 0xF         // z0 := E
1475%  get_vreg(z1, z0)           #  z1 := xFP[E]
1476   sw $z1, (2*4)($z3)         // fp[E] := z1
1477   GET_VREG_OBJECT $z0, $z0   // z0 := xREFS[E]
1478   sw $z0, (2*4)($z2)         // refs[E] := z0
1479.L${uniq}_arg_2:
1480   srliw $z0, $v_fedc, 4      // z0 := F|E|D
1481   andi $z0, $z0, 0xF         // z0 := D
1482%  get_vreg(z1, z0)           #  z1 := xFP[D]
1483   sw $z1, (1*4)($z3)         // fp[D] := z1
1484   GET_VREG_OBJECT $z0, $z0   // z0 := xREFS[D]
1485   sw $z0, (1*4)($z2)         // refs[D] := z0
1486.L${uniq}_arg_1:
1487%  if how_vC == "in_a1":
1488     // a1 = xFP[C] from earlier stage of instance invoke
1489     sw a1, (0*4)($z3)        // fp[C] := a1
1490     sw a1, (0*4)($z2)        // refs[C] := a1
1491%  elif how_vC == "skip":
1492     // string init doesn't read "this"
1493%  elif how_vC == "load":
1494     // static method loads vC just like other vregs
1495     andi $z0, $v_fedc, 0xF   // z0 := C
1496%    get_vreg(z1, z0)         #  z1 := xFP[C]
1497     sw $z1, (0*4)($z3)       // fp[C] := z1
1498     GET_VREG_OBJECT $z0, $z0  // z0 := xREFS[C]
1499     sw $z0, (0*4)($z2)       // refs[C] := z0
1500%#:
1501.L${uniq}_arg_done:
1502
1503
1504%def n2n_arg_move_range(refs="", fp="", regs="", vC="", z0="", z1="", z2="", z3="", z4="", z5="", a1_instance=True, how_vC="", uniq=""):
1505   srliw $z0, xINST, 8     // z0 := AA (arg count)
1506
1507%  if not a1_instance:
1508     beqz $z0, .L${uniq}_arg_range_done
1509%#:
1510   // AA >= 1, iterator setup
1511   sub $z4, $regs, $z0     // z4 := regs - AA; starting idx in fp and refs
1512   sh2add $z1, $vC, xREFS  // z1 := addr of xREFS[CCCC]
1513   sh2add $z2, $vC, xFP    // z2 := addr of xFP[CCCC]
1514   sh2add $z3, $z4, $refs  // z3 := addr of refs[z4]
1515   sh2add $z4, $z4, $fp    // z4 := addr of fp[z4]
1516
1517   BRANCH_IF_BIT_CLEAR $z0, $z0, 0, .L${uniq}_arg_range_copy_wide
1518                           // branch if AA is even
1519   // AA is odd, transfer one slot. Apply some optimizations.
1520%  if how_vC == "in_a1":
1521     sw a1, ($z3)
1522     sw a1, ($z4)
1523%  elif how_vC == "skip":
1524     // string init doesn't read "this"
1525%  elif how_vC == "load":
1526     lw $z0, ($z1)
1527     lw $z5, ($z2)
1528     sw $z0, ($z3)
1529     sw $z5, ($z4)
1530%#:
1531   addi $z1, $z1, 4
1532   addi $z2, $z2, 4
1533   addi $z3, $z3, 4
1534   addi $z4, $z4, 4
1535.L${uniq}_arg_range_copy_wide:
1536   // Even count of vreg slots, apply LD/SD.
1537   beq $z3, $fp, .L${uniq}_arg_range_done  // terminate loop if refs[regs] == fp[0]
1538   ld $z0, ($z1)
1539   ld $z5, ($z2)
1540   sd $z0, ($z3)
1541   sd $z5, ($z4)
1542   addi $z1, $z1, 8
1543   addi $z2, $z2, 8
1544   addi $z3, $z3, 8
1545   addi $z4, $z4, 8
1546   j .L${uniq}_arg_range_copy_wide
1547.L${uniq}_arg_range_done:
1548
1549
1550//
1551// Nterp entry point helpers
1552//
1553
1554
1555// Hardcoded:
1556// - a0: ArtMethod*
1557%def setup_ref_args_and_go(fp="", refs="", refs_end="", spills_sp="", z0="", z1="", done=""):
1558   // Store managed-ABI register args into fp/refs arrays.
1559%  store_ref_to_vreg(gpr="a1", fp=fp, refs=refs, refs_end=refs_end, done=done)
1560%  store_ref_to_vreg(gpr="a2", fp=fp, refs=refs, refs_end=refs_end, done=done)
1561%  store_ref_to_vreg(gpr="a3", fp=fp, refs=refs, refs_end=refs_end, done=done)
1562%  store_ref_to_vreg(gpr="a4", fp=fp, refs=refs, refs_end=refs_end, done=done)
1563%  store_ref_to_vreg(gpr="a5", fp=fp, refs=refs, refs_end=refs_end, done=done)
1564%  store_ref_to_vreg(gpr="a6", fp=fp, refs=refs, refs_end=refs_end, done=done)
1565%  store_ref_to_vreg(gpr="a7", fp=fp, refs=refs, refs_end=refs_end, done=done)
1566   // We drained arg registers, so continue from caller's stack.
1567   // A ref arg is 4 bytes, so the continuation offset is well known.
1568   addi $z0, $spills_sp, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8 + 7*4)
1569       // z0 := out array base addr + 7 vreg slots
1570.Lentry_ref_stack:
1571   lwu $z1, ($z0)
1572   sw $z1, ($fp)
1573   sw $z1, ($refs)
1574   addi $z0, $z0, 4
1575   addi $fp, $fp, 4
1576   addi $refs, $refs, 4
1577   bne $refs, $refs_end, .Lentry_ref_stack
1578
1579   j $done
1580
1581
1582%def store_ref_to_vreg(gpr="", fp="", refs="", refs_end="", done=""):
1583   sw $gpr, ($fp)
1584   sw $gpr, ($refs)
1585   addi $fp, $fp, 4
1586   addi $refs, $refs, 4
1587   beq $refs, $refs_end, $done
1588
1589
1590// \fp and \refs are used as array base addrs, unmodified.
1591%def store_gpr_to_vreg(gpr="", offset="", shorty="", fp="", refs="", z0="", z1="", D="", F="", J="", L="", next=""):
1592.Lentry_arg_${gpr}:
1593   lb $z0, ($shorty)         // z0 := shorty type
1594   addi $shorty, $shorty, 1  // Increment char ptr.
1595   beqz $z0, $next           // z0 = \0: finished shorty pass
1596   beq $z0, $D, .Lentry_arg_skip_double_${gpr}
1597   beq $z0, $F, .Lentry_arg_skip_float_${gpr}
1598
1599   add $z1, $offset, $fp
1600   beq $z0, $J, .Lentry_arg_long_${gpr}
1601   sw $gpr, ($z1)
1602   bne $z0, $L, .Lentry_arg_finish_${gpr}
1603   add $z1, $offset, $refs
1604   sw $gpr, ($z1)
1605   j .Lentry_arg_finish_${gpr}
1606.Lentry_arg_skip_double_${gpr}:
1607   addi $offset, $offset, 4
1608.Lentry_arg_skip_float_${gpr}:
1609   addi $offset, $offset, 4
1610   j .Lentry_arg_${gpr}
1611.Lentry_arg_long_${gpr}:
1612   sd $gpr, ($z1)
1613   addi $offset, $offset, 4
1614.Lentry_arg_finish_${gpr}:
1615   addi $offset, $offset, 4
1616
1617
1618// \fp is used as array base addr, unmodified.
1619%def store_fpr_to_vreg(fpr="", offset="", shorty="", fp="", z0="", z1="", D="", F="", J="", next=""):
1620.Lentry_farg_${fpr}:
1621   lb $z0, ($shorty)         // z0 := shorty type
1622   addi $shorty, $shorty, 1  // Increment char ptr.
1623   beqz $z0, $next           // z0 = \0: finished shorty pass
1624   beq $z0, $D, .Lentry_farg_double_${fpr}
1625   beq $z0, $F, .Lentry_farg_float_${fpr}
1626   addi $offset, $offset, 4
1627   bne $z0, $J, .Lentry_farg_${fpr}
1628   addi $offset, $offset, 4
1629   j .Lentry_farg_${fpr}
1630
1631.Lentry_farg_float_${fpr}:
1632   add $z1, $offset, $fp
1633   fsw $fpr, ($z1)
1634   j .Lentry_farg_finish_${fpr}
1635.Lentry_farg_double_${fpr}:
1636   add $z1, $offset, $fp
1637   fsd $fpr, ($z1)
1638   addi $offset, $offset, 4
1639.Lentry_farg_finish_${fpr}:
1640   addi $offset, $offset, 4
1641
1642
1643// \outs, \fp, \refs are used as iterators, modified.
1644%def store_outs_to_vregs(outs="", shorty="", fp="", refs="", z0="", z1="", D="", F="", J="", L="", next=""):
1645.Lentry_stack:
1646   lb $z0, ($shorty)         // z0 := next shorty arg spec
1647   addi $shorty, $shorty, 1  // Increment char ptr.
1648   beqz $z0, $next           // z0 == \0
1649   beq $z0, $F, .Lentry_stack_next_4
1650   beq $z0, $D, .Lentry_stack_next_8
1651   beq $z0, $J, .Lentry_stack_long
1652   // 32-bit arg
1653   lwu $z1, ($outs)
1654   sw $z1, ($fp)
1655   bne $z0, $L, .Lentry_stack_next_4
1656   // and also a ref
1657   sw $z1, ($refs)
1658.Lentry_stack_next_4:
1659   addi $outs, $outs, 4
1660   addi $fp, $fp, 4
1661   addi $refs, $refs, 4
1662   j .Lentry_stack
1663.Lentry_stack_long:
1664   ld $z1, ($outs)
1665   sd $z1, ($fp)
1666.Lentry_stack_next_8:
1667   addi $outs, $outs, 8
1668   addi $fp, $fp, 8
1669   addi $refs, $refs, 8
1670   j .Lentry_stack
1671
1672
1673// \outs, \fp are used as iterators, modified.
1674%def store_float_outs_to_vregs(outs="", shorty="", fp="", z0="", D="", F="", J="", next=""):
1675.Lentry_fstack:
1676   lb $z0, ($shorty)         // z0 := next shorty arg spec
1677   addi $shorty, $shorty, 1  // Increment char ptr.
1678   beqz $z0, $next           // z0 == \0
1679   beq $z0, $F, .Lentry_fstack_float
1680   beq $z0, $D, .Lentry_fstack_double
1681   beq $z0, $J, .Lentry_fstack_next_8
1682   // 32-bit arg
1683   addi $outs, $outs, 4
1684   addi $fp, $fp, 4
1685   j .Lentry_fstack
1686.Lentry_fstack_float:
1687   lwu $z0, ($outs)
1688   sw $z0, ($fp)
1689   addi $outs, $outs, 4
1690   addi $fp, $fp, 4
1691   j .Lentry_fstack
1692.Lentry_fstack_double:
1693   ld $z0, ($outs)
1694   sd $z0, ($fp)
1695.Lentry_fstack_next_8:
1696   addi $outs, $outs, 8
1697   addi $fp, $fp, 8
1698   j .Lentry_fstack
1699
1700