• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 %def header():
2 /*
3  * Copyright (C) 2023 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * This is a #include, not a %include, because we want the C pre-processor
20  * to expand the macros into assembler assignment statements.
21  */
22 #include "asm_support.h"
23 #include "arch/riscv64/asm_support_riscv64.S"
24 
25 /**
26  * RISC-V 64 ABI general notes
27  *
28  * References
29  * - https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
30  * - runtime/arch/riscv64/registers_riscv64.h
31  *
32  * 32 general purposes registers
33  * - fixed purpose: zero, ra, sp, gp, tp, s1
34  *     gp/scs: shadow call stack - do not clobber!
35  *     s1/tr: ART thread register - do not clobber!
36  * - temporaries: t0-t6
37  * - arguments: a0-a7
38  * - callee saved: ra, s0/fp, s2-s11
39  *     s0 is flexible, available to use as a frame pointer if needed.
40  *
41  * 32 floating point registers
42  * - temporaries: ft0-ft11
43  * - arguments: fa0-fa7
44  * - callee saved: fs0-fs11
45  */
46 
47 // Android references
48 //   Bytecodes: https://source.android.com/docs/core/runtime/dalvik-bytecode
49 //   Instruction formats: https://source.android.com/docs/core/runtime/instruction-formats
50 //   Shorty: https://source.android.com/docs/core/runtime/dex-format#shortydescriptor
51 
52 // Fixed register usages in Nterp.
53 //    nickname  ABI    reg   purpose
54 #define xSELF    s1  // x9,   Thread* self pointer
55 #define xFP      s2  // x18,  interpreted frame pointer: to access locals and args
56 #define xPC      s3  // x19,  interpreted program counter: to fetch instructions
57 #define xINST    s4  // x20,  first 16-bit code unit of current instruction
58 #define xIBASE   s5  // x21,  interpreted instruction base pointer: for computed goto
59 #define xREFS    s6  // x22,  base of object references of dex registers
60 
61 // DWARF registers reference
62 // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-dwarf.adoc
63 #define CFI_TMP  10  // DWARF register number for       a0/x10
64 #define CFI_DEX  19  // DWARF register number for xPC  /s3/x19
65 #define CFI_REFS 22  // DWARF register number for xREFS/s6/x22
66 
67 // Synchronization
68 // This code follows the RISC-V atomics ABI specification [1].
69 //
70 // Object publication.
71 // new-instance and new-array operations must first perform a `fence w,w` "constructor fence" to
72 // ensure their new object references are correctly published with a subsequent SET_VREG_OBJECT.
73 //
74 // Volatile load/store.
75 // A volatile load is implemented as: fence rw,rw ; load ; fence r,rw.
76 // A 32-bit or 64-bit volatile store is implemented as: amoswap.{w,d}.rl
77 // A volatile store for a narrower type is implemented as: fence rw,w ; store ; fence rw,rw
78 //
79 // [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-atomic.adoc
80 
81 // An assembly entry for nterp.
82 .macro OAT_ENTRY name
83     .type \name, @function
84     .hidden \name
85     .global \name
86     .balign 16
87 \name:
88 .endm
89 
90 .macro SIZE name
91     .size \name, .-\name
92 .endm
93 
94 // Similar to ENTRY but without the CFI directives.
95 .macro NAME_START name
96     .type \name, @function
97     .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
98     .global \name
99     /* Cache alignment for function entry */
100     .balign 16
101 \name:
102 .endm
103 
104 .macro NAME_END name
105   SIZE \name
106 .endm
107 
108 // Macro for defining entrypoints into runtime. We don't need to save registers (we're not holding
109 // references there), but there is no kDontSave runtime method. So just use the kSaveRefsOnly
110 // runtime method.
111 .macro NTERP_TRAMPOLINE name, helper
112 ENTRY \name
113     SETUP_SAVE_REFS_ONLY_FRAME
114     call \helper
115     RESTORE_SAVE_REFS_ONLY_FRAME
116     ld t0, THREAD_EXCEPTION_OFFSET(xSELF)
117     bnez t0, nterp_deliver_pending_exception
118     ret
119 END \name
120 .endm
121 
122 // Unpack code items from dex format.
123 // Input: \code_item
124 // Output:
125 //   - \regs: register count
126 //   - \outs: out count
127 //   - \ins: in count. If set to register "zero" (x0), load is skipped.
128 //   - \code_item: holds instruction array on exit
129 .macro FETCH_CODE_ITEM_INFO code_item, regs, outs, ins
130     // Check LSB of \code_item. If 1, it's a compact dex file.
131     BRANCH_IF_BIT_CLEAR \regs, \code_item, 0, 1f  // Regular dex.
132     unimp  // Compact dex: unimplemented.
133 1:
134     // Unpack values from regular dex format.
135     lhu \regs, CODE_ITEM_REGISTERS_SIZE_OFFSET(\code_item)
136     lhu \outs, CODE_ITEM_OUTS_SIZE_OFFSET(\code_item)
137     .ifnc \ins, zero
138       lhu \ins, CODE_ITEM_INS_SIZE_OFFSET(\code_item)
139     .endif
140     addi \code_item, \code_item, CODE_ITEM_INSNS_OFFSET
141 .endm
142 
143 .macro EXPORT_PC
144     sd xPC, -16(xREFS)
145 .endm
146 
147 .macro TEST_IF_MARKING reg, label
148     lb \reg, THREAD_IS_GC_MARKING_OFFSET(xSELF)
149     bnez \reg, \label
150 .endm
151 
152 .macro DO_SUSPEND_CHECK continue
153     lwu t0, THREAD_FLAGS_OFFSET(xSELF)
154     andi t0, t0, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
155     beqz t0, \continue
156     EXPORT_PC
157     call art_quick_test_suspend
158 .endm
159 
160 // Fetch one or more half-word units from an offset past the current PC.
161 // The offset is specified in 16-bit code units.
162 //
163 // A \width flag allows reading 32 bits (2 units) or 64 bits (4 units) from the offset.
164 // The RISC-V ISA supports unaligned accesses for these wider loads.
165 //
166 // If \width=8, \byte={0,1} indexes into the code unit at the offset.
167 //
168 // Default behavior loads one code unit with unsigned zext.
169 // The \signed flag is for signed sext, for shorter loads.
170 //
171 // Does not advance xPC.
172 .macro FETCH reg, count, signed=0, width=16, byte=0
173     .if \width == 8
174       .if \signed
175         lb  \reg, (\count*2 + \byte)(xPC)
176       .else
177         lbu \reg, (\count*2 + \byte)(xPC)
178       .endif
179     .elseif \width == 16
180       .if \signed
181         lh  \reg, (\count*2)(xPC)
182       .else
183         lhu \reg, (\count*2)(xPC)
184       .endif
185     .elseif \width == 32
186       .if \signed
187         lw  \reg, (\count*2)(xPC)
188       .else
189         lwu \reg, (\count*2)(xPC)
190       .endif
191     .elseif \width == 64
192       ld  \reg, (\count*2)(xPC)
193     .else
194       unimp  // impossible
195     .endif
196 .endm
197 
198 // Fetch the next instruction, from xPC into xINST.
199 // Does not advance xPC.
200 .macro FETCH_INST
201     lhu xINST, (xPC)  // zero in upper 48 bits
202 .endm
203 
204 // Fetch the next instruction, from xPC into xINST. Advance xPC by \count units, each 2 bytes.
205 //
206 // Immediates have a 12-bit offset range from xPC. Thus, \count can range from -1024 to 1023.
207 //
208 // Note: Must be placed AFTER anything that can throw an exception, or the exception catch may miss.
209 // Thus, this macro must be placed after EXPORT_PC.
210 .macro FETCH_ADVANCE_INST count
211     lhu xINST, (\count*2)(xPC)  // zero in upper 48 bits
212     addi xPC, xPC, (\count*2)
213 .endm
214 
215 // Clobbers: \reg
216 .macro GET_INST_OPCODE reg
217     and \reg, xINST, 0xFF
218 .endm
219 
220 // Clobbers: \reg
221 .macro GOTO_OPCODE reg
222     slliw \reg, \reg, ${handler_size_bits}
223     add \reg, xIBASE, \reg
224     jr \reg
225 .endm
226 
227 .macro FETCH_FROM_THREAD_CACHE reg, miss_label, z0, z1
228     // See art::InterpreterCache::IndexOf() for computing index of key within cache array.
229     // Entry address:
230     //   xSELF + OFFSET + ((xPC>>2 & xFF) << 4)
231     // = xSELF + OFFSET + ((xPC & xFF<<2) << 2)
232     // = xSELF + ((OFFSET>>2 + (xPC & xFF<<2)) << 2)
233     // => ANDI, ADD, SH2ADD
234 #if (THREAD_INTERPRETER_CACHE_SIZE_LOG2 != 8)
235 #error Expected interpreter cache array size = 256 elements
236 #endif
237 #if (THREAD_INTERPRETER_CACHE_SIZE_SHIFT != 2)
238 #error Expected interpreter cache entry size = 16 bytes
239 #endif
240 #if ((THREAD_INTERPRETER_CACHE_OFFSET & 0x3) != 0)
241 #error Expected interpreter cache offset to be 4-byte aligned
242 #endif
243     andi \z0, xPC, 0xFF << 2
244     addi \z0, \z0, THREAD_INTERPRETER_CACHE_OFFSET >> 2
245     sh2add \z0, \z0, xSELF  // z0 := entry's address
246     ld \z1, (\z0)           // z1 := dex PC
247     bne xPC, \z1, \miss_label
248     ld \reg, 8(\z0)         // value: depends on context; see call site
249 .endm
250 
251 // Inputs:
252 //   - a0
253 //   - xSELF
254 // Clobbers: t0
255 .macro CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot, if_not_hot
256     lwu t0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
257     BRANCH_IF_BIT_CLEAR t0, t0, ART_METHOD_IS_MEMORY_SHARED_FLAG_BIT, \if_hot
258 
259     lwu t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)  // t0 := hotness
260     beqz t0, \if_hot
261 
262     addi t0, t0, -1  // increase hotness
263     sw t0,  THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)
264     j \if_not_hot
265 .endm
266 
267 // Update xPC by \units code units. On back edges, perform hotness and suspend.
268 .macro BRANCH units
269     sh1add xPC, \units, xPC
270     blez \units, 2f  // If branch is <= 0, increase hotness and do a suspend check.
271 1:
272     FETCH_INST
273     GET_INST_OPCODE t0
274     GOTO_OPCODE t0
275 2:
276     ld a0, (sp)
277     lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)  // t0 := hotness
278 #if (NTERP_HOTNESS_VALUE != 0)
279 #error Expected 0 for hotness value
280 #endif
281     // If the counter is at zero (hot), handle it in the runtime.
282     beqz t0, 3f
283     addi t0, t0, -1  // increase hotness
284     sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
285     DO_SUSPEND_CHECK continue=1b
286     j 1b
287 3:
288     tail NterpHandleHotnessOverflow  // arg a0 (ArtMethod*)
289 .endm
290 
291 // Increase method hotness before starting the method.
292 // Hardcoded:
293 // - a0: ArtMethod*
294 // Clobbers: t0
295 .macro START_EXECUTING_INSTRUCTIONS
296     ld a0, (sp)
297     lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)  // t0 := hotness
298 #if (NTERP_HOTNESS_VALUE != 0)
299 #error Expected 0 for hotness value
300 #endif
301     // If the counter is at zero (hot), handle it in the runtime.
302     beqz t0, 3f
303     addi t0, t0, -1  // increase hotness
304     sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
305 1:
306     DO_SUSPEND_CHECK continue=2f
307 2:
308     FETCH_INST
309     GET_INST_OPCODE t0
310     GOTO_OPCODE t0
311 3:
312     CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=4f, if_not_hot=1b
313 4:
314     mv a1, zero  // dex_pc_ptr=nullptr
315     mv a2, zero  // vergs=nullptr
316     call nterp_hot_method
317     j 2b
318 .endm
319 
320 // 64 bit read
321 // Clobbers: \reg
322 // Safe if \reg == \vreg.
323 .macro GET_VREG_WIDE reg, vreg
324     sh2add \reg, \vreg, xFP  // vreg addr in register array
325     ld \reg, (\reg)          // reg := fp[vreg](lo) | fp[vreg+1](hi)
326 .endm
327 
328 // 64 bit write
329 // Clobbers: z0
330 .macro SET_VREG_WIDE reg, vreg, z0
331     sh2add \z0, \vreg, xFP    // vreg addr in register array
332     sd \reg, (\z0)            // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
333     sh2add \z0, \vreg, xREFS  // vreg addr in reference array
334     sd zero, (\z0)            // refs[vreg] := null ; refs[vreg+1] := null
335 .endm
336 
337 // Object read
338 // Clobbers: \reg
339 // Safe if \reg == \vreg.
340 .macro GET_VREG_OBJECT reg, vreg
341     sh2add \reg, \vreg, xREFS  // vreg addr in reference array
342     lwu \reg, (\reg)           // reg := refs[vreg]
343 .endm
344 
345 // Object write
346 // Clobbers: z0
347 .macro SET_VREG_OBJECT reg, vreg, z0
348     sh2add \z0, \vreg, xFP    // vreg addr in register array
349     sw \reg, (\z0)            // fp[vreg] := reg
350     sh2add \z0, \vreg, xREFS  // vreg addr in reference array
351     sw \reg, (\z0)            // refs[vreg] := reg
352 .endm
353 
354 // Floating-point 64 bit read
355 // Clobbers: \reg, \vreg
356 .macro GET_VREG_DOUBLE reg, vreg
357     sh2add \vreg, \vreg, xFP  // vreg addr in register array
358     fld \reg, (\vreg)         // reg := fp[vreg](lo) | fp[vreg+1](hi)
359 .endm
360 
361 // Floating-point 64 bit write
362 // Clobbers: \reg, z0
363 .macro SET_VREG_DOUBLE reg, vreg, z0
364     sh2add \z0, \vreg, xFP    // vreg addr in register array
365     fsd \reg, (\z0)           // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
366     sh2add \z0, \vreg, xREFS  // vreg addr in reference array
367     sd zero, (\z0)            // refs[vreg] := null ; refs[vreg+1] := null
368 .endm
369 
370 // Put "%def" definitions after ".macro" definitions for proper expansion. %def is greedy.
371 
372 // Typed read, defaults to 32-bit read
373 // Note: An object ref requires LWU, or LW;ZEXT.W.
374 // Clobbers: \reg
375 // Safe if \reg == \vreg.
376 %def get_vreg(reg, vreg, width=32, is_wide=False, is_unsigned=False):
377 %  if is_wide or width == 64:
378      GET_VREG_WIDE $reg, $vreg
379 %  elif is_unsigned:
380      sh2add $reg, $vreg, xFP  // vreg addr in register array
381      lwu $reg, ($reg)         // reg := fp[vreg], zext
382 %  else:
383      sh2add $reg, $vreg, xFP  // vreg addr in register array
384      lw $reg, ($reg)          // reg := fp[vreg]
385 %#:
386 
387 // Typed write, defaults to 32-bit write.
388 // Note: Incorrect for an object ref; it requires 2nd SW into xREFS.
389 // Clobbers: z0
390 %def set_vreg(reg, vreg, z0, width=32, is_wide=False):
391 %  if is_wide or width == 64:
392      SET_VREG_WIDE $reg, $vreg, $z0
393 %  else:
394      sh2add $z0, $vreg, xFP    // vreg addr in register array
395      sw $reg, ($z0)            // fp[vreg] := reg
396      sh2add $z0, $vreg, xREFS  // vreg addr in reference array
397      sw zero, ($z0)            // refs[vreg] := null
398 %#:
399 
400 // Floating-point read, defaults to 32-bit read.
401 // Clobbers: reg, vreg
402 %def get_vreg_float(reg, vreg, is_double=False):
403 %  if is_double:
404      GET_VREG_DOUBLE $reg, $vreg
405 %  else:
406      sh2add $vreg, $vreg, xFP  // vreg addr in register array
407      flw $reg, ($vreg)         // reg := fp[vreg]
408 %#:
409 
410 // Floating-point write, defaults to 32-bit write.
411 // Clobbers: reg, z0
412 %def set_vreg_float(reg, vreg, z0, is_double=False):
413 %  if is_double:
414      SET_VREG_DOUBLE $reg, $vreg, $z0
415 %  else:
416      sh2add $z0, $vreg, xFP    // vreg addr in register array
417      fsw $reg, ($z0)           // fp[vreg] := reg
418      sh2add $z0, $vreg, xREFS  // vreg addr in reference array
419      sw zero, ($z0)            // refs[vreg] := null
420 %#:
421 
422 %def entry():
423 /*
424  * ArtMethod entry point.
425  *
426  * On entry:
427  *  a0     ArtMethod* callee
428  *  a1-a7  method parameters
429  */
430 OAT_ENTRY ExecuteNterpWithClinitImpl
431 #if MIRROR_CLASS_STATUS_SHIFT < 12
432 #error mirror class status bits cannot use LUI load technique
433 #endif
434     .cfi_startproc
435     // For simplicity, we don't do a read barrier here, but instead rely
436     // on art_quick_resolution_trampoline to always have a suspend point before
437     // calling back here.
438     lwu t0, ART_METHOD_DECLARING_CLASS_OFFSET(a0)
439     lw t1, MIRROR_CLASS_STATUS_OFFSET(t0)  // t1 := status word, sext
440     lui t2, MIRROR_CLASS_STATUS_VISIBLY_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
441     // The unsigned comparison works in tandem with the 64-bit sign-extension of
442     // the status bits at the top of the 32-bit word. The order of the status
443     // constants (sign extended from LUI) is unchanged with unsigned comparison.
444     bgeu t1, t2, ExecuteNterpImpl
445     lui t2, MIRROR_CLASS_STATUS_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
446     bltu t1, t2, .Linitializing_check
447     fence w, w
448     j ExecuteNterpImpl
449 .Linitializing_check:
450     lui t2, MIRROR_CLASS_STATUS_INITIALIZING << (MIRROR_CLASS_STATUS_SHIFT - 12)
451     bltu t1, t2, .Lresolution_trampoline
452     lwu t1, MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET(t0)
453     lwu t0, THREAD_TID_OFFSET(xSELF)
454     beq t0, t1, ExecuteNterpImpl
455 .Lresolution_trampoline:
456     tail art_quick_resolution_trampoline
457     .cfi_endproc
458     .type EndExecuteNterpWithClinitImpl, @function
459     .hidden EndExecuteNterpWithClinitImpl
460     .global EndExecuteNterpWithClinitImpl
461 EndExecuteNterpWithClinitImpl:
462 
463 OAT_ENTRY ExecuteNterpImpl
464    .cfi_startproc
465 %  setup_nterp_frame(cfi_refs="CFI_REFS", refs="xREFS", fp="xFP", pc="xPC", regs="s7", ins="s8", spills_sp="s9", z0="t0", z1="t1", z2="t2", z3="t3", uniq="entry")
466                             // xREFS := callee refs array
467                             // xFP   := callee fp array
468                             // xPC   := callee dex array
469                             // s7    := refs/fp vreg count
470                             // s8    := ins count
471                             // s9    := post-spills pre-frame sp
472                             // sp    := post-frame sp
473    CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0)
474 
475    // Fast path: zero args.
476    beqz s8, .Lentry_go
477 
478    sub s7, s7, s8           // s7 := a1 index in fp/refs
479    lwu s10, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
480                             // s10 := method flags
481 
482    // Fast path: all reference args.
483    sh2add t0, s7, xFP       // t0 := &xFP[a1]
484    sh2add t1, s7, xREFS     // t1 := &xREFS[a1]
485    BRANCH_IF_BIT_CLEAR t2, s10, ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT, .Lentry_a1
486 %  setup_ref_args_and_go(fp="t0", refs="t1", refs_end="xFP", spills_sp="s9", z0="t2", z1="t3", done=".Lentry_go")
487 
488    // Fast path: instance with zero args.
489 .Lentry_a1:
490    bexti s10, s10, ART_METHOD_IS_STATIC_FLAG_BIT
491                             // s10 := 1 if static, 0 if instance
492    bnez s10, .Lentry_shorty
493    sw a1, (t0)
494    sw a1, (t1)
495    li t2, 1
496    beq s8, t2, .Lentry_go
497 
498    // Slow path: runtime call to obtain shorty, full setup from managed ABI.
499 .Lentry_shorty:
500    SPILL_ALL_ARGUMENTS
501    // TODO: Better way to get shorty
502    call NterpGetShorty      // arg a0
503    mv s11, a0               // s11 := shorty
504    RESTORE_ALL_ARGUMENTS
505 
506    // temporaries are trashed, recompute some values
507    sh2add t0, s7, xFP       // t0 := &xFP[a1]
508    sh2add t1, s7, xREFS     // t1 := &xREFS[a1]
509    addi t2, s11, 1          // t2 := shorty arg (skip return type)
510    xori s10, s10, 1         // s10 := 0 if static, 1 if instance
511    slliw t3, s10, 2         // t3 := (static) 0, (instance) 4: fp/refs/outs byte offset
512    // constant setup for gpr/fpr shorty comparisons
513    li s0, 'D'               // s0 := double char (unused fp)
514    li s4, 'F'               // s4 := float char (unused xINST)
515    li s5, 'J'               // s5 := long char (unused xIBASE)
516    li s8, 'L'               // s8 := ref char (unused ins count)
517    bnez s10, .Lentry_args   // instance a1 already stored into callee's xFP and xREFS
518 
519 %  store_gpr_to_vreg(gpr="a1", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
520 
521 .Lentry_args:
522    // linear scan through shorty: extract non-float args
523 %  store_gpr_to_vreg(gpr="a2", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
524 %  store_gpr_to_vreg(gpr="a3", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
525 %  store_gpr_to_vreg(gpr="a4", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
526 %  store_gpr_to_vreg(gpr="a5", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
527 %  store_gpr_to_vreg(gpr="a6", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
528 %  store_gpr_to_vreg(gpr="a7", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
529    // We drained arg registers, so continue from caller stack's out array. Unlike the reference-only
530    // fast-path, the continuation offset in the out array can vary, depending on the presence of
531    // 64-bit values in the arg registers. \offset tracks this value as a byte offset.
532    addi t5, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
533                             // t5 := (caller) outs array base address
534    add t4, t3, t0           // t4 := (callee) &FP[next]
535    add t1, t3, t1           // t1 := (callee) &REFS[next]
536    add t3, t3, t5           // t3 := (caller) &OUTS[next]
537 %  store_outs_to_vregs(outs="t3", shorty="t2", fp="t4", refs="t1", z0="t5", z1="t6", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
538                             // t0 = &xFP[a1], unclobbered
539 .Lentry_fargs:
540    addi t1, s11, 1          // t1 := shorty arg (skip return type)
541    slliw t2, s10, 2         // t2 := starting byte offset for fp/outs, static and instance
542    // linear scan through shorty: extract float args
543 %  store_fpr_to_vreg(fpr="fa0", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
544 %  store_fpr_to_vreg(fpr="fa1", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
545 %  store_fpr_to_vreg(fpr="fa2", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
546 %  store_fpr_to_vreg(fpr="fa3", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
547 %  store_fpr_to_vreg(fpr="fa4", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
548 %  store_fpr_to_vreg(fpr="fa5", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
549 %  store_fpr_to_vreg(fpr="fa6", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
550 %  store_fpr_to_vreg(fpr="fa7", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
551    addi t3, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
552                             // t3 := (caller) outs array base address
553    add t0, t2, t0           // t0 := (callee) &FP[next]
554    add t2, t2, t3           // t2 := (caller) &OUTS[next]
555 %  store_float_outs_to_vregs(outs="t2", shorty="t1", fp="t0", z0="t3", D="s0", F="s4", J="s5", next=".Lentry_go")
556 
557 .Lentry_go:
558     la xIBASE, artNterpAsmInstructionStart
559     START_EXECUTING_INSTRUCTIONS
560     // NOTE: no fallthrough
561     // cfi info continues, and covers the whole nterp implementation.
562     SIZE ExecuteNterpImpl
563 
564 %def footer():
565 /*
566  * ===========================================================================
567  *  Common subroutines and data
568  * ===========================================================================
569  */
570 
571     .text
572     .align  2
573 
574 
575 // Enclose all code below in a symbol (which gets printed in backtraces).
576 NAME_START nterp_helper
577 
578 common_errArrayIndex:
579     EXPORT_PC
580     // CALL preserves RA for stack walking.
581     call art_quick_throw_array_bounds  // args a0 (index), a1 (length)
582 
583 common_errDivideByZero:
584     EXPORT_PC
585     // CALL preserves RA for stack walking.
586     call art_quick_throw_div_zero
587 
588 common_errNullObject:
589     EXPORT_PC
590     // CALL preserves RA for stack walking.
591     call art_quick_throw_null_pointer_exception
592 
593 NterpInvokeVirtual:
594 %  nterp_invoke_virtual()
595 NterpInvokeSuper:
596 %  nterp_invoke_super()
597 NterpInvokeDirect:
598 %  nterp_invoke_direct()
599 NterpInvokeStringInit:
600 %  nterp_invoke_string_init()
601 NterpInvokeStatic:
602 %  nterp_invoke_static()
603 NterpInvokeInterface:
604 %  nterp_invoke_interface()
605 NterpInvokePolymorphic:
606 %  nterp_invoke_polymorphic()
607 NterpInvokeCustom:
608 %  nterp_invoke_custom()
609 NterpInvokeVirtualRange:
610 %  nterp_invoke_virtual_range()
611 NterpInvokeSuperRange:
612 %  nterp_invoke_super_range()
613 NterpInvokeDirectRange:
614 %  nterp_invoke_direct_range()
615 NterpInvokeStringInitRange:
616 %  nterp_invoke_string_init_range()
617 NterpInvokeStaticRange:
618 %  nterp_invoke_static_range()
619 NterpInvokeInterfaceRange:
620 %  nterp_invoke_interface_range()
621 NterpInvokePolymorphicRange:
622 %  nterp_invoke_polymorphic_range()
623 NterpInvokeCustomRange:
624 %  nterp_invoke_custom_range()
625 
626 // Arg a0: ArtMethod*
627 NterpHandleHotnessOverflow:
628    CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=.Lhotspill_hot, if_not_hot=.Lhotspill_suspend
629 .Lhotspill_hot:
630    mv a1, xPC
631    mv a2, xFP
632    call nterp_hot_method  // args a0, a1, a2
633    bnez a0, .Lhotspill_osr
634 .Lhotspill_advance:
635    FETCH_INST
636    GET_INST_OPCODE t0
637    GOTO_OPCODE t0
638 .Lhotspill_osr:
639    // a0 = OsrData*
640    // Drop most of the current nterp frame, but keep the callee-saves.
641    // The nterp callee-saves (count and layout) match the OSR frame's callee-saves.
642    ld sp, -8(xREFS)  // caller's interpreted frame pointer
643    .cfi_def_cfa sp, NTERP_SIZE_SAVE_CALLEE_SAVES
644    lwu t0, OSR_DATA_FRAME_SIZE(a0)
645    addi t0, t0, -NTERP_SIZE_SAVE_CALLEE_SAVES  // t0 := osr frame - callee saves, in bytes
646    mv s7, sp         // Remember CFA in a callee-save register.
647    .cfi_def_cfa_register s7
648    sub sp, sp, t0    // OSR size guaranteed to be stack aligned (16 bytes).
649 
650    addi t1, a0, OSR_DATA_MEMORY  // t1 := read start
651    add t1, t1, t0                // t1 := read end (exclusive)
652    mv t2, s7                     // t2 := write end (exclusive)
653    // t0 >= 8 (OSR places ArtMethod* at bottom of frame), so loop will terminate.
654 .Lhotspill_osr_copy_loop:
655    addi t1, t1, -8
656    ld t3, (t1)
657    addi t2, t2, -8
658    sd t3, (t2)
659    bne t2, sp, .Lhotspill_osr_copy_loop
660 
661    ld s8, OSR_DATA_NATIVE_PC(a0)  // s8 := native PC; jump after free
662    call free  // arg a0; release OsrData*
663    jr s8      // Jump to the compiled code.
664 .Lhotspill_suspend:
665    DO_SUSPEND_CHECK continue=.Lhotspill_advance
666    j .Lhotspill_advance
667 
668 // This is the logical end of ExecuteNterpImpl, where the frame info applies.
669 .cfi_endproc
670 
671 NterpToNterpInstance:
672 %  nterp_to_nterp_instance()
673 NterpToNterpStringInit:
674 %  nterp_to_nterp_string_init()
675 NterpToNterpStatic:
676 %  nterp_to_nterp_static()
677 NterpToNterpInstanceRange:
678 %  nterp_to_nterp_instance_range()
679 NterpToNterpStringInitRange:
680 %  nterp_to_nterp_string_init_range()
681 NterpToNterpStaticRange:
682 %  nterp_to_nterp_static_range()
683 
684 NAME_END nterp_helper
685 
686 // EndExecuteNterpImpl includes the methods after .cfi_endproc, as we want the runtime to see them
687 // as part of the Nterp PCs. This label marks the end of PCs contained by the OatQuickMethodHeader
688 // created for the interpreter entry point.
689     .type EndExecuteNterpImpl, @function
690     .hidden EndExecuteNterpImpl
691     .global EndExecuteNterpImpl
692 EndExecuteNterpImpl:
693 
694 // Entrypoints into runtime.
695 NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
696 NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
697 NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
698 NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
699 NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
700 NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
701 NTERP_TRAMPOLINE nterp_get_static_field, NterpGetStaticField
702 NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
703 NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
704 
705 ENTRY nterp_deliver_pending_exception
706     DELIVER_PENDING_EXCEPTION
707 END nterp_deliver_pending_exception
708 
709 // gen_mterp.py will inline the following definitions
710 // within [ExecuteNterpImpl, EndExecuteNterpImpl).
711 %def instruction_start():
712     .type artNterpAsmInstructionStart, @function
713     .hidden artNterpAsmInstructionStart
714     .global artNterpAsmInstructionStart
715 artNterpAsmInstructionStart = .L_op_nop
716     .text
717 
718 %def instruction_end():
719     .type artNterpAsmInstructionEnd, @function
720     .hidden artNterpAsmInstructionEnd
721     .global artNterpAsmInstructionEnd
722 artNterpAsmInstructionEnd:
723     // artNterpAsmInstructionEnd is used as landing pad for exception handling.
724     // xPC (S3) for the exception handler was set just prior to the long jump coming here.
725     FETCH_INST
726     GET_INST_OPCODE t0
727     GOTO_OPCODE t0
728 
729 %def opcode_pre():
730 %   pass
731 %def opcode_name_prefix():
732 %   return "nterp_"
733 %def opcode_start():
734     NAME_START nterp_${opcode}
735 %def opcode_end():
736     NAME_END nterp_${opcode}
737 %def opcode_slow_path_start(name):
738     NAME_START ${name}
739 %def opcode_slow_path_end(name):
740     NAME_END ${name}
741