1/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2Permission is hereby granted, free of charge, to any person obtaining
3a copy of this software and associated documentation files (the
4``Software''), to deal in the Software without restriction, including
5without limitation the rights to use, copy, modify, merge, publish,
6distribute, sublicense, and/or sell copies of the Software, and to
7permit persons to whom the Software is furnished to do so, subject to
8the following conditions:
9The above copyright notice and this permission notice shall be
10included in all copies or substantial portions of the Software.
11THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
12EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
13MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
14IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
15CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
18
19#define LIBFFI_ASM
20#include <fficonfig.h>
21#include <ffi.h>
22#include <ffi_cfi.h>
23#include "internal.h"
24
25	OPT	2 /*disable listing */
26/* For some macros to add unwind information */
27#include "ksarm64.h"
28	OPT	1 /*re-enable listing */
29
30#define BE(X)	0
31#define PTR_REG(n)      x##n
32#define PTR_SIZE	8
33
34	IMPORT ffi_closure_SYSV_inner
35	EXPORT	ffi_call_SYSV
36	EXPORT	ffi_closure_SYSV_V
37	EXPORT	ffi_closure_SYSV
38	EXPORT	extend_hfa_type
39	EXPORT	compress_hfa_type
40#ifdef FFI_GO_CLOSURES
41	EXPORT	ffi_go_closure_SYSV_V
42	EXPORT	ffi_go_closure_SYSV
43#endif
44
45	TEXTAREA, ALLIGN=8
46
47/* ffi_call_SYSV
48   extern void ffi_call_SYSV (void *stack, void *frame,
49			      void (*fn)(void), void *rvalue,
50			      int flags, void *closure);
51   Therefore on entry we have:
52   x0 stack
53   x1 frame
54   x2 fn
55   x3 rvalue
56   x4 flags
57   x5 closure
58*/
59
60	NESTED_ENTRY ffi_call_SYSV_fake
61
62	/* For unwind information, Windows has to store fp and lr  */
63	PROLOG_SAVE_REG_PAIR	x29, x30, #-32!
64
65	ALTERNATE_ENTRY ffi_call_SYSV
66	/* Use a stack frame allocated by our caller. */
67	stp	x29, x30, [x1]
68	mov	x29, x1
69	mov	sp, x0
70
71	mov	x9, x2			/* save fn */
72	mov	x8, x3			/* install structure return */
73#ifdef FFI_GO_CLOSURES
74	/*mov	x18, x5			install static chain */
75#endif
76	stp	x3, x4, [x29, #16]	/* save rvalue and flags */
77
78	/* Load the vector argument passing registers, if necessary.  */
79	tbz	x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1
80	ldp	q0, q1, [sp, #0]
81	ldp	q2, q3, [sp, #32]
82	ldp	q4, q5, [sp, #64]
83	ldp	q6, q7, [sp, #96]
84
85ffi_call_SYSV_L1
86	/* Load the core argument passing registers, including
87	   the structure return pointer.  */
88	ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0]
89	ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16]
90	ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32]
91	ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48]
92
93	/* Deallocate the context, leaving the stacked arguments.  */
94	add	sp, sp, #CALL_CONTEXT_SIZE
95
96	blr     x9			/* call fn */
97
98	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */
99
100	/* Partially deconstruct the stack frame. */
101	mov     sp, x29
102	ldp     x29, x30, [x29]
103
104	/* Save the return value as directed.  */
105	adr	x5, ffi_call_SYSV_return
106	and	w4, w4, #AARCH64_RET_MASK
107	add	x5, x5, x4, lsl #3
108	br	x5
109
110	/* Note that each table entry is 2 insns, and thus 8 bytes.
111	   For integer data, note that we're storing into ffi_arg
112	   and therefore we want to extend to 64 bits; these types
113	   have two consecutive entries allocated for them.  */
114	ALIGN 4
115ffi_call_SYSV_return
116	ret				/* VOID */
117	nop
118	str	x0, [x3]		/* INT64 */
119	ret
120	stp	x0, x1, [x3]		/* INT128 */
121	ret
122	brk	#1000			/* UNUSED */
123	ret
124	brk	#1000			/* UNUSED */
125	ret
126	brk	#1000			/* UNUSED */
127	ret
128	brk	#1000			/* UNUSED */
129	ret
130	brk	#1000			/* UNUSED */
131	ret
132	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */
133	ret
134	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */
135	ret
136	stp	s0, s1, [x3]		/* S2 */
137	ret
138	str	s0, [x3]		/* S1 */
139	ret
140	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */
141	ret
142	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */
143	ret
144	stp	d0, d1, [x3]		/* D2 */
145	ret
146	str	d0, [x3]		/* D1 */
147	ret
148	str	q3, [x3, #48]		/* Q4 */
149	nop
150	str	q2, [x3, #32]		/* Q3 */
151	nop
152	stp	q0, q1, [x3]		/* Q2 */
153	ret
154	str	q0, [x3]		/* Q1 */
155	ret
156	uxtb	w0, w0			/* UINT8 */
157	str	x0, [x3]
158	ret				/* reserved */
159	nop
160	uxth	w0, w0			/* UINT16 */
161	str	x0, [x3]
162	ret				/* reserved */
163	nop
164	mov	w0, w0			/* UINT32 */
165	str	x0, [x3]
166	ret				/* reserved */
167	nop
168	sxtb	x0, w0			/* SINT8 */
169	str	x0, [x3]
170	ret				/* reserved */
171	nop
172	sxth	x0, w0			/* SINT16 */
173	str	x0, [x3]
174	ret				/* reserved */
175	nop
176	sxtw	x0, w0			/* SINT32 */
177	str	x0, [x3]
178	ret				/* reserved */
179	nop
180
181
182	NESTED_END ffi_call_SYSV_fake
183
184
185/* ffi_closure_SYSV
186   Closure invocation glue. This is the low level code invoked directly by
187   the closure trampoline to setup and call a closure.
188   On entry x17 points to a struct ffi_closure, x16 has been clobbered
189   all other registers are preserved.
190   We allocate a call context and save the argument passing registers,
191   then invoked the generic C ffi_closure_SYSV_inner() function to do all
192   the real work, on return we load the result passing registers back from
193   the call context.
194*/
195
196#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
197
198	NESTED_ENTRY	ffi_closure_SYSV_V
199	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
200
201	/* Save the argument passing vector registers.  */
202	stp	q0, q1, [sp, #16 + 0]
203	stp	q2, q3, [sp, #16 + 32]
204	stp	q4, q5, [sp, #16 + 64]
205	stp	q6, q7, [sp, #16 + 96]
206
207	b	ffi_closure_SYSV_save_argument
208	NESTED_END	ffi_closure_SYSV_V
209
210	NESTED_ENTRY	ffi_closure_SYSV
211	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
212
213ffi_closure_SYSV_save_argument
214	/* Save the argument passing core registers.  */
215	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
216	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
217	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
218	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
219
220	/* Load ffi_closure_inner arguments.  */
221	ldp	PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */
222	ldr	PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2]	/* load user_data */
223
224do_closure
225	add	x3, sp, #16							/* load context */
226	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */
227	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */
228	mov	x6, x8					/* load struct_rval */
229
230	bl	ffi_closure_SYSV_inner
231
232	/* Load the return value as directed.  */
233	adr	x1, ffi_closure_SYSV_return_base
234	and	w0, w0, #AARCH64_RET_MASK
235	add	x1, x1, x0, lsl #3
236	add	x3, sp, #16+CALL_CONTEXT_SIZE
237	br	x1
238
239	/* Note that each table entry is 2 insns, and thus 8 bytes.  */
240	ALIGN	8
241ffi_closure_SYSV_return_base
242	b	ffi_closure_SYSV_epilog			/* VOID */
243	nop
244	ldr	x0, [x3]		/* INT64 */
245	b	ffi_closure_SYSV_epilog
246	ldp	x0, x1, [x3]		/* INT128 */
247	b	ffi_closure_SYSV_epilog
248	brk	#1000			/* UNUSED */
249	nop
250	brk	#1000			/* UNUSED */
251	nop
252	brk	#1000			/* UNUSED */
253	nop
254	brk	#1000			/* UNUSED */
255	nop
256	brk	#1000			/* UNUSED */
257	nop
258	ldr	s3, [x3, #12]		/* S4 */
259	nop
260	ldr	s2, [x3, #8]		/* S3 */
261	nop
262	ldp	s0, s1, [x3]		/* S2 */
263	b	ffi_closure_SYSV_epilog
264	ldr	s0, [x3]		/* S1 */
265	b	ffi_closure_SYSV_epilog
266	ldr	d3, [x3, #24]		/* D4 */
267	nop
268	ldr	d2, [x3, #16]		/* D3 */
269	nop
270	ldp	d0, d1, [x3]		/* D2 */
271	b	ffi_closure_SYSV_epilog
272	ldr	d0, [x3]		/* D1 */
273	b	ffi_closure_SYSV_epilog
274	ldr	q3, [x3, #48]		/* Q4 */
275	nop
276	ldr	q2, [x3, #32]		/* Q3 */
277	nop
278	ldp	q0, q1, [x3]		/* Q2 */
279	b	ffi_closure_SYSV_epilog
280	ldr	q0, [x3]		/* Q1 */
281	b	ffi_closure_SYSV_epilog
282	ldrb	w0, [x3, #BE(7)]	/* UINT8 */
283	b	ffi_closure_SYSV_epilog
284	brk	#1000			/* reserved */
285	nop
286	ldrh	w0, [x3, #BE(6)]	/* UINT16 */
287	b	ffi_closure_SYSV_epilog
288	brk	#1000			/* reserved */
289	nop
290	ldr	w0, [x3, #BE(4)]	/* UINT32 */
291	b	ffi_closure_SYSV_epilog
292	brk	#1000			/* reserved */
293	nop
294	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */
295	b	ffi_closure_SYSV_epilog
296	brk	#1000			/* reserved */
297	nop
298	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */
299	b	ffi_closure_SYSV_epilog
300	brk	#1000			/* reserved */
301	nop
302	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */
303	nop
304					/* reserved */
305
306ffi_closure_SYSV_epilog
307	EPILOG_RESTORE_REG_PAIR	x29, x30, #ffi_closure_SYSV_FS!
308	EPILOG_RETURN
309	NESTED_END	ffi_closure_SYSV
310
311
312#ifdef FFI_GO_CLOSURES
313	NESTED_ENTRY	ffi_go_closure_SYSV_V
314	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
315
316	/* Save the argument passing vector registers.  */
317	stp	q0, q1, [sp, #16 + 0]
318	stp	q2, q3, [sp, #16 + 32]
319	stp	q4, q5, [sp, #16 + 64]
320	stp	q6, q7, [sp, #16 + 96]
321	b	ffi_go_closure_SYSV_save_argument
322	NESTED_END	ffi_go_closure_SYSV_V
323
324	NESTED_ENTRY	ffi_go_closure_SYSV
325	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
326
327ffi_go_closure_SYSV_save_argument
328	/* Save the argument passing core registers.  */
329	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
330	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
331	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
332	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
333
334	/* Load ffi_closure_inner arguments.  */
335	ldp	PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
336	mov	x2, x18					/* load user_data */
337	b	do_closure
338	NESTED_END	ffi_go_closure_SYSV
339
340#endif /* FFI_GO_CLOSURES */
341
342
343/* void extend_hfa_type (void *dest, void *src, int h) */
344
345	LEAF_ENTRY	extend_hfa_type
346
347	adr	x3, extend_hfa_type_jump_base
348	and	w2, w2, #AARCH64_RET_MASK
349	sub	x2, x2, #AARCH64_RET_S4
350	add	x3, x3, x2, lsl #4
351	br	x3
352
353	ALIGN	4
354extend_hfa_type_jump_base
355	ldp	s16, s17, [x1]		/* S4 */
356	ldp	s18, s19, [x1, #8]
357	b	extend_hfa_type_store_4
358	nop
359
360	ldp	s16, s17, [x1]		/* S3 */
361	ldr	s18, [x1, #8]
362	b	extend_hfa_type_store_3
363	nop
364
365	ldp	s16, s17, [x1]		/* S2 */
366	b	extend_hfa_type_store_2
367	nop
368	nop
369
370	ldr	s16, [x1]		/* S1 */
371	b	extend_hfa_type_store_1
372	nop
373	nop
374
375	ldp	d16, d17, [x1]		/* D4 */
376	ldp	d18, d19, [x1, #16]
377	b       extend_hfa_type_store_4
378	nop
379
380	ldp     d16, d17, [x1]		/* D3 */
381	ldr     d18, [x1, #16]
382	b	extend_hfa_type_store_3
383	nop
384
385	ldp	d16, d17, [x1]		/* D2 */
386	b	extend_hfa_type_store_2
387	nop
388	nop
389
390	ldr	d16, [x1]		/* D1 */
391	b	extend_hfa_type_store_1
392	nop
393	nop
394
395	ldp	q16, q17, [x1]		/* Q4 */
396	ldp	q18, q19, [x1, #16]
397	b	extend_hfa_type_store_4
398	nop
399
400	ldp	q16, q17, [x1]		/* Q3 */
401	ldr	q18, [x1, #16]
402	b	extend_hfa_type_store_3
403	nop
404
405	ldp	q16, q17, [x1]		/* Q2 */
406	b	extend_hfa_type_store_2
407	nop
408	nop
409
410	ldr	q16, [x1]		/* Q1 */
411	b	extend_hfa_type_store_1
412
413extend_hfa_type_store_4
414	str	q19, [x0, #48]
415extend_hfa_type_store_3
416	str	q18, [x0, #32]
417extend_hfa_type_store_2
418	str	q17, [x0, #16]
419extend_hfa_type_store_1
420	str	q16, [x0]
421	ret
422
423	LEAF_END	extend_hfa_type
424
425
426/* void compress_hfa_type (void *dest, void *reg, int h) */
427
428	LEAF_ENTRY	compress_hfa_type
429
430	adr	x3, compress_hfa_type_jump_base
431	and	w2, w2, #AARCH64_RET_MASK
432	sub	x2, x2, #AARCH64_RET_S4
433	add	x3, x3, x2, lsl #4
434	br	x3
435
436	ALIGN	4
437compress_hfa_type_jump_base
438	ldp	q16, q17, [x1]		/* S4 */
439	ldp	q18, q19, [x1, #32]
440	st4	{ v16.s, v17.s, v18.s, v19.s }[0], [x0]
441	ret
442
443	ldp	q16, q17, [x1]		/* S3 */
444	ldr	q18, [x1, #32]
445	st3	{ v16.s, v17.s, v18.s }[0], [x0]
446	ret
447
448	ldp	q16, q17, [x1]		/* S2 */
449	st2	{ v16.s, v17.s }[0], [x0]
450	ret
451	nop
452
453	ldr	q16, [x1]		/* S1 */
454	st1	{ v16.s }[0], [x0]
455	ret
456	nop
457
458	ldp	q16, q17, [x1]		/* D4 */
459	ldp	q18, q19, [x1, #32]
460	st4	{ v16.d, v17.d, v18.d, v19.d }[0], [x0]
461	ret
462
463	ldp	q16, q17, [x1]		/* D3 */
464	ldr	q18, [x1, #32]
465	st3	{ v16.d, v17.d, v18.d }[0], [x0]
466	ret
467
468	ldp	q16, q17, [x1]		/* D2 */
469	st2	{ v16.d, v17.d }[0], [x0]
470	ret
471	nop
472
473	ldr	q16, [x1]		/* D1 */
474	st1	{ v16.d }[0], [x0]
475	ret
476	nop
477
478	ldp	q16, q17, [x1]		/* Q4 */
479	ldp	q18, q19, [x1, #32]
480	b	compress_hfa_type_store_q4
481	nop
482
483	ldp	q16, q17, [x1]		/* Q3 */
484	ldr	q18, [x1, #32]
485	b	compress_hfa_type_store_q3
486	nop
487
488	ldp	q16, q17, [x1]		/* Q2 */
489	stp	q16, q17, [x0]
490	ret
491	nop
492
493	ldr	q16, [x1]		/* Q1 */
494	str	q16, [x0]
495	ret
496
497compress_hfa_type_store_q4
498	str	q19, [x0, #48]
499compress_hfa_type_store_q3
500	str	q18, [x0, #32]
501	stp	q16, q17, [x0]
502	ret
503
504	LEAF_END	compress_hfa_type
505
506	END