1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21
22 #include <stdio.h>
23
24 #include <ffi.h>
25 #include <ffi_common.h>
26
27 #include <stdlib.h>
28
29 /* Stack alignment requirement in bytes */
30 #if defined (__APPLE__)
31 #define AARCH64_STACK_ALIGN 1
32 #else
33 #define AARCH64_STACK_ALIGN 16
34 #endif
35
36 #define N_X_ARG_REG 8
37 #define N_V_ARG_REG 8
38
39 #define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
40
41 union _d
42 {
43 UINT64 d;
44 UINT32 s[2];
45 };
46
47 struct call_context
48 {
49 UINT64 x [AARCH64_N_XREG];
50 struct
51 {
52 union _d d[2];
53 } v [AARCH64_N_VREG];
54 };
55
56 #if defined (__clang__) && defined (__APPLE__)
57 extern void
58 sys_icache_invalidate (void *start, size_t len);
59 #endif
60
61 static inline void
ffi_clear_cache(void * start,void * end)62 ffi_clear_cache (void *start, void *end)
63 {
64 #if defined (__clang__) && defined (__APPLE__)
65 sys_icache_invalidate (start, (char *)end - (char *)start);
66 #elif defined (__GNUC__)
67 __builtin___clear_cache (start, end);
68 #else
69 #error "Missing builtin to flush instruction cache"
70 #endif
71 }
72
73 static void *
get_x_addr(struct call_context * context,unsigned n)74 get_x_addr (struct call_context *context, unsigned n)
75 {
76 return &context->x[n];
77 }
78
79 static void *
get_s_addr(struct call_context * context,unsigned n)80 get_s_addr (struct call_context *context, unsigned n)
81 {
82 #if defined __AARCH64EB__
83 return &context->v[n].d[1].s[1];
84 #else
85 return &context->v[n].d[0].s[0];
86 #endif
87 }
88
89 static void *
get_d_addr(struct call_context * context,unsigned n)90 get_d_addr (struct call_context *context, unsigned n)
91 {
92 #if defined __AARCH64EB__
93 return &context->v[n].d[1];
94 #else
95 return &context->v[n].d[0];
96 #endif
97 }
98
99 static void *
get_v_addr(struct call_context * context,unsigned n)100 get_v_addr (struct call_context *context, unsigned n)
101 {
102 return &context->v[n];
103 }
104
105 /* Return the memory location at which a basic type would reside
106 were it to have been stored in register n. */
107
108 static void *
get_basic_type_addr(unsigned short type,struct call_context * context,unsigned n)109 get_basic_type_addr (unsigned short type, struct call_context *context,
110 unsigned n)
111 {
112 switch (type)
113 {
114 case FFI_TYPE_FLOAT:
115 return get_s_addr (context, n);
116 case FFI_TYPE_DOUBLE:
117 return get_d_addr (context, n);
118 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
119 case FFI_TYPE_LONGDOUBLE:
120 return get_v_addr (context, n);
121 #endif
122 case FFI_TYPE_UINT8:
123 case FFI_TYPE_SINT8:
124 case FFI_TYPE_UINT16:
125 case FFI_TYPE_SINT16:
126 case FFI_TYPE_UINT32:
127 case FFI_TYPE_SINT32:
128 case FFI_TYPE_INT:
129 case FFI_TYPE_POINTER:
130 case FFI_TYPE_UINT64:
131 case FFI_TYPE_SINT64:
132 return get_x_addr (context, n);
133 case FFI_TYPE_VOID:
134 return NULL;
135 default:
136 FFI_ASSERT (0);
137 return NULL;
138 }
139 }
140
141 /* Return the alignment width for each of the basic types. */
142
143 static size_t
get_basic_type_alignment(unsigned short type)144 get_basic_type_alignment (unsigned short type)
145 {
146 switch (type)
147 {
148 case FFI_TYPE_FLOAT:
149 #if defined (__APPLE__)
150 return sizeof (UINT32);
151 #endif
152 case FFI_TYPE_DOUBLE:
153 return sizeof (UINT64);
154 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
155 case FFI_TYPE_LONGDOUBLE:
156 return sizeof (long double);
157 #endif
158 case FFI_TYPE_UINT8:
159 case FFI_TYPE_SINT8:
160 #if defined (__APPLE__)
161 return sizeof (UINT8);
162 #endif
163 case FFI_TYPE_UINT16:
164 case FFI_TYPE_SINT16:
165 #if defined (__APPLE__)
166 return sizeof (UINT16);
167 #endif
168 case FFI_TYPE_UINT32:
169 case FFI_TYPE_INT:
170 case FFI_TYPE_SINT32:
171 #if defined (__APPLE__)
172 return sizeof (UINT32);
173 #endif
174 case FFI_TYPE_POINTER:
175 case FFI_TYPE_UINT64:
176 case FFI_TYPE_SINT64:
177 return sizeof (UINT64);
178
179 default:
180 FFI_ASSERT (0);
181 return 0;
182 }
183 }
184
185 /* Return the size in bytes for each of the basic types. */
186
187 static size_t
get_basic_type_size(unsigned short type)188 get_basic_type_size (unsigned short type)
189 {
190 switch (type)
191 {
192 case FFI_TYPE_FLOAT:
193 return sizeof (UINT32);
194 case FFI_TYPE_DOUBLE:
195 return sizeof (UINT64);
196 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
197 case FFI_TYPE_LONGDOUBLE:
198 return sizeof (long double);
199 #endif
200 case FFI_TYPE_UINT8:
201 return sizeof (UINT8);
202 case FFI_TYPE_SINT8:
203 return sizeof (SINT8);
204 case FFI_TYPE_UINT16:
205 return sizeof (UINT16);
206 case FFI_TYPE_SINT16:
207 return sizeof (SINT16);
208 case FFI_TYPE_UINT32:
209 return sizeof (UINT32);
210 case FFI_TYPE_INT:
211 case FFI_TYPE_SINT32:
212 return sizeof (SINT32);
213 case FFI_TYPE_POINTER:
214 case FFI_TYPE_UINT64:
215 return sizeof (UINT64);
216 case FFI_TYPE_SINT64:
217 return sizeof (SINT64);
218
219 default:
220 FFI_ASSERT (0);
221 return 0;
222 }
223 }
224
225 extern void
226 ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
227 extended_cif *),
228 struct call_context *context,
229 extended_cif *,
230 size_t,
231 void (*fn)(void));
232
233 extern void
234 ffi_closure_SYSV (ffi_closure *);
235
236 /* Test for an FFI floating point representation. */
237
238 static unsigned
is_floating_type(unsigned short type)239 is_floating_type (unsigned short type)
240 {
241 return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
242 || type == FFI_TYPE_LONGDOUBLE);
243 }
244
245 /* Test for a homogeneous structure. */
246
247 static unsigned short
get_homogeneous_type(ffi_type * ty)248 get_homogeneous_type (ffi_type *ty)
249 {
250 if (ty->type == FFI_TYPE_STRUCT && ty->elements)
251 {
252 unsigned i;
253 unsigned short candidate_type
254 = get_homogeneous_type (ty->elements[0]);
255 for (i =1; ty->elements[i]; i++)
256 {
257 unsigned short iteration_type = 0;
258 /* If we have a nested struct, we must find its homogeneous type.
259 If that fits with our candidate type, we are still
260 homogeneous. */
261 if (ty->elements[i]->type == FFI_TYPE_STRUCT
262 && ty->elements[i]->elements)
263 {
264 iteration_type = get_homogeneous_type (ty->elements[i]);
265 }
266 else
267 {
268 iteration_type = ty->elements[i]->type;
269 }
270
271 /* If we are not homogeneous, return FFI_TYPE_STRUCT. */
272 if (candidate_type != iteration_type)
273 return FFI_TYPE_STRUCT;
274 }
275 return candidate_type;
276 }
277
278 /* Base case, we have no more levels of nesting, so we
279 are a basic type, and so, trivially homogeneous in that type. */
280 return ty->type;
281 }
282
283 /* Determine the number of elements within a STRUCT.
284
285 Note, we must handle nested structs.
286
287 If ty is not a STRUCT this function will return 0. */
288
289 static unsigned
element_count(ffi_type * ty)290 element_count (ffi_type *ty)
291 {
292 if (ty->type == FFI_TYPE_STRUCT && ty->elements)
293 {
294 unsigned n;
295 unsigned elems = 0;
296 for (n = 0; ty->elements[n]; n++)
297 {
298 if (ty->elements[n]->type == FFI_TYPE_STRUCT
299 && ty->elements[n]->elements)
300 elems += element_count (ty->elements[n]);
301 else
302 elems++;
303 }
304 return elems;
305 }
306 return 0;
307 }
308
309 /* Test for a homogeneous floating point aggregate.
310
311 A homogeneous floating point aggregate is a homogeneous aggregate of
312 a half- single- or double- precision floating point type with one
313 to four elements. Note that this includes nested structs of the
314 basic type. */
315
316 static int
is_hfa(ffi_type * ty)317 is_hfa (ffi_type *ty)
318 {
319 if (ty->type == FFI_TYPE_STRUCT
320 && ty->elements[0]
321 && is_floating_type (get_homogeneous_type (ty)))
322 {
323 unsigned n = element_count (ty);
324 return n >= 1 && n <= 4;
325 }
326 return 0;
327 }
328
329 /* Test if an ffi_type is a candidate for passing in a register.
330
331 This test does not check that sufficient registers of the
332 appropriate class are actually available, merely that IFF
333 sufficient registers are available then the argument will be passed
334 in register(s).
335
336 Note that an ffi_type that is deemed to be a register candidate
337 will always be returned in registers.
338
339 Returns 1 if a register candidate else 0. */
340
341 static int
is_register_candidate(ffi_type * ty)342 is_register_candidate (ffi_type *ty)
343 {
344 switch (ty->type)
345 {
346 case FFI_TYPE_VOID:
347 case FFI_TYPE_FLOAT:
348 case FFI_TYPE_DOUBLE:
349 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
350 case FFI_TYPE_LONGDOUBLE:
351 #endif
352 case FFI_TYPE_UINT8:
353 case FFI_TYPE_UINT16:
354 case FFI_TYPE_UINT32:
355 case FFI_TYPE_UINT64:
356 case FFI_TYPE_POINTER:
357 case FFI_TYPE_SINT8:
358 case FFI_TYPE_SINT16:
359 case FFI_TYPE_SINT32:
360 case FFI_TYPE_INT:
361 case FFI_TYPE_SINT64:
362 return 1;
363
364 case FFI_TYPE_STRUCT:
365 if (is_hfa (ty))
366 {
367 return 1;
368 }
369 else if (ty->size > 16)
370 {
371 /* Too large. Will be replaced with a pointer to memory. The
372 pointer MAY be passed in a register, but the value will
373 not. This test specifically fails since the argument will
374 never be passed by value in registers. */
375 return 0;
376 }
377 else
378 {
379 /* Might be passed in registers depending on the number of
380 registers required. */
381 return (ty->size + 7) / 8 < N_X_ARG_REG;
382 }
383 break;
384
385 default:
386 FFI_ASSERT (0);
387 break;
388 }
389
390 return 0;
391 }
392
393 /* Test if an ffi_type argument or result is a candidate for a vector
394 register. */
395
396 static int
is_v_register_candidate(ffi_type * ty)397 is_v_register_candidate (ffi_type *ty)
398 {
399 return is_floating_type (ty->type)
400 || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
401 }
402
403 /* Representation of the procedure call argument marshalling
404 state.
405
406 The terse state variable names match the names used in the AARCH64
407 PCS. */
408
409 struct arg_state
410 {
411 unsigned ngrn; /* Next general-purpose register number. */
412 unsigned nsrn; /* Next vector register number. */
413 size_t nsaa; /* Next stack offset. */
414
415 #if defined (__APPLE__)
416 unsigned allocating_variadic;
417 #endif
418 };
419
420 /* Initialize a procedure call argument marshalling state. */
421 static void
arg_init(struct arg_state * state,size_t call_frame_size)422 arg_init (struct arg_state *state, size_t call_frame_size)
423 {
424 state->ngrn = 0;
425 state->nsrn = 0;
426 state->nsaa = 0;
427
428 #if defined (__APPLE__)
429 state->allocating_variadic = 0;
430 #endif
431 }
432
433 /* Return the number of available consecutive core argument
434 registers. */
435
436 static unsigned
available_x(struct arg_state * state)437 available_x (struct arg_state *state)
438 {
439 return N_X_ARG_REG - state->ngrn;
440 }
441
442 /* Return the number of available consecutive vector argument
443 registers. */
444
445 static unsigned
available_v(struct arg_state * state)446 available_v (struct arg_state *state)
447 {
448 return N_V_ARG_REG - state->nsrn;
449 }
450
451 static void *
allocate_to_x(struct call_context * context,struct arg_state * state)452 allocate_to_x (struct call_context *context, struct arg_state *state)
453 {
454 FFI_ASSERT (state->ngrn < N_X_ARG_REG);
455 return get_x_addr (context, (state->ngrn)++);
456 }
457
458 static void *
allocate_to_s(struct call_context * context,struct arg_state * state)459 allocate_to_s (struct call_context *context, struct arg_state *state)
460 {
461 FFI_ASSERT (state->nsrn < N_V_ARG_REG);
462 return get_s_addr (context, (state->nsrn)++);
463 }
464
465 static void *
allocate_to_d(struct call_context * context,struct arg_state * state)466 allocate_to_d (struct call_context *context, struct arg_state *state)
467 {
468 FFI_ASSERT (state->nsrn < N_V_ARG_REG);
469 return get_d_addr (context, (state->nsrn)++);
470 }
471
472 static void *
allocate_to_v(struct call_context * context,struct arg_state * state)473 allocate_to_v (struct call_context *context, struct arg_state *state)
474 {
475 FFI_ASSERT (state->nsrn < N_V_ARG_REG);
476 return get_v_addr (context, (state->nsrn)++);
477 }
478
479 /* Allocate an aligned slot on the stack and return a pointer to it. */
480 static void *
allocate_to_stack(struct arg_state * state,void * stack,size_t alignment,size_t size)481 allocate_to_stack (struct arg_state *state, void *stack, size_t alignment,
482 size_t size)
483 {
484 void *allocation;
485
486 /* Round up the NSAA to the larger of 8 or the natural
487 alignment of the argument's type. */
488 state->nsaa = ALIGN (state->nsaa, alignment);
489 state->nsaa = ALIGN (state->nsaa, alignment);
490 #if defined (__APPLE__)
491 if (state->allocating_variadic)
492 state->nsaa = ALIGN (state->nsaa, 8);
493 #else
494 state->nsaa = ALIGN (state->nsaa, 8);
495 #endif
496
497 allocation = stack + state->nsaa;
498
499 state->nsaa += size;
500 return allocation;
501 }
502
503 static void
copy_basic_type(void * dest,void * source,unsigned short type)504 copy_basic_type (void *dest, void *source, unsigned short type)
505 {
506 /* This is necessary to ensure that basic types are copied
507 sign extended to 64-bits as libffi expects. */
508 switch (type)
509 {
510 case FFI_TYPE_FLOAT:
511 *(float *) dest = *(float *) source;
512 break;
513 case FFI_TYPE_DOUBLE:
514 *(double *) dest = *(double *) source;
515 break;
516 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
517 case FFI_TYPE_LONGDOUBLE:
518 *(long double *) dest = *(long double *) source;
519 break;
520 #endif
521 case FFI_TYPE_UINT8:
522 *(ffi_arg *) dest = *(UINT8 *) source;
523 break;
524 case FFI_TYPE_SINT8:
525 *(ffi_sarg *) dest = *(SINT8 *) source;
526 break;
527 case FFI_TYPE_UINT16:
528 *(ffi_arg *) dest = *(UINT16 *) source;
529 break;
530 case FFI_TYPE_SINT16:
531 *(ffi_sarg *) dest = *(SINT16 *) source;
532 break;
533 case FFI_TYPE_UINT32:
534 *(ffi_arg *) dest = *(UINT32 *) source;
535 break;
536 case FFI_TYPE_INT:
537 case FFI_TYPE_SINT32:
538 *(ffi_sarg *) dest = *(SINT32 *) source;
539 break;
540 case FFI_TYPE_POINTER:
541 case FFI_TYPE_UINT64:
542 *(ffi_arg *) dest = *(UINT64 *) source;
543 break;
544 case FFI_TYPE_SINT64:
545 *(ffi_sarg *) dest = *(SINT64 *) source;
546 break;
547 case FFI_TYPE_VOID:
548 break;
549
550 default:
551 FFI_ASSERT (0);
552 }
553 }
554
555 static void
copy_hfa_to_reg_or_stack(void * memory,ffi_type * ty,struct call_context * context,unsigned char * stack,struct arg_state * state)556 copy_hfa_to_reg_or_stack (void *memory,
557 ffi_type *ty,
558 struct call_context *context,
559 unsigned char *stack,
560 struct arg_state *state)
561 {
562 unsigned elems = element_count (ty);
563 if (available_v (state) < elems)
564 {
565 /* There are insufficient V registers. Further V register allocations
566 are prevented, the NSAA is adjusted (by allocate_to_stack ())
567 and the argument is copied to memory at the adjusted NSAA. */
568 state->nsrn = N_V_ARG_REG;
569 memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
570 memory,
571 ty->size);
572 }
573 else
574 {
575 int i;
576 unsigned short type = get_homogeneous_type (ty);
577 for (i = 0; i < elems; i++)
578 {
579 void *reg = allocate_to_v (context, state);
580 copy_basic_type (reg, memory, type);
581 memory += get_basic_type_size (type);
582 }
583 }
584 }
585
586 /* Either allocate an appropriate register for the argument type, or if
587 none are available, allocate a stack slot and return a pointer
588 to the allocated space. */
589
590 static void *
allocate_to_register_or_stack(struct call_context * context,unsigned char * stack,struct arg_state * state,unsigned short type)591 allocate_to_register_or_stack (struct call_context *context,
592 unsigned char *stack,
593 struct arg_state *state,
594 unsigned short type)
595 {
596 size_t alignment = get_basic_type_alignment (type);
597 size_t size = alignment;
598 switch (type)
599 {
600 case FFI_TYPE_FLOAT:
601 /* This is the only case for which the allocated stack size
602 should not match the alignment of the type. */
603 size = sizeof (UINT32);
604 /* Fall through. */
605 case FFI_TYPE_DOUBLE:
606 if (state->nsrn < N_V_ARG_REG)
607 return allocate_to_d (context, state);
608 state->nsrn = N_V_ARG_REG;
609 break;
610 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
611 case FFI_TYPE_LONGDOUBLE:
612 if (state->nsrn < N_V_ARG_REG)
613 return allocate_to_v (context, state);
614 state->nsrn = N_V_ARG_REG;
615 break;
616 #endif
617 case FFI_TYPE_UINT8:
618 case FFI_TYPE_SINT8:
619 case FFI_TYPE_UINT16:
620 case FFI_TYPE_SINT16:
621 case FFI_TYPE_UINT32:
622 case FFI_TYPE_SINT32:
623 case FFI_TYPE_INT:
624 case FFI_TYPE_POINTER:
625 case FFI_TYPE_UINT64:
626 case FFI_TYPE_SINT64:
627 if (state->ngrn < N_X_ARG_REG)
628 return allocate_to_x (context, state);
629 state->ngrn = N_X_ARG_REG;
630 break;
631 default:
632 FFI_ASSERT (0);
633 }
634
635 return allocate_to_stack (state, stack, alignment, size);
636 }
637
638 /* Copy a value to an appropriate register, or if none are
639 available, to the stack. */
640
641 static void
copy_to_register_or_stack(struct call_context * context,unsigned char * stack,struct arg_state * state,void * value,unsigned short type)642 copy_to_register_or_stack (struct call_context *context,
643 unsigned char *stack,
644 struct arg_state *state,
645 void *value,
646 unsigned short type)
647 {
648 copy_basic_type (
649 allocate_to_register_or_stack (context, stack, state, type),
650 value,
651 type);
652 }
653
654 /* Marshall the arguments from FFI representation to procedure call
655 context and stack. */
656
657 static unsigned
aarch64_prep_args(struct call_context * context,unsigned char * stack,extended_cif * ecif)658 aarch64_prep_args (struct call_context *context, unsigned char *stack,
659 extended_cif *ecif)
660 {
661 int i;
662 struct arg_state state;
663
664 arg_init (&state, ALIGN(ecif->cif->bytes, 16));
665
666 for (i = 0; i < ecif->cif->nargs; i++)
667 {
668 ffi_type *ty = ecif->cif->arg_types[i];
669 switch (ty->type)
670 {
671 case FFI_TYPE_VOID:
672 FFI_ASSERT (0);
673 break;
674
675 /* If the argument is a basic type the argument is allocated to an
676 appropriate register, or if none are available, to the stack. */
677 case FFI_TYPE_FLOAT:
678 case FFI_TYPE_DOUBLE:
679 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
680 case FFI_TYPE_LONGDOUBLE:
681 #endif
682 case FFI_TYPE_UINT8:
683 case FFI_TYPE_SINT8:
684 case FFI_TYPE_UINT16:
685 case FFI_TYPE_SINT16:
686 case FFI_TYPE_UINT32:
687 case FFI_TYPE_INT:
688 case FFI_TYPE_SINT32:
689 case FFI_TYPE_POINTER:
690 case FFI_TYPE_UINT64:
691 case FFI_TYPE_SINT64:
692 copy_to_register_or_stack (context, stack, &state,
693 ecif->avalue[i], ty->type);
694 break;
695
696 case FFI_TYPE_STRUCT:
697 if (is_hfa (ty))
698 {
699 copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
700 stack, &state);
701 }
702 else if (ty->size > 16)
703 {
704 /* If the argument is a composite type that is larger than 16
705 bytes, then the argument has been copied to memory, and
706 the argument is replaced by a pointer to the copy. */
707
708 copy_to_register_or_stack (context, stack, &state,
709 &(ecif->avalue[i]), FFI_TYPE_POINTER);
710 }
711 else if (available_x (&state) >= (ty->size + 7) / 8)
712 {
713 /* If the argument is a composite type and the size in
714 double-words is not more than the number of available
715 X registers, then the argument is copied into consecutive
716 X registers. */
717 int j;
718 for (j = 0; j < (ty->size + 7) / 8; j++)
719 {
720 memcpy (allocate_to_x (context, &state),
721 &(((UINT64 *) ecif->avalue[i])[j]),
722 sizeof (UINT64));
723 }
724 }
725 else
726 {
727 /* Otherwise, there are insufficient X registers. Further X
728 register allocations are prevented, the NSAA is adjusted
729 (by allocate_to_stack ()) and the argument is copied to
730 memory at the adjusted NSAA. */
731 state.ngrn = N_X_ARG_REG;
732
733 memcpy (allocate_to_stack (&state, stack, ty->alignment,
734 ty->size), ecif->avalue + i, ty->size);
735 }
736 break;
737
738 default:
739 FFI_ASSERT (0);
740 break;
741 }
742
743 #if defined (__APPLE__)
744 if (i + 1 == ecif->cif->aarch64_nfixedargs)
745 {
746 state.ngrn = N_X_ARG_REG;
747 state.nsrn = N_V_ARG_REG;
748
749 state.allocating_variadic = 1;
750 }
751 #endif
752 }
753
754 return ecif->cif->aarch64_flags;
755 }
756
757 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)758 ffi_prep_cif_machdep (ffi_cif *cif)
759 {
760 /* Round the stack up to a multiple of the stack alignment requirement. */
761 cif->bytes =
762 (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
763
764 /* Initialize our flags. We are interested if this CIF will touch a
765 vector register, if so we will enable context save and load to
766 those registers, otherwise not. This is intended to be friendly
767 to lazy float context switching in the kernel. */
768 cif->aarch64_flags = 0;
769
770 if (is_v_register_candidate (cif->rtype))
771 {
772 cif->aarch64_flags |= AARCH64_FFI_WITH_V;
773 }
774 else
775 {
776 int i;
777 for (i = 0; i < cif->nargs; i++)
778 if (is_v_register_candidate (cif->arg_types[i]))
779 {
780 cif->aarch64_flags |= AARCH64_FFI_WITH_V;
781 break;
782 }
783 }
784
785 #if defined (__APPLE__)
786 cif->aarch64_nfixedargs = 0;
787 #endif
788
789 return FFI_OK;
790 }
791
792 #if defined (__APPLE__)
793
794 /* Perform Apple-specific cif processing for variadic calls */
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)795 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
796 unsigned int nfixedargs,
797 unsigned int ntotalargs)
798 {
799 ffi_status status;
800
801 status = ffi_prep_cif_machdep (cif);
802
803 cif->aarch64_nfixedargs = nfixedargs;
804
805 return status;
806 }
807
808 #endif
809
810 /* Call a function with the provided arguments and capture the return
811 value. */
812 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)813 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
814 {
815 extended_cif ecif;
816
817 ecif.cif = cif;
818 ecif.avalue = avalue;
819 ecif.rvalue = rvalue;
820
821 switch (cif->abi)
822 {
823 case FFI_SYSV:
824 {
825 struct call_context context;
826 size_t stack_bytes;
827
828 /* Figure out the total amount of stack space we need, the
829 above call frame space needs to be 16 bytes aligned to
830 ensure correct alignment of the first object inserted in
831 that space hence the ALIGN applied to cif->bytes.*/
832 stack_bytes = ALIGN(cif->bytes, 16);
833
834 memset (&context, 0, sizeof (context));
835 if (is_register_candidate (cif->rtype))
836 {
837 ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
838 switch (cif->rtype->type)
839 {
840 case FFI_TYPE_VOID:
841 case FFI_TYPE_FLOAT:
842 case FFI_TYPE_DOUBLE:
843 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
844 case FFI_TYPE_LONGDOUBLE:
845 #endif
846 case FFI_TYPE_UINT8:
847 case FFI_TYPE_SINT8:
848 case FFI_TYPE_UINT16:
849 case FFI_TYPE_SINT16:
850 case FFI_TYPE_UINT32:
851 case FFI_TYPE_SINT32:
852 case FFI_TYPE_POINTER:
853 case FFI_TYPE_UINT64:
854 case FFI_TYPE_INT:
855 case FFI_TYPE_SINT64:
856 {
857 void *addr = get_basic_type_addr (cif->rtype->type,
858 &context, 0);
859 copy_basic_type (rvalue, addr, cif->rtype->type);
860 break;
861 }
862
863 case FFI_TYPE_STRUCT:
864 if (is_hfa (cif->rtype))
865 {
866 int j;
867 unsigned short type = get_homogeneous_type (cif->rtype);
868 unsigned elems = element_count (cif->rtype);
869 for (j = 0; j < elems; j++)
870 {
871 void *reg = get_basic_type_addr (type, &context, j);
872 copy_basic_type (rvalue, reg, type);
873 rvalue += get_basic_type_size (type);
874 }
875 }
876 else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
877 {
878 size_t size = ALIGN (cif->rtype->size, sizeof (UINT64));
879 memcpy (rvalue, get_x_addr (&context, 0), size);
880 }
881 else
882 {
883 FFI_ASSERT (0);
884 }
885 break;
886
887 default:
888 FFI_ASSERT (0);
889 break;
890 }
891 }
892 else
893 {
894 memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
895 ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
896 stack_bytes, fn);
897 }
898 break;
899 }
900
901 default:
902 FFI_ASSERT (0);
903 break;
904 }
905 }
906
907 static unsigned char trampoline [] =
908 { 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */
909 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */
910 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
911 };
912
913 /* Build a trampoline. */
914
915 #define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
916 ({unsigned char *__tramp = (unsigned char*)(TRAMP); \
917 UINT64 __fun = (UINT64)(FUN); \
918 UINT64 __ctx = (UINT64)(CTX); \
919 UINT64 __flags = (UINT64)(FLAGS); \
920 memcpy (__tramp, trampoline, sizeof (trampoline)); \
921 memcpy (__tramp + 12, &__fun, sizeof (__fun)); \
922 memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
923 memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
924 ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
925 })
926
927 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)928 ffi_prep_closure_loc (ffi_closure* closure,
929 ffi_cif* cif,
930 void (*fun)(ffi_cif*,void*,void**,void*),
931 void *user_data,
932 void *codeloc)
933 {
934 if (cif->abi != FFI_SYSV)
935 return FFI_BAD_ABI;
936
937 FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
938 cif->aarch64_flags);
939
940 closure->cif = cif;
941 closure->user_data = user_data;
942 closure->fun = fun;
943
944 return FFI_OK;
945 }
946
947 /* Primary handler to setup and invoke a function within a closure.
948
949 A closure when invoked enters via the assembler wrapper
950 ffi_closure_SYSV(). The wrapper allocates a call context on the
951 stack, saves the interesting registers (from the perspective of
952 the calling convention) into the context then passes control to
953 ffi_closure_SYSV_inner() passing the saved context and a pointer to
954 the stack at the point ffi_closure_SYSV() was invoked.
955
956 On the return path the assembler wrapper will reload call context
957 registers.
958
959 ffi_closure_SYSV_inner() marshalls the call context into ffi value
960 descriptors, invokes the wrapped function, then marshalls the return
961 value back into the call context. */
962
963 void FFI_HIDDEN
ffi_closure_SYSV_inner(ffi_closure * closure,struct call_context * context,void * stack)964 ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
965 void *stack)
966 {
967 ffi_cif *cif = closure->cif;
968 void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
969 void *rvalue = NULL;
970 int i;
971 struct arg_state state;
972
973 arg_init (&state, ALIGN(cif->bytes, 16));
974
975 for (i = 0; i < cif->nargs; i++)
976 {
977 ffi_type *ty = cif->arg_types[i];
978
979 switch (ty->type)
980 {
981 case FFI_TYPE_VOID:
982 FFI_ASSERT (0);
983 break;
984
985 case FFI_TYPE_UINT8:
986 case FFI_TYPE_SINT8:
987 case FFI_TYPE_UINT16:
988 case FFI_TYPE_SINT16:
989 case FFI_TYPE_UINT32:
990 case FFI_TYPE_SINT32:
991 case FFI_TYPE_INT:
992 case FFI_TYPE_POINTER:
993 case FFI_TYPE_UINT64:
994 case FFI_TYPE_SINT64:
995 case FFI_TYPE_FLOAT:
996 case FFI_TYPE_DOUBLE:
997 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
998 case FFI_TYPE_LONGDOUBLE:
999 avalue[i] = allocate_to_register_or_stack (context, stack,
1000 &state, ty->type);
1001 break;
1002 #endif
1003
1004 case FFI_TYPE_STRUCT:
1005 if (is_hfa (ty))
1006 {
1007 unsigned n = element_count (ty);
1008 if (available_v (&state) < n)
1009 {
1010 state.nsrn = N_V_ARG_REG;
1011 avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1012 ty->size);
1013 }
1014 else
1015 {
1016 switch (get_homogeneous_type (ty))
1017 {
1018 case FFI_TYPE_FLOAT:
1019 {
1020 /* Eeek! We need a pointer to the structure,
1021 however the homogeneous float elements are
1022 being passed in individual S registers,
1023 therefore the structure is not represented as
1024 a contiguous sequence of bytes in our saved
1025 register context. We need to fake up a copy
1026 of the structure laid out in memory
1027 correctly. The fake can be tossed once the
1028 closure function has returned hence alloca()
1029 is sufficient. */
1030 int j;
1031 UINT32 *p = avalue[i] = alloca (ty->size);
1032 for (j = 0; j < element_count (ty); j++)
1033 memcpy (&p[j],
1034 allocate_to_s (context, &state),
1035 sizeof (*p));
1036 break;
1037 }
1038
1039 case FFI_TYPE_DOUBLE:
1040 {
1041 /* Eeek! We need a pointer to the structure,
1042 however the homogeneous float elements are
1043 being passed in individual S registers,
1044 therefore the structure is not represented as
1045 a contiguous sequence of bytes in our saved
1046 register context. We need to fake up a copy
1047 of the structure laid out in memory
1048 correctly. The fake can be tossed once the
1049 closure function has returned hence alloca()
1050 is sufficient. */
1051 int j;
1052 UINT64 *p = avalue[i] = alloca (ty->size);
1053 for (j = 0; j < element_count (ty); j++)
1054 memcpy (&p[j],
1055 allocate_to_d (context, &state),
1056 sizeof (*p));
1057 break;
1058 }
1059
1060 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1061 case FFI_TYPE_LONGDOUBLE:
1062 memcpy (&avalue[i],
1063 allocate_to_v (context, &state),
1064 sizeof (*avalue));
1065 break;
1066 #endif
1067
1068 default:
1069 FFI_ASSERT (0);
1070 break;
1071 }
1072 }
1073 }
1074 else if (ty->size > 16)
1075 {
1076 /* Replace Composite type of size greater than 16 with a
1077 pointer. */
1078 memcpy (&avalue[i],
1079 allocate_to_register_or_stack (context, stack,
1080 &state, FFI_TYPE_POINTER),
1081 sizeof (avalue[i]));
1082 }
1083 else if (available_x (&state) >= (ty->size + 7) / 8)
1084 {
1085 avalue[i] = get_x_addr (context, state.ngrn);
1086 state.ngrn += (ty->size + 7) / 8;
1087 }
1088 else
1089 {
1090 state.ngrn = N_X_ARG_REG;
1091
1092 avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1093 ty->size);
1094 }
1095 break;
1096
1097 default:
1098 FFI_ASSERT (0);
1099 break;
1100 }
1101 }
1102
1103 /* Figure out where the return value will be passed, either in
1104 registers or in a memory block allocated by the caller and passed
1105 in x8. */
1106
1107 if (is_register_candidate (cif->rtype))
1108 {
1109 /* Register candidates are *always* returned in registers. */
1110
1111 /* Allocate a scratchpad for the return value, we will let the
1112 callee scrible the result into the scratch pad then move the
1113 contents into the appropriate return value location for the
1114 call convention. */
1115 rvalue = alloca (cif->rtype->size);
1116 (closure->fun) (cif, rvalue, avalue, closure->user_data);
1117
1118 /* Copy the return value into the call context so that it is returned
1119 as expected to our caller. */
1120 switch (cif->rtype->type)
1121 {
1122 case FFI_TYPE_VOID:
1123 break;
1124
1125 case FFI_TYPE_UINT8:
1126 case FFI_TYPE_UINT16:
1127 case FFI_TYPE_UINT32:
1128 case FFI_TYPE_POINTER:
1129 case FFI_TYPE_UINT64:
1130 case FFI_TYPE_SINT8:
1131 case FFI_TYPE_SINT16:
1132 case FFI_TYPE_INT:
1133 case FFI_TYPE_SINT32:
1134 case FFI_TYPE_SINT64:
1135 case FFI_TYPE_FLOAT:
1136 case FFI_TYPE_DOUBLE:
1137 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1138 case FFI_TYPE_LONGDOUBLE:
1139 #endif
1140 {
1141 void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
1142 copy_basic_type (addr, rvalue, cif->rtype->type);
1143 break;
1144 }
1145 case FFI_TYPE_STRUCT:
1146 if (is_hfa (cif->rtype))
1147 {
1148 int j;
1149 unsigned short type = get_homogeneous_type (cif->rtype);
1150 unsigned elems = element_count (cif->rtype);
1151 for (j = 0; j < elems; j++)
1152 {
1153 void *reg = get_basic_type_addr (type, context, j);
1154 copy_basic_type (reg, rvalue, type);
1155 rvalue += get_basic_type_size (type);
1156 }
1157 }
1158 else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
1159 {
1160 size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
1161 memcpy (get_x_addr (context, 0), rvalue, size);
1162 }
1163 else
1164 {
1165 FFI_ASSERT (0);
1166 }
1167 break;
1168 default:
1169 FFI_ASSERT (0);
1170 break;
1171 }
1172 }
1173 else
1174 {
1175 memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
1176 (closure->fun) (cif, rvalue, avalue, closure->user_data);
1177 }
1178 }
1179
1180