1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2 
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10 
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21 
22 #include <stdio.h>
23 
24 #include <ffi.h>
25 #include <ffi_common.h>
26 
27 #include <stdlib.h>
28 
29 /* Stack alignment requirement in bytes */
30 #if defined (__APPLE__)
31 #define AARCH64_STACK_ALIGN 1
32 #else
33 #define AARCH64_STACK_ALIGN 16
34 #endif
35 
36 #define N_X_ARG_REG 8
37 #define N_V_ARG_REG 8
38 
39 #define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
40 
41 union _d
42 {
43   UINT64 d;
44   UINT32 s[2];
45 };
46 
47 struct call_context
48 {
49   UINT64 x [AARCH64_N_XREG];
50   struct
51   {
52     union _d d[2];
53   } v [AARCH64_N_VREG];
54 };
55 
56 #if defined (__clang__) && defined (__APPLE__)
57 extern void
58 sys_icache_invalidate (void *start, size_t len);
59 #endif
60 
61 static inline void
ffi_clear_cache(void * start,void * end)62 ffi_clear_cache (void *start, void *end)
63 {
64 #if defined (__clang__) && defined (__APPLE__)
65 	sys_icache_invalidate (start, (char *)end - (char *)start);
66 #elif defined (__GNUC__)
67 	__builtin___clear_cache (start, end);
68 #else
69 #error "Missing builtin to flush instruction cache"
70 #endif
71 }
72 
73 static void *
get_x_addr(struct call_context * context,unsigned n)74 get_x_addr (struct call_context *context, unsigned n)
75 {
76   return &context->x[n];
77 }
78 
79 static void *
get_s_addr(struct call_context * context,unsigned n)80 get_s_addr (struct call_context *context, unsigned n)
81 {
82 #if defined __AARCH64EB__
83   return &context->v[n].d[1].s[1];
84 #else
85   return &context->v[n].d[0].s[0];
86 #endif
87 }
88 
89 static void *
get_d_addr(struct call_context * context,unsigned n)90 get_d_addr (struct call_context *context, unsigned n)
91 {
92 #if defined __AARCH64EB__
93   return &context->v[n].d[1];
94 #else
95   return &context->v[n].d[0];
96 #endif
97 }
98 
99 static void *
get_v_addr(struct call_context * context,unsigned n)100 get_v_addr (struct call_context *context, unsigned n)
101 {
102   return &context->v[n];
103 }
104 
105 /* Return the memory location at which a basic type would reside
106    were it to have been stored in register n.  */
107 
108 static void *
get_basic_type_addr(unsigned short type,struct call_context * context,unsigned n)109 get_basic_type_addr (unsigned short type, struct call_context *context,
110 		     unsigned n)
111 {
112   switch (type)
113     {
114     case FFI_TYPE_FLOAT:
115       return get_s_addr (context, n);
116     case FFI_TYPE_DOUBLE:
117       return get_d_addr (context, n);
118 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
119     case FFI_TYPE_LONGDOUBLE:
120       return get_v_addr (context, n);
121 #endif
122     case FFI_TYPE_UINT8:
123     case FFI_TYPE_SINT8:
124     case FFI_TYPE_UINT16:
125     case FFI_TYPE_SINT16:
126     case FFI_TYPE_UINT32:
127     case FFI_TYPE_SINT32:
128     case FFI_TYPE_INT:
129     case FFI_TYPE_POINTER:
130     case FFI_TYPE_UINT64:
131     case FFI_TYPE_SINT64:
132       return get_x_addr (context, n);
133     case FFI_TYPE_VOID:
134       return NULL;
135     default:
136       FFI_ASSERT (0);
137       return NULL;
138     }
139 }
140 
141 /* Return the alignment width for each of the basic types.  */
142 
143 static size_t
get_basic_type_alignment(unsigned short type)144 get_basic_type_alignment (unsigned short type)
145 {
146   switch (type)
147     {
148     case FFI_TYPE_FLOAT:
149 #if defined (__APPLE__)
150       return sizeof (UINT32);
151 #endif
152     case FFI_TYPE_DOUBLE:
153       return sizeof (UINT64);
154 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
155     case FFI_TYPE_LONGDOUBLE:
156       return sizeof (long double);
157 #endif
158     case FFI_TYPE_UINT8:
159     case FFI_TYPE_SINT8:
160 #if defined (__APPLE__)
161 	  return sizeof (UINT8);
162 #endif
163     case FFI_TYPE_UINT16:
164     case FFI_TYPE_SINT16:
165 #if defined (__APPLE__)
166 	  return sizeof (UINT16);
167 #endif
168     case FFI_TYPE_UINT32:
169     case FFI_TYPE_INT:
170     case FFI_TYPE_SINT32:
171 #if defined (__APPLE__)
172 	  return sizeof (UINT32);
173 #endif
174     case FFI_TYPE_POINTER:
175     case FFI_TYPE_UINT64:
176     case FFI_TYPE_SINT64:
177       return sizeof (UINT64);
178 
179     default:
180       FFI_ASSERT (0);
181       return 0;
182     }
183 }
184 
185 /* Return the size in bytes for each of the basic types.  */
186 
187 static size_t
get_basic_type_size(unsigned short type)188 get_basic_type_size (unsigned short type)
189 {
190   switch (type)
191     {
192     case FFI_TYPE_FLOAT:
193       return sizeof (UINT32);
194     case FFI_TYPE_DOUBLE:
195       return sizeof (UINT64);
196 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
197     case FFI_TYPE_LONGDOUBLE:
198       return sizeof (long double);
199 #endif
200     case FFI_TYPE_UINT8:
201       return sizeof (UINT8);
202     case FFI_TYPE_SINT8:
203       return sizeof (SINT8);
204     case FFI_TYPE_UINT16:
205       return sizeof (UINT16);
206     case FFI_TYPE_SINT16:
207       return sizeof (SINT16);
208     case FFI_TYPE_UINT32:
209       return sizeof (UINT32);
210     case FFI_TYPE_INT:
211     case FFI_TYPE_SINT32:
212       return sizeof (SINT32);
213     case FFI_TYPE_POINTER:
214     case FFI_TYPE_UINT64:
215       return sizeof (UINT64);
216     case FFI_TYPE_SINT64:
217       return sizeof (SINT64);
218 
219     default:
220       FFI_ASSERT (0);
221       return 0;
222     }
223 }
224 
225 extern void
226 ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
227 			    extended_cif *),
228                struct call_context *context,
229                extended_cif *,
230                size_t,
231                void (*fn)(void));
232 
233 extern void
234 ffi_closure_SYSV (ffi_closure *);
235 
236 /* Test for an FFI floating point representation.  */
237 
238 static unsigned
is_floating_type(unsigned short type)239 is_floating_type (unsigned short type)
240 {
241   return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
242 	  || type == FFI_TYPE_LONGDOUBLE);
243 }
244 
245 /* Test for a homogeneous structure.  */
246 
247 static unsigned short
get_homogeneous_type(ffi_type * ty)248 get_homogeneous_type (ffi_type *ty)
249 {
250   if (ty->type == FFI_TYPE_STRUCT && ty->elements)
251     {
252       unsigned i;
253       unsigned short candidate_type
254 	= get_homogeneous_type (ty->elements[0]);
255       for (i =1; ty->elements[i]; i++)
256 	{
257 	  unsigned short iteration_type = 0;
258 	  /* If we have a nested struct, we must find its homogeneous type.
259 	     If that fits with our candidate type, we are still
260 	     homogeneous.  */
261 	  if (ty->elements[i]->type == FFI_TYPE_STRUCT
262 	      && ty->elements[i]->elements)
263 	    {
264 	      iteration_type = get_homogeneous_type (ty->elements[i]);
265 	    }
266 	  else
267 	    {
268 	      iteration_type = ty->elements[i]->type;
269 	    }
270 
271 	  /* If we are not homogeneous, return FFI_TYPE_STRUCT.  */
272 	  if (candidate_type != iteration_type)
273 	    return FFI_TYPE_STRUCT;
274 	}
275       return candidate_type;
276     }
277 
278   /* Base case, we have no more levels of nesting, so we
279      are a basic type, and so, trivially homogeneous in that type.  */
280   return ty->type;
281 }
282 
283 /* Determine the number of elements within a STRUCT.
284 
285    Note, we must handle nested structs.
286 
287    If ty is not a STRUCT this function will return 0.  */
288 
289 static unsigned
element_count(ffi_type * ty)290 element_count (ffi_type *ty)
291 {
292   if (ty->type == FFI_TYPE_STRUCT && ty->elements)
293     {
294       unsigned n;
295       unsigned elems = 0;
296       for (n = 0; ty->elements[n]; n++)
297 	{
298 	  if (ty->elements[n]->type == FFI_TYPE_STRUCT
299 	      && ty->elements[n]->elements)
300 	    elems += element_count (ty->elements[n]);
301 	  else
302 	    elems++;
303 	}
304       return elems;
305     }
306   return 0;
307 }
308 
309 /* Test for a homogeneous floating point aggregate.
310 
311    A homogeneous floating point aggregate is a homogeneous aggregate of
312    a half- single- or double- precision floating point type with one
313    to four elements.  Note that this includes nested structs of the
314    basic type.  */
315 
316 static int
is_hfa(ffi_type * ty)317 is_hfa (ffi_type *ty)
318 {
319   if (ty->type == FFI_TYPE_STRUCT
320       && ty->elements[0]
321       && is_floating_type (get_homogeneous_type (ty)))
322     {
323       unsigned n = element_count (ty);
324       return n >= 1 && n <= 4;
325     }
326   return 0;
327 }
328 
329 /* Test if an ffi_type is a candidate for passing in a register.
330 
331    This test does not check that sufficient registers of the
332    appropriate class are actually available, merely that IFF
333    sufficient registers are available then the argument will be passed
334    in register(s).
335 
336    Note that an ffi_type that is deemed to be a register candidate
337    will always be returned in registers.
338 
339    Returns 1 if a register candidate else 0.  */
340 
341 static int
is_register_candidate(ffi_type * ty)342 is_register_candidate (ffi_type *ty)
343 {
344   switch (ty->type)
345     {
346     case FFI_TYPE_VOID:
347     case FFI_TYPE_FLOAT:
348     case FFI_TYPE_DOUBLE:
349 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
350     case FFI_TYPE_LONGDOUBLE:
351 #endif
352     case FFI_TYPE_UINT8:
353     case FFI_TYPE_UINT16:
354     case FFI_TYPE_UINT32:
355     case FFI_TYPE_UINT64:
356     case FFI_TYPE_POINTER:
357     case FFI_TYPE_SINT8:
358     case FFI_TYPE_SINT16:
359     case FFI_TYPE_SINT32:
360     case FFI_TYPE_INT:
361     case FFI_TYPE_SINT64:
362       return 1;
363 
364     case FFI_TYPE_STRUCT:
365       if (is_hfa (ty))
366         {
367           return 1;
368         }
369       else if (ty->size > 16)
370         {
371           /* Too large. Will be replaced with a pointer to memory. The
372              pointer MAY be passed in a register, but the value will
373              not. This test specifically fails since the argument will
374              never be passed by value in registers. */
375           return 0;
376         }
377       else
378         {
379           /* Might be passed in registers depending on the number of
380              registers required. */
381           return (ty->size + 7) / 8 < N_X_ARG_REG;
382         }
383       break;
384 
385     default:
386       FFI_ASSERT (0);
387       break;
388     }
389 
390   return 0;
391 }
392 
393 /* Test if an ffi_type argument or result is a candidate for a vector
394    register.  */
395 
396 static int
is_v_register_candidate(ffi_type * ty)397 is_v_register_candidate (ffi_type *ty)
398 {
399   return is_floating_type (ty->type)
400 	   || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
401 }
402 
403 /* Representation of the procedure call argument marshalling
404    state.
405 
406    The terse state variable names match the names used in the AARCH64
407    PCS. */
408 
409 struct arg_state
410 {
411   unsigned ngrn;                /* Next general-purpose register number. */
412   unsigned nsrn;                /* Next vector register number. */
413   size_t nsaa;                  /* Next stack offset. */
414 
415 #if defined (__APPLE__)
416   unsigned allocating_variadic;
417 #endif
418 };
419 
420 /* Initialize a procedure call argument marshalling state.  */
421 static void
arg_init(struct arg_state * state,size_t call_frame_size)422 arg_init (struct arg_state *state, size_t call_frame_size)
423 {
424   state->ngrn = 0;
425   state->nsrn = 0;
426   state->nsaa = 0;
427 
428 #if defined (__APPLE__)
429   state->allocating_variadic = 0;
430 #endif
431 }
432 
433 /* Return the number of available consecutive core argument
434    registers.  */
435 
436 static unsigned
available_x(struct arg_state * state)437 available_x (struct arg_state *state)
438 {
439   return N_X_ARG_REG - state->ngrn;
440 }
441 
442 /* Return the number of available consecutive vector argument
443    registers.  */
444 
445 static unsigned
available_v(struct arg_state * state)446 available_v (struct arg_state *state)
447 {
448   return N_V_ARG_REG - state->nsrn;
449 }
450 
451 static void *
allocate_to_x(struct call_context * context,struct arg_state * state)452 allocate_to_x (struct call_context *context, struct arg_state *state)
453 {
454   FFI_ASSERT (state->ngrn < N_X_ARG_REG);
455   return get_x_addr (context, (state->ngrn)++);
456 }
457 
458 static void *
allocate_to_s(struct call_context * context,struct arg_state * state)459 allocate_to_s (struct call_context *context, struct arg_state *state)
460 {
461   FFI_ASSERT (state->nsrn < N_V_ARG_REG);
462   return get_s_addr (context, (state->nsrn)++);
463 }
464 
465 static void *
allocate_to_d(struct call_context * context,struct arg_state * state)466 allocate_to_d (struct call_context *context, struct arg_state *state)
467 {
468   FFI_ASSERT (state->nsrn < N_V_ARG_REG);
469   return get_d_addr (context, (state->nsrn)++);
470 }
471 
472 static void *
allocate_to_v(struct call_context * context,struct arg_state * state)473 allocate_to_v (struct call_context *context, struct arg_state *state)
474 {
475   FFI_ASSERT (state->nsrn < N_V_ARG_REG);
476   return get_v_addr (context, (state->nsrn)++);
477 }
478 
479 /* Allocate an aligned slot on the stack and return a pointer to it.  */
480 static void *
allocate_to_stack(struct arg_state * state,void * stack,size_t alignment,size_t size)481 allocate_to_stack (struct arg_state *state, void *stack, size_t alignment,
482 		   size_t size)
483 {
484   void *allocation;
485 
486   /* Round up the NSAA to the larger of 8 or the natural
487      alignment of the argument's type.  */
488   state->nsaa = ALIGN (state->nsaa, alignment);
489   state->nsaa = ALIGN (state->nsaa, alignment);
490 #if defined (__APPLE__)
491   if (state->allocating_variadic)
492     state->nsaa = ALIGN (state->nsaa, 8);
493 #else
494   state->nsaa = ALIGN (state->nsaa, 8);
495 #endif
496 
497   allocation = stack + state->nsaa;
498 
499   state->nsaa += size;
500   return allocation;
501 }
502 
503 static void
copy_basic_type(void * dest,void * source,unsigned short type)504 copy_basic_type (void *dest, void *source, unsigned short type)
505 {
506   /* This is necessary to ensure that basic types are copied
507      sign extended to 64-bits as libffi expects.  */
508   switch (type)
509     {
510     case FFI_TYPE_FLOAT:
511       *(float *) dest = *(float *) source;
512       break;
513     case FFI_TYPE_DOUBLE:
514       *(double *) dest = *(double *) source;
515       break;
516 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
517     case FFI_TYPE_LONGDOUBLE:
518       *(long double *) dest = *(long double *) source;
519       break;
520 #endif
521     case FFI_TYPE_UINT8:
522       *(ffi_arg *) dest = *(UINT8 *) source;
523       break;
524     case FFI_TYPE_SINT8:
525       *(ffi_sarg *) dest = *(SINT8 *) source;
526       break;
527     case FFI_TYPE_UINT16:
528       *(ffi_arg *) dest = *(UINT16 *) source;
529       break;
530     case FFI_TYPE_SINT16:
531       *(ffi_sarg *) dest = *(SINT16 *) source;
532       break;
533     case FFI_TYPE_UINT32:
534       *(ffi_arg *) dest = *(UINT32 *) source;
535       break;
536     case FFI_TYPE_INT:
537     case FFI_TYPE_SINT32:
538       *(ffi_sarg *) dest = *(SINT32 *) source;
539       break;
540     case FFI_TYPE_POINTER:
541     case FFI_TYPE_UINT64:
542       *(ffi_arg *) dest = *(UINT64 *) source;
543       break;
544     case FFI_TYPE_SINT64:
545       *(ffi_sarg *) dest = *(SINT64 *) source;
546       break;
547     case FFI_TYPE_VOID:
548       break;
549 
550     default:
551       FFI_ASSERT (0);
552     }
553 }
554 
555 static void
copy_hfa_to_reg_or_stack(void * memory,ffi_type * ty,struct call_context * context,unsigned char * stack,struct arg_state * state)556 copy_hfa_to_reg_or_stack (void *memory,
557 			  ffi_type *ty,
558 			  struct call_context *context,
559 			  unsigned char *stack,
560 			  struct arg_state *state)
561 {
562   unsigned elems = element_count (ty);
563   if (available_v (state) < elems)
564     {
565       /* There are insufficient V registers. Further V register allocations
566 	 are prevented, the NSAA is adjusted (by allocate_to_stack ())
567 	 and the argument is copied to memory at the adjusted NSAA.  */
568       state->nsrn = N_V_ARG_REG;
569       memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
570 	      memory,
571 	      ty->size);
572     }
573   else
574     {
575       int i;
576       unsigned short type = get_homogeneous_type (ty);
577       for (i = 0; i < elems; i++)
578 	{
579 	  void *reg = allocate_to_v (context, state);
580 	  copy_basic_type (reg, memory, type);
581 	  memory += get_basic_type_size (type);
582 	}
583     }
584 }
585 
586 /* Either allocate an appropriate register for the argument type, or if
587    none are available, allocate a stack slot and return a pointer
588    to the allocated space.  */
589 
590 static void *
allocate_to_register_or_stack(struct call_context * context,unsigned char * stack,struct arg_state * state,unsigned short type)591 allocate_to_register_or_stack (struct call_context *context,
592 			       unsigned char *stack,
593 			       struct arg_state *state,
594 			       unsigned short type)
595 {
596   size_t alignment = get_basic_type_alignment (type);
597   size_t size = alignment;
598   switch (type)
599     {
600     case FFI_TYPE_FLOAT:
601       /* This is the only case for which the allocated stack size
602 	 should not match the alignment of the type.  */
603       size = sizeof (UINT32);
604       /* Fall through.  */
605     case FFI_TYPE_DOUBLE:
606       if (state->nsrn < N_V_ARG_REG)
607 	return allocate_to_d (context, state);
608       state->nsrn = N_V_ARG_REG;
609       break;
610 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
611     case FFI_TYPE_LONGDOUBLE:
612       if (state->nsrn < N_V_ARG_REG)
613 	return allocate_to_v (context, state);
614       state->nsrn = N_V_ARG_REG;
615       break;
616 #endif
617     case FFI_TYPE_UINT8:
618     case FFI_TYPE_SINT8:
619     case FFI_TYPE_UINT16:
620     case FFI_TYPE_SINT16:
621     case FFI_TYPE_UINT32:
622     case FFI_TYPE_SINT32:
623     case FFI_TYPE_INT:
624     case FFI_TYPE_POINTER:
625     case FFI_TYPE_UINT64:
626     case FFI_TYPE_SINT64:
627       if (state->ngrn < N_X_ARG_REG)
628 	return allocate_to_x (context, state);
629       state->ngrn = N_X_ARG_REG;
630       break;
631     default:
632       FFI_ASSERT (0);
633     }
634 
635     return allocate_to_stack (state, stack, alignment, size);
636 }
637 
638 /* Copy a value to an appropriate register, or if none are
639    available, to the stack.  */
640 
641 static void
copy_to_register_or_stack(struct call_context * context,unsigned char * stack,struct arg_state * state,void * value,unsigned short type)642 copy_to_register_or_stack (struct call_context *context,
643 			   unsigned char *stack,
644 			   struct arg_state *state,
645 			   void *value,
646 			   unsigned short type)
647 {
648   copy_basic_type (
649 	  allocate_to_register_or_stack (context, stack, state, type),
650 	  value,
651 	  type);
652 }
653 
654 /* Marshall the arguments from FFI representation to procedure call
655    context and stack.  */
656 
657 static unsigned
aarch64_prep_args(struct call_context * context,unsigned char * stack,extended_cif * ecif)658 aarch64_prep_args (struct call_context *context, unsigned char *stack,
659 		   extended_cif *ecif)
660 {
661   int i;
662   struct arg_state state;
663 
664   arg_init (&state, ALIGN(ecif->cif->bytes, 16));
665 
666   for (i = 0; i < ecif->cif->nargs; i++)
667     {
668       ffi_type *ty = ecif->cif->arg_types[i];
669       switch (ty->type)
670 	{
671 	case FFI_TYPE_VOID:
672 	  FFI_ASSERT (0);
673 	  break;
674 
675 	/* If the argument is a basic type the argument is allocated to an
676 	   appropriate register, or if none are available, to the stack.  */
677 	case FFI_TYPE_FLOAT:
678 	case FFI_TYPE_DOUBLE:
679 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
680 	case FFI_TYPE_LONGDOUBLE:
681 #endif
682 	case FFI_TYPE_UINT8:
683 	case FFI_TYPE_SINT8:
684 	case FFI_TYPE_UINT16:
685 	case FFI_TYPE_SINT16:
686 	case FFI_TYPE_UINT32:
687 	case FFI_TYPE_INT:
688 	case FFI_TYPE_SINT32:
689 	case FFI_TYPE_POINTER:
690 	case FFI_TYPE_UINT64:
691 	case FFI_TYPE_SINT64:
692 	  copy_to_register_or_stack (context, stack, &state,
693 				     ecif->avalue[i], ty->type);
694 	  break;
695 
696 	case FFI_TYPE_STRUCT:
697 	  if (is_hfa (ty))
698 	    {
699 	      copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
700 					stack, &state);
701 	    }
702 	  else if (ty->size > 16)
703 	    {
704 	      /* If the argument is a composite type that is larger than 16
705 		 bytes, then the argument has been copied to memory, and
706 		 the argument is replaced by a pointer to the copy.  */
707 
708 	      copy_to_register_or_stack (context, stack, &state,
709 					 &(ecif->avalue[i]), FFI_TYPE_POINTER);
710 	    }
711 	  else if (available_x (&state) >= (ty->size + 7) / 8)
712 	    {
713 	      /* If the argument is a composite type and the size in
714 		 double-words is not more than the number of available
715 		 X registers, then the argument is copied into consecutive
716 		 X registers.  */
717 	      int j;
718 	      for (j = 0; j < (ty->size + 7) / 8; j++)
719 		{
720 		  memcpy (allocate_to_x (context, &state),
721 			  &(((UINT64 *) ecif->avalue[i])[j]),
722 			  sizeof (UINT64));
723 		}
724 	    }
725 	  else
726 	    {
727 	      /* Otherwise, there are insufficient X registers. Further X
728 		 register allocations are prevented, the NSAA is adjusted
729 		 (by allocate_to_stack ()) and the argument is copied to
730 		 memory at the adjusted NSAA.  */
731 	      state.ngrn = N_X_ARG_REG;
732 
733 	      memcpy (allocate_to_stack (&state, stack, ty->alignment,
734 					 ty->size), ecif->avalue + i, ty->size);
735 	    }
736 	  break;
737 
738 	default:
739 	  FFI_ASSERT (0);
740 	  break;
741 	}
742 
743 #if defined (__APPLE__)
744       if (i + 1 == ecif->cif->aarch64_nfixedargs)
745 	{
746 	  state.ngrn = N_X_ARG_REG;
747 	  state.nsrn = N_V_ARG_REG;
748 
749 	  state.allocating_variadic = 1;
750 	}
751 #endif
752     }
753 
754   return ecif->cif->aarch64_flags;
755 }
756 
757 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)758 ffi_prep_cif_machdep (ffi_cif *cif)
759 {
760   /* Round the stack up to a multiple of the stack alignment requirement. */
761   cif->bytes =
762     (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
763 
764   /* Initialize our flags. We are interested if this CIF will touch a
765      vector register, if so we will enable context save and load to
766      those registers, otherwise not. This is intended to be friendly
767      to lazy float context switching in the kernel.  */
768   cif->aarch64_flags = 0;
769 
770   if (is_v_register_candidate (cif->rtype))
771     {
772       cif->aarch64_flags |= AARCH64_FFI_WITH_V;
773     }
774   else
775     {
776       int i;
777       for (i = 0; i < cif->nargs; i++)
778         if (is_v_register_candidate (cif->arg_types[i]))
779           {
780             cif->aarch64_flags |= AARCH64_FFI_WITH_V;
781             break;
782           }
783     }
784 
785 #if defined (__APPLE__)
786   cif->aarch64_nfixedargs = 0;
787 #endif
788 
789   return FFI_OK;
790 }
791 
792 #if defined (__APPLE__)
793 
794 /* Perform Apple-specific cif processing for variadic calls */
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)795 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
796 				    unsigned int nfixedargs,
797 				    unsigned int ntotalargs)
798 {
799   ffi_status status;
800 
801   status = ffi_prep_cif_machdep (cif);
802 
803   cif->aarch64_nfixedargs = nfixedargs;
804 
805   return status;
806 }
807 
808 #endif
809 
810 /* Call a function with the provided arguments and capture the return
811    value.  */
812 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)813 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
814 {
815   extended_cif ecif;
816 
817   ecif.cif = cif;
818   ecif.avalue = avalue;
819   ecif.rvalue = rvalue;
820 
821   switch (cif->abi)
822     {
823     case FFI_SYSV:
824       {
825         struct call_context context;
826 	size_t stack_bytes;
827 
828 	/* Figure out the total amount of stack space we need, the
829 	   above call frame space needs to be 16 bytes aligned to
830 	   ensure correct alignment of the first object inserted in
831 	   that space hence the ALIGN applied to cif->bytes.*/
832 	stack_bytes = ALIGN(cif->bytes, 16);
833 
834 	memset (&context, 0, sizeof (context));
835         if (is_register_candidate (cif->rtype))
836           {
837             ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
838             switch (cif->rtype->type)
839               {
840               case FFI_TYPE_VOID:
841               case FFI_TYPE_FLOAT:
842               case FFI_TYPE_DOUBLE:
843 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
844               case FFI_TYPE_LONGDOUBLE:
845 #endif
846               case FFI_TYPE_UINT8:
847               case FFI_TYPE_SINT8:
848               case FFI_TYPE_UINT16:
849               case FFI_TYPE_SINT16:
850               case FFI_TYPE_UINT32:
851               case FFI_TYPE_SINT32:
852               case FFI_TYPE_POINTER:
853               case FFI_TYPE_UINT64:
854               case FFI_TYPE_INT:
855               case FFI_TYPE_SINT64:
856 		{
857 		  void *addr = get_basic_type_addr (cif->rtype->type,
858 						    &context, 0);
859 		  copy_basic_type (rvalue, addr, cif->rtype->type);
860 		  break;
861 		}
862 
863               case FFI_TYPE_STRUCT:
864                 if (is_hfa (cif->rtype))
865 		  {
866 		    int j;
867 		    unsigned short type = get_homogeneous_type (cif->rtype);
868 		    unsigned elems = element_count (cif->rtype);
869 		    for (j = 0; j < elems; j++)
870 		      {
871 			void *reg = get_basic_type_addr (type, &context, j);
872 			copy_basic_type (rvalue, reg, type);
873 			rvalue += get_basic_type_size (type);
874 		      }
875 		  }
876                 else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
877                   {
878                     size_t size = ALIGN (cif->rtype->size, sizeof (UINT64));
879                     memcpy (rvalue, get_x_addr (&context, 0), size);
880                   }
881                 else
882                   {
883                     FFI_ASSERT (0);
884                   }
885                 break;
886 
887               default:
888                 FFI_ASSERT (0);
889                 break;
890               }
891           }
892         else
893           {
894             memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
895             ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
896 			   stack_bytes, fn);
897           }
898         break;
899       }
900 
901     default:
902       FFI_ASSERT (0);
903       break;
904     }
905 }
906 
907 static unsigned char trampoline [] =
908 { 0x70, 0x00, 0x00, 0x58,	/* ldr	x16, 1f	*/
909   0x91, 0x00, 0x00, 0x10,	/* adr	x17, 2f	*/
910   0x00, 0x02, 0x1f, 0xd6	/* br	x16	*/
911 };
912 
913 /* Build a trampoline.  */
914 
915 #define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS)			\
916   ({unsigned char *__tramp = (unsigned char*)(TRAMP);			\
917     UINT64  __fun = (UINT64)(FUN);					\
918     UINT64  __ctx = (UINT64)(CTX);					\
919     UINT64  __flags = (UINT64)(FLAGS);					\
920     memcpy (__tramp, trampoline, sizeof (trampoline));			\
921     memcpy (__tramp + 12, &__fun, sizeof (__fun));			\
922     memcpy (__tramp + 20, &__ctx, sizeof (__ctx));			\
923     memcpy (__tramp + 28, &__flags, sizeof (__flags));			\
924     ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE);		\
925   })
926 
927 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)928 ffi_prep_closure_loc (ffi_closure* closure,
929                       ffi_cif* cif,
930                       void (*fun)(ffi_cif*,void*,void**,void*),
931                       void *user_data,
932                       void *codeloc)
933 {
934   if (cif->abi != FFI_SYSV)
935     return FFI_BAD_ABI;
936 
937   FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
938 		       cif->aarch64_flags);
939 
940   closure->cif  = cif;
941   closure->user_data = user_data;
942   closure->fun  = fun;
943 
944   return FFI_OK;
945 }
946 
947 /* Primary handler to setup and invoke a function within a closure.
948 
949    A closure when invoked enters via the assembler wrapper
950    ffi_closure_SYSV(). The wrapper allocates a call context on the
951    stack, saves the interesting registers (from the perspective of
952    the calling convention) into the context then passes control to
953    ffi_closure_SYSV_inner() passing the saved context and a pointer to
954    the stack at the point ffi_closure_SYSV() was invoked.
955 
956    On the return path the assembler wrapper will reload call context
957    registers.
958 
959    ffi_closure_SYSV_inner() marshalls the call context into ffi value
960    descriptors, invokes the wrapped function, then marshalls the return
961    value back into the call context.  */
962 
963 void FFI_HIDDEN
ffi_closure_SYSV_inner(ffi_closure * closure,struct call_context * context,void * stack)964 ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
965 			void *stack)
966 {
967   ffi_cif *cif = closure->cif;
968   void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
969   void *rvalue = NULL;
970   int i;
971   struct arg_state state;
972 
973   arg_init (&state, ALIGN(cif->bytes, 16));
974 
975   for (i = 0; i < cif->nargs; i++)
976     {
977       ffi_type *ty = cif->arg_types[i];
978 
979       switch (ty->type)
980 	{
981 	case FFI_TYPE_VOID:
982 	  FFI_ASSERT (0);
983 	  break;
984 
985 	case FFI_TYPE_UINT8:
986 	case FFI_TYPE_SINT8:
987 	case FFI_TYPE_UINT16:
988 	case FFI_TYPE_SINT16:
989 	case FFI_TYPE_UINT32:
990 	case FFI_TYPE_SINT32:
991 	case FFI_TYPE_INT:
992 	case FFI_TYPE_POINTER:
993 	case FFI_TYPE_UINT64:
994 	case FFI_TYPE_SINT64:
995 	case  FFI_TYPE_FLOAT:
996 	case  FFI_TYPE_DOUBLE:
997 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
998 	case  FFI_TYPE_LONGDOUBLE:
999 	  avalue[i] = allocate_to_register_or_stack (context, stack,
1000 						     &state, ty->type);
1001 	  break;
1002 #endif
1003 
1004 	case FFI_TYPE_STRUCT:
1005 	  if (is_hfa (ty))
1006 	    {
1007 	      unsigned n = element_count (ty);
1008 	      if (available_v (&state) < n)
1009 		{
1010 		  state.nsrn = N_V_ARG_REG;
1011 		  avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1012 						 ty->size);
1013 		}
1014 	      else
1015 		{
1016 		  switch (get_homogeneous_type (ty))
1017 		    {
1018 		    case FFI_TYPE_FLOAT:
1019 		      {
1020 			/* Eeek! We need a pointer to the structure,
1021 			   however the homogeneous float elements are
1022 			   being passed in individual S registers,
1023 			   therefore the structure is not represented as
1024 			   a contiguous sequence of bytes in our saved
1025 			   register context. We need to fake up a copy
1026 			   of the structure laid out in memory
1027 			   correctly. The fake can be tossed once the
1028 			   closure function has returned hence alloca()
1029 			   is sufficient. */
1030 			int j;
1031 			UINT32 *p = avalue[i] = alloca (ty->size);
1032 			for (j = 0; j < element_count (ty); j++)
1033 			  memcpy (&p[j],
1034 				  allocate_to_s (context, &state),
1035 				  sizeof (*p));
1036 			break;
1037 		      }
1038 
1039 		    case FFI_TYPE_DOUBLE:
1040 		      {
1041 			/* Eeek! We need a pointer to the structure,
1042 			   however the homogeneous float elements are
1043 			   being passed in individual S registers,
1044 			   therefore the structure is not represented as
1045 			   a contiguous sequence of bytes in our saved
1046 			   register context. We need to fake up a copy
1047 			   of the structure laid out in memory
1048 			   correctly. The fake can be tossed once the
1049 			   closure function has returned hence alloca()
1050 			   is sufficient. */
1051 			int j;
1052 			UINT64 *p = avalue[i] = alloca (ty->size);
1053 			for (j = 0; j < element_count (ty); j++)
1054 			  memcpy (&p[j],
1055 				  allocate_to_d (context, &state),
1056 				  sizeof (*p));
1057 			break;
1058 		      }
1059 
1060 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1061 		    case FFI_TYPE_LONGDOUBLE:
1062 			  memcpy (&avalue[i],
1063 				  allocate_to_v (context, &state),
1064 				  sizeof (*avalue));
1065 		      break;
1066 #endif
1067 
1068 		    default:
1069 		      FFI_ASSERT (0);
1070 		      break;
1071 		    }
1072 		}
1073 	    }
1074 	  else if (ty->size > 16)
1075 	    {
1076 	      /* Replace Composite type of size greater than 16 with a
1077 		 pointer.  */
1078 	      memcpy (&avalue[i],
1079 		      allocate_to_register_or_stack (context, stack,
1080 						     &state, FFI_TYPE_POINTER),
1081 		      sizeof (avalue[i]));
1082 	    }
1083 	  else if (available_x (&state) >= (ty->size + 7) / 8)
1084 	    {
1085 	      avalue[i] = get_x_addr (context, state.ngrn);
1086 	      state.ngrn += (ty->size + 7) / 8;
1087 	    }
1088 	  else
1089 	    {
1090 	      state.ngrn = N_X_ARG_REG;
1091 
1092 	      avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1093 					     ty->size);
1094 	    }
1095 	  break;
1096 
1097 	default:
1098 	  FFI_ASSERT (0);
1099 	  break;
1100 	}
1101     }
1102 
1103   /* Figure out where the return value will be passed, either in
1104      registers or in a memory block allocated by the caller and passed
1105      in x8.  */
1106 
1107   if (is_register_candidate (cif->rtype))
1108     {
1109       /* Register candidates are *always* returned in registers. */
1110 
1111       /* Allocate a scratchpad for the return value, we will let the
1112          callee scrible the result into the scratch pad then move the
1113          contents into the appropriate return value location for the
1114          call convention.  */
1115       rvalue = alloca (cif->rtype->size);
1116       (closure->fun) (cif, rvalue, avalue, closure->user_data);
1117 
1118       /* Copy the return value into the call context so that it is returned
1119          as expected to our caller.  */
1120       switch (cif->rtype->type)
1121         {
1122         case FFI_TYPE_VOID:
1123           break;
1124 
1125         case FFI_TYPE_UINT8:
1126         case FFI_TYPE_UINT16:
1127         case FFI_TYPE_UINT32:
1128         case FFI_TYPE_POINTER:
1129         case FFI_TYPE_UINT64:
1130         case FFI_TYPE_SINT8:
1131         case FFI_TYPE_SINT16:
1132         case FFI_TYPE_INT:
1133         case FFI_TYPE_SINT32:
1134         case FFI_TYPE_SINT64:
1135         case FFI_TYPE_FLOAT:
1136         case FFI_TYPE_DOUBLE:
1137 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1138         case FFI_TYPE_LONGDOUBLE:
1139 #endif
1140 	  {
1141 	    void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
1142 	    copy_basic_type (addr, rvalue, cif->rtype->type);
1143             break;
1144 	  }
1145         case FFI_TYPE_STRUCT:
1146           if (is_hfa (cif->rtype))
1147 	    {
1148 	      int j;
1149 	      unsigned short type = get_homogeneous_type (cif->rtype);
1150 	      unsigned elems = element_count (cif->rtype);
1151 	      for (j = 0; j < elems; j++)
1152 		{
1153 		  void *reg = get_basic_type_addr (type, context, j);
1154 		  copy_basic_type (reg, rvalue, type);
1155 		  rvalue += get_basic_type_size (type);
1156 		}
1157 	    }
1158           else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
1159             {
1160               size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
1161               memcpy (get_x_addr (context, 0), rvalue, size);
1162             }
1163           else
1164             {
1165               FFI_ASSERT (0);
1166             }
1167           break;
1168         default:
1169           FFI_ASSERT (0);
1170           break;
1171         }
1172     }
1173   else
1174     {
1175       memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
1176       (closure->fun) (cif, rvalue, avalue, closure->user_data);
1177     }
1178 }
1179 
1180