1 /* -----------------------------------------------------------------------
2    ffi.c - Copyright (c) 2011 Timothy Wall
3            Copyright (c) 2011 Plausible Labs Cooperative, Inc.
4            Copyright (c) 2011 Anthony Green
5 	   Copyright (c) 2011 Free Software Foundation
6            Copyright (c) 1998, 2008, 2011  Red Hat, Inc.
7 
8    ARM Foreign Function Interface
9 
10    Permission is hereby granted, free of charge, to any person obtaining
11    a copy of this software and associated documentation files (the
12    ``Software''), to deal in the Software without restriction, including
13    without limitation the rights to use, copy, modify, merge, publish,
14    distribute, sublicense, and/or sell copies of the Software, and to
15    permit persons to whom the Software is furnished to do so, subject to
16    the following conditions:
17 
18    The above copyright notice and this permission notice shall be included
19    in all copies or substantial portions of the Software.
20 
21    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
22    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24    NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28    DEALINGS IN THE SOFTWARE.
29    ----------------------------------------------------------------------- */
30 
31 #if defined(__arm__) || defined(_M_ARM)
32 #include <fficonfig.h>
33 #include <ffi.h>
34 #include <ffi_common.h>
35 #include <stdint.h>
36 #include <stdlib.h>
37 #include "internal.h"
38 
39 #if defined(_MSC_VER) && defined(_M_ARM)
40 #define WIN32_LEAN_AND_MEAN
41 #include <windows.h>
42 #endif
43 
44 #if FFI_EXEC_TRAMPOLINE_TABLE
45 
46 #ifdef __MACH__
47 #include <mach/machine/vm_param.h>
48 #endif
49 
50 #else
51 #ifndef _M_ARM
52 extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
53 #else
54 extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN;
55 #endif
56 #endif
57 
58 /* Forward declares. */
59 static int vfp_type_p (const ffi_type *);
60 static void layout_vfp_args (ffi_cif *);
61 
62 static void *
ffi_align(ffi_type * ty,void * p)63 ffi_align (ffi_type *ty, void *p)
64 {
65   /* Align if necessary */
66   size_t alignment;
67 #ifdef _WIN32_WCE
68   alignment = 4;
69 #else
70   alignment = ty->alignment;
71   if (alignment < 4)
72     alignment = 4;
73 #endif
74   return (void *) FFI_ALIGN (p, alignment);
75 }
76 
77 static size_t
ffi_put_arg(ffi_type * ty,void * src,void * dst)78 ffi_put_arg (ffi_type *ty, void *src, void *dst)
79 {
80   size_t z = ty->size;
81 
82   switch (ty->type)
83     {
84     case FFI_TYPE_SINT8:
85       *(UINT32 *)dst = *(SINT8 *)src;
86       break;
87     case FFI_TYPE_UINT8:
88       *(UINT32 *)dst = *(UINT8 *)src;
89       break;
90     case FFI_TYPE_SINT16:
91       *(UINT32 *)dst = *(SINT16 *)src;
92       break;
93     case FFI_TYPE_UINT16:
94       *(UINT32 *)dst = *(UINT16 *)src;
95       break;
96 
97     case FFI_TYPE_INT:
98     case FFI_TYPE_SINT32:
99     case FFI_TYPE_UINT32:
100     case FFI_TYPE_POINTER:
101 #ifndef _MSC_VER
102     case FFI_TYPE_FLOAT:
103 #endif
104       *(UINT32 *)dst = *(UINT32 *)src;
105       break;
106 
107 #ifdef _MSC_VER
108     // casting a float* to a UINT32* doesn't work on Windows
109     case FFI_TYPE_FLOAT:
110         *(uintptr_t *)dst = 0;
111         *(float *)dst = *(float *)src;
112         break;
113 #endif
114 
115     case FFI_TYPE_SINT64:
116     case FFI_TYPE_UINT64:
117     case FFI_TYPE_DOUBLE:
118       *(UINT64 *)dst = *(UINT64 *)src;
119       break;
120 
121     case FFI_TYPE_STRUCT:
122     case FFI_TYPE_COMPLEX:
123       memcpy (dst, src, z);
124       break;
125 
126     default:
127       abort();
128     }
129 
130   return FFI_ALIGN (z, 4);
131 }
132 
133 /* ffi_prep_args is called once stack space has been allocated
134    for the function's arguments.
135 
136    The vfp_space parameter is the load area for VFP regs, the return
137    value is cif->vfp_used (word bitset of VFP regs used for passing
138    arguments). These are only used for the VFP hard-float ABI.
139 */
140 static void
ffi_prep_args_SYSV(ffi_cif * cif,int flags,void * rvalue,void ** avalue,char * argp)141 ffi_prep_args_SYSV (ffi_cif *cif, int flags, void *rvalue,
142 		    void **avalue, char *argp)
143 {
144   ffi_type **arg_types = cif->arg_types;
145   int i, n;
146 
147   if (flags == ARM_TYPE_STRUCT)
148     {
149       *(void **) argp = rvalue;
150       argp += 4;
151     }
152 
153   for (i = 0, n = cif->nargs; i < n; i++)
154     {
155       ffi_type *ty = arg_types[i];
156       argp = ffi_align (ty, argp);
157       argp += ffi_put_arg (ty, avalue[i], argp);
158     }
159 }
160 
161 static void
ffi_prep_args_VFP(ffi_cif * cif,int flags,void * rvalue,void ** avalue,char * stack,char * vfp_space)162 ffi_prep_args_VFP (ffi_cif *cif, int flags, void *rvalue,
163                    void **avalue, char *stack, char *vfp_space)
164 {
165   ffi_type **arg_types = cif->arg_types;
166   int i, n, vi = 0;
167   char *argp, *regp, *eo_regp;
168   char stack_used = 0;
169   char done_with_regs = 0;
170 
171   /* The first 4 words on the stack are used for values
172      passed in core registers.  */
173   regp = stack;
174   eo_regp = argp = regp + 16;
175 
176   /* If the function returns an FFI_TYPE_STRUCT in memory,
177      that address is passed in r0 to the function.  */
178   if (flags == ARM_TYPE_STRUCT)
179     {
180       *(void **) regp = rvalue;
181       regp += 4;
182     }
183 
184   for (i = 0, n = cif->nargs; i < n; i++)
185     {
186       ffi_type *ty = arg_types[i];
187       void *a = avalue[i];
188       int is_vfp_type = vfp_type_p (ty);
189 
190       /* Allocated in VFP registers. */
191       if (vi < cif->vfp_nargs && is_vfp_type)
192 	{
193 	  char *vfp_slot = vfp_space + cif->vfp_args[vi++] * 4;
194 	  ffi_put_arg (ty, a, vfp_slot);
195 	  continue;
196 	}
197       /* Try allocating in core registers. */
198       else if (!done_with_regs && !is_vfp_type)
199 	{
200 	  char *tregp = ffi_align (ty, regp);
201 	  size_t size = ty->size;
202 	  size = (size < 4) ? 4 : size;	// pad
203 	  /* Check if there is space left in the aligned register
204 	     area to place the argument.  */
205 	  if (tregp + size <= eo_regp)
206 	    {
207 	      regp = tregp + ffi_put_arg (ty, a, tregp);
208 	      done_with_regs = (regp == argp);
209 	      // ensure we did not write into the stack area
210 	      FFI_ASSERT (regp <= argp);
211 	      continue;
212 	    }
213 	  /* In case there are no arguments in the stack area yet,
214 	     the argument is passed in the remaining core registers
215 	     and on the stack.  */
216 	  else if (!stack_used)
217 	    {
218 	      stack_used = 1;
219 	      done_with_regs = 1;
220 	      argp = tregp + ffi_put_arg (ty, a, tregp);
221 	      FFI_ASSERT (eo_regp < argp);
222 	      continue;
223 	    }
224 	}
225       /* Base case, arguments are passed on the stack */
226       stack_used = 1;
227       argp = ffi_align (ty, argp);
228       argp += ffi_put_arg (ty, a, argp);
229     }
230 }
231 
232 /* Perform machine dependent cif processing */
233 ffi_status FFI_HIDDEN
ffi_prep_cif_machdep(ffi_cif * cif)234 ffi_prep_cif_machdep (ffi_cif *cif)
235 {
236   int flags = 0, cabi = cif->abi;
237   size_t bytes = cif->bytes;
238 
239   /* Map out the register placements of VFP register args.  The VFP
240      hard-float calling conventions are slightly more sophisticated
241      than the base calling conventions, so we do it here instead of
242      in ffi_prep_args(). */
243   if (cabi == FFI_VFP)
244     layout_vfp_args (cif);
245 
246   /* Set the return type flag */
247   switch (cif->rtype->type)
248     {
249     case FFI_TYPE_VOID:
250       flags = ARM_TYPE_VOID;
251       break;
252 
253     case FFI_TYPE_INT:
254     case FFI_TYPE_UINT8:
255     case FFI_TYPE_SINT8:
256     case FFI_TYPE_UINT16:
257     case FFI_TYPE_SINT16:
258     case FFI_TYPE_UINT32:
259     case FFI_TYPE_SINT32:
260     case FFI_TYPE_POINTER:
261       flags = ARM_TYPE_INT;
262       break;
263 
264     case FFI_TYPE_SINT64:
265     case FFI_TYPE_UINT64:
266       flags = ARM_TYPE_INT64;
267       break;
268 
269     case FFI_TYPE_FLOAT:
270       flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_S : ARM_TYPE_INT);
271       break;
272     case FFI_TYPE_DOUBLE:
273       flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_D : ARM_TYPE_INT64);
274       break;
275 
276     case FFI_TYPE_STRUCT:
277     case FFI_TYPE_COMPLEX:
278       if (cabi == FFI_VFP)
279 	{
280 	  int h = vfp_type_p (cif->rtype);
281 
282 	  flags = ARM_TYPE_VFP_N;
283 	  if (h == 0x100 + FFI_TYPE_FLOAT)
284 	    flags = ARM_TYPE_VFP_S;
285 	  if (h == 0x100 + FFI_TYPE_DOUBLE)
286 	    flags = ARM_TYPE_VFP_D;
287 	  if (h != 0)
288 	      break;
289 	}
290 
291       /* A Composite Type not larger than 4 bytes is returned in r0.
292 	 A Composite Type larger than 4 bytes, or whose size cannot
293 	 be determined statically ... is stored in memory at an
294 	 address passed [in r0].  */
295       if (cif->rtype->size <= 4)
296 	flags = ARM_TYPE_INT;
297       else
298 	{
299 	  flags = ARM_TYPE_STRUCT;
300 	  bytes += 4;
301 	}
302       break;
303 
304     default:
305       abort();
306     }
307 
308   /* Round the stack up to a multiple of 8 bytes.  This isn't needed
309      everywhere, but it is on some platforms, and it doesn't harm anything
310      when it isn't needed.  */
311   bytes = FFI_ALIGN (bytes, 8);
312 
313   /* Minimum stack space is the 4 register arguments that we pop.  */
314   if (bytes < 4*4)
315     bytes = 4*4;
316 
317   cif->bytes = bytes;
318   cif->flags = flags;
319 
320   return FFI_OK;
321 }
322 
323 /* Perform machine dependent cif processing for variadic calls */
324 ffi_status FFI_HIDDEN
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)325 ffi_prep_cif_machdep_var (ffi_cif * cif,
326 			  unsigned int nfixedargs, unsigned int ntotalargs)
327 {
328   /* VFP variadic calls actually use the SYSV ABI */
329   if (cif->abi == FFI_VFP)
330     cif->abi = FFI_SYSV;
331 
332   return ffi_prep_cif_machdep (cif);
333 }
334 
335 /* Prototypes for assembly functions, in sysv.S.  */
336 
337 struct call_frame
338 {
339   void *fp;
340   void *lr;
341   void *rvalue;
342   int flags;
343   void *closure;
344 };
345 
346 extern void ffi_call_SYSV (void *stack, struct call_frame *,
347 			   void (*fn) (void)) FFI_HIDDEN;
348 extern void ffi_call_VFP (void *vfp_space, struct call_frame *,
349 			   void (*fn) (void), unsigned vfp_used) FFI_HIDDEN;
350 
351 static void
ffi_call_int(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue,void * closure)352 ffi_call_int (ffi_cif * cif, void (*fn) (void), void *rvalue,
353 	      void **avalue, void *closure)
354 {
355   int flags = cif->flags;
356   ffi_type *rtype = cif->rtype;
357   size_t bytes, rsize, vfp_size;
358   char *stack, *vfp_space, *new_rvalue;
359   struct call_frame *frame;
360 
361   rsize = 0;
362   if (rvalue == NULL)
363     {
364       /* If the return value is a struct and we don't have a return
365 	 value address then we need to make one.  Otherwise the return
366 	 value is in registers and we can ignore them.  */
367       if (flags == ARM_TYPE_STRUCT)
368 	rsize = rtype->size;
369       else
370 	flags = ARM_TYPE_VOID;
371     }
372   else if (flags == ARM_TYPE_VFP_N)
373     {
374       /* Largest case is double x 4. */
375       rsize = 32;
376     }
377   else if (flags == ARM_TYPE_INT && rtype->type == FFI_TYPE_STRUCT)
378     rsize = 4;
379 
380   /* Largest case.  */
381   vfp_size = (cif->abi == FFI_VFP && cif->vfp_used ? 8*8: 0);
382 
383   bytes = cif->bytes;
384   stack = alloca (vfp_size + bytes + sizeof(struct call_frame) + rsize);
385 
386   vfp_space = NULL;
387   if (vfp_size)
388     {
389       vfp_space = stack;
390       stack += vfp_size;
391     }
392 
393   frame = (struct call_frame *)(stack + bytes);
394 
395   new_rvalue = rvalue;
396   if (rsize)
397     new_rvalue = (void *)(frame + 1);
398 
399   frame->rvalue = new_rvalue;
400   frame->flags = flags;
401   frame->closure = closure;
402 
403   if (vfp_space)
404     {
405       ffi_prep_args_VFP (cif, flags, new_rvalue, avalue, stack, vfp_space);
406       ffi_call_VFP (vfp_space, frame, fn, cif->vfp_used);
407     }
408   else
409     {
410       ffi_prep_args_SYSV (cif, flags, new_rvalue, avalue, stack);
411       ffi_call_SYSV (stack, frame, fn);
412     }
413 
414   if (rvalue && rvalue != new_rvalue)
415     memcpy (rvalue, new_rvalue, rtype->size);
416 }
417 
418 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)419 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
420 {
421   ffi_call_int (cif, fn, rvalue, avalue, NULL);
422 }
423 
424 void
ffi_call_go(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue,void * closure)425 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
426 	     void **avalue, void *closure)
427 {
428   ffi_call_int (cif, fn, rvalue, avalue, closure);
429 }
430 
431 static void *
ffi_prep_incoming_args_SYSV(ffi_cif * cif,void * rvalue,char * argp,void ** avalue)432 ffi_prep_incoming_args_SYSV (ffi_cif *cif, void *rvalue,
433 			     char *argp, void **avalue)
434 {
435   ffi_type **arg_types = cif->arg_types;
436   int i, n;
437 
438   if (cif->flags == ARM_TYPE_STRUCT)
439     {
440       rvalue = *(void **) argp;
441       argp += 4;
442     }
443   else
444     {
445       if (cif->rtype->size && cif->rtype->size < 4)
446         *(uint32_t *) rvalue = 0;
447     }
448 
449   for (i = 0, n = cif->nargs; i < n; i++)
450     {
451       ffi_type *ty = arg_types[i];
452       size_t z = ty->size;
453 
454       argp = ffi_align (ty, argp);
455       avalue[i] = (void *) argp;
456       argp += z;
457     }
458 
459   return rvalue;
460 }
461 
462 static void *
ffi_prep_incoming_args_VFP(ffi_cif * cif,void * rvalue,char * stack,char * vfp_space,void ** avalue)463 ffi_prep_incoming_args_VFP (ffi_cif *cif, void *rvalue, char *stack,
464 			    char *vfp_space, void **avalue)
465 {
466   ffi_type **arg_types = cif->arg_types;
467   int i, n, vi = 0;
468   char *argp, *regp, *eo_regp;
469   char done_with_regs = 0;
470   char stack_used = 0;
471 
472   regp = stack;
473   eo_regp = argp = regp + 16;
474 
475   if (cif->flags == ARM_TYPE_STRUCT)
476     {
477       rvalue = *(void **) regp;
478       regp += 4;
479     }
480 
481   for (i = 0, n = cif->nargs; i < n; i++)
482     {
483       ffi_type *ty = arg_types[i];
484       int is_vfp_type = vfp_type_p (ty);
485       size_t z = ty->size;
486 
487       if (vi < cif->vfp_nargs && is_vfp_type)
488 	{
489 	  avalue[i] = vfp_space + cif->vfp_args[vi++] * 4;
490 	  continue;
491 	}
492       else if (!done_with_regs && !is_vfp_type)
493 	{
494 	  char *tregp = ffi_align (ty, regp);
495 
496 	  z = (z < 4) ? 4 : z;	// pad
497 
498 	  /* If the arguments either fits into the registers or uses registers
499 	     and stack, while we haven't read other things from the stack */
500 	  if (tregp + z <= eo_regp || !stack_used)
501 	    {
502 	      /* Because we're little endian, this is what it turns into.  */
503 	      avalue[i] = (void *) tregp;
504 	      regp = tregp + z;
505 
506 	      /* If we read past the last core register, make sure we
507 		 have not read from the stack before and continue
508 		 reading after regp.  */
509 	      if (regp > eo_regp)
510 		{
511 		  FFI_ASSERT (!stack_used);
512 		  argp = regp;
513 		}
514 	      if (regp >= eo_regp)
515 		{
516 		  done_with_regs = 1;
517 		  stack_used = 1;
518 		}
519 	      continue;
520 	    }
521 	}
522 
523       stack_used = 1;
524       argp = ffi_align (ty, argp);
525       avalue[i] = (void *) argp;
526       argp += z;
527     }
528 
529   return rvalue;
530 }
531 
532 struct closure_frame
533 {
534   char vfp_space[8*8] __attribute__((aligned(8)));
535   char result[8*4];
536   char argp[];
537 };
538 
539 int FFI_HIDDEN
ffi_closure_inner_SYSV(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,struct closure_frame * frame)540 ffi_closure_inner_SYSV (ffi_cif *cif,
541 		        void (*fun) (ffi_cif *, void *, void **, void *),
542 		        void *user_data,
543 		        struct closure_frame *frame)
544 {
545   void **avalue = (void **) alloca (cif->nargs * sizeof (void *));
546   void *rvalue = ffi_prep_incoming_args_SYSV (cif, frame->result,
547 					      frame->argp, avalue);
548   fun (cif, rvalue, avalue, user_data);
549   return cif->flags;
550 }
551 
552 int FFI_HIDDEN
ffi_closure_inner_VFP(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,struct closure_frame * frame)553 ffi_closure_inner_VFP (ffi_cif *cif,
554 		       void (*fun) (ffi_cif *, void *, void **, void *),
555 		       void *user_data,
556 		       struct closure_frame *frame)
557 {
558   void **avalue = (void **) alloca (cif->nargs * sizeof (void *));
559   void *rvalue = ffi_prep_incoming_args_VFP (cif, frame->result, frame->argp,
560 					     frame->vfp_space, avalue);
561   fun (cif, rvalue, avalue, user_data);
562   return cif->flags;
563 }
564 
565 void ffi_closure_SYSV (void) FFI_HIDDEN;
566 void ffi_closure_VFP (void) FFI_HIDDEN;
567 void ffi_go_closure_SYSV (void) FFI_HIDDEN;
568 void ffi_go_closure_VFP (void) FFI_HIDDEN;
569 
570 /* the cif must already be prep'ed */
571 
572 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)573 ffi_prep_closure_loc (ffi_closure * closure,
574 		      ffi_cif * cif,
575 		      void (*fun) (ffi_cif *, void *, void **, void *),
576 		      void *user_data, void *codeloc)
577 {
578   void (*closure_func) (void) = ffi_closure_SYSV;
579 
580   if (cif->abi == FFI_VFP)
581     {
582       /* We only need take the vfp path if there are vfp arguments.  */
583       if (cif->vfp_used)
584 	closure_func = ffi_closure_VFP;
585     }
586   else if (cif->abi != FFI_SYSV)
587     return FFI_BAD_ABI;
588 
589 #if FFI_EXEC_TRAMPOLINE_TABLE
590   void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
591   config[0] = closure;
592   config[1] = closure_func;
593 #else
594 
595 #ifndef _M_ARM
596   memcpy(closure->tramp, ffi_arm_trampoline, 8);
597 #else
598   // cast away function type so MSVC doesn't set the lower bit of the function pointer
599   memcpy(closure->tramp, (void*)((uintptr_t)ffi_arm_trampoline & 0xFFFFFFFE), FFI_TRAMPOLINE_CLOSURE_OFFSET);
600 #endif
601 
602 #if defined (__QNX__)
603   msync(closure->tramp, 8, 0x1000000);	/* clear data map */
604   msync(codeloc, 8, 0x1000000);	/* clear insn map */
605 #elif defined(_MSC_VER)
606   FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE);
607 #else
608   __clear_cache(closure->tramp, closure->tramp + 8);	/* clear data map */
609   __clear_cache(codeloc, codeloc + 8);			/* clear insn map */
610 #endif
611 #ifdef _M_ARM
612   *(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func;
613 #else
614   *(void (**)(void))(closure->tramp + 8) = closure_func;
615 #endif
616 #endif
617 
618   closure->cif = cif;
619   closure->fun = fun;
620   closure->user_data = user_data;
621 
622   return FFI_OK;
623 }
624 
625 ffi_status
ffi_prep_go_closure(ffi_go_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *))626 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
627 		     void (*fun) (ffi_cif *, void *, void **, void *))
628 {
629   void (*closure_func) (void) = ffi_go_closure_SYSV;
630 
631   if (cif->abi == FFI_VFP)
632     {
633       /* We only need take the vfp path if there are vfp arguments.  */
634       if (cif->vfp_used)
635 	closure_func = ffi_go_closure_VFP;
636     }
637   else if (cif->abi != FFI_SYSV)
638     return FFI_BAD_ABI;
639 
640   closure->tramp = closure_func;
641   closure->cif = cif;
642   closure->fun = fun;
643 
644   return FFI_OK;
645 }
646 
647 /* Below are routines for VFP hard-float support. */
648 
649 /* A subroutine of vfp_type_p.  Given a structure type, return the type code
650    of the first non-structure element.  Recurse for structure elements.
651    Return -1 if the structure is in fact empty, i.e. no nested elements.  */
652 
653 static int
is_hfa0(const ffi_type * ty)654 is_hfa0 (const ffi_type *ty)
655 {
656   ffi_type **elements = ty->elements;
657   int i, ret = -1;
658 
659   if (elements != NULL)
660     for (i = 0; elements[i]; ++i)
661       {
662         ret = elements[i]->type;
663         if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
664           {
665             ret = is_hfa0 (elements[i]);
666             if (ret < 0)
667               continue;
668           }
669         break;
670       }
671 
672   return ret;
673 }
674 
675 /* A subroutine of vfp_type_p.  Given a structure type, return true if all
676    of the non-structure elements are the same as CANDIDATE.  */
677 
678 static int
is_hfa1(const ffi_type * ty,int candidate)679 is_hfa1 (const ffi_type *ty, int candidate)
680 {
681   ffi_type **elements = ty->elements;
682   int i;
683 
684   if (elements != NULL)
685     for (i = 0; elements[i]; ++i)
686       {
687         int t = elements[i]->type;
688         if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
689           {
690             if (!is_hfa1 (elements[i], candidate))
691               return 0;
692           }
693         else if (t != candidate)
694           return 0;
695       }
696 
697   return 1;
698 }
699 
700 /* Determine if TY is an homogenous floating point aggregate (HFA).
701    That is, a structure consisting of 1 to 4 members of all the same type,
702    where that type is a floating point scalar.
703 
704    Returns non-zero iff TY is an HFA.  The result is an encoded value where
705    bits 0-7 contain the type code, and bits 8-10 contain the element count.  */
706 
707 static int
vfp_type_p(const ffi_type * ty)708 vfp_type_p (const ffi_type *ty)
709 {
710   ffi_type **elements;
711   int candidate, i;
712   size_t size, ele_count;
713 
714   /* Quickest tests first.  */
715   candidate = ty->type;
716   switch (ty->type)
717     {
718     default:
719       return 0;
720     case FFI_TYPE_FLOAT:
721     case FFI_TYPE_DOUBLE:
722       ele_count = 1;
723       goto done;
724     case FFI_TYPE_COMPLEX:
725       candidate = ty->elements[0]->type;
726       if (candidate != FFI_TYPE_FLOAT && candidate != FFI_TYPE_DOUBLE)
727 	return 0;
728       ele_count = 2;
729       goto done;
730     case FFI_TYPE_STRUCT:
731       break;
732     }
733 
734   /* No HFA types are smaller than 4 bytes, or larger than 32 bytes.  */
735   size = ty->size;
736   if (size < 4 || size > 32)
737     return 0;
738 
739   /* Find the type of the first non-structure member.  */
740   elements = ty->elements;
741   candidate = elements[0]->type;
742   if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
743     {
744       for (i = 0; ; ++i)
745         {
746           candidate = is_hfa0 (elements[i]);
747           if (candidate >= 0)
748             break;
749         }
750     }
751 
752   /* If the first member is not a floating point type, it's not an HFA.
753      Also quickly re-check the size of the structure.  */
754   switch (candidate)
755     {
756     case FFI_TYPE_FLOAT:
757       ele_count = size / sizeof(float);
758       if (size != ele_count * sizeof(float))
759         return 0;
760       break;
761     case FFI_TYPE_DOUBLE:
762       ele_count = size / sizeof(double);
763       if (size != ele_count * sizeof(double))
764         return 0;
765       break;
766     default:
767       return 0;
768     }
769   if (ele_count > 4)
770     return 0;
771 
772   /* Finally, make sure that all scalar elements are the same type.  */
773   for (i = 0; elements[i]; ++i)
774     {
775       int t = elements[i]->type;
776       if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
777         {
778           if (!is_hfa1 (elements[i], candidate))
779             return 0;
780         }
781       else if (t != candidate)
782         return 0;
783     }
784 
785   /* All tests succeeded.  Encode the result.  */
786  done:
787   return (ele_count << 8) | candidate;
788 }
789 
790 static int
place_vfp_arg(ffi_cif * cif,int h)791 place_vfp_arg (ffi_cif *cif, int h)
792 {
793   unsigned short reg = cif->vfp_reg_free;
794   int align = 1, nregs = h >> 8;
795 
796   if ((h & 0xff) == FFI_TYPE_DOUBLE)
797     align = 2, nregs *= 2;
798 
799   /* Align register number. */
800   if ((reg & 1) && align == 2)
801     reg++;
802 
803   while (reg + nregs <= 16)
804     {
805       int s, new_used = 0;
806       for (s = reg; s < reg + nregs; s++)
807 	{
808 	  new_used |= (1 << s);
809 	  if (cif->vfp_used & (1 << s))
810 	    {
811 	      reg += align;
812 	      goto next_reg;
813 	    }
814 	}
815       /* Found regs to allocate. */
816       cif->vfp_used |= new_used;
817       cif->vfp_args[cif->vfp_nargs++] = (signed char)reg;
818 
819       /* Update vfp_reg_free. */
820       if (cif->vfp_used & (1 << cif->vfp_reg_free))
821 	{
822 	  reg += nregs;
823 	  while (cif->vfp_used & (1 << reg))
824 	    reg += 1;
825 	  cif->vfp_reg_free = reg;
826 	}
827       return 0;
828     next_reg:;
829     }
830   // done, mark all regs as used
831   cif->vfp_reg_free = 16;
832   cif->vfp_used = 0xFFFF;
833   return 1;
834 }
835 
836 static void
layout_vfp_args(ffi_cif * cif)837 layout_vfp_args (ffi_cif * cif)
838 {
839   unsigned int i;
840   /* Init VFP fields */
841   cif->vfp_used = 0;
842   cif->vfp_nargs = 0;
843   cif->vfp_reg_free = 0;
844   memset (cif->vfp_args, -1, 16);	/* Init to -1. */
845 
846   for (i = 0; i < cif->nargs; i++)
847     {
848       int h = vfp_type_p (cif->arg_types[i]);
849       if (h && place_vfp_arg (cif, h) == 1)
850 	break;
851     }
852 }
853 
854 #endif /* __arm__ or _M_ARM */
855