1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
2 // RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
3 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
4 // RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
5 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
6 // RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
7 #include <stdarg.h>
8 
9 // CHECK-LABEL: define signext i8 @f0()
f0(void)10 char f0(void) {
11   return 0;
12 }
13 
14 // CHECK-LABEL: define signext i16 @f1()
f1(void)15 short f1(void) {
16   return 0;
17 }
18 
19 // CHECK-LABEL: define i32 @f2()
f2(void)20 int f2(void) {
21   return 0;
22 }
23 
24 // CHECK-LABEL: define float @f3()
f3(void)25 float f3(void) {
26   return 0;
27 }
28 
29 // CHECK-LABEL: define double @f4()
f4(void)30 double f4(void) {
31   return 0;
32 }
33 
34 // CHECK-LABEL: define x86_fp80 @f5()
f5(void)35 long double f5(void) {
36   return 0;
37 }
38 
39 // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
f6(char a0,short a1,int a2,long long a3,void * a4)40 void f6(char a0, short a1, int a2, long long a3, void *a4) {
41 }
42 
43 // CHECK-LABEL: define void @f7(i32 %a0)
44 typedef enum { A, B, C } e7;
f7(e7 a0)45 void f7(e7 a0) {
46 }
47 
48 // Test merging/passing of upper eightbyte with X87 class.
49 //
50 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
51 // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0)
52 union u8 {
53   long double a;
54   int b;
55 };
f8_1()56 union u8 f8_1() { while (1) {} }
f8_2(union u8 a0)57 void f8_2(union u8 a0) {}
58 
59 // CHECK-LABEL: define i64 @f9()
f9(void)60 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
61 
62 // CHECK-LABEL: define void @f10(i64 %a0.coerce)
63 struct s10 { int a; int b; int : 0; };
f10(struct s10 a0)64 void f10(struct s10 a0) {}
65 
66 // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
f11()67 union { long double a; float b; } f11() { while (1) {} }
68 
69 // CHECK-LABEL: define i32 @f12_0()
70 // CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
71 struct s12 { int a __attribute__((aligned(16))); };
f12_0(void)72 struct s12 f12_0(void) { while (1) {} }
f12_1(struct s12 a0)73 void f12_1(struct s12 a0) {}
74 
75 // Check that sret parameter is accounted for when checking available integer
76 // registers.
77 // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f)
78 
79 struct s13_0 { long long f0[3]; };
80 struct s13_1 { long long f0[2]; };
f13(int a,int b,int c,int d,struct s13_1 e,int f)81 struct s13_0 f13(int a, int b, int c, int d,
82                  struct s13_1 e, int f) { while (1) {} }
83 
84 // CHECK: define void @f14({{.*}}, i8 signext %X)
f14(int a,int b,int c,int d,int e,int f,char X)85 void f14(int a, int b, int c, int d, int e, int f, char X) {}
86 
87 // CHECK: define void @f15({{.*}}, i8* %X)
f15(int a,int b,int c,int d,int e,int f,void * X)88 void f15(int a, int b, int c, int d, int e, int f, void *X) {}
89 
90 // CHECK: define void @f16({{.*}}, float %X)
f16(float a,float b,float c,float d,float e,float f,float g,float h,float X)91 void f16(float a, float b, float c, float d, float e, float f, float g, float h,
92          float X) {}
93 
94 // CHECK: define void @f17({{.*}}, x86_fp80 %X)
f17(float a,float b,float c,float d,float e,float f,float g,float h,long double X)95 void f17(float a, float b, float c, float d, float e, float f, float g, float h,
96          long double X) {}
97 
98 // Check for valid coercion.  The struct should be passed/returned as i32, not
99 // as i64 for better code quality.
100 // rdar://8135035
101 // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce)
102 struct f18_s0 { int f0; };
f18(int a,struct f18_s0 f18_arg1)103 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
104 
105 // Check byval alignment.
106 
107 // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x)
108 struct s19 {
109   long double a;
110 };
f19(struct s19 x)111 void f19(struct s19 x) {}
112 
113 // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
114 struct __attribute__((aligned(32))) s20 {
115   int x;
116   int y;
117 };
f20(struct s20 x)118 void f20(struct s20 x) {}
119 
120 struct StringRef {
121   long x;
122   const char *Ptr;
123 };
124 
125 // rdar://7375902
126 // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1)
f21(struct StringRef S)127 const char *f21(struct StringRef S) { return S.x+S.Ptr; }
128 
129 // PR7567
130 typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L;
f22(L x,L y)131 void f22(L x, L y) { }
132 // CHECK: @f22
133 // CHECK: %x = alloca{{.*}}, align 16
134 // CHECK: %y = alloca{{.*}}, align 16
135 
136 
137 
138 // PR7714
139 struct f23S {
140   short f0;
141   unsigned f1;
142   int f2;
143 };
144 
145 
f23(int A,struct f23S B)146 void f23(int A, struct f23S B) {
147   // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
148 }
149 
150 struct f24s { long a; int b; };
151 
f24(struct f23S * X,struct f24s * P2)152 struct f23S f24(struct f23S *X, struct f24s *P2) {
153   return *X;
154 
155   // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2)
156 }
157 
158 // rdar://8248065
159 typedef float v4f32 __attribute__((__vector_size__(16)));
f25(v4f32 X)160 v4f32 f25(v4f32 X) {
161   // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X)
162   // CHECK-NOT: alloca
163   // CHECK: alloca <4 x float>
164   // CHECK-NOT: alloca
165   // CHECK: store <4 x float> %X, <4 x float>*
166   // CHECK-NOT: store
167   // CHECK: ret <4 x float>
168   return X+X;
169 }
170 
171 struct foo26 {
172   int *X;
173   float *Y;
174 };
175 
f26(struct foo26 * P)176 struct foo26 f26(struct foo26 *P) {
177   // CHECK: define { i32*, float* } @f26(%struct.foo26* %P)
178   return *P;
179 }
180 
181 
182 struct v4f32wrapper {
183   v4f32 v;
184 };
185 
f27(struct v4f32wrapper X)186 struct v4f32wrapper f27(struct v4f32wrapper X) {
187   // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce)
188   return X;
189 }
190 
191 // PR22563 - We should unwrap simple structs and arrays to pass
192 // and return them in the appropriate vector registers if possible.
193 
194 typedef float v8f32 __attribute__((__vector_size__(32)));
195 struct v8f32wrapper {
196   v8f32 v;
197 };
198 
f27a(struct v8f32wrapper X)199 struct v8f32wrapper f27a(struct v8f32wrapper X) {
200   // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce)
201   return X;
202 }
203 
204 struct v8f32wrapper_wrapper {
205   v8f32 v[1];
206 };
207 
f27b(struct v8f32wrapper_wrapper X)208 struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) {
209   // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce)
210   return X;
211 }
212 
213 // rdar://5711709
214 struct f28c {
215   double x;
216   int y;
217 };
f28(struct f28c C)218 void f28(struct f28c C) {
219   // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1)
220 }
221 
222 struct f29a {
223   struct c {
224     double x;
225     int y;
226   } x[1];
227 };
228 
f29a(struct f29a A)229 void f29a(struct f29a A) {
230   // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1)
231 }
232 
233 // rdar://8249586
234 struct S0 { char f0[8]; char f2; char f3; char f4; };
f30(struct S0 p_4)235 void f30(struct S0 p_4) {
236   // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
237 }
238 
239 // Pass the third element as a float when followed by tail padding.
240 // rdar://8251384
241 struct f31foo { float a, b, c; };
f31(struct f31foo X)242 float f31(struct f31foo X) {
243   // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
244   return X.c;
245 }
246 
f32(_Complex float A,_Complex float B)247 _Complex float f32(_Complex float A, _Complex float B) {
248   // rdar://6379669
249   // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
250   return A+B;
251 }
252 
253 
254 // rdar://8357396
255 struct f33s { long x; float c,d; };
256 
f33(va_list X)257 void f33(va_list X) {
258   va_arg(X, struct f33s);
259 }
260 
261 typedef unsigned long long v1i64 __attribute__((__vector_size__(8)));
262 
263 // rdar://8359248
264 // CHECK-LABEL: define i64 @f34(i64 %arg.coerce)
f34(v1i64 arg)265 v1i64 f34(v1i64 arg) { return arg; }
266 
267 
268 // rdar://8358475
269 // CHECK-LABEL: define i64 @f35(i64 %arg.coerce)
270 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
f35(v1i64_2 arg)271 v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
272 
273 // rdar://9122143
274 // CHECK: declare void @func(%struct._str* byval align 16)
275 typedef struct _str {
276   union {
277     long double a;
278     long c;
279   };
280 } str;
281 
282 void func(str s);
283 str ss;
f9122143()284 void f9122143()
285 {
286   func(ss);
287 }
288 
289 // CHECK-LABEL: define double @f36(double %arg.coerce)
290 typedef unsigned v2i32 __attribute((__vector_size__(8)));
f36(v2i32 arg)291 v2i32 f36(v2i32 arg) { return arg; }
292 
293 // AVX: declare void @f38(<8 x float>)
294 // AVX: declare void @f37(<8 x float>)
295 // SSE: declare void @f38(%struct.s256* byval align 32)
296 // SSE: declare void @f37(<8 x float>* byval align 32)
297 typedef float __m256 __attribute__ ((__vector_size__ (32)));
298 typedef struct {
299   __m256 m;
300 } s256;
301 
302 s256 x38;
303 __m256 x37;
304 
305 void f38(s256 x);
306 void f37(__m256 x);
f39()307 void f39() { f38(x38); f37(x37); }
308 
309 // The two next tests make sure that the struct below is passed
310 // in the same way regardless of avx being used
311 
312 // CHECK: declare void @func40(%struct.t128* byval align 16)
313 typedef float __m128 __attribute__ ((__vector_size__ (16)));
314 typedef struct t128 {
315   __m128 m;
316   __m128 n;
317 } two128;
318 
319 extern void func40(two128 s);
func41(two128 s)320 void func41(two128 s) {
321   func40(s);
322 }
323 
324 // CHECK: declare void @func42(%struct.t128_2* byval align 16)
325 typedef struct xxx {
326   __m128 array[2];
327 } Atwo128;
328 typedef struct t128_2 {
329   Atwo128 x;
330 } SA;
331 
332 extern void func42(SA s);
func43(SA s)333 void func43(SA s) {
334   func42(s);
335 }
336 
337 // CHECK-LABEL: define i32 @f44
338 // CHECK: ptrtoint
339 // CHECK-NEXT: add i64 %{{[0-9]+}}, 31
340 // CHECK-NEXT: and i64 %{{[0-9]+}}, -32
341 // CHECK-NEXT: inttoptr
342 typedef int T44 __attribute((vector_size(32)));
343 struct s44 { T44 x; int y; };
f44(int i,...)344 int f44(int i, ...) {
345   __builtin_va_list ap;
346   __builtin_va_start(ap, i);
347   struct s44 s = __builtin_va_arg(ap, struct s44);
348   __builtin_va_end(ap);
349   return s.y;
350 }
351 
352 // Text that vec3 returns the correct LLVM IR type.
353 // AVX-LABEL: define i32 @foo(<3 x i64> %X)
354 typedef long long3 __attribute((ext_vector_type(3)));
foo(long3 X)355 int foo(long3 X)
356 {
357   return 0;
358 }
359 
360 // Make sure we don't use a varargs convention for a function without a
361 // prototype where AVX types are involved.
362 // AVX: @test45
363 // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*)
364 int f45();
365 __m256 x45;
test45()366 void test45() { f45(x45); }
367 
368 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call
369 // lowering can't handle this case correctly because it runs after legalization.
370 // CHECK: @test46
371 // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}})
372 typedef float v46 __attribute((vector_size(8)));
373 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
test46()374 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
375 
376 // Check that we pass the struct below without using byval, which helps out
377 // codegen.
378 //
379 // CHECK: @test47
380 // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
381 struct s47 { unsigned a; };
382 void f47(int,int,int,int,int,int,struct s47);
test47(int a,struct s47 b)383 void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }
384 
385 // rdar://12723368
386 // In the following example, there are holes in T4 at the 3rd byte and the 4th
387 // byte, however, T2 does not have those holes. T4 is chosen to be the
388 // representing type for union T1, but we can't use load or store of T4 since
389 // it will skip the 3rd byte and the 4th byte.
390 // In general, Since we don't accurately represent the data fields of a union,
391 // do not use load or store of the representing llvm type for the union.
392 typedef _Complex int T2;
393 typedef _Complex char T5;
394 typedef _Complex int T7;
395 typedef struct T4 { T5 field0; T7 field1; } T4;
396 typedef union T1 { T2 field0; T4 field1; } T1;
397 extern T1 T1_retval;
test48(void)398 T1 test48(void) {
399 // CHECK: @test48
400 // CHECK: memcpy
401 // CHECK: memcpy
402   return T1_retval;
403 }
404 
405 void test49_helper(double, ...);
test49(double d,double e)406 void test49(double d, double e) {
407   test49_helper(d, e);
408 }
409 // CHECK-LABEL:    define void @test49(
410 // CHECK:      [[T0:%.*]] = load double, double*
411 // CHECK-NEXT: [[T1:%.*]] = load double, double*
412 // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]])
413 
414 void test50_helper();
test50(double d,double e)415 void test50(double d, double e) {
416   test50_helper(d, e);
417 }
418 // CHECK-LABEL:    define void @test50(
419 // CHECK:      [[T0:%.*]] = load double, double*
420 // CHECK-NEXT: [[T1:%.*]] = load double, double*
421 // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
422 
423 struct test51_s { __uint128_t intval; };
test51(struct test51_s * s,__builtin_va_list argList)424 void test51(struct test51_s *s, __builtin_va_list argList) {
425     *s = __builtin_va_arg(argList, struct test51_s);
426 }
427 
428 // CHECK-LABEL: define void @test51
429 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
430 // CHECK: br i1
431 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
432 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]]
433 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}}
434 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
435 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
436 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
437 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false)
438 // CHECK-NEXT: add i32 {{.*}}, 16
439 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
440 // CHECK-NEXT: br label
441 
442 void test52_helper(int, ...);
443 __m256 x52;
test52()444 void test52() {
445   test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
446 }
447 // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
448 
test53(__m256 * m,__builtin_va_list argList)449 void test53(__m256 *m, __builtin_va_list argList) {
450   *m = __builtin_va_arg(argList, __m256);
451 }
452 // AVX-LABEL: define void @test53
453 // AVX-NOT: br i1
454 // AVX: ret void
455 
456 void test54_helper(__m256, ...);
457 __m256 x54;
test54()458 void test54() {
459   test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
460   test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
461 }
462 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
463 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
464 
465 typedef float __m512 __attribute__ ((__vector_size__ (64)));
466 typedef struct {
467   __m512 m;
468 } s512;
469 
470 s512 x55;
471 __m512 x56;
472 
473 // Even on AVX512, aggregates of size larger than four eightbytes have class
474 // MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
475 //
476 // CHECK: declare void @f55(%struct.s512* byval align 64)
477 void f55(s512 x);
478 
479 // However, __m512 has type SSE/SSEUP on AVX512.
480 //
481 // AVX512: declare void @f56(<16 x float>)
482 // NO-AVX512: declare void @f56(<16 x float>* byval align 64)
483 void f56(__m512 x);
f57()484 void f57() { f55(x55); f56(x56); }
485 
486 // Like for __m128 on AVX, check that the struct below is passed
487 // in the same way regardless of AVX512 being used.
488 //
489 // CHECK: declare void @f58(%struct.t256* byval align 32)
490 typedef struct t256 {
491   __m256 m;
492   __m256 n;
493 } two256;
494 
495 extern void f58(two256 s);
f59(two256 s)496 void f59(two256 s) {
497   f58(s);
498 }
499 
500 // CHECK: declare void @f60(%struct.sat256* byval align 32)
501 typedef struct at256 {
502   __m256 array[2];
503 } Atwo256;
504 typedef struct sat256 {
505   Atwo256 x;
506 } SAtwo256;
507 
508 extern void f60(SAtwo256 s);
f61(SAtwo256 s)509 void f61(SAtwo256 s) {
510   f60(s);
511 }
512 
513 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
514 void f62_helper(int, ...);
515 __m512 x62;
f62()516 void f62() {
517   f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
518 }
519 
520 // Like for __m256 on AVX, we always pass __m512 in memory, and don't
521 // need to use the register save area.
522 //
523 // AVX512-LABEL: define void @f63
524 // AVX512-NOT: br i1
525 // AVX512: ret void
f63(__m512 * m,__builtin_va_list argList)526 void f63(__m512 *m, __builtin_va_list argList) {
527   *m = __builtin_va_arg(argList, __m512);
528 }
529 
530 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
531 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
532 void f64_helper(__m512, ...);
533 __m512 x64;
f64()534 void f64() {
535   f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
536   f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
537 }
538