1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \ 2 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512 3 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \ 4 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512 5 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \ 6 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512 7 #include <stdarg.h> 8 9 // CHECK-LABEL: define signext i8 @f0() f0(void)10 char f0(void) { 11 return 0; 12 } 13 14 // CHECK-LABEL: define signext i16 @f1() f1(void)15 short f1(void) { 16 return 0; 17 } 18 19 // CHECK-LABEL: define i32 @f2() f2(void)20 int f2(void) { 21 return 0; 22 } 23 24 // CHECK-LABEL: define float @f3() f3(void)25 float f3(void) { 26 return 0; 27 } 28 29 // CHECK-LABEL: define double @f4() f4(void)30 double f4(void) { 31 return 0; 32 } 33 34 // CHECK-LABEL: define x86_fp80 @f5() f5(void)35 long double f5(void) { 36 return 0; 37 } 38 39 // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4) f6(char a0,short a1,int a2,long long a3,void * a4)40 void f6(char a0, short a1, int a2, long long a3, void *a4) { 41 } 42 43 // CHECK-LABEL: define void @f7(i32 %a0) 44 typedef enum { A, B, C } e7; f7(e7 a0)45 void f7(e7 a0) { 46 } 47 48 // Test merging/passing of upper eightbyte with X87 class. 49 // 50 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result) 51 // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0) 52 union u8 { 53 long double a; 54 int b; 55 }; f8_1()56 union u8 f8_1() { while (1) {} } f8_2(union u8 a0)57 void f8_2(union u8 a0) {} 58 59 // CHECK-LABEL: define i64 @f9() f9(void)60 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} } 61 62 // CHECK-LABEL: define void @f10(i64 %a0.coerce) 63 struct s10 { int a; int b; int : 0; }; f10(struct s10 a0)64 void f10(struct s10 a0) {} 65 66 // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result) f11()67 union { long double a; float b; } f11() { while (1) {} } 68 69 // CHECK-LABEL: define i32 @f12_0() 70 // CHECK-LABEL: define void @f12_1(i32 %a0.coerce) 71 struct s12 { int a __attribute__((aligned(16))); }; f12_0(void)72 struct s12 f12_0(void) { while (1) {} } f12_1(struct s12 a0)73 void f12_1(struct s12 a0) {} 74 75 // Check that sret parameter is accounted for when checking available integer 76 // registers. 77 // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f) 78 79 struct s13_0 { long long f0[3]; }; 80 struct s13_1 { long long f0[2]; }; f13(int a,int b,int c,int d,struct s13_1 e,int f)81 struct s13_0 f13(int a, int b, int c, int d, 82 struct s13_1 e, int f) { while (1) {} } 83 84 // CHECK: define void @f14({{.*}}, i8 signext %X) f14(int a,int b,int c,int d,int e,int f,char X)85 void f14(int a, int b, int c, int d, int e, int f, char X) {} 86 87 // CHECK: define void @f15({{.*}}, i8* %X) f15(int a,int b,int c,int d,int e,int f,void * X)88 void f15(int a, int b, int c, int d, int e, int f, void *X) {} 89 90 // CHECK: define void @f16({{.*}}, float %X) f16(float a,float b,float c,float d,float e,float f,float g,float h,float X)91 void f16(float a, float b, float c, float d, float e, float f, float g, float h, 92 float X) {} 93 94 // CHECK: define void @f17({{.*}}, x86_fp80 %X) f17(float a,float b,float c,float d,float e,float f,float g,float h,long double X)95 void f17(float a, float b, float c, float d, float e, float f, float g, float h, 96 long double X) {} 97 98 // Check for valid coercion. The struct should be passed/returned as i32, not 99 // as i64 for better code quality. 100 // rdar://8135035 101 // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce) 102 struct f18_s0 { int f0; }; f18(int a,struct f18_s0 f18_arg1)103 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} } 104 105 // Check byval alignment. 106 107 // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x) 108 struct s19 { 109 long double a; 110 }; f19(struct s19 x)111 void f19(struct s19 x) {} 112 113 // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x) 114 struct __attribute__((aligned(32))) s20 { 115 int x; 116 int y; 117 }; f20(struct s20 x)118 void f20(struct s20 x) {} 119 120 struct StringRef { 121 long x; 122 const char *Ptr; 123 }; 124 125 // rdar://7375902 126 // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1) f21(struct StringRef S)127 const char *f21(struct StringRef S) { return S.x+S.Ptr; } 128 129 // PR7567 130 typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L; f22(L x,L y)131 void f22(L x, L y) { } 132 // CHECK: @f22 133 // CHECK: %x = alloca{{.*}}, align 16 134 // CHECK: %y = alloca{{.*}}, align 16 135 136 137 138 // PR7714 139 struct f23S { 140 short f0; 141 unsigned f1; 142 int f2; 143 }; 144 145 f23(int A,struct f23S B)146 void f23(int A, struct f23S B) { 147 // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1) 148 } 149 150 struct f24s { long a; int b; }; 151 f24(struct f23S * X,struct f24s * P2)152 struct f23S f24(struct f23S *X, struct f24s *P2) { 153 return *X; 154 155 // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2) 156 } 157 158 // rdar://8248065 159 typedef float v4f32 __attribute__((__vector_size__(16))); f25(v4f32 X)160 v4f32 f25(v4f32 X) { 161 // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X) 162 // CHECK-NOT: alloca 163 // CHECK: alloca <4 x float> 164 // CHECK-NOT: alloca 165 // CHECK: store <4 x float> %X, <4 x float>* 166 // CHECK-NOT: store 167 // CHECK: ret <4 x float> 168 return X+X; 169 } 170 171 struct foo26 { 172 int *X; 173 float *Y; 174 }; 175 f26(struct foo26 * P)176 struct foo26 f26(struct foo26 *P) { 177 // CHECK: define { i32*, float* } @f26(%struct.foo26* %P) 178 return *P; 179 } 180 181 182 struct v4f32wrapper { 183 v4f32 v; 184 }; 185 f27(struct v4f32wrapper X)186 struct v4f32wrapper f27(struct v4f32wrapper X) { 187 // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce) 188 return X; 189 } 190 191 // PR22563 - We should unwrap simple structs and arrays to pass 192 // and return them in the appropriate vector registers if possible. 193 194 typedef float v8f32 __attribute__((__vector_size__(32))); 195 struct v8f32wrapper { 196 v8f32 v; 197 }; 198 f27a(struct v8f32wrapper X)199 struct v8f32wrapper f27a(struct v8f32wrapper X) { 200 // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce) 201 return X; 202 } 203 204 struct v8f32wrapper_wrapper { 205 v8f32 v[1]; 206 }; 207 f27b(struct v8f32wrapper_wrapper X)208 struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) { 209 // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce) 210 return X; 211 } 212 213 // rdar://5711709 214 struct f28c { 215 double x; 216 int y; 217 }; f28(struct f28c C)218 void f28(struct f28c C) { 219 // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1) 220 } 221 222 struct f29a { 223 struct c { 224 double x; 225 int y; 226 } x[1]; 227 }; 228 f29a(struct f29a A)229 void f29a(struct f29a A) { 230 // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1) 231 } 232 233 // rdar://8249586 234 struct S0 { char f0[8]; char f2; char f3; char f4; }; f30(struct S0 p_4)235 void f30(struct S0 p_4) { 236 // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1) 237 } 238 239 // Pass the third element as a float when followed by tail padding. 240 // rdar://8251384 241 struct f31foo { float a, b, c; }; f31(struct f31foo X)242 float f31(struct f31foo X) { 243 // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1) 244 return X.c; 245 } 246 f32(_Complex float A,_Complex float B)247 _Complex float f32(_Complex float A, _Complex float B) { 248 // rdar://6379669 249 // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce) 250 return A+B; 251 } 252 253 254 // rdar://8357396 255 struct f33s { long x; float c,d; }; 256 f33(va_list X)257 void f33(va_list X) { 258 va_arg(X, struct f33s); 259 } 260 261 typedef unsigned long long v1i64 __attribute__((__vector_size__(8))); 262 263 // rdar://8359248 264 // CHECK-LABEL: define double @f34(double %arg.coerce) f34(v1i64 arg)265 v1i64 f34(v1i64 arg) { return arg; } 266 267 268 // rdar://8358475 269 // CHECK-LABEL: define double @f35(double %arg.coerce) 270 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8))); f35(v1i64_2 arg)271 v1i64_2 f35(v1i64_2 arg) { return arg+arg; } 272 273 // rdar://9122143 274 // CHECK: declare void @func(%struct._str* byval align 16) 275 typedef struct _str { 276 union { 277 long double a; 278 long c; 279 }; 280 } str; 281 282 void func(str s); 283 str ss; f9122143()284 void f9122143() 285 { 286 func(ss); 287 } 288 289 // CHECK-LABEL: define double @f36(double %arg.coerce) 290 typedef unsigned v2i32 __attribute((__vector_size__(8))); f36(v2i32 arg)291 v2i32 f36(v2i32 arg) { return arg; } 292 293 // AVX: declare void @f38(<8 x float>) 294 // AVX: declare void @f37(<8 x float>) 295 // SSE: declare void @f38(%struct.s256* byval align 32) 296 // SSE: declare void @f37(<8 x float>* byval align 32) 297 typedef float __m256 __attribute__ ((__vector_size__ (32))); 298 typedef struct { 299 __m256 m; 300 } s256; 301 302 s256 x38; 303 __m256 x37; 304 305 void f38(s256 x); 306 void f37(__m256 x); f39()307 void f39() { f38(x38); f37(x37); } 308 309 // The two next tests make sure that the struct below is passed 310 // in the same way regardless of avx being used 311 312 // CHECK: declare void @func40(%struct.t128* byval align 16) 313 typedef float __m128 __attribute__ ((__vector_size__ (16))); 314 typedef struct t128 { 315 __m128 m; 316 __m128 n; 317 } two128; 318 319 extern void func40(two128 s); func41(two128 s)320 void func41(two128 s) { 321 func40(s); 322 } 323 324 // CHECK: declare void @func42(%struct.t128_2* byval align 16) 325 typedef struct xxx { 326 __m128 array[2]; 327 } Atwo128; 328 typedef struct t128_2 { 329 Atwo128 x; 330 } SA; 331 332 extern void func42(SA s); func43(SA s)333 void func43(SA s) { 334 func42(s); 335 } 336 337 // CHECK-LABEL: define i32 @f44 338 // CHECK: ptrtoint 339 // CHECK-NEXT: add i64 %{{[0-9]+}}, 31 340 // CHECK-NEXT: and i64 %{{[0-9]+}}, -32 341 // CHECK-NEXT: inttoptr 342 typedef int T44 __attribute((vector_size(32))); 343 struct s44 { T44 x; int y; }; f44(int i,...)344 int f44(int i, ...) { 345 __builtin_va_list ap; 346 __builtin_va_start(ap, i); 347 struct s44 s = __builtin_va_arg(ap, struct s44); 348 __builtin_va_end(ap); 349 return s.y; 350 } 351 352 // Text that vec3 returns the correct LLVM IR type. 353 // AVX-LABEL: define i32 @foo(<3 x i64> %X) 354 typedef long long3 __attribute((ext_vector_type(3))); foo(long3 X)355 int foo(long3 X) 356 { 357 return 0; 358 } 359 360 // Make sure we don't use a varargs convention for a function without a 361 // prototype where AVX types are involved. 362 // AVX: @test45 363 // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*) 364 int f45(); 365 __m256 x45; test45()366 void test45() { f45(x45); } 367 368 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call 369 // lowering can't handle this case correctly because it runs after legalization. 370 // CHECK: @test46 371 // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}}) 372 typedef float v46 __attribute((vector_size(8))); 373 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46); test46()374 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); } 375 376 // Check that we pass the struct below without using byval, which helps out 377 // codegen. 378 // 379 // CHECK: @test47 380 // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) 381 struct s47 { unsigned a; }; 382 void f47(int,int,int,int,int,int,struct s47); test47(int a,struct s47 b)383 void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); } 384 385 // rdar://12723368 386 // In the following example, there are holes in T4 at the 3rd byte and the 4th 387 // byte, however, T2 does not have those holes. T4 is chosen to be the 388 // representing type for union T1, but we can't use load or store of T4 since 389 // it will skip the 3rd byte and the 4th byte. 390 // In general, Since we don't accurately represent the data fields of a union, 391 // do not use load or store of the representing llvm type for the union. 392 typedef _Complex int T2; 393 typedef _Complex char T5; 394 typedef _Complex int T7; 395 typedef struct T4 { T5 field0; T7 field1; } T4; 396 typedef union T1 { T2 field0; T4 field1; } T1; 397 extern T1 T1_retval; test48(void)398 T1 test48(void) { 399 // CHECK: @test48 400 // CHECK: memcpy 401 // CHECK: memcpy 402 return T1_retval; 403 } 404 405 void test49_helper(double, ...); test49(double d,double e)406 void test49(double d, double e) { 407 test49_helper(d, e); 408 } 409 // CHECK-LABEL: define void @test49( 410 // CHECK: [[T0:%.*]] = load double, double* 411 // CHECK-NEXT: [[T1:%.*]] = load double, double* 412 // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]]) 413 414 void test50_helper(); test50(double d,double e)415 void test50(double d, double e) { 416 test50_helper(d, e); 417 } 418 // CHECK-LABEL: define void @test50( 419 // CHECK: [[T0:%.*]] = load double, double* 420 // CHECK-NEXT: [[T1:%.*]] = load double, double* 421 // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]]) 422 423 struct test51_s { __uint128_t intval; }; test51(struct test51_s * s,__builtin_va_list argList)424 void test51(struct test51_s *s, __builtin_va_list argList) { 425 *s = __builtin_va_arg(argList, struct test51_s); 426 } 427 428 // CHECK-LABEL: define void @test51 429 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16 430 // CHECK: br i1 431 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3 432 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]] 433 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}} 434 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]] 435 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8* 436 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8* 437 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false) 438 // CHECK-NEXT: add i32 {{.*}}, 16 439 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}} 440 // CHECK-NEXT: br label 441 442 void test52_helper(int, ...); 443 __m256 x52; test52()444 void test52() { 445 test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 446 } 447 // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 448 test53(__m256 * m,__builtin_va_list argList)449 void test53(__m256 *m, __builtin_va_list argList) { 450 *m = __builtin_va_arg(argList, __m256); 451 } 452 // AVX-LABEL: define void @test53 453 // AVX-NOT: br i1 454 // AVX: ret void 455 456 void test54_helper(__m256, ...); 457 __m256 x54; test54()458 void test54() { 459 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 460 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 461 } 462 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 463 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) 464 465 typedef float __m512 __attribute__ ((__vector_size__ (64))); 466 typedef struct { 467 __m512 m; 468 } s512; 469 470 s512 x55; 471 __m512 x56; 472 473 // Even on AVX512, aggregates of size larger than four eightbytes have class 474 // MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1). 475 // 476 // CHECK: declare void @f55(%struct.s512* byval align 64) 477 void f55(s512 x); 478 479 // However, __m512 has type SSE/SSEUP on AVX512. 480 // 481 // AVX512: declare void @f56(<16 x float>) 482 // NO-AVX512: declare void @f56(<16 x float>* byval align 64) 483 void f56(__m512 x); f57()484 void f57() { f55(x55); f56(x56); } 485 486 // Like for __m128 on AVX, check that the struct below is passed 487 // in the same way regardless of AVX512 being used. 488 // 489 // CHECK: declare void @f58(%struct.t256* byval align 32) 490 typedef struct t256 { 491 __m256 m; 492 __m256 n; 493 } two256; 494 495 extern void f58(two256 s); f59(two256 s)496 void f59(two256 s) { 497 f58(s); 498 } 499 500 // CHECK: declare void @f60(%struct.sat256* byval align 32) 501 typedef struct at256 { 502 __m256 array[2]; 503 } Atwo256; 504 typedef struct sat256 { 505 Atwo256 x; 506 } SAtwo256; 507 508 extern void f60(SAtwo256 s); f61(SAtwo256 s)509 void f61(SAtwo256 s) { 510 f60(s); 511 } 512 513 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 514 void f62_helper(int, ...); 515 __m512 x62; f62()516 void f62() { 517 f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 518 } 519 520 // Like for __m256 on AVX, we always pass __m512 in memory, and don't 521 // need to use the register save area. 522 // 523 // AVX512-LABEL: define void @f63 524 // AVX512-NOT: br i1 525 // AVX512: ret void f63(__m512 * m,__builtin_va_list argList)526 void f63(__m512 *m, __builtin_va_list argList) { 527 *m = __builtin_va_arg(argList, __m512); 528 } 529 530 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 531 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) 532 void f64_helper(__m512, ...); 533 __m512 x64; f64()534 void f64() { 535 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 536 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 537 } 538