1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
2 
3 // CHECK: define signext i8 @f0()
f0(void)4 char f0(void) {
5   return 0;
6 }
7 
8 // Struct as return type. Aggregates <= 16 bytes are passed directly and round
9 // up to multiple of 8 bytes.
10 // CHECK: define i64 @f1()
11 struct s1 { char f0; };
f1(void)12 struct s1 f1(void) {}
13 
14 // CHECK: define i64 @f2()
15 struct s2 { short f0; };
f2(void)16 struct s2 f2(void) {}
17 
18 // CHECK: define i64 @f3()
19 struct s3 { int f0; };
f3(void)20 struct s3 f3(void) {}
21 
22 // CHECK: define i64 @f4()
23 struct s4 { struct s4_0 { int f0; } f0; };
f4(void)24 struct s4 f4(void) {}
25 
26 // CHECK: define i64 @f5()
27 struct s5 { struct { } f0; int f1; };
f5(void)28 struct s5 f5(void) {}
29 
30 // CHECK: define i64 @f6()
31 struct s6 { int f0[1]; };
f6(void)32 struct s6 f6(void) {}
33 
34 // CHECK: define void @f7()
35 struct s7 { struct { int : 0; } f0; };
f7(void)36 struct s7 f7(void) {}
37 
38 // CHECK: define void @f8()
39 struct s8 { struct { int : 0; } f0[1]; };
f8(void)40 struct s8 f8(void) {}
41 
42 // CHECK: define i64 @f9()
43 struct s9 { int f0; int : 0; };
f9(void)44 struct s9 f9(void) {}
45 
46 // CHECK: define i64 @f10()
47 struct s10 { int f0; int : 0; int : 0; };
f10(void)48 struct s10 f10(void) {}
49 
50 // CHECK: define i64 @f11()
51 struct s11 { int : 0; int f0; };
f11(void)52 struct s11 f11(void) {}
53 
54 // CHECK: define i64 @f12()
55 union u12 { char f0; short f1; int f2; };
f12(void)56 union u12 f12(void) {}
57 
58 // Homogeneous Aggregate as return type will be passed directly.
59 // CHECK: define %struct.s13 @f13()
60 struct s13 { float f0; };
f13(void)61 struct s13 f13(void) {}
62 // CHECK: define %union.u14 @f14()
63 union u14 { float f0; };
f14(void)64 union u14 f14(void) {}
65 
66 // CHECK: define void @f15()
f15(struct s7 a0)67 void f15(struct s7 a0) {}
68 
69 // CHECK: define void @f16()
f16(struct s8 a0)70 void f16(struct s8 a0) {}
71 
72 // CHECK: define i64 @f17()
73 struct s17 { short f0 : 13; char f1 : 4; };
f17(void)74 struct s17 f17(void) {}
75 
76 // CHECK: define i64 @f18()
77 struct s18 { short f0; char f1 : 4; };
f18(void)78 struct s18 f18(void) {}
79 
80 // CHECK: define i64 @f19()
81 struct s19 { int f0; struct s8 f1; };
f19(void)82 struct s19 f19(void) {}
83 
84 // CHECK: define i64 @f20()
85 struct s20 { struct s8 f1; int f0; };
f20(void)86 struct s20 f20(void) {}
87 
88 // CHECK: define i64 @f21()
89 struct s21 { struct {} f1; int f0 : 4; };
f21(void)90 struct s21 f21(void) {}
91 
92 // CHECK: define i64 @f22()
93 // CHECK: define i64 @f23()
94 // CHECK: define i64 @f24()
95 // CHECK: define [2 x i64] @f25()
96 // CHECK: define { float, float } @f26()
97 // CHECK: define { double, double } @f27()
f22(void)98 _Complex char       f22(void) {}
f23(void)99 _Complex short      f23(void) {}
f24(void)100 _Complex int        f24(void) {}
f25(void)101 _Complex long long  f25(void) {}
f26(void)102 _Complex float      f26(void) {}
f27(void)103 _Complex double     f27(void) {}
104 
105 // CHECK: define i64 @f28()
106 struct s28 { _Complex char f0; };
f28()107 struct s28 f28() {}
108 
109 // CHECK: define i64 @f29()
110 struct s29 { _Complex short f0; };
f29()111 struct s29 f29() {}
112 
113 // CHECK: define i64 @f30()
114 struct s30 { _Complex int f0; };
f30()115 struct s30 f30() {}
116 
117 struct s31 { char x; };
f31(struct s31 s)118 void f31(struct s31 s) { }
119 // CHECK: define void @f31(i64 %s.coerce)
120 // CHECK: %s = alloca %struct.s31, align 1
121 // CHECK: trunc i64 %s.coerce to i8
122 // CHECK: store i8 %{{.*}},
123 
124 struct s32 { double x; };
f32(struct s32 s)125 void f32(struct s32 s) { }
126 // CHECK: @f32([1 x double] %{{.*}})
127 
128 // A composite type larger than 16 bytes should be passed indirectly.
129 struct s33 { char buf[32*32]; };
f33(struct s33 s)130 void f33(struct s33 s) { }
131 // CHECK: define void @f33(%struct.s33* %s)
132 
133 struct s34 { char c; };
134 void f34(struct s34 s);
g34(struct s34 * s)135 void g34(struct s34 *s) { f34(*s); }
136 // CHECK: @g34(%struct.s34* %s)
137 // CHECK: %[[a:.*]] = load i8, i8* %{{.*}}
138 // CHECK: zext i8 %[[a]] to i64
139 // CHECK: call void @f34(i64 %{{.*}})
140 
141 /*
142  * Check that va_arg accesses stack according to ABI alignment
143  */
t1(int i,...)144 long long t1(int i, ...) {
145     // CHECK: t1
146     __builtin_va_list ap;
147     __builtin_va_start(ap, i);
148     // CHECK-NOT: add i32 %{{.*}} 7
149     // CHECK-NOT: and i32 %{{.*}} -8
150     long long ll = __builtin_va_arg(ap, long long);
151     __builtin_va_end(ap);
152     return ll;
153 }
t2(int i,...)154 double t2(int i, ...) {
155     // CHECK: t2
156     __builtin_va_list ap;
157     __builtin_va_start(ap, i);
158     // CHECK-NOT: add i32 %{{.*}} 7
159     // CHECK-NOT: and i32 %{{.*}} -8
160     double ll = __builtin_va_arg(ap, double);
161     __builtin_va_end(ap);
162     return ll;
163 }
164 
165 #include <arm_neon.h>
166 
167 // Homogeneous Vector Aggregate as return type and argument type.
168 // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
f0_0(int8x16_t a0,int8x16_t a1)169 int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) {
170   return vzipq_s8(a0, a1);
171 }
172 
173 // Test direct vector passing.
174 typedef float T_float32x2 __attribute__ ((__vector_size__ (8)));
175 typedef float T_float32x4 __attribute__ ((__vector_size__ (16)));
176 typedef float T_float32x8 __attribute__ ((__vector_size__ (32)));
177 typedef float T_float32x16 __attribute__ ((__vector_size__ (64)));
178 
179 // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}})
f1_0(T_float32x2 a0)180 T_float32x2 f1_0(T_float32x2 a0) { return a0; }
181 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}})
f1_1(T_float32x4 a0)182 T_float32x4 f1_1(T_float32x4 a0) { return a0; }
183 // Vector with length bigger than 16-byte is illegal and is passed indirectly.
184 // CHECK: define void @f1_2(<8 x float>* noalias sret  %{{.*}}, <8 x float>*)
f1_2(T_float32x8 a0)185 T_float32x8 f1_2(T_float32x8 a0) { return a0; }
186 // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*)
f1_3(T_float32x16 a0)187 T_float32x16 f1_3(T_float32x16 a0) { return a0; }
188 
189 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and
190 // aggregates with size > 16 bytes.
191 struct s35
192 {
193    float v[4]; //Testing HFA.
194 } __attribute__((aligned(16)));
195 typedef struct s35 s35_with_align;
196 
197 typedef __attribute__((neon_vector_type(4))) float float32x4_t;
f35(int i,s35_with_align s1,s35_with_align s2)198 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
199 // CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
200 // CHECK: %s1 = alloca %struct.s35, align 16
201 // CHECK: %s2 = alloca %struct.s35, align 16
202 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
203 // CHECK: load <4 x float>, <4 x float>* %[[a]], align 16
204 // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
205 // CHECK: load <4 x float>, <4 x float>* %[[b]], align 16
206   float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
207                             *(float32x4_t *)&s2);
208   return v;
209 }
210 
211 struct s36
212 {
213    int v[4]; //Testing 16-byte aggregate.
214 } __attribute__((aligned(16)));
215 typedef struct s36 s36_with_align;
216 
217 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
f36(int i,s36_with_align s1,s36_with_align s2)218 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
219 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
220 // CHECK: %s1 = alloca %struct.s36, align 16
221 // CHECK: %s2 = alloca %struct.s36, align 16
222 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
223 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
224 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
225 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
226 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
227 // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16
228   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
229                           *(int32x4_t *)&s2);
230   return v;
231 }
232 
233 struct s37
234 {
235    int v[18]; //Testing large aggregate.
236 } __attribute__((aligned(16)));
237 typedef struct s37 s37_with_align;
238 
f37(int i,s37_with_align s1,s37_with_align s2)239 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) {
240 // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2)
241 // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>*
242 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
243 // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>*
244 // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16
245   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
246                           *(int32x4_t *)&s2);
247   return v;
248 }
249 s37_with_align g37;
caller37()250 int32x4_t caller37() {
251 // CHECK: caller37
252 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16
253 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16
254 // CHECK: call void @llvm.memcpy
255 // CHECK: call void @llvm.memcpy
256 // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]])
257   return f37(3, g37, g37);
258 }
259 
260 // rdar://problem/12648441
261 // Test passing structs with size < 8, < 16 and > 16
262 // with alignment of 16 and without
263 
264 // structs with size <= 8 bytes, without alignment attribute
265 // passed as i64 regardless of the align attribute
266 struct s38
267 {
268   int i;
269   short s;
270 };
271 typedef struct s38 s38_no_align;
272 // passing structs in registers
273 __attribute__ ((noinline))
f38(int i,s38_no_align s1,s38_no_align s2)274 int f38(int i, s38_no_align s1, s38_no_align s2) {
275 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
276 // CHECK: %s1 = alloca %struct.s38, align 4
277 // CHECK: %s2 = alloca %struct.s38, align 4
278 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
279 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
280 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
281 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
282 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
283 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
284   return s1.i + s2.i + i + s1.s + s2.s;
285 }
286 s38_no_align g38;
287 s38_no_align g38_2;
caller38()288 int caller38() {
289 // CHECK: define i32 @caller38()
290 // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
291 // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
292 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
293   return f38(3, g38, g38_2);
294 }
295 // passing structs on stack
296 __attribute__ ((noinline))
f38_stack(int i,int i2,int i3,int i4,int i5,int i6,int i7,int i8,int i9,s38_no_align s1,s38_no_align s2)297 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
298               int i9, s38_no_align s1, s38_no_align s2) {
299 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
300 // CHECK: %s1 = alloca %struct.s38, align 4
301 // CHECK: %s2 = alloca %struct.s38, align 4
302 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
303 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
304 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
305 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
306 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
307 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
308   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
309 }
caller38_stack()310 int caller38_stack() {
311 // CHECK: define i32 @caller38_stack()
312 // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
313 // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
314 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
315   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
316 }
317 
318 // structs with size <= 8 bytes, with alignment attribute
319 struct s39
320 {
321   int i;
322   short s;
323 } __attribute__((aligned(16)));
324 typedef struct s39 s39_with_align;
325 // passing aligned structs in registers
326 __attribute__ ((noinline))
f39(int i,s39_with_align s1,s39_with_align s2)327 int f39(int i, s39_with_align s1, s39_with_align s2) {
328 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
329 // CHECK: %s1 = alloca %struct.s39, align 16
330 // CHECK: %s2 = alloca %struct.s39, align 16
331 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
332 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
333 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
334 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
335 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
336 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
337   return s1.i + s2.i + i + s1.s + s2.s;
338 }
339 s39_with_align g39;
340 s39_with_align g39_2;
caller39()341 int caller39() {
342 // CHECK: define i32 @caller39()
343 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
344 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
345 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
346   return f39(3, g39, g39_2);
347 }
348 // passing aligned structs on stack
349 __attribute__ ((noinline))
f39_stack(int i,int i2,int i3,int i4,int i5,int i6,int i7,int i8,int i9,s39_with_align s1,s39_with_align s2)350 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
351               int i9, s39_with_align s1, s39_with_align s2) {
352 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
353 // CHECK: %s1 = alloca %struct.s39, align 16
354 // CHECK: %s2 = alloca %struct.s39, align 16
355 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
356 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
357 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
358 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
359 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
360 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
361   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
362 }
caller39_stack()363 int caller39_stack() {
364 // CHECK: define i32 @caller39_stack()
365 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
366 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
367 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
368   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
369 }
370 
371 // structs with size <= 16 bytes, without alignment attribute
372 struct s40
373 {
374   int i;
375   short s;
376   int i2;
377   short s2;
378 };
379 typedef struct s40 s40_no_align;
380 // passing structs in registers
381 __attribute__ ((noinline))
f40(int i,s40_no_align s1,s40_no_align s2)382 int f40(int i, s40_no_align s1, s40_no_align s2) {
383 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
384 // CHECK: %s1 = alloca %struct.s40, align 4
385 // CHECK: %s2 = alloca %struct.s40, align 4
386 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
387 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
388 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
389 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
390 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
391 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
392   return s1.i + s2.i + i + s1.s + s2.s;
393 }
394 s40_no_align g40;
395 s40_no_align g40_2;
caller40()396 int caller40() {
397 // CHECK: define i32 @caller40()
398 // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
399 // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
400 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
401   return f40(3, g40, g40_2);
402 }
403 // passing structs on stack
404 __attribute__ ((noinline))
f40_stack(int i,int i2,int i3,int i4,int i5,int i6,int i7,int i8,int i9,s40_no_align s1,s40_no_align s2)405 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
406               int i9, s40_no_align s1, s40_no_align s2) {
407 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
408 // CHECK: %s1 = alloca %struct.s40, align 4
409 // CHECK: %s2 = alloca %struct.s40, align 4
410 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
411 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
412 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
413 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
414 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
415 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
416   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
417 }
caller40_stack()418 int caller40_stack() {
419 // CHECK: define i32 @caller40_stack()
420 // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
421 // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
422 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
423   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
424 }
425 
426 // structs with size <= 16 bytes, with alignment attribute
427 struct s41
428 {
429   int i;
430   short s;
431   int i2;
432   short s2;
433 } __attribute__((aligned(16)));
434 typedef struct s41 s41_with_align;
435 // passing aligned structs in registers
436 __attribute__ ((noinline))
f41(int i,s41_with_align s1,s41_with_align s2)437 int f41(int i, s41_with_align s1, s41_with_align s2) {
438 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
439 // CHECK: %s1 = alloca %struct.s41, align 16
440 // CHECK: %s2 = alloca %struct.s41, align 16
441 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
442 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
443 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
444 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
445 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
446 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
447   return s1.i + s2.i + i + s1.s + s2.s;
448 }
449 s41_with_align g41;
450 s41_with_align g41_2;
caller41()451 int caller41() {
452 // CHECK: define i32 @caller41()
453 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
454 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
455 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
456   return f41(3, g41, g41_2);
457 }
458 // passing aligned structs on stack
459 __attribute__ ((noinline))
f41_stack(int i,int i2,int i3,int i4,int i5,int i6,int i7,int i8,int i9,s41_with_align s1,s41_with_align s2)460 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
461               int i9, s41_with_align s1, s41_with_align s2) {
462 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
463 // CHECK: %s1 = alloca %struct.s41, align 16
464 // CHECK: %s2 = alloca %struct.s41, align 16
465 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
466 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
467 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
468 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
469 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
470 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
471   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
472 }
caller41_stack()473 int caller41_stack() {
474 // CHECK: define i32 @caller41_stack()
475 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
476 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
477 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
478   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
479 }
480 
481 // structs with size > 16 bytes, without alignment attribute
482 struct s42
483 {
484   int i;
485   short s;
486   int i2;
487   short s2;
488   int i3;
489   short s3;
490 };
491 typedef struct s42 s42_no_align;
492 // passing structs in registers
493 __attribute__ ((noinline))
f42(int i,s42_no_align s1,s42_no_align s2)494 int f42(int i, s42_no_align s1, s42_no_align s2) {
495 // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2)
496 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0
497 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0
498 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1
499 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1
500   return s1.i + s2.i + i + s1.s + s2.s;
501 }
502 s42_no_align g42;
503 s42_no_align g42_2;
caller42()504 int caller42() {
505 // CHECK: define i32 @caller42()
506 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
507 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
508 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
509 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
510 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
511 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
512 // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]])
513   return f42(3, g42, g42_2);
514 }
515 // passing structs on stack
516 __attribute__ ((noinline))
f42_stack(int i,int i2,int i3,int i4,int i5,int i6,int i7,int i8,int i9,s42_no_align s1,s42_no_align s2)517 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
518               int i9, s42_no_align s1, s42_no_align s2) {
519 // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2)
520 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0
521 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0
522 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1
523 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1
524   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
525 }
caller42_stack()526 int caller42_stack() {
527 // CHECK: define i32 @caller42_stack()
528 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
529 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
530 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
531 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
532 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
533 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
534 // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]])
535   return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2);
536 }
537 
538 // structs with size > 16 bytes, with alignment attribute
539 struct s43
540 {
541   int i;
542   short s;
543   int i2;
544   short s2;
545   int i3;
546   short s3;
547 } __attribute__((aligned(16)));
548 typedef struct s43 s43_with_align;
549 // passing aligned structs in registers
550 __attribute__ ((noinline))
f43(int i,s43_with_align s1,s43_with_align s2)551 int f43(int i, s43_with_align s1, s43_with_align s2) {
552 // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2)
553 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0
554 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0
555 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1
556 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1
557   return s1.i + s2.i + i + s1.s + s2.s;
558 }
559 s43_with_align g43;
560 s43_with_align g43_2;
caller43()561 int caller43() {
562 // CHECK: define i32 @caller43()
563 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
564 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
565 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
566 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
567 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
568 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
569 // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]])
570   return f43(3, g43, g43_2);
571 }
572 // passing aligned structs on stack
573 __attribute__ ((noinline))
f43_stack(int i,int i2,int i3,int i4,int i5,int i6,int i7,int i8,int i9,s43_with_align s1,s43_with_align s2)574 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
575               int i9, s43_with_align s1, s43_with_align s2) {
576 // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2)
577 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0
578 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0
579 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1
580 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1
581   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
582 }
caller43_stack()583 int caller43_stack() {
584 // CHECK: define i32 @caller43_stack()
585 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
586 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
587 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
589 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
591 // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]])
592   return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2);
593 }
594 
595 // rdar://13668927
596 // We should not split argument s1 between registers and stack.
597 __attribute__ ((noinline))
f40_split(int i,int i2,int i3,int i4,int i5,int i6,int i7,s40_no_align s1,s40_no_align s2)598 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
599               s40_no_align s1, s40_no_align s2) {
600 // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
601   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
602 }
caller40_split()603 int caller40_split() {
604 // CHECK: define i32 @caller40_split()
605 // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
606   return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
607 }
608 
609 __attribute__ ((noinline))
f41_split(int i,int i2,int i3,int i4,int i5,int i6,int i7,s41_with_align s1,s41_with_align s2)610 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
611               s41_with_align s1, s41_with_align s2) {
612 // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce)
613   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
614 }
caller41_split()615 int caller41_split() {
616 // CHECK: define i32 @caller41_split()
617 // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}})
618   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
619 }
620 
621 // Handle homogeneous aggregates properly in variadic functions.
622 struct HFA {
623   float a, b, c, d;
624 };
625 
test_hfa(int n,...)626 float test_hfa(int n, ...) {
627 // CHECK-LABEL: define float @test_hfa(i32 %n, ...)
628 // CHECK: [[THELIST:%.*]] = alloca i8*
629 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
630 
631   // HFA is not indirect, so occupies its full 16 bytes on the stack.
632 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16
633 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
634 
635 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
636   __builtin_va_list thelist;
637   __builtin_va_start(thelist, n);
638   struct HFA h = __builtin_va_arg(thelist, struct HFA);
639   return h.d;
640 }
641 
test_hfa_call(struct HFA * a)642 float test_hfa_call(struct HFA *a) {
643 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
644 // CHECK: call float (i32, ...) @test_hfa(i32 1, [4 x float] {{.*}})
645   test_hfa(1, *a);
646 }
647 
648 struct TooBigHFA {
649   float a, b, c, d, e;
650 };
651 
test_toobig_hfa(int n,...)652 float test_toobig_hfa(int n, ...) {
653 // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...)
654 // CHECK: [[THELIST:%.*]] = alloca i8*
655 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
656 
657   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
658   // of stack consumed.
659 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
660 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
661 
662 // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA**
663 // CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]]
664   __builtin_va_list thelist;
665   __builtin_va_start(thelist, n);
666   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
667   return h.d;
668 }
669 
670 struct HVA {
671   int32x4_t a, b;
672 };
673 
test_hva(int n,...)674 int32x4_t test_hva(int n, ...) {
675 // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...)
676 // CHECK: [[THELIST:%.*]] = alloca i8*
677 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
678 
679   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
680   // must be properly aligned.
681 // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
682 // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
683 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
684 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
685 
686 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32
687 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
688 
689 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
690   __builtin_va_list thelist;
691   __builtin_va_start(thelist, n);
692   struct HVA h = __builtin_va_arg(thelist, struct HVA);
693   return h.b;
694 }
695 
696 struct TooBigHVA {
697   int32x4_t a, b, c, d, e;
698 };
699 
test_toobig_hva(int n,...)700 int32x4_t test_toobig_hva(int n, ...) {
701 // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...)
702 // CHECK: [[THELIST:%.*]] = alloca i8*
703 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
704 
705   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
706   // of stack consumed.
707 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
708 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
709 
710 // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA**
711 // CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]]
712   __builtin_va_list thelist;
713   __builtin_va_start(thelist, n);
714   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
715   return h.d;
716 }
717 
718 typedef __attribute__((__ext_vector_type__(3))) float float32x3_t;
719 typedef struct { float32x3_t arr[4]; } HFAv3;
720 
test_hva_v3(int n,...)721 float32x3_t test_hva_v3(int n, ...) {
722 // CHECK-LABEL: define <3 x float> @test_hva_v3(i32 %n, ...)
723 // CHECK: [[THELIST:%.*]] = alloca i8*
724 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
725 
726   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
727   // must be properly aligned.
728 // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
729 // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
730 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
731 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
732 
733 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64
734 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
735 
736 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3*
737   __builtin_va_list l;
738   __builtin_va_start(l, n);
739   HFAv3 r = __builtin_va_arg(l, HFAv3);
740   return r.arr[2];
741 }
742 
test_hva_v3_call(HFAv3 * a)743 float32x3_t test_hva_v3_call(HFAv3 *a) {
744 // CHECK-LABEL: define <3 x float> @test_hva_v3_call(%struct.HFAv3* %a)
745 // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}})
746   return test_hva_v3(1, *a);
747 }
748