1; RUN: llc < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s
2; RUN: llc < %s -march=ppc64le -mattr=+altivec -mattr=-vsx | FileCheck %s
3
4; Currently VSX support is disabled for this test because we generate lxsdx
5; instead of lfd, and stxsdx instead of stfd.  That is a poor choice when we
6; have reg+imm addressing, and is on the list of things to be fixed.
7; The second run step is to ensure that -march=ppc64le is adequate to select
8; the same feature set as with -mcpu=pwr8 since that is the baseline for ppc64le.
9
10target datalayout = "e-m:e-i64:64-n32:64"
11target triple = "powerpc64le-unknown-linux-gnu"
12
13;
14; Verify use of registers for float/vector aggregate return.
15;
16
17define [8 x float] @return_float([8 x float] %x) {
18entry:
19  ret [8 x float] %x
20}
21; CHECK-LABEL: @return_float
22; CHECK: %entry
23; CHECK-NEXT: blr
24
25define [8 x double] @return_double([8 x double] %x) {
26entry:
27  ret [8 x double] %x
28}
29; CHECK-LABEL: @return_double
30; CHECK: %entry
31; CHECK-NEXT: blr
32
33define [4 x ppc_fp128] @return_ppcf128([4 x ppc_fp128] %x) {
34entry:
35  ret [4 x ppc_fp128] %x
36}
37; CHECK-LABEL: @return_ppcf128
38; CHECK: %entry
39; CHECK-NEXT: blr
40
41define [8 x <4 x i32>] @return_v4i32([8 x <4 x i32>] %x) {
42entry:
43  ret [8 x <4 x i32>] %x
44}
45; CHECK-LABEL: @return_v4i32
46; CHECK: %entry
47; CHECK-NEXT: blr
48
49
50;
51; Verify amount of space taken up by aggregates in the parameter save area.
52;
53
54define i64 @callee_float([7 x float] %a, [7 x float] %b, i64 %c) {
55entry:
56  ret i64 %c
57}
58; CHECK-LABEL: @callee_float
59; CHECK: ld 3, 96(1)
60; CHECK: blr
61
62define void @caller_float(i64 %x, [7 x float] %y) {
63entry:
64  tail call void @test_float([7 x float] %y, [7 x float] %y, i64 %x)
65  ret void
66}
67; CHECK-LABEL: @caller_float
68; CHECK: std 3, 96(1)
69; CHECK: bl test_float
70
71declare void @test_float([7 x float], [7 x float], i64)
72
73define i64 @callee_double(i64 %a, [7 x double] %b, i64 %c) {
74entry:
75  ret i64 %c
76}
77; CHECK-LABEL: @callee_double
78; CHECK: ld 3, 96(1)
79; CHECK: blr
80
81define void @caller_double(i64 %x, [7 x double] %y) {
82entry:
83  tail call void @test_double(i64 %x, [7 x double] %y, i64 %x)
84  ret void
85}
86; CHECK-LABEL: @caller_double
87; CHECK: std 3, 96(1)
88; CHECK: bl test_double
89
90declare void @test_double(i64, [7 x double], i64)
91
92define i64 @callee_ppcf128(i64 %a, [4 x ppc_fp128] %b, i64 %c) {
93entry:
94  ret i64 %c
95}
96; CHECK-LABEL: @callee_ppcf128
97; CHECK: ld 3, 104(1)
98; CHECK: blr
99
100define void @caller_ppcf128(i64 %x, [4 x ppc_fp128] %y) {
101entry:
102  tail call void @test_ppcf128(i64 %x, [4 x ppc_fp128] %y, i64 %x)
103  ret void
104}
105; CHECK-LABEL: @caller_ppcf128
106; CHECK: std 3, 104(1)
107; CHECK: bl test_ppcf128
108
109declare void @test_ppcf128(i64, [4 x ppc_fp128], i64)
110
111define i64 @callee_i64(i64 %a, [7 x i64] %b, i64 %c) {
112entry:
113  ret i64 %c
114}
115; CHECK-LABEL: @callee_i64
116; CHECK: ld 3, 96(1)
117; CHECK: blr
118
119define void @caller_i64(i64 %x, [7 x i64] %y) {
120entry:
121  tail call void @test_i64(i64 %x, [7 x i64] %y, i64 %x)
122  ret void
123}
124; CHECK-LABEL: @caller_i64
125; CHECK: std 3, 96(1)
126; CHECK: bl test_i64
127
128declare void @test_i64(i64, [7 x i64], i64)
129
130define i64 @callee_i128(i64 %a, [4 x i128] %b, i64 %c) {
131entry:
132  ret i64 %c
133}
134; CHECK-LABEL: @callee_i128
135; CHECK: ld 3, 112(1)
136; CHECK: blr
137
138define void @caller_i128(i64 %x, [4 x i128] %y) {
139entry:
140  tail call void @test_i128(i64 %x, [4 x i128] %y, i64 %x)
141  ret void
142}
143; CHECK-LABEL: @caller_i128
144; CHECK: std 3, 112(1)
145; CHECK: bl test_i128
146
147declare void @test_i128(i64, [4 x i128], i64)
148
149define i64 @callee_v4i32(i64 %a, [4 x <4 x i32>] %b, i64 %c) {
150entry:
151  ret i64 %c
152}
153; CHECK-LABEL: @callee_v4i32
154; CHECK: ld 3, 112(1)
155; CHECK: blr
156
157define void @caller_v4i32(i64 %x, [4 x <4 x i32>] %y) {
158entry:
159  tail call void @test_v4i32(i64 %x, [4 x <4 x i32>] %y, i64 %x)
160  ret void
161}
162; CHECK-LABEL: @caller_v4i32
163; CHECK: std 3, 112(1)
164; CHECK: bl test_v4i32
165
166declare void @test_v4i32(i64, [4 x <4 x i32>], i64)
167
168
169;
170; Verify handling of floating point arguments in GPRs
171;
172
173%struct.float8 = type { [8 x float] }
174%struct.float5 = type { [5 x float] }
175%struct.float2 = type { [2 x float] }
176
177@g8 = common global %struct.float8 zeroinitializer, align 4
178@g5 = common global %struct.float5 zeroinitializer, align 4
179@g2 = common global %struct.float2 zeroinitializer, align 4
180
181define float @callee0([7 x float] %a, [7 x float] %b) {
182entry:
183  %b.extract = extractvalue [7 x float] %b, 6
184  ret float %b.extract
185}
186; CHECK-LABEL: @callee0
187; CHECK: stw 10, [[OFF:.*]](1)
188; CHECK: lfs 1, [[OFF]](1)
189; CHECK: blr
190
191define void @caller0([7 x float] %a) {
192entry:
193  tail call void @test0([7 x float] %a, [7 x float] %a)
194  ret void
195}
196; CHECK-LABEL: @caller0
197; CHECK-DAG: fmr 8, 1
198; CHECK-DAG: fmr 9, 2
199; CHECK-DAG: fmr 10, 3
200; CHECK-DAG: fmr 11, 4
201; CHECK-DAG: fmr 12, 5
202; CHECK-DAG: fmr 13, 6
203; CHECK-DAG: stfs 7, [[OFF:[0-9]+]](1)
204; CHECK-DAG: lwz 10, [[OFF]](1)
205; CHECK: bl test0
206
207declare void @test0([7 x float], [7 x float])
208
209define float @callee1([8 x float] %a, [8 x float] %b) {
210entry:
211  %b.extract = extractvalue [8 x float] %b, 7
212  ret float %b.extract
213}
214; CHECK-LABEL: @callee1
215; CHECK: rldicl [[REG:[0-9]+]], 10, 32, 32
216; CHECK: stw [[REG]], [[OFF:.*]](1)
217; CHECK: lfs 1, [[OFF]](1)
218; CHECK: blr
219
220define void @caller1([8 x float] %a) {
221entry:
222  tail call void @test1([8 x float] %a, [8 x float] %a)
223  ret void
224}
225; CHECK-LABEL: @caller1
226; CHECK-DAG: fmr 9, 1
227; CHECK-DAG: fmr 10, 2
228; CHECK-DAG: fmr 11, 3
229; CHECK-DAG: fmr 12, 4
230; CHECK-DAG: fmr 13, 5
231; CHECK-DAG: stfs 5, [[OFF0:[0-9]+]](1)
232; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1)
233; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1)
234; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1)
235; CHECK-DAG: lwz [[REG0:[0-9]+]], [[OFF0]](1)
236; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1)
237; CHECK-DAG: lwz [[REG2:[0-9]+]], [[OFF2]](1)
238; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1)
239; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
240; CHECK-DAG: sldi [[REG3]], [[REG3]], 32
241; CHECK-DAG: or 9, [[REG0]], [[REG1]]
242; CHECK-DAG: or 10, [[REG2]], [[REG3]]
243; CHECK: bl test1
244
245declare void @test1([8 x float], [8 x float])
246
247define float @callee2([8 x float] %a, [5 x float] %b, [2 x float] %c) {
248entry:
249  %c.extract = extractvalue [2 x float] %c, 1
250  ret float %c.extract
251}
252; CHECK-LABEL: @callee2
253; CHECK: rldicl [[REG:[0-9]+]], 10, 32, 32
254; CHECK: stw [[REG]], [[OFF:.*]](1)
255; CHECK: lfs 1, [[OFF]](1)
256; CHECK: blr
257
258define void @caller2() {
259entry:
260  %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8, %struct.float8* @g8, i64 0, i32 0), align 4
261  %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5, %struct.float5* @g5, i64 0, i32 0), align 4
262  %2 = load [2 x float], [2 x float]* getelementptr inbounds (%struct.float2, %struct.float2* @g2, i64 0, i32 0), align 4
263  tail call void @test2([8 x float] %0, [5 x float] %1, [2 x float] %2)
264  ret void
265}
266; CHECK-LABEL: @caller2
267; CHECK: ld {{[0-9]+}}, .LC
268; CHECK-DAG: lfs 1, 0({{[0-9]+}})
269; CHECK-DAG: lfs 2, 4({{[0-9]+}})
270; CHECK-DAG: lfs 3, 8({{[0-9]+}})
271; CHECK-DAG: lfs 4, 12({{[0-9]+}})
272; CHECK-DAG: lfs 5, 16({{[0-9]+}})
273; CHECK-DAG: lfs 6, 20({{[0-9]+}})
274; CHECK-DAG: lfs 7, 24({{[0-9]+}})
275; CHECK-DAG: lfs 8, 28({{[0-9]+}})
276
277; CHECK-DAG: lfs 9, 0({{[0-9]+}})
278; CHECK-DAG: lfs 10, 4({{[0-9]+}})
279; CHECK-DAG: lfs 11, 8({{[0-9]+}})
280; CHECK-DAG: lfs 12, 12({{[0-9]+}})
281; CHECK-DAG: lfs 13, 16({{[0-9]+}})
282
283; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}})
284; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}})
285; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32
286; CHECK-DAG: or 10, [[REG0]], [[REG2]]
287; CHECK: bl test2
288
289declare void @test2([8 x float], [5 x float], [2 x float])
290
291define double @callee3([8 x float] %a, [5 x float] %b, double %c) {
292entry:
293  ret double %c
294}
295; CHECK-LABEL: @callee3
296; CHECK: std 10, [[OFF:.*]](1)
297; CHECK: lfd 1, [[OFF]](1)
298; CHECK: blr
299
300define void @caller3(double %d) {
301entry:
302  %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8, %struct.float8* @g8, i64 0, i32 0), align 4
303  %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5, %struct.float5* @g5, i64 0, i32 0), align 4
304  tail call void @test3([8 x float] %0, [5 x float] %1, double %d)
305  ret void
306}
307; CHECK-LABEL: @caller3
308; CHECK: stfd 1, [[OFF:.*]](1)
309; CHECK: ld 10, [[OFF]](1)
310; CHECK: bl test3
311
312declare void @test3([8 x float], [5 x float], double)
313
314define float @callee4([8 x float] %a, [5 x float] %b, float %c) {
315entry:
316  ret float %c
317}
318; CHECK-LABEL: @callee4
319; CHECK: stw 10, [[OFF:.*]](1)
320; CHECK: lfs 1, [[OFF]](1)
321; CHECK: blr
322
323define void @caller4(float %f) {
324entry:
325  %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8, %struct.float8* @g8, i64 0, i32 0), align 4
326  %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5, %struct.float5* @g5, i64 0, i32 0), align 4
327  tail call void @test4([8 x float] %0, [5 x float] %1, float %f)
328  ret void
329}
330; CHECK-LABEL: @caller4
331; CHECK: stfs 1, [[OFF:.*]](1)
332; CHECK: lwz 10, [[OFF]](1)
333; CHECK: bl test4
334
335declare void @test4([8 x float], [5 x float], float)
336
337