1; Test 32-bit subtraction in which the second operand is variable.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i32 @foo()
6
7; Check SLR.
8define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
9; CHECK-LABEL: f1:
10; CHECK: slr %r3, %r4
11; CHECK-DAG: st %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: afi [[REG]], -536870912
14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
15; CHECK: br %r14
16  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
17  %val = extractvalue {i32, i1} %t, 0
18  %obit = extractvalue {i32, i1} %t, 1
19  store i32 %val, i32 *%res
20  ret i1 %obit
21}
22
23; Check using the overflow result for a branch.
24define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
25; CHECK-LABEL: f2:
26; CHECK: slr %r3, %r4
27; CHECK: st %r3, 0(%r5)
28; CHECK: jgle foo@PLT
29; CHECK: br %r14
30  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
31  %val = extractvalue {i32, i1} %t, 0
32  %obit = extractvalue {i32, i1} %t, 1
33  store i32 %val, i32 *%res
34  br i1 %obit, label %call, label %exit
35
36call:
37  tail call i32 @foo()
38  br label %exit
39
40exit:
41  ret void
42}
43
44; ... and the same with the inverted direction.
45define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
46; CHECK-LABEL: f3:
47; CHECK: slr %r3, %r4
48; CHECK: st %r3, 0(%r5)
49; CHECK: jgnle foo@PLT
50; CHECK: br %r14
51  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
52  %val = extractvalue {i32, i1} %t, 0
53  %obit = extractvalue {i32, i1} %t, 1
54  store i32 %val, i32 *%res
55  br i1 %obit, label %exit, label %call
56
57call:
58  tail call i32 @foo()
59  br label %exit
60
61exit:
62  ret void
63}
64
65; Check the low end of the SL range.
66define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
67; CHECK-LABEL: f4:
68; CHECK: sl %r3, 0(%r4)
69; CHECK-DAG: st %r3, 0(%r5)
70; CHECK-DAG: ipm [[REG:%r[0-5]]]
71; CHECK-DAG: afi [[REG]], -536870912
72; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
73; CHECK: br %r14
74  %b = load i32, i32 *%src
75  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
76  %val = extractvalue {i32, i1} %t, 0
77  %obit = extractvalue {i32, i1} %t, 1
78  store i32 %val, i32 *%res
79  ret i1 %obit
80}
81
82; Check the high end of the aligned SL range.
83define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
84; CHECK-LABEL: f5:
85; CHECK: sl %r3, 4092(%r4)
86; CHECK-DAG: st %r3, 0(%r5)
87; CHECK-DAG: ipm [[REG:%r[0-5]]]
88; CHECK-DAG: afi [[REG]], -536870912
89; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
90; CHECK: br %r14
91  %ptr = getelementptr i32, i32 *%src, i64 1023
92  %b = load i32, i32 *%ptr
93  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
94  %val = extractvalue {i32, i1} %t, 0
95  %obit = extractvalue {i32, i1} %t, 1
96  store i32 %val, i32 *%res
97  ret i1 %obit
98}
99
100; Check the next word up, which should use SLY instead of SL.
101define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
102; CHECK-LABEL: f6:
103; CHECK: sly %r3, 4096(%r4)
104; CHECK-DAG: st %r3, 0(%r5)
105; CHECK-DAG: ipm [[REG:%r[0-5]]]
106; CHECK-DAG: afi [[REG]], -536870912
107; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
108; CHECK: br %r14
109  %ptr = getelementptr i32, i32 *%src, i64 1024
110  %b = load i32, i32 *%ptr
111  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
112  %val = extractvalue {i32, i1} %t, 0
113  %obit = extractvalue {i32, i1} %t, 1
114  store i32 %val, i32 *%res
115  ret i1 %obit
116}
117
118; Check the high end of the aligned SLY range.
119define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
120; CHECK-LABEL: f7:
121; CHECK: sly %r3, 524284(%r4)
122; CHECK-DAG: st %r3, 0(%r5)
123; CHECK-DAG: ipm [[REG:%r[0-5]]]
124; CHECK-DAG: afi [[REG]], -536870912
125; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
126; CHECK: br %r14
127  %ptr = getelementptr i32, i32 *%src, i64 131071
128  %b = load i32, i32 *%ptr
129  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
130  %val = extractvalue {i32, i1} %t, 0
131  %obit = extractvalue {i32, i1} %t, 1
132  store i32 %val, i32 *%res
133  ret i1 %obit
134}
135
136; Check the next word up, which needs separate address logic.
137; Other sequences besides this one would be OK.
138define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
139; CHECK-LABEL: f8:
140; CHECK: agfi %r4, 524288
141; CHECK: sl %r3, 0(%r4)
142; CHECK-DAG: st %r3, 0(%r5)
143; CHECK-DAG: ipm [[REG:%r[0-5]]]
144; CHECK-DAG: afi [[REG]], -536870912
145; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
146; CHECK: br %r14
147  %ptr = getelementptr i32, i32 *%src, i64 131072
148  %b = load i32, i32 *%ptr
149  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
150  %val = extractvalue {i32, i1} %t, 0
151  %obit = extractvalue {i32, i1} %t, 1
152  store i32 %val, i32 *%res
153  ret i1 %obit
154}
155
156; Check the high end of the negative aligned SLY range.
157define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
158; CHECK-LABEL: f9:
159; CHECK: sly %r3, -4(%r4)
160; CHECK-DAG: st %r3, 0(%r5)
161; CHECK-DAG: ipm [[REG:%r[0-5]]]
162; CHECK-DAG: afi [[REG]], -536870912
163; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
164; CHECK: br %r14
165  %ptr = getelementptr i32, i32 *%src, i64 -1
166  %b = load i32, i32 *%ptr
167  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
168  %val = extractvalue {i32, i1} %t, 0
169  %obit = extractvalue {i32, i1} %t, 1
170  store i32 %val, i32 *%res
171  ret i1 %obit
172}
173
174; Check the low end of the SLY range.
175define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
176; CHECK-LABEL: f10:
177; CHECK: sly %r3, -524288(%r4)
178; CHECK-DAG: st %r3, 0(%r5)
179; CHECK-DAG: ipm [[REG:%r[0-5]]]
180; CHECK-DAG: afi [[REG]], -536870912
181; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
182; CHECK: br %r14
183  %ptr = getelementptr i32, i32 *%src, i64 -131072
184  %b = load i32, i32 *%ptr
185  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
186  %val = extractvalue {i32, i1} %t, 0
187  %obit = extractvalue {i32, i1} %t, 1
188  store i32 %val, i32 *%res
189  ret i1 %obit
190}
191
192; Check the next word down, which needs separate address logic.
193; Other sequences besides this one would be OK.
194define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
195; CHECK-LABEL: f11:
196; CHECK: agfi %r4, -524292
197; CHECK: sl %r3, 0(%r4)
198; CHECK-DAG: st %r3, 0(%r5)
199; CHECK-DAG: ipm [[REG:%r[0-5]]]
200; CHECK-DAG: afi [[REG]], -536870912
201; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
202; CHECK: br %r14
203  %ptr = getelementptr i32, i32 *%src, i64 -131073
204  %b = load i32, i32 *%ptr
205  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
206  %val = extractvalue {i32, i1} %t, 0
207  %obit = extractvalue {i32, i1} %t, 1
208  store i32 %val, i32 *%res
209  ret i1 %obit
210}
211
212; Check that SL allows an index.
213define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) {
214; CHECK-LABEL: f12:
215; CHECK: sl %r4, 4092({{%r3,%r2|%r2,%r3}})
216; CHECK-DAG: st %r4, 0(%r5)
217; CHECK-DAG: ipm [[REG:%r[0-5]]]
218; CHECK-DAG: afi [[REG]], -536870912
219; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
220; CHECK: br %r14
221  %add1 = add i64 %src, %index
222  %add2 = add i64 %add1, 4092
223  %ptr = inttoptr i64 %add2 to i32 *
224  %b = load i32, i32 *%ptr
225  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
226  %val = extractvalue {i32, i1} %t, 0
227  %obit = extractvalue {i32, i1} %t, 1
228  store i32 %val, i32 *%res
229  ret i1 %obit
230}
231
232; Check that SLY allows an index.
233define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) {
234; CHECK-LABEL: f13:
235; CHECK: sly %r4, 4096({{%r3,%r2|%r2,%r3}})
236; CHECK-DAG: st %r4, 0(%r5)
237; CHECK-DAG: ipm [[REG:%r[0-5]]]
238; CHECK-DAG: afi [[REG]], -536870912
239; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
240; CHECK: br %r14
241  %add1 = add i64 %src, %index
242  %add2 = add i64 %add1, 4096
243  %ptr = inttoptr i64 %add2 to i32 *
244  %b = load i32, i32 *%ptr
245  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
246  %val = extractvalue {i32, i1} %t, 0
247  %obit = extractvalue {i32, i1} %t, 1
248  store i32 %val, i32 *%res
249  ret i1 %obit
250}
251
252; Check that subtractions of spilled values can use SL rather than SLR.
253define zeroext i1 @f14(i32 *%ptr0) {
254; CHECK-LABEL: f14:
255; CHECK: brasl %r14, foo@PLT
256; CHECK: sl %r2, 16{{[04]}}(%r15)
257; CHECK: br %r14
258  %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
259  %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
260  %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
261  %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
262  %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
263  %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
264  %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
265  %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
266  %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
267
268  %val0 = load i32, i32 *%ptr0
269  %val1 = load i32, i32 *%ptr1
270  %val2 = load i32, i32 *%ptr2
271  %val3 = load i32, i32 *%ptr3
272  %val4 = load i32, i32 *%ptr4
273  %val5 = load i32, i32 *%ptr5
274  %val6 = load i32, i32 *%ptr6
275  %val7 = load i32, i32 *%ptr7
276  %val8 = load i32, i32 *%ptr8
277  %val9 = load i32, i32 *%ptr9
278
279  %ret = call i32 @foo()
280
281  %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %ret, i32 %val0)
282  %add0 = extractvalue {i32, i1} %t0, 0
283  %obit0 = extractvalue {i32, i1} %t0, 1
284  %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add0, i32 %val1)
285  %add1 = extractvalue {i32, i1} %t1, 0
286  %obit1 = extractvalue {i32, i1} %t1, 1
287  %res1 = or i1 %obit0, %obit1
288  %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add1, i32 %val2)
289  %add2 = extractvalue {i32, i1} %t2, 0
290  %obit2 = extractvalue {i32, i1} %t2, 1
291  %res2 = or i1 %res1, %obit2
292  %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add2, i32 %val3)
293  %add3 = extractvalue {i32, i1} %t3, 0
294  %obit3 = extractvalue {i32, i1} %t3, 1
295  %res3 = or i1 %res2, %obit3
296  %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add3, i32 %val4)
297  %add4 = extractvalue {i32, i1} %t4, 0
298  %obit4 = extractvalue {i32, i1} %t4, 1
299  %res4 = or i1 %res3, %obit4
300  %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add4, i32 %val5)
301  %add5 = extractvalue {i32, i1} %t5, 0
302  %obit5 = extractvalue {i32, i1} %t5, 1
303  %res5 = or i1 %res4, %obit5
304  %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add5, i32 %val6)
305  %add6 = extractvalue {i32, i1} %t6, 0
306  %obit6 = extractvalue {i32, i1} %t6, 1
307  %res6 = or i1 %res5, %obit6
308  %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add6, i32 %val7)
309  %add7 = extractvalue {i32, i1} %t7, 0
310  %obit7 = extractvalue {i32, i1} %t7, 1
311  %res7 = or i1 %res6, %obit7
312  %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add7, i32 %val8)
313  %add8 = extractvalue {i32, i1} %t8, 0
314  %obit8 = extractvalue {i32, i1} %t8, 1
315  %res8 = or i1 %res7, %obit8
316  %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add8, i32 %val9)
317  %add9 = extractvalue {i32, i1} %t9, 0
318  %obit9 = extractvalue {i32, i1} %t9, 1
319  %res9 = or i1 %res8, %obit9
320
321  ret i1 %res9
322}
323
324declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
325
326