1; Test subtraction of a zero-extended i32 from an i64.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i64 @foo()
6
7; Check SLGFR.
8define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
9; CHECK-LABEL: f1:
10; CHECK: slgfr %r3, %r4
11; CHECK-DAG: stg %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: afi [[REG]], -536870912
14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
15; CHECK: br %r14
16  %bext = zext i32 %b to i64
17  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
18  %val = extractvalue {i64, i1} %t, 0
19  %obit = extractvalue {i64, i1} %t, 1
20  store i64 %val, i64 *%res
21  ret i1 %obit
22}
23
24; Check using the overflow result for a branch.
25define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
26; CHECK-LABEL: f2:
27; CHECK: slgfr %r3, %r4
28; CHECK: stg %r3, 0(%r5)
29; CHECK: jgle foo@PLT
30; CHECK: br %r14
31  %bext = zext i32 %b to i64
32  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
33  %val = extractvalue {i64, i1} %t, 0
34  %obit = extractvalue {i64, i1} %t, 1
35  store i64 %val, i64 *%res
36  br i1 %obit, label %call, label %exit
37
38call:
39  tail call i64 @foo()
40  br label %exit
41
42exit:
43  ret void
44}
45
46; ... and the same with the inverted direction.
47define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
48; CHECK-LABEL: f3:
49; CHECK: slgfr %r3, %r4
50; CHECK: stg %r3, 0(%r5)
51; CHECK: jgnle foo@PLT
52; CHECK: br %r14
53  %bext = zext i32 %b to i64
54  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
55  %val = extractvalue {i64, i1} %t, 0
56  %obit = extractvalue {i64, i1} %t, 1
57  store i64 %val, i64 *%res
58  br i1 %obit, label %exit, label %call
59
60call:
61  tail call i64 @foo()
62  br label %exit
63
64exit:
65  ret void
66}
67
68; Check SLGF with no displacement.
69define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
70; CHECK-LABEL: f4:
71; CHECK: slgf %r3, 0(%r4)
72; CHECK-DAG: stg %r3, 0(%r5)
73; CHECK-DAG: ipm [[REG:%r[0-5]]]
74; CHECK-DAG: afi [[REG]], -536870912
75; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
76; CHECK: br %r14
77  %b = load i32, i32 *%src
78  %bext = zext i32 %b to i64
79  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
80  %val = extractvalue {i64, i1} %t, 0
81  %obit = extractvalue {i64, i1} %t, 1
82  store i64 %val, i64 *%res
83  ret i1 %obit
84}
85
86; Check the high end of the aligned SLGF range.
87define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
88; CHECK-LABEL: f5:
89; CHECK: slgf %r3, 524284(%r4)
90; CHECK-DAG: stg %r3, 0(%r5)
91; CHECK-DAG: ipm [[REG:%r[0-5]]]
92; CHECK-DAG: afi [[REG]], -536870912
93; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
94; CHECK: br %r14
95  %ptr = getelementptr i32, i32 *%src, i64 131071
96  %b = load i32, i32 *%ptr
97  %bext = zext i32 %b to i64
98  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
99  %val = extractvalue {i64, i1} %t, 0
100  %obit = extractvalue {i64, i1} %t, 1
101  store i64 %val, i64 *%res
102  ret i1 %obit
103}
104
105; Check the next doubleword up, which needs separate address logic.
106; Other sequences besides this one would be OK.
107define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
108; CHECK-LABEL: f6:
109; CHECK: agfi %r4, 524288
110; CHECK: slgf %r3, 0(%r4)
111; CHECK-DAG: stg %r3, 0(%r5)
112; CHECK-DAG: ipm [[REG:%r[0-5]]]
113; CHECK-DAG: afi [[REG]], -536870912
114; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
115; CHECK: br %r14
116  %ptr = getelementptr i32, i32 *%src, i64 131072
117  %b = load i32, i32 *%ptr
118  %bext = zext i32 %b to i64
119  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
120  %val = extractvalue {i64, i1} %t, 0
121  %obit = extractvalue {i64, i1} %t, 1
122  store i64 %val, i64 *%res
123  ret i1 %obit
124}
125
126; Check the high end of the negative aligned SLGF range.
127define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
128; CHECK-LABEL: f7:
129; CHECK: slgf %r3, -4(%r4)
130; CHECK-DAG: stg %r3, 0(%r5)
131; CHECK-DAG: ipm [[REG:%r[0-5]]]
132; CHECK-DAG: afi [[REG]], -536870912
133; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
134; CHECK: br %r14
135  %ptr = getelementptr i32, i32 *%src, i64 -1
136  %b = load i32, i32 *%ptr
137  %bext = zext i32 %b to i64
138  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
139  %val = extractvalue {i64, i1} %t, 0
140  %obit = extractvalue {i64, i1} %t, 1
141  store i64 %val, i64 *%res
142  ret i1 %obit
143}
144
145; Check the low end of the SLGF range.
146define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
147; CHECK-LABEL: f8:
148; CHECK: slgf %r3, -524288(%r4)
149; CHECK-DAG: stg %r3, 0(%r5)
150; CHECK-DAG: ipm [[REG:%r[0-5]]]
151; CHECK-DAG: afi [[REG]], -536870912
152; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
153; CHECK: br %r14
154  %ptr = getelementptr i32, i32 *%src, i64 -131072
155  %b = load i32, i32 *%ptr
156  %bext = zext i32 %b to i64
157  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
158  %val = extractvalue {i64, i1} %t, 0
159  %obit = extractvalue {i64, i1} %t, 1
160  store i64 %val, i64 *%res
161  ret i1 %obit
162}
163
164; Check the next doubleword down, which needs separate address logic.
165; Other sequences besides this one would be OK.
166define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
167; CHECK-LABEL: f9:
168; CHECK: agfi %r4, -524292
169; CHECK: slgf %r3, 0(%r4)
170; CHECK-DAG: stg %r3, 0(%r5)
171; CHECK-DAG: ipm [[REG:%r[0-5]]]
172; CHECK-DAG: afi [[REG]], -536870912
173; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
174; CHECK: br %r14
175  %ptr = getelementptr i32, i32 *%src, i64 -131073
176  %b = load i32, i32 *%ptr
177  %bext = zext i32 %b to i64
178  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
179  %val = extractvalue {i64, i1} %t, 0
180  %obit = extractvalue {i64, i1} %t, 1
181  store i64 %val, i64 *%res
182  ret i1 %obit
183}
184
185; Check that SLGF allows an index.
186define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) {
187; CHECK-LABEL: f10:
188; CHECK: slgf %r4, 524284({{%r3,%r2|%r2,%r3}})
189; CHECK-DAG: stg %r4, 0(%r5)
190; CHECK-DAG: ipm [[REG:%r[0-5]]]
191; CHECK-DAG: afi [[REG]], -536870912
192; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
193; CHECK: br %r14
194  %add1 = add i64 %src, %index
195  %add2 = add i64 %add1, 524284
196  %ptr = inttoptr i64 %add2 to i32 *
197  %b = load i32, i32 *%ptr
198  %bext = zext i32 %b to i64
199  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
200  %val = extractvalue {i64, i1} %t, 0
201  %obit = extractvalue {i64, i1} %t, 1
202  store i64 %val, i64 *%res
203  ret i1 %obit
204}
205
206; Check that subtractions of spilled values can use SLGF rather than SLGFR.
207define zeroext i1 @f11(i32 *%ptr0) {
208; CHECK-LABEL: f11:
209; CHECK: brasl %r14, foo@PLT
210; CHECK: slgf {{%r[0-9]+}}, 160(%r15)
211; CHECK: br %r14
212  %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
213  %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
214  %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
215  %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
216  %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
217  %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
218  %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
219  %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
220  %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
221
222  %val0 = load i32, i32 *%ptr0
223  %val1 = load i32, i32 *%ptr1
224  %val2 = load i32, i32 *%ptr2
225  %val3 = load i32, i32 *%ptr3
226  %val4 = load i32, i32 *%ptr4
227  %val5 = load i32, i32 *%ptr5
228  %val6 = load i32, i32 *%ptr6
229  %val7 = load i32, i32 *%ptr7
230  %val8 = load i32, i32 *%ptr8
231  %val9 = load i32, i32 *%ptr9
232
233  %frob0 = add i32 %val0, 100
234  %frob1 = add i32 %val1, 100
235  %frob2 = add i32 %val2, 100
236  %frob3 = add i32 %val3, 100
237  %frob4 = add i32 %val4, 100
238  %frob5 = add i32 %val5, 100
239  %frob6 = add i32 %val6, 100
240  %frob7 = add i32 %val7, 100
241  %frob8 = add i32 %val8, 100
242  %frob9 = add i32 %val9, 100
243
244  store i32 %frob0, i32 *%ptr0
245  store i32 %frob1, i32 *%ptr1
246  store i32 %frob2, i32 *%ptr2
247  store i32 %frob3, i32 *%ptr3
248  store i32 %frob4, i32 *%ptr4
249  store i32 %frob5, i32 *%ptr5
250  store i32 %frob6, i32 *%ptr6
251  store i32 %frob7, i32 *%ptr7
252  store i32 %frob8, i32 *%ptr8
253  store i32 %frob9, i32 *%ptr9
254
255  %ret = call i64 @foo()
256
257  %ext0 = zext i32 %frob0 to i64
258  %ext1 = zext i32 %frob1 to i64
259  %ext2 = zext i32 %frob2 to i64
260  %ext3 = zext i32 %frob3 to i64
261  %ext4 = zext i32 %frob4 to i64
262  %ext5 = zext i32 %frob5 to i64
263  %ext6 = zext i32 %frob6 to i64
264  %ext7 = zext i32 %frob7 to i64
265  %ext8 = zext i32 %frob8 to i64
266  %ext9 = zext i32 %frob9 to i64
267
268  %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %ext0)
269  %add0 = extractvalue {i64, i1} %t0, 0
270  %obit0 = extractvalue {i64, i1} %t0, 1
271  %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %ext1)
272  %add1 = extractvalue {i64, i1} %t1, 0
273  %obit1 = extractvalue {i64, i1} %t1, 1
274  %res1 = or i1 %obit0, %obit1
275  %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %ext2)
276  %add2 = extractvalue {i64, i1} %t2, 0
277  %obit2 = extractvalue {i64, i1} %t2, 1
278  %res2 = or i1 %res1, %obit2
279  %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %ext3)
280  %add3 = extractvalue {i64, i1} %t3, 0
281  %obit3 = extractvalue {i64, i1} %t3, 1
282  %res3 = or i1 %res2, %obit3
283  %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %ext4)
284  %add4 = extractvalue {i64, i1} %t4, 0
285  %obit4 = extractvalue {i64, i1} %t4, 1
286  %res4 = or i1 %res3, %obit4
287  %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %ext5)
288  %add5 = extractvalue {i64, i1} %t5, 0
289  %obit5 = extractvalue {i64, i1} %t5, 1
290  %res5 = or i1 %res4, %obit5
291  %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %ext6)
292  %add6 = extractvalue {i64, i1} %t6, 0
293  %obit6 = extractvalue {i64, i1} %t6, 1
294  %res6 = or i1 %res5, %obit6
295  %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %ext7)
296  %add7 = extractvalue {i64, i1} %t7, 0
297  %obit7 = extractvalue {i64, i1} %t7, 1
298  %res7 = or i1 %res6, %obit7
299  %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %ext8)
300  %add8 = extractvalue {i64, i1} %t8, 0
301  %obit8 = extractvalue {i64, i1} %t8, 1
302  %res8 = or i1 %res7, %obit8
303  %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %ext9)
304  %add9 = extractvalue {i64, i1} %t9, 0
305  %obit9 = extractvalue {i64, i1} %t9, 1
306  %res9 = or i1 %res8, %obit9
307
308  ret i1 %res9
309}
310
311declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
312
313