1; Test 64-bit addition in which the second operand is variable.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i64 @foo()
6
7; Check ALGR.
8define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) {
9; CHECK-LABEL: f1:
10; CHECK: algr %r3, %r4
11; CHECK-DAG: stg %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
14; CHECK: br %r14
15  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
16  %val = extractvalue {i64, i1} %t, 0
17  %obit = extractvalue {i64, i1} %t, 1
18  store i64 %val, i64 *%res
19  ret i1 %obit
20}
21
22; Check using the overflow result for a branch.
23define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) {
24; CHECK-LABEL: f2:
25; CHECK: algr %r3, %r4
26; CHECK: stg %r3, 0(%r5)
27; CHECK: jgnle foo@PLT
28; CHECK: br %r14
29  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
30  %val = extractvalue {i64, i1} %t, 0
31  %obit = extractvalue {i64, i1} %t, 1
32  store i64 %val, i64 *%res
33  br i1 %obit, label %call, label %exit
34
35call:
36  tail call i64 @foo()
37  br label %exit
38
39exit:
40  ret void
41}
42
43; ... and the same with the inverted direction.
44define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) {
45; CHECK-LABEL: f3:
46; CHECK: algr %r3, %r4
47; CHECK: stg %r3, 0(%r5)
48; CHECK: jgle foo@PLT
49; CHECK: br %r14
50  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
51  %val = extractvalue {i64, i1} %t, 0
52  %obit = extractvalue {i64, i1} %t, 1
53  store i64 %val, i64 *%res
54  br i1 %obit, label %exit, label %call
55
56call:
57  tail call i64 @foo()
58  br label %exit
59
60exit:
61  ret void
62}
63
64; Check ALG with no displacement.
65define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) {
66; CHECK-LABEL: f4:
67; CHECK: alg %r3, 0(%r4)
68; CHECK-DAG: stg %r3, 0(%r5)
69; CHECK-DAG: ipm [[REG:%r[0-5]]]
70; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
71; CHECK: br %r14
72  %b = load i64, i64 *%src
73  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
74  %val = extractvalue {i64, i1} %t, 0
75  %obit = extractvalue {i64, i1} %t, 1
76  store i64 %val, i64 *%res
77  ret i1 %obit
78}
79
80; Check the high end of the aligned ALG range.
81define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) {
82; CHECK-LABEL: f5:
83; CHECK: alg %r3, 524280(%r4)
84; CHECK-DAG: stg %r3, 0(%r5)
85; CHECK-DAG: ipm [[REG:%r[0-5]]]
86; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
87; CHECK: br %r14
88  %ptr = getelementptr i64, i64 *%src, i64 65535
89  %b = load i64, i64 *%ptr
90  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
91  %val = extractvalue {i64, i1} %t, 0
92  %obit = extractvalue {i64, i1} %t, 1
93  store i64 %val, i64 *%res
94  ret i1 %obit
95}
96
97; Check the next doubleword up, which needs separate address logic.
98; Other sequences besides this one would be OK.
99define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) {
100; CHECK-LABEL: f6:
101; CHECK: agfi %r4, 524288
102; CHECK: alg %r3, 0(%r4)
103; CHECK-DAG: stg %r3, 0(%r5)
104; CHECK-DAG: ipm [[REG:%r[0-5]]]
105; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
106; CHECK: br %r14
107  %ptr = getelementptr i64, i64 *%src, i64 65536
108  %b = load i64, i64 *%ptr
109  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
110  %val = extractvalue {i64, i1} %t, 0
111  %obit = extractvalue {i64, i1} %t, 1
112  store i64 %val, i64 *%res
113  ret i1 %obit
114}
115
116; Check the high end of the negative aligned ALG range.
117define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) {
118; CHECK-LABEL: f7:
119; CHECK: alg %r3, -8(%r4)
120; CHECK-DAG: stg %r3, 0(%r5)
121; CHECK-DAG: ipm [[REG:%r[0-5]]]
122; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
123; CHECK: br %r14
124  %ptr = getelementptr i64, i64 *%src, i64 -1
125  %b = load i64, i64 *%ptr
126  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
127  %val = extractvalue {i64, i1} %t, 0
128  %obit = extractvalue {i64, i1} %t, 1
129  store i64 %val, i64 *%res
130  ret i1 %obit
131}
132
133; Check the low end of the ALG range.
134define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) {
135; CHECK-LABEL: f8:
136; CHECK: alg %r3, -524288(%r4)
137; CHECK-DAG: stg %r3, 0(%r5)
138; CHECK-DAG: ipm [[REG:%r[0-5]]]
139; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
140; CHECK: br %r14
141  %ptr = getelementptr i64, i64 *%src, i64 -65536
142  %b = load i64, i64 *%ptr
143  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
144  %val = extractvalue {i64, i1} %t, 0
145  %obit = extractvalue {i64, i1} %t, 1
146  store i64 %val, i64 *%res
147  ret i1 %obit
148}
149
150; Check the next doubleword down, which needs separate address logic.
151; Other sequences besides this one would be OK.
152define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) {
153; CHECK-LABEL: f9:
154; CHECK: agfi %r4, -524296
155; CHECK: alg %r3, 0(%r4)
156; CHECK-DAG: stg %r3, 0(%r5)
157; CHECK-DAG: ipm [[REG:%r[0-5]]]
158; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
159; CHECK: br %r14
160  %ptr = getelementptr i64, i64 *%src, i64 -65537
161  %b = load i64, i64 *%ptr
162  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
163  %val = extractvalue {i64, i1} %t, 0
164  %obit = extractvalue {i64, i1} %t, 1
165  store i64 %val, i64 *%res
166  ret i1 %obit
167}
168
169; Check that ALG allows an index.
170define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) {
171; CHECK-LABEL: f10:
172; CHECK: alg %r4, 524280({{%r3,%r2|%r2,%r3}})
173; CHECK-DAG: stg %r4, 0(%r5)
174; CHECK-DAG: ipm [[REG:%r[0-5]]]
175; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
176; CHECK: br %r14
177  %add1 = add i64 %src, %index
178  %add2 = add i64 %add1, 524280
179  %ptr = inttoptr i64 %add2 to i64 *
180  %b = load i64, i64 *%ptr
181  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
182  %val = extractvalue {i64, i1} %t, 0
183  %obit = extractvalue {i64, i1} %t, 1
184  store i64 %val, i64 *%res
185  ret i1 %obit
186}
187
188; Check that additions of spilled values can use ALG rather than ALGR.
189define zeroext i1 @f11(i64 *%ptr0) {
190; CHECK-LABEL: f11:
191; CHECK: brasl %r14, foo@PLT
192; CHECK: alg %r2, 160(%r15)
193; CHECK: br %r14
194  %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
195  %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
196  %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
197  %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
198  %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
199  %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
200  %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
201  %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
202  %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
203
204  %val0 = load i64, i64 *%ptr0
205  %val1 = load i64, i64 *%ptr1
206  %val2 = load i64, i64 *%ptr2
207  %val3 = load i64, i64 *%ptr3
208  %val4 = load i64, i64 *%ptr4
209  %val5 = load i64, i64 *%ptr5
210  %val6 = load i64, i64 *%ptr6
211  %val7 = load i64, i64 *%ptr7
212  %val8 = load i64, i64 *%ptr8
213  %val9 = load i64, i64 *%ptr9
214
215  %ret = call i64 @foo()
216
217  %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %val0)
218  %add0 = extractvalue {i64, i1} %t0, 0
219  %obit0 = extractvalue {i64, i1} %t0, 1
220  %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %val1)
221  %add1 = extractvalue {i64, i1} %t1, 0
222  %obit1 = extractvalue {i64, i1} %t1, 1
223  %res1 = or i1 %obit0, %obit1
224  %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %val2)
225  %add2 = extractvalue {i64, i1} %t2, 0
226  %obit2 = extractvalue {i64, i1} %t2, 1
227  %res2 = or i1 %res1, %obit2
228  %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %val3)
229  %add3 = extractvalue {i64, i1} %t3, 0
230  %obit3 = extractvalue {i64, i1} %t3, 1
231  %res3 = or i1 %res2, %obit3
232  %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %val4)
233  %add4 = extractvalue {i64, i1} %t4, 0
234  %obit4 = extractvalue {i64, i1} %t4, 1
235  %res4 = or i1 %res3, %obit4
236  %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %val5)
237  %add5 = extractvalue {i64, i1} %t5, 0
238  %obit5 = extractvalue {i64, i1} %t5, 1
239  %res5 = or i1 %res4, %obit5
240  %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %val6)
241  %add6 = extractvalue {i64, i1} %t6, 0
242  %obit6 = extractvalue {i64, i1} %t6, 1
243  %res6 = or i1 %res5, %obit6
244  %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %val7)
245  %add7 = extractvalue {i64, i1} %t7, 0
246  %obit7 = extractvalue {i64, i1} %t7, 1
247  %res7 = or i1 %res6, %obit7
248  %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %val8)
249  %add8 = extractvalue {i64, i1} %t8, 0
250  %obit8 = extractvalue {i64, i1} %t8, 1
251  %res8 = or i1 %res7, %obit8
252  %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %val9)
253  %add9 = extractvalue {i64, i1} %t9, 0
254  %obit9 = extractvalue {i64, i1} %t9, 1
255  %res9 = or i1 %res8, %obit9
256
257  ret i1 %res9
258}
259
260declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
261
262