1; Test 32-bit subtraction in which the second operand is a sign-extended
2; i16 memory value.
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5
6declare i32 @foo()
7
8; Check the low end of the SH range.
9define zeroext i1 @f1(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
10; CHECK-LABEL: f1:
11; CHECK: sh %r3, 0(%r4)
12; CHECK-DAG: st %r3, 0(%r5)
13; CHECK-DAG: ipm [[REG:%r[0-5]]]
14; CHECK-DAG: afi [[REG]], 1342177280
15; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
16; CHECK: br %r14
17  %half = load i16, i16 *%src
18  %b = sext i16 %half to i32
19  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
20  %val = extractvalue {i32, i1} %t, 0
21  %obit = extractvalue {i32, i1} %t, 1
22  store i32 %val, i32 *%res
23  ret i1 %obit
24}
25
26; Check the high end of the aligned SH range.
27define zeroext i1 @f2(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
28; CHECK-LABEL: f2:
29; CHECK: sh %r3, 4094(%r4)
30; CHECK-DAG: st %r3, 0(%r5)
31; CHECK-DAG: ipm [[REG:%r[0-5]]]
32; CHECK-DAG: afi [[REG]], 1342177280
33; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
34; CHECK: br %r14
35  %ptr = getelementptr i16, i16 *%src, i64 2047
36  %half = load i16, i16 *%ptr
37  %b = sext i16 %half to i32
38  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
39  %val = extractvalue {i32, i1} %t, 0
40  %obit = extractvalue {i32, i1} %t, 1
41  store i32 %val, i32 *%res
42  ret i1 %obit
43}
44
45; Check the next halfword up, which should use SHY instead of SH.
46define zeroext i1 @f3(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
47; CHECK-LABEL: f3:
48; CHECK: shy %r3, 4096(%r4)
49; CHECK-DAG: st %r3, 0(%r5)
50; CHECK-DAG: ipm [[REG:%r[0-5]]]
51; CHECK-DAG: afi [[REG]], 1342177280
52; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
53; CHECK: br %r14
54  %ptr = getelementptr i16, i16 *%src, i64 2048
55  %half = load i16, i16 *%ptr
56  %b = sext i16 %half to i32
57  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
58  %val = extractvalue {i32, i1} %t, 0
59  %obit = extractvalue {i32, i1} %t, 1
60  store i32 %val, i32 *%res
61  ret i1 %obit
62}
63
64; Check the high end of the aligned SHY range.
65define zeroext i1 @f4(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
66; CHECK-LABEL: f4:
67; CHECK: shy %r3, 524286(%r4)
68; CHECK-DAG: st %r3, 0(%r5)
69; CHECK-DAG: ipm [[REG:%r[0-5]]]
70; CHECK-DAG: afi [[REG]], 1342177280
71; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
72; CHECK: br %r14
73  %ptr = getelementptr i16, i16 *%src, i64 262143
74  %half = load i16, i16 *%ptr
75  %b = sext i16 %half to i32
76  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
77  %val = extractvalue {i32, i1} %t, 0
78  %obit = extractvalue {i32, i1} %t, 1
79  store i32 %val, i32 *%res
80  ret i1 %obit
81}
82
83; Check the next halfword up, which needs separate address logic.
84; Other sequences besides this one would be OK.
85define zeroext i1 @f5(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
86; CHECK-LABEL: f5:
87; CHECK: agfi %r4, 524288
88; CHECK: sh %r3, 0(%r4)
89; CHECK-DAG: st %r3, 0(%r5)
90; CHECK-DAG: ipm [[REG:%r[0-5]]]
91; CHECK-DAG: afi [[REG]], 1342177280
92; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
93; CHECK: br %r14
94  %ptr = getelementptr i16, i16 *%src, i64 262144
95  %half = load i16, i16 *%ptr
96  %b = sext i16 %half to i32
97  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
98  %val = extractvalue {i32, i1} %t, 0
99  %obit = extractvalue {i32, i1} %t, 1
100  store i32 %val, i32 *%res
101  ret i1 %obit
102}
103
104; Check the high end of the negative aligned SHY range.
105define zeroext i1 @f6(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
106; CHECK-LABEL: f6:
107; CHECK: shy %r3, -2(%r4)
108; CHECK-DAG: st %r3, 0(%r5)
109; CHECK-DAG: ipm [[REG:%r[0-5]]]
110; CHECK-DAG: afi [[REG]], 1342177280
111; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
112; CHECK: br %r14
113  %ptr = getelementptr i16, i16 *%src, i64 -1
114  %half = load i16, i16 *%ptr
115  %b = sext i16 %half to i32
116  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
117  %val = extractvalue {i32, i1} %t, 0
118  %obit = extractvalue {i32, i1} %t, 1
119  store i32 %val, i32 *%res
120  ret i1 %obit
121}
122
123; Check the low end of the SHY range.
124define zeroext i1 @f7(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
125; CHECK-LABEL: f7:
126; CHECK: shy %r3, -524288(%r4)
127; CHECK-DAG: st %r3, 0(%r5)
128; CHECK-DAG: ipm [[REG:%r[0-5]]]
129; CHECK-DAG: afi [[REG]], 1342177280
130; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
131; CHECK: br %r14
132  %ptr = getelementptr i16, i16 *%src, i64 -262144
133  %half = load i16, i16 *%ptr
134  %b = sext i16 %half to i32
135  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
136  %val = extractvalue {i32, i1} %t, 0
137  %obit = extractvalue {i32, i1} %t, 1
138  store i32 %val, i32 *%res
139  ret i1 %obit
140}
141
142; Check the next halfword down, which needs separate address logic.
143; Other sequences besides this one would be OK.
144define zeroext i1 @f8(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
145; CHECK-LABEL: f8:
146; CHECK: agfi %r4, -524290
147; CHECK: sh %r3, 0(%r4)
148; CHECK-DAG: st %r3, 0(%r5)
149; CHECK-DAG: ipm [[REG:%r[0-5]]]
150; CHECK-DAG: afi [[REG]], 1342177280
151; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
152; CHECK: br %r14
153  %ptr = getelementptr i16, i16 *%src, i64 -262145
154  %half = load i16, i16 *%ptr
155  %b = sext i16 %half to i32
156  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
157  %val = extractvalue {i32, i1} %t, 0
158  %obit = extractvalue {i32, i1} %t, 1
159  store i32 %val, i32 *%res
160  ret i1 %obit
161}
162
163; Check that SH allows an index.
164define zeroext i1 @f9(i64 %src, i64 %index, i32 %a, i32 *%res) {
165; CHECK-LABEL: f9:
166; CHECK: sh %r4, 4094({{%r3,%r2|%r2,%r3}})
167; CHECK-DAG: st %r4, 0(%r5)
168; CHECK-DAG: ipm [[REG:%r[0-5]]]
169; CHECK-DAG: afi [[REG]], 1342177280
170; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
171; CHECK: br %r14
172  %add1 = add i64 %src, %index
173  %add2 = add i64 %add1, 4094
174  %ptr = inttoptr i64 %add2 to i16 *
175  %half = load i16, i16 *%ptr
176  %b = sext i16 %half to i32
177  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
178  %val = extractvalue {i32, i1} %t, 0
179  %obit = extractvalue {i32, i1} %t, 1
180  store i32 %val, i32 *%res
181  ret i1 %obit
182}
183
184; Check that SHY allows an index.
185define zeroext i1 @f10(i64 %src, i64 %index, i32 %a, i32 *%res) {
186; CHECK-LABEL: f10:
187; CHECK: shy %r4, 4096({{%r3,%r2|%r2,%r3}})
188; CHECK-DAG: st %r4, 0(%r5)
189; CHECK-DAG: ipm [[REG:%r[0-5]]]
190; CHECK-DAG: afi [[REG]], 1342177280
191; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
192; CHECK: br %r14
193  %add1 = add i64 %src, %index
194  %add2 = add i64 %add1, 4096
195  %ptr = inttoptr i64 %add2 to i16 *
196  %half = load i16, i16 *%ptr
197  %b = sext i16 %half to i32
198  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
199  %val = extractvalue {i32, i1} %t, 0
200  %obit = extractvalue {i32, i1} %t, 1
201  store i32 %val, i32 *%res
202  ret i1 %obit
203}
204
205; Check using the overflow result for a branch.
206define void @f11(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
207; CHECK-LABEL: f11:
208; CHECK: sh %r3, 0(%r4)
209; CHECK: st %r3, 0(%r5)
210; CHECK: jgo foo@PLT
211; CHECK: br %r14
212  %half = load i16, i16 *%src
213  %b = sext i16 %half to i32
214  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
215  %val = extractvalue {i32, i1} %t, 0
216  %obit = extractvalue {i32, i1} %t, 1
217  store i32 %val, i32 *%res
218  br i1 %obit, label %call, label %exit
219
220call:
221  tail call i32 @foo()
222  br label %exit
223
224exit:
225  ret void
226}
227
228; ... and the same with the inverted direction.
229define void @f12(i32 %dummy, i32 %a, i16 *%src, i32 *%res) {
230; CHECK-LABEL: f12:
231; CHECK: sh %r3, 0(%r4)
232; CHECK: st %r3, 0(%r5)
233; CHECK: jgno foo@PLT
234; CHECK: br %r14
235  %half = load i16, i16 *%src
236  %b = sext i16 %half to i32
237  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
238  %val = extractvalue {i32, i1} %t, 0
239  %obit = extractvalue {i32, i1} %t, 1
240  store i32 %val, i32 *%res
241  br i1 %obit, label %exit, label %call
242
243call:
244  tail call i32 @foo()
245  br label %exit
246
247exit:
248  ret void
249}
250
251
252declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
253
254