1; Test 32-bit conditional stores that are presented as selects.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
4
5declare void @foo(i32 *)
6
7; Test the simple case, with the loaded value first.
8define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
9; CHECK-LABEL: f1:
10; CHECK-NOT: %r2
11; CHECK: blr %r14
12; CHECK-NOT: %r2
13; CHECK: st %r3, 0(%r2)
14; CHECK: br %r14
15  %cond = icmp ult i32 %limit, 420
16  %orig = load i32 , i32 *%ptr
17  %res = select i1 %cond, i32 %orig, i32 %alt
18  store i32 %res, i32 *%ptr
19  ret void
20}
21
22; ...and with the loaded value second
23define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
24; CHECK-LABEL: f2:
25; CHECK-NOT: %r2
26; CHECK: bher %r14
27; CHECK-NOT: %r2
28; CHECK: st %r3, 0(%r2)
29; CHECK: br %r14
30  %cond = icmp ult i32 %limit, 420
31  %orig = load i32 , i32 *%ptr
32  %res = select i1 %cond, i32 %alt, i32 %orig
33  store i32 %res, i32 *%ptr
34  ret void
35}
36
37; Test cases where the value is explicitly sign-extended to 64 bits, with the
38; loaded value first.
39define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
40; CHECK-LABEL: f3:
41; CHECK-NOT: %r2
42; CHECK: blr %r14
43; CHECK-NOT: %r2
44; CHECK: st %r3, 0(%r2)
45; CHECK: br %r14
46  %cond = icmp ult i32 %limit, 420
47  %orig = load i32 , i32 *%ptr
48  %ext = sext i32 %orig to i64
49  %res = select i1 %cond, i64 %ext, i64 %alt
50  %trunc = trunc i64 %res to i32
51  store i32 %trunc, i32 *%ptr
52  ret void
53}
54
55; ...and with the loaded value second
56define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
57; CHECK-LABEL: f4:
58; CHECK-NOT: %r2
59; CHECK: bher %r14
60; CHECK-NOT: %r2
61; CHECK: st %r3, 0(%r2)
62; CHECK: br %r14
63  %cond = icmp ult i32 %limit, 420
64  %orig = load i32 , i32 *%ptr
65  %ext = sext i32 %orig to i64
66  %res = select i1 %cond, i64 %alt, i64 %ext
67  %trunc = trunc i64 %res to i32
68  store i32 %trunc, i32 *%ptr
69  ret void
70}
71
72; Test cases where the value is explicitly zero-extended to 32 bits, with the
73; loaded value first.
74define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
75; CHECK-LABEL: f5:
76; CHECK-NOT: %r2
77; CHECK: blr %r14
78; CHECK-NOT: %r2
79; CHECK: st %r3, 0(%r2)
80; CHECK: br %r14
81  %cond = icmp ult i32 %limit, 420
82  %orig = load i32 , i32 *%ptr
83  %ext = zext i32 %orig to i64
84  %res = select i1 %cond, i64 %ext, i64 %alt
85  %trunc = trunc i64 %res to i32
86  store i32 %trunc, i32 *%ptr
87  ret void
88}
89
90; ...and with the loaded value second
91define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
92; CHECK-LABEL: f6:
93; CHECK-NOT: %r2
94; CHECK: bher %r14
95; CHECK-NOT: %r2
96; CHECK: st %r3, 0(%r2)
97; CHECK: br %r14
98  %cond = icmp ult i32 %limit, 420
99  %orig = load i32 , i32 *%ptr
100  %ext = zext i32 %orig to i64
101  %res = select i1 %cond, i64 %alt, i64 %ext
102  %trunc = trunc i64 %res to i32
103  store i32 %trunc, i32 *%ptr
104  ret void
105}
106
107; Check the high end of the aligned ST range.
108define void @f7(i32 *%base, i32 %alt, i32 %limit) {
109; CHECK-LABEL: f7:
110; CHECK-NOT: %r2
111; CHECK: blr %r14
112; CHECK-NOT: %r2
113; CHECK: st %r3, 4092(%r2)
114; CHECK: br %r14
115  %ptr = getelementptr i32, i32 *%base, i64 1023
116  %cond = icmp ult i32 %limit, 420
117  %orig = load i32 , i32 *%ptr
118  %res = select i1 %cond, i32 %orig, i32 %alt
119  store i32 %res, i32 *%ptr
120  ret void
121}
122
123; Check the next word up, which should use STY instead of ST.
124define void @f8(i32 *%base, i32 %alt, i32 %limit) {
125; CHECK-LABEL: f8:
126; CHECK-NOT: %r2
127; CHECK: blr %r14
128; CHECK-NOT: %r2
129; CHECK: sty %r3, 4096(%r2)
130; CHECK: br %r14
131  %ptr = getelementptr i32, i32 *%base, i64 1024
132  %cond = icmp ult i32 %limit, 420
133  %orig = load i32 , i32 *%ptr
134  %res = select i1 %cond, i32 %orig, i32 %alt
135  store i32 %res, i32 *%ptr
136  ret void
137}
138
139; Check the high end of the aligned STY range.
140define void @f9(i32 *%base, i32 %alt, i32 %limit) {
141; CHECK-LABEL: f9:
142; CHECK-NOT: %r2
143; CHECK: blr %r14
144; CHECK-NOT: %r2
145; CHECK: sty %r3, 524284(%r2)
146; CHECK: br %r14
147  %ptr = getelementptr i32, i32 *%base, i64 131071
148  %cond = icmp ult i32 %limit, 420
149  %orig = load i32 , i32 *%ptr
150  %res = select i1 %cond, i32 %orig, i32 %alt
151  store i32 %res, i32 *%ptr
152  ret void
153}
154
155; Check the next word up, which needs separate address logic.
156; Other sequences besides this one would be OK.
157define void @f10(i32 *%base, i32 %alt, i32 %limit) {
158; CHECK-LABEL: f10:
159; CHECK-NOT: %r2
160; CHECK: blr %r14
161; CHECK-NOT: %r2
162; CHECK: agfi %r2, 524288
163; CHECK: st %r3, 0(%r2)
164; CHECK: br %r14
165  %ptr = getelementptr i32, i32 *%base, i64 131072
166  %cond = icmp ult i32 %limit, 420
167  %orig = load i32 , i32 *%ptr
168  %res = select i1 %cond, i32 %orig, i32 %alt
169  store i32 %res, i32 *%ptr
170  ret void
171}
172
173; Check the low end of the STY range.
174define void @f11(i32 *%base, i32 %alt, i32 %limit) {
175; CHECK-LABEL: f11:
176; CHECK-NOT: %r2
177; CHECK: blr %r14
178; CHECK-NOT: %r2
179; CHECK: sty %r3, -524288(%r2)
180; CHECK: br %r14
181  %ptr = getelementptr i32, i32 *%base, i64 -131072
182  %cond = icmp ult i32 %limit, 420
183  %orig = load i32 , i32 *%ptr
184  %res = select i1 %cond, i32 %orig, i32 %alt
185  store i32 %res, i32 *%ptr
186  ret void
187}
188
189; Check the next word down, which needs separate address logic.
190; Other sequences besides this one would be OK.
191define void @f12(i32 *%base, i32 %alt, i32 %limit) {
192; CHECK-LABEL: f12:
193; CHECK-NOT: %r2
194; CHECK: blr %r14
195; CHECK-NOT: %r2
196; CHECK: agfi %r2, -524292
197; CHECK: st %r3, 0(%r2)
198; CHECK: br %r14
199  %ptr = getelementptr i32, i32 *%base, i64 -131073
200  %cond = icmp ult i32 %limit, 420
201  %orig = load i32 , i32 *%ptr
202  %res = select i1 %cond, i32 %orig, i32 %alt
203  store i32 %res, i32 *%ptr
204  ret void
205}
206
207; Check that STY allows an index.
208define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
209; CHECK-LABEL: f13:
210; CHECK-NOT: %r2
211; CHECK: blr %r14
212; CHECK-NOT: %r2
213; CHECK: sty %r4, 4096(%r3,%r2)
214; CHECK: br %r14
215  %add1 = add i64 %base, %index
216  %add2 = add i64 %add1, 4096
217  %ptr = inttoptr i64 %add2 to i32 *
218  %cond = icmp ult i32 %limit, 420
219  %orig = load i32 , i32 *%ptr
220  %res = select i1 %cond, i32 %orig, i32 %alt
221  store i32 %res, i32 *%ptr
222  ret void
223}
224
225; Check that volatile loads are not matched.
226define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
227; CHECK-LABEL: f14:
228; CHECK: l {{%r[0-5]}}, 0(%r2)
229; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
230; CHECK: [[LABEL]]:
231; CHECK: st {{%r[0-5]}}, 0(%r2)
232; CHECK: br %r14
233  %cond = icmp ult i32 %limit, 420
234  %orig = load volatile i32 , i32 *%ptr
235  %res = select i1 %cond, i32 %orig, i32 %alt
236  store i32 %res, i32 *%ptr
237  ret void
238}
239
240; ...likewise stores.  In this case we should have a conditional load into %r3.
241define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
242; CHECK-LABEL: f15:
243; CHECK: jhe [[LABEL:[^ ]*]]
244; CHECK: l %r3, 0(%r2)
245; CHECK: [[LABEL]]:
246; CHECK: st %r3, 0(%r2)
247; CHECK: br %r14
248  %cond = icmp ult i32 %limit, 420
249  %orig = load i32 , i32 *%ptr
250  %res = select i1 %cond, i32 %orig, i32 %alt
251  store volatile i32 %res, i32 *%ptr
252  ret void
253}
254
255; Check that atomic loads are not matched.  The transformation is OK for
256; the "unordered" case tested here, but since we don't try to handle atomic
257; operations at all in this context, it seems better to assert that than
258; to restrict the test to a stronger ordering.
259define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
260; FIXME: should use a normal load instead of CS.
261; CHECK-LABEL: f16:
262; CHECK: l {{%r[0-5]}}, 0(%r2)
263; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
264; CHECK: [[LABEL]]:
265; CHECK: st {{%r[0-5]}}, 0(%r2)
266; CHECK: br %r14
267  %cond = icmp ult i32 %limit, 420
268  %orig = load atomic i32 , i32 *%ptr unordered, align 4
269  %res = select i1 %cond, i32 %orig, i32 %alt
270  store i32 %res, i32 *%ptr
271  ret void
272}
273
274; ...likewise stores.
275define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
276; FIXME: should use a normal store instead of CS.
277; CHECK-LABEL: f17:
278; CHECK: jhe [[LABEL:[^ ]*]]
279; CHECK: l %r3, 0(%r2)
280; CHECK: [[LABEL]]:
281; CHECK: st %r3, 0(%r2)
282; CHECK: br %r14
283  %cond = icmp ult i32 %limit, 420
284  %orig = load i32 , i32 *%ptr
285  %res = select i1 %cond, i32 %orig, i32 %alt
286  store atomic i32 %res, i32 *%ptr unordered, align 4
287  ret void
288}
289
290; Try a frame index base.
291define void @f18(i32 %alt, i32 %limit) {
292; CHECK-LABEL: f18:
293; CHECK: brasl %r14, foo@PLT
294; CHECK-NOT: %r15
295; CHECK: jl [[LABEL:[^ ]*]]
296; CHECK-NOT: %r15
297; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15)
298; CHECK: [[LABEL]]:
299; CHECK: brasl %r14, foo@PLT
300; CHECK: br %r14
301  %ptr = alloca i32
302  call void @foo(i32 *%ptr)
303  %cond = icmp ult i32 %limit, 420
304  %orig = load i32 , i32 *%ptr
305  %res = select i1 %cond, i32 %orig, i32 %alt
306  store i32 %res, i32 *%ptr
307  call void @foo(i32 *%ptr)
308  ret void
309}
310