1; Test 32-bit conditional stores that are presented as selects.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
4
5declare void @foo(i32 *)
6
7; Test the simple case, with the loaded value first.
8define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
9; CHECK-LABEL: f1:
10; CHECK-NOT: %r2
11; CHECK: jl [[LABEL:[^ ]*]]
12; CHECK-NOT: %r2
13; CHECK: st %r3, 0(%r2)
14; CHECK: [[LABEL]]:
15; CHECK: br %r14
16  %cond = icmp ult i32 %limit, 420
17  %orig = load i32 , i32 *%ptr
18  %res = select i1 %cond, i32 %orig, i32 %alt
19  store i32 %res, i32 *%ptr
20  ret void
21}
22
23; ...and with the loaded value second
24define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
25; CHECK-LABEL: f2:
26; CHECK-NOT: %r2
27; CHECK: jhe [[LABEL:[^ ]*]]
28; CHECK-NOT: %r2
29; CHECK: st %r3, 0(%r2)
30; CHECK: [[LABEL]]:
31; CHECK: br %r14
32  %cond = icmp ult i32 %limit, 420
33  %orig = load i32 , i32 *%ptr
34  %res = select i1 %cond, i32 %alt, i32 %orig
35  store i32 %res, i32 *%ptr
36  ret void
37}
38
39; Test cases where the value is explicitly sign-extended to 64 bits, with the
40; loaded value first.
41define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
42; CHECK-LABEL: f3:
43; CHECK-NOT: %r2
44; CHECK: jl [[LABEL:[^ ]*]]
45; CHECK-NOT: %r2
46; CHECK: st %r3, 0(%r2)
47; CHECK: [[LABEL]]:
48; CHECK: br %r14
49  %cond = icmp ult i32 %limit, 420
50  %orig = load i32 , i32 *%ptr
51  %ext = sext i32 %orig to i64
52  %res = select i1 %cond, i64 %ext, i64 %alt
53  %trunc = trunc i64 %res to i32
54  store i32 %trunc, i32 *%ptr
55  ret void
56}
57
58; ...and with the loaded value second
59define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
60; CHECK-LABEL: f4:
61; CHECK-NOT: %r2
62; CHECK: jhe [[LABEL:[^ ]*]]
63; CHECK-NOT: %r2
64; CHECK: st %r3, 0(%r2)
65; CHECK: [[LABEL]]:
66; CHECK: br %r14
67  %cond = icmp ult i32 %limit, 420
68  %orig = load i32 , i32 *%ptr
69  %ext = sext i32 %orig to i64
70  %res = select i1 %cond, i64 %alt, i64 %ext
71  %trunc = trunc i64 %res to i32
72  store i32 %trunc, i32 *%ptr
73  ret void
74}
75
76; Test cases where the value is explicitly zero-extended to 32 bits, with the
77; loaded value first.
78define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
79; CHECK-LABEL: f5:
80; CHECK-NOT: %r2
81; CHECK: jl [[LABEL:[^ ]*]]
82; CHECK-NOT: %r2
83; CHECK: st %r3, 0(%r2)
84; CHECK: [[LABEL]]:
85; CHECK: br %r14
86  %cond = icmp ult i32 %limit, 420
87  %orig = load i32 , i32 *%ptr
88  %ext = zext i32 %orig to i64
89  %res = select i1 %cond, i64 %ext, i64 %alt
90  %trunc = trunc i64 %res to i32
91  store i32 %trunc, i32 *%ptr
92  ret void
93}
94
95; ...and with the loaded value second
96define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
97; CHECK-LABEL: f6:
98; CHECK-NOT: %r2
99; CHECK: jhe [[LABEL:[^ ]*]]
100; CHECK-NOT: %r2
101; CHECK: st %r3, 0(%r2)
102; CHECK: [[LABEL]]:
103; CHECK: br %r14
104  %cond = icmp ult i32 %limit, 420
105  %orig = load i32 , i32 *%ptr
106  %ext = zext i32 %orig to i64
107  %res = select i1 %cond, i64 %alt, i64 %ext
108  %trunc = trunc i64 %res to i32
109  store i32 %trunc, i32 *%ptr
110  ret void
111}
112
113; Check the high end of the aligned ST range.
114define void @f7(i32 *%base, i32 %alt, i32 %limit) {
115; CHECK-LABEL: f7:
116; CHECK-NOT: %r2
117; CHECK: jl [[LABEL:[^ ]*]]
118; CHECK-NOT: %r2
119; CHECK: st %r3, 4092(%r2)
120; CHECK: [[LABEL]]:
121; CHECK: br %r14
122  %ptr = getelementptr i32, i32 *%base, i64 1023
123  %cond = icmp ult i32 %limit, 420
124  %orig = load i32 , i32 *%ptr
125  %res = select i1 %cond, i32 %orig, i32 %alt
126  store i32 %res, i32 *%ptr
127  ret void
128}
129
130; Check the next word up, which should use STY instead of ST.
131define void @f8(i32 *%base, i32 %alt, i32 %limit) {
132; CHECK-LABEL: f8:
133; CHECK-NOT: %r2
134; CHECK: jl [[LABEL:[^ ]*]]
135; CHECK-NOT: %r2
136; CHECK: sty %r3, 4096(%r2)
137; CHECK: [[LABEL]]:
138; CHECK: br %r14
139  %ptr = getelementptr i32, i32 *%base, i64 1024
140  %cond = icmp ult i32 %limit, 420
141  %orig = load i32 , i32 *%ptr
142  %res = select i1 %cond, i32 %orig, i32 %alt
143  store i32 %res, i32 *%ptr
144  ret void
145}
146
147; Check the high end of the aligned STY range.
148define void @f9(i32 *%base, i32 %alt, i32 %limit) {
149; CHECK-LABEL: f9:
150; CHECK-NOT: %r2
151; CHECK: jl [[LABEL:[^ ]*]]
152; CHECK-NOT: %r2
153; CHECK: sty %r3, 524284(%r2)
154; CHECK: [[LABEL]]:
155; CHECK: br %r14
156  %ptr = getelementptr i32, i32 *%base, i64 131071
157  %cond = icmp ult i32 %limit, 420
158  %orig = load i32 , i32 *%ptr
159  %res = select i1 %cond, i32 %orig, i32 %alt
160  store i32 %res, i32 *%ptr
161  ret void
162}
163
164; Check the next word up, which needs separate address logic.
165; Other sequences besides this one would be OK.
166define void @f10(i32 *%base, i32 %alt, i32 %limit) {
167; CHECK-LABEL: f10:
168; CHECK-NOT: %r2
169; CHECK: jl [[LABEL:[^ ]*]]
170; CHECK-NOT: %r2
171; CHECK: agfi %r2, 524288
172; CHECK: st %r3, 0(%r2)
173; CHECK: [[LABEL]]:
174; CHECK: br %r14
175  %ptr = getelementptr i32, i32 *%base, i64 131072
176  %cond = icmp ult i32 %limit, 420
177  %orig = load i32 , i32 *%ptr
178  %res = select i1 %cond, i32 %orig, i32 %alt
179  store i32 %res, i32 *%ptr
180  ret void
181}
182
183; Check the low end of the STY range.
184define void @f11(i32 *%base, i32 %alt, i32 %limit) {
185; CHECK-LABEL: f11:
186; CHECK-NOT: %r2
187; CHECK: jl [[LABEL:[^ ]*]]
188; CHECK-NOT: %r2
189; CHECK: sty %r3, -524288(%r2)
190; CHECK: [[LABEL]]:
191; CHECK: br %r14
192  %ptr = getelementptr i32, i32 *%base, i64 -131072
193  %cond = icmp ult i32 %limit, 420
194  %orig = load i32 , i32 *%ptr
195  %res = select i1 %cond, i32 %orig, i32 %alt
196  store i32 %res, i32 *%ptr
197  ret void
198}
199
200; Check the next word down, which needs separate address logic.
201; Other sequences besides this one would be OK.
202define void @f12(i32 *%base, i32 %alt, i32 %limit) {
203; CHECK-LABEL: f12:
204; CHECK-NOT: %r2
205; CHECK: jl [[LABEL:[^ ]*]]
206; CHECK-NOT: %r2
207; CHECK: agfi %r2, -524292
208; CHECK: st %r3, 0(%r2)
209; CHECK: [[LABEL]]:
210; CHECK: br %r14
211  %ptr = getelementptr i32, i32 *%base, i64 -131073
212  %cond = icmp ult i32 %limit, 420
213  %orig = load i32 , i32 *%ptr
214  %res = select i1 %cond, i32 %orig, i32 %alt
215  store i32 %res, i32 *%ptr
216  ret void
217}
218
219; Check that STY allows an index.
220define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
221; CHECK-LABEL: f13:
222; CHECK-NOT: %r2
223; CHECK: jl [[LABEL:[^ ]*]]
224; CHECK-NOT: %r2
225; CHECK: sty %r4, 4096(%r3,%r2)
226; CHECK: [[LABEL]]:
227; CHECK: br %r14
228  %add1 = add i64 %base, %index
229  %add2 = add i64 %add1, 4096
230  %ptr = inttoptr i64 %add2 to i32 *
231  %cond = icmp ult i32 %limit, 420
232  %orig = load i32 , i32 *%ptr
233  %res = select i1 %cond, i32 %orig, i32 %alt
234  store i32 %res, i32 *%ptr
235  ret void
236}
237
238; Check that volatile loads are not matched.
239define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
240; CHECK-LABEL: f14:
241; CHECK: l {{%r[0-5]}}, 0(%r2)
242; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
243; CHECK: [[LABEL]]:
244; CHECK: st {{%r[0-5]}}, 0(%r2)
245; CHECK: br %r14
246  %cond = icmp ult i32 %limit, 420
247  %orig = load volatile i32 , i32 *%ptr
248  %res = select i1 %cond, i32 %orig, i32 %alt
249  store i32 %res, i32 *%ptr
250  ret void
251}
252
253; ...likewise stores.  In this case we should have a conditional load into %r3.
254define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
255; CHECK-LABEL: f15:
256; CHECK: jhe [[LABEL:[^ ]*]]
257; CHECK: l %r3, 0(%r2)
258; CHECK: [[LABEL]]:
259; CHECK: st %r3, 0(%r2)
260; CHECK: br %r14
261  %cond = icmp ult i32 %limit, 420
262  %orig = load i32 , i32 *%ptr
263  %res = select i1 %cond, i32 %orig, i32 %alt
264  store volatile i32 %res, i32 *%ptr
265  ret void
266}
267
268; Check that atomic loads are not matched.  The transformation is OK for
269; the "unordered" case tested here, but since we don't try to handle atomic
270; operations at all in this context, it seems better to assert that than
271; to restrict the test to a stronger ordering.
272define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
273; FIXME: should use a normal load instead of CS.
274; CHECK-LABEL: f16:
275; CHECK: l {{%r[0-5]}}, 0(%r2)
276; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
277; CHECK: [[LABEL]]:
278; CHECK: st {{%r[0-5]}}, 0(%r2)
279; CHECK: br %r14
280  %cond = icmp ult i32 %limit, 420
281  %orig = load atomic i32 , i32 *%ptr unordered, align 4
282  %res = select i1 %cond, i32 %orig, i32 %alt
283  store i32 %res, i32 *%ptr
284  ret void
285}
286
287; ...likewise stores.
288define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
289; FIXME: should use a normal store instead of CS.
290; CHECK-LABEL: f17:
291; CHECK: jhe [[LABEL:[^ ]*]]
292; CHECK: l %r3, 0(%r2)
293; CHECK: [[LABEL]]:
294; CHECK: st %r3, 0(%r2)
295; CHECK: br %r14
296  %cond = icmp ult i32 %limit, 420
297  %orig = load i32 , i32 *%ptr
298  %res = select i1 %cond, i32 %orig, i32 %alt
299  store atomic i32 %res, i32 *%ptr unordered, align 4
300  ret void
301}
302
303; Try a frame index base.
304define void @f18(i32 %alt, i32 %limit) {
305; CHECK-LABEL: f18:
306; CHECK: brasl %r14, foo@PLT
307; CHECK-NOT: %r15
308; CHECK: jl [[LABEL:[^ ]*]]
309; CHECK-NOT: %r15
310; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15)
311; CHECK: [[LABEL]]:
312; CHECK: brasl %r14, foo@PLT
313; CHECK: br %r14
314  %ptr = alloca i32
315  call void @foo(i32 *%ptr)
316  %cond = icmp ult i32 %limit, 420
317  %orig = load i32 , i32 *%ptr
318  %res = select i1 %cond, i32 %orig, i32 %alt
319  store i32 %res, i32 *%ptr
320  call void @foo(i32 *%ptr)
321  ret void
322}
323