1; Test 16-bit conditional stores that are presented as selects.  The volatile
2; tests require z10, which use a branch instead of a LOCR.
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
5
6declare void @foo(i16 *)
7
8; Test the simple case, with the loaded value first.
9define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
10; CHECK-LABEL: f1:
11; CHECK-NOT: %r2
12; CHECK: jl [[LABEL:[^ ]*]]
13; CHECK-NOT: %r2
14; CHECK: sth %r3, 0(%r2)
15; CHECK: [[LABEL]]:
16; CHECK: br %r14
17  %cond = icmp ult i32 %limit, 420
18  %orig = load i16 , i16 *%ptr
19  %res = select i1 %cond, i16 %orig, i16 %alt
20  store i16 %res, i16 *%ptr
21  ret void
22}
23
24; ...and with the loaded value second
25define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
26; CHECK-LABEL: f2:
27; CHECK-NOT: %r2
28; CHECK: jhe [[LABEL:[^ ]*]]
29; CHECK-NOT: %r2
30; CHECK: sth %r3, 0(%r2)
31; CHECK: [[LABEL]]:
32; CHECK: br %r14
33  %cond = icmp ult i32 %limit, 420
34  %orig = load i16 , i16 *%ptr
35  %res = select i1 %cond, i16 %alt, i16 %orig
36  store i16 %res, i16 *%ptr
37  ret void
38}
39
40; Test cases where the value is explicitly sign-extended to 32 bits, with the
41; loaded value first.
42define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
43; CHECK-LABEL: f3:
44; CHECK-NOT: %r2
45; CHECK: jl [[LABEL:[^ ]*]]
46; CHECK-NOT: %r2
47; CHECK: sth %r3, 0(%r2)
48; CHECK: [[LABEL]]:
49; CHECK: br %r14
50  %cond = icmp ult i32 %limit, 420
51  %orig = load i16 , i16 *%ptr
52  %ext = sext i16 %orig to i32
53  %res = select i1 %cond, i32 %ext, i32 %alt
54  %trunc = trunc i32 %res to i16
55  store i16 %trunc, i16 *%ptr
56  ret void
57}
58
59; ...and with the loaded value second
60define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
61; CHECK-LABEL: f4:
62; CHECK-NOT: %r2
63; CHECK: jhe [[LABEL:[^ ]*]]
64; CHECK-NOT: %r2
65; CHECK: sth %r3, 0(%r2)
66; CHECK: [[LABEL]]:
67; CHECK: br %r14
68  %cond = icmp ult i32 %limit, 420
69  %orig = load i16 , i16 *%ptr
70  %ext = sext i16 %orig to i32
71  %res = select i1 %cond, i32 %alt, i32 %ext
72  %trunc = trunc i32 %res to i16
73  store i16 %trunc, i16 *%ptr
74  ret void
75}
76
77; Test cases where the value is explicitly zero-extended to 32 bits, with the
78; loaded value first.
79define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
80; CHECK-LABEL: f5:
81; CHECK-NOT: %r2
82; CHECK: jl [[LABEL:[^ ]*]]
83; CHECK-NOT: %r2
84; CHECK: sth %r3, 0(%r2)
85; CHECK: [[LABEL]]:
86; CHECK: br %r14
87  %cond = icmp ult i32 %limit, 420
88  %orig = load i16 , i16 *%ptr
89  %ext = zext i16 %orig to i32
90  %res = select i1 %cond, i32 %ext, i32 %alt
91  %trunc = trunc i32 %res to i16
92  store i16 %trunc, i16 *%ptr
93  ret void
94}
95
96; ...and with the loaded value second
97define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
98; CHECK-LABEL: f6:
99; CHECK-NOT: %r2
100; CHECK: jhe [[LABEL:[^ ]*]]
101; CHECK-NOT: %r2
102; CHECK: sth %r3, 0(%r2)
103; CHECK: [[LABEL]]:
104; CHECK: br %r14
105  %cond = icmp ult i32 %limit, 420
106  %orig = load i16 , i16 *%ptr
107  %ext = zext i16 %orig to i32
108  %res = select i1 %cond, i32 %alt, i32 %ext
109  %trunc = trunc i32 %res to i16
110  store i16 %trunc, i16 *%ptr
111  ret void
112}
113
114; Test cases where the value is explicitly sign-extended to 64 bits, with the
115; loaded value first.
116define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
117; CHECK-LABEL: f7:
118; CHECK-NOT: %r2
119; CHECK: jl [[LABEL:[^ ]*]]
120; CHECK-NOT: %r2
121; CHECK: sth %r3, 0(%r2)
122; CHECK: [[LABEL]]:
123; CHECK: br %r14
124  %cond = icmp ult i32 %limit, 420
125  %orig = load i16 , i16 *%ptr
126  %ext = sext i16 %orig to i64
127  %res = select i1 %cond, i64 %ext, i64 %alt
128  %trunc = trunc i64 %res to i16
129  store i16 %trunc, i16 *%ptr
130  ret void
131}
132
133; ...and with the loaded value second
134define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
135; CHECK-LABEL: f8:
136; CHECK-NOT: %r2
137; CHECK: jhe [[LABEL:[^ ]*]]
138; CHECK-NOT: %r2
139; CHECK: sth %r3, 0(%r2)
140; CHECK: [[LABEL]]:
141; CHECK: br %r14
142  %cond = icmp ult i32 %limit, 420
143  %orig = load i16 , i16 *%ptr
144  %ext = sext i16 %orig to i64
145  %res = select i1 %cond, i64 %alt, i64 %ext
146  %trunc = trunc i64 %res to i16
147  store i16 %trunc, i16 *%ptr
148  ret void
149}
150
151; Test cases where the value is explicitly zero-extended to 64 bits, with the
152; loaded value first.
153define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
154; CHECK-LABEL: f9:
155; CHECK-NOT: %r2
156; CHECK: jl [[LABEL:[^ ]*]]
157; CHECK-NOT: %r2
158; CHECK: sth %r3, 0(%r2)
159; CHECK: [[LABEL]]:
160; CHECK: br %r14
161  %cond = icmp ult i32 %limit, 420
162  %orig = load i16 , i16 *%ptr
163  %ext = zext i16 %orig to i64
164  %res = select i1 %cond, i64 %ext, i64 %alt
165  %trunc = trunc i64 %res to i16
166  store i16 %trunc, i16 *%ptr
167  ret void
168}
169
170; ...and with the loaded value second
171define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
172; CHECK-LABEL: f10:
173; CHECK-NOT: %r2
174; CHECK: jhe [[LABEL:[^ ]*]]
175; CHECK-NOT: %r2
176; CHECK: sth %r3, 0(%r2)
177; CHECK: [[LABEL]]:
178; CHECK: br %r14
179  %cond = icmp ult i32 %limit, 420
180  %orig = load i16 , i16 *%ptr
181  %ext = zext i16 %orig to i64
182  %res = select i1 %cond, i64 %alt, i64 %ext
183  %trunc = trunc i64 %res to i16
184  store i16 %trunc, i16 *%ptr
185  ret void
186}
187
188; Check the high end of the aligned STH range.
189define void @f11(i16 *%base, i16 %alt, i32 %limit) {
190; CHECK-LABEL: f11:
191; CHECK-NOT: %r2
192; CHECK: jl [[LABEL:[^ ]*]]
193; CHECK-NOT: %r2
194; CHECK: sth %r3, 4094(%r2)
195; CHECK: [[LABEL]]:
196; CHECK: br %r14
197  %ptr = getelementptr i16, i16 *%base, i64 2047
198  %cond = icmp ult i32 %limit, 420
199  %orig = load i16 , i16 *%ptr
200  %res = select i1 %cond, i16 %orig, i16 %alt
201  store i16 %res, i16 *%ptr
202  ret void
203}
204
205; Check the next halfword up, which should use STHY instead of STH.
206define void @f12(i16 *%base, i16 %alt, i32 %limit) {
207; CHECK-LABEL: f12:
208; CHECK-NOT: %r2
209; CHECK: jl [[LABEL:[^ ]*]]
210; CHECK-NOT: %r2
211; CHECK: sthy %r3, 4096(%r2)
212; CHECK: [[LABEL]]:
213; CHECK: br %r14
214  %ptr = getelementptr i16, i16 *%base, i64 2048
215  %cond = icmp ult i32 %limit, 420
216  %orig = load i16 , i16 *%ptr
217  %res = select i1 %cond, i16 %orig, i16 %alt
218  store i16 %res, i16 *%ptr
219  ret void
220}
221
222; Check the high end of the aligned STHY range.
223define void @f13(i16 *%base, i16 %alt, i32 %limit) {
224; CHECK-LABEL: f13:
225; CHECK-NOT: %r2
226; CHECK: jl [[LABEL:[^ ]*]]
227; CHECK-NOT: %r2
228; CHECK: sthy %r3, 524286(%r2)
229; CHECK: [[LABEL]]:
230; CHECK: br %r14
231  %ptr = getelementptr i16, i16 *%base, i64 262143
232  %cond = icmp ult i32 %limit, 420
233  %orig = load i16 , i16 *%ptr
234  %res = select i1 %cond, i16 %orig, i16 %alt
235  store i16 %res, i16 *%ptr
236  ret void
237}
238
239; Check the next halfword up, which needs separate address logic.
240; Other sequences besides this one would be OK.
241define void @f14(i16 *%base, i16 %alt, i32 %limit) {
242; CHECK-LABEL: f14:
243; CHECK-NOT: %r2
244; CHECK: jl [[LABEL:[^ ]*]]
245; CHECK-NOT: %r2
246; CHECK: agfi %r2, 524288
247; CHECK: sth %r3, 0(%r2)
248; CHECK: [[LABEL]]:
249; CHECK: br %r14
250  %ptr = getelementptr i16, i16 *%base, i64 262144
251  %cond = icmp ult i32 %limit, 420
252  %orig = load i16 , i16 *%ptr
253  %res = select i1 %cond, i16 %orig, i16 %alt
254  store i16 %res, i16 *%ptr
255  ret void
256}
257
258; Check the low end of the STHY range.
259define void @f15(i16 *%base, i16 %alt, i32 %limit) {
260; CHECK-LABEL: f15:
261; CHECK-NOT: %r2
262; CHECK: jl [[LABEL:[^ ]*]]
263; CHECK-NOT: %r2
264; CHECK: sthy %r3, -524288(%r2)
265; CHECK: [[LABEL]]:
266; CHECK: br %r14
267  %ptr = getelementptr i16, i16 *%base, i64 -262144
268  %cond = icmp ult i32 %limit, 420
269  %orig = load i16 , i16 *%ptr
270  %res = select i1 %cond, i16 %orig, i16 %alt
271  store i16 %res, i16 *%ptr
272  ret void
273}
274
275; Check the next halfword down, which needs separate address logic.
276; Other sequences besides this one would be OK.
277define void @f16(i16 *%base, i16 %alt, i32 %limit) {
278; CHECK-LABEL: f16:
279; CHECK-NOT: %r2
280; CHECK: jl [[LABEL:[^ ]*]]
281; CHECK-NOT: %r2
282; CHECK: agfi %r2, -524290
283; CHECK: sth %r3, 0(%r2)
284; CHECK: [[LABEL]]:
285; CHECK: br %r14
286  %ptr = getelementptr i16, i16 *%base, i64 -262145
287  %cond = icmp ult i32 %limit, 420
288  %orig = load i16 , i16 *%ptr
289  %res = select i1 %cond, i16 %orig, i16 %alt
290  store i16 %res, i16 *%ptr
291  ret void
292}
293
294; Check that STHY allows an index.
295define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
296; CHECK-LABEL: f17:
297; CHECK-NOT: %r2
298; CHECK: jl [[LABEL:[^ ]*]]
299; CHECK-NOT: %r2
300; CHECK: sthy %r4, 4096(%r3,%r2)
301; CHECK: [[LABEL]]:
302; CHECK: br %r14
303  %add1 = add i64 %base, %index
304  %add2 = add i64 %add1, 4096
305  %ptr = inttoptr i64 %add2 to i16 *
306  %cond = icmp ult i32 %limit, 420
307  %orig = load i16 , i16 *%ptr
308  %res = select i1 %cond, i16 %orig, i16 %alt
309  store i16 %res, i16 *%ptr
310  ret void
311}
312
313; Check that volatile loads are not matched.
314define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
315; CHECK-LABEL: f18:
316; CHECK: lh {{%r[0-5]}}, 0(%r2)
317; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
318; CHECK: [[LABEL]]:
319; CHECK: sth {{%r[0-5]}}, 0(%r2)
320; CHECK: br %r14
321  %cond = icmp ult i32 %limit, 420
322  %orig = load volatile i16 , i16 *%ptr
323  %res = select i1 %cond, i16 %orig, i16 %alt
324  store i16 %res, i16 *%ptr
325  ret void
326}
327
328; ...likewise stores.  In this case we should have a conditional load into %r3.
329define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
330; CHECK-LABEL: f19:
331; CHECK: jhe [[LABEL:[^ ]*]]
332; CHECK: lh %r3, 0(%r2)
333; CHECK: [[LABEL]]:
334; CHECK: sth %r3, 0(%r2)
335; CHECK: br %r14
336  %cond = icmp ult i32 %limit, 420
337  %orig = load i16 , i16 *%ptr
338  %res = select i1 %cond, i16 %orig, i16 %alt
339  store volatile i16 %res, i16 *%ptr
340  ret void
341}
342
343; Check that atomic loads are not matched.  The transformation is OK for
344; the "unordered" case tested here, but since we don't try to handle atomic
345; operations at all in this context, it seems better to assert that than
346; to restrict the test to a stronger ordering.
347define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
348; FIXME: should use a normal load instead of CS.
349; CHECK-LABEL: f20:
350; CHECK: lh {{%r[0-9]+}}, 0(%r2)
351; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
352; CHECK: [[LABEL]]:
353; CHECK: sth {{%r[0-9]+}}, 0(%r2)
354; CHECK: br %r14
355  %cond = icmp ult i32 %limit, 420
356  %orig = load atomic i16 , i16 *%ptr unordered, align 2
357  %res = select i1 %cond, i16 %orig, i16 %alt
358  store i16 %res, i16 *%ptr
359  ret void
360}
361
362; ...likewise stores.
363define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
364; FIXME: should use a normal store instead of CS.
365; CHECK-LABEL: f21:
366; CHECK: jhe [[LABEL:[^ ]*]]
367; CHECK: lh %r3, 0(%r2)
368; CHECK: [[LABEL]]:
369; CHECK: sth %r3, 0(%r2)
370; CHECK: br %r14
371  %cond = icmp ult i32 %limit, 420
372  %orig = load i16 , i16 *%ptr
373  %res = select i1 %cond, i16 %orig, i16 %alt
374  store atomic i16 %res, i16 *%ptr unordered, align 2
375  ret void
376}
377
378; Try a frame index base.
379define void @f22(i16 %alt, i32 %limit) {
380; CHECK-LABEL: f22:
381; CHECK: brasl %r14, foo@PLT
382; CHECK-NOT: %r15
383; CHECK: jl [[LABEL:[^ ]*]]
384; CHECK-NOT: %r15
385; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
386; CHECK: [[LABEL]]:
387; CHECK: brasl %r14, foo@PLT
388; CHECK: br %r14
389  %ptr = alloca i16
390  call void @foo(i16 *%ptr)
391  %cond = icmp ult i32 %limit, 420
392  %orig = load i16 , i16 *%ptr
393  %res = select i1 %cond, i16 %orig, i16 %alt
394  store i16 %res, i16 *%ptr
395  call void @foo(i16 *%ptr)
396  ret void
397}
398