1; Test 16-bit conditional stores that are presented as selects.  The volatile
2; tests require z10, which use a branch instead of a LOCR.
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
5
6declare void @foo(i16 *)
7
8; Test the simple case, with the loaded value first.
9define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
10; CHECK-LABEL: f1:
11; CHECK-NOT: %r2
12; CHECK: blr %r14
13; CHECK-NOT: %r2
14; CHECK: sth %r3, 0(%r2)
15; CHECK: br %r14
16  %cond = icmp ult i32 %limit, 420
17  %orig = load i16 , i16 *%ptr
18  %res = select i1 %cond, i16 %orig, i16 %alt
19  store i16 %res, i16 *%ptr
20  ret void
21}
22
23; ...and with the loaded value second
24define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
25; CHECK-LABEL: f2:
26; CHECK-NOT: %r2
27; CHECK: bher %r14
28; CHECK-NOT: %r2
29; CHECK: sth %r3, 0(%r2)
30; CHECK: br %r14
31  %cond = icmp ult i32 %limit, 420
32  %orig = load i16 , i16 *%ptr
33  %res = select i1 %cond, i16 %alt, i16 %orig
34  store i16 %res, i16 *%ptr
35  ret void
36}
37
38; Test cases where the value is explicitly sign-extended to 32 bits, with the
39; loaded value first.
40define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
41; CHECK-LABEL: f3:
42; CHECK-NOT: %r2
43; CHECK: blr %r14
44; CHECK-NOT: %r2
45; CHECK: sth %r3, 0(%r2)
46; CHECK: br %r14
47  %cond = icmp ult i32 %limit, 420
48  %orig = load i16 , i16 *%ptr
49  %ext = sext i16 %orig to i32
50  %res = select i1 %cond, i32 %ext, i32 %alt
51  %trunc = trunc i32 %res to i16
52  store i16 %trunc, i16 *%ptr
53  ret void
54}
55
56; ...and with the loaded value second
57define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
58; CHECK-LABEL: f4:
59; CHECK-NOT: %r2
60; CHECK: bher %r14
61; CHECK-NOT: %r2
62; CHECK: sth %r3, 0(%r2)
63; CHECK: br %r14
64  %cond = icmp ult i32 %limit, 420
65  %orig = load i16 , i16 *%ptr
66  %ext = sext i16 %orig to i32
67  %res = select i1 %cond, i32 %alt, i32 %ext
68  %trunc = trunc i32 %res to i16
69  store i16 %trunc, i16 *%ptr
70  ret void
71}
72
73; Test cases where the value is explicitly zero-extended to 32 bits, with the
74; loaded value first.
75define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
76; CHECK-LABEL: f5:
77; CHECK-NOT: %r2
78; CHECK: blr %r14
79; CHECK-NOT: %r2
80; CHECK: sth %r3, 0(%r2)
81; CHECK: br %r14
82  %cond = icmp ult i32 %limit, 420
83  %orig = load i16 , i16 *%ptr
84  %ext = zext i16 %orig to i32
85  %res = select i1 %cond, i32 %ext, i32 %alt
86  %trunc = trunc i32 %res to i16
87  store i16 %trunc, i16 *%ptr
88  ret void
89}
90
91; ...and with the loaded value second
92define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
93; CHECK-LABEL: f6:
94; CHECK-NOT: %r2
95; CHECK: bher %r14
96; CHECK-NOT: %r2
97; CHECK: sth %r3, 0(%r2)
98; CHECK: br %r14
99  %cond = icmp ult i32 %limit, 420
100  %orig = load i16 , i16 *%ptr
101  %ext = zext i16 %orig to i32
102  %res = select i1 %cond, i32 %alt, i32 %ext
103  %trunc = trunc i32 %res to i16
104  store i16 %trunc, i16 *%ptr
105  ret void
106}
107
108; Test cases where the value is explicitly sign-extended to 64 bits, with the
109; loaded value first.
110define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
111; CHECK-LABEL: f7:
112; CHECK-NOT: %r2
113; CHECK: blr %r14
114; CHECK-NOT: %r2
115; CHECK: sth %r3, 0(%r2)
116; CHECK: br %r14
117  %cond = icmp ult i32 %limit, 420
118  %orig = load i16 , i16 *%ptr
119  %ext = sext i16 %orig to i64
120  %res = select i1 %cond, i64 %ext, i64 %alt
121  %trunc = trunc i64 %res to i16
122  store i16 %trunc, i16 *%ptr
123  ret void
124}
125
126; ...and with the loaded value second
127define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
128; CHECK-LABEL: f8:
129; CHECK-NOT: %r2
130; CHECK: bher %r14
131; CHECK-NOT: %r2
132; CHECK: sth %r3, 0(%r2)
133; CHECK: br %r14
134  %cond = icmp ult i32 %limit, 420
135  %orig = load i16 , i16 *%ptr
136  %ext = sext i16 %orig to i64
137  %res = select i1 %cond, i64 %alt, i64 %ext
138  %trunc = trunc i64 %res to i16
139  store i16 %trunc, i16 *%ptr
140  ret void
141}
142
143; Test cases where the value is explicitly zero-extended to 64 bits, with the
144; loaded value first.
145define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
146; CHECK-LABEL: f9:
147; CHECK-NOT: %r2
148; CHECK: blr %r14
149; CHECK-NOT: %r2
150; CHECK: sth %r3, 0(%r2)
151; CHECK: br %r14
152  %cond = icmp ult i32 %limit, 420
153  %orig = load i16 , i16 *%ptr
154  %ext = zext i16 %orig to i64
155  %res = select i1 %cond, i64 %ext, i64 %alt
156  %trunc = trunc i64 %res to i16
157  store i16 %trunc, i16 *%ptr
158  ret void
159}
160
161; ...and with the loaded value second
162define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
163; CHECK-LABEL: f10:
164; CHECK-NOT: %r2
165; CHECK: bher %r14
166; CHECK-NOT: %r2
167; CHECK: sth %r3, 0(%r2)
168; CHECK: br %r14
169  %cond = icmp ult i32 %limit, 420
170  %orig = load i16 , i16 *%ptr
171  %ext = zext i16 %orig to i64
172  %res = select i1 %cond, i64 %alt, i64 %ext
173  %trunc = trunc i64 %res to i16
174  store i16 %trunc, i16 *%ptr
175  ret void
176}
177
178; Check the high end of the aligned STH range.
179define void @f11(i16 *%base, i16 %alt, i32 %limit) {
180; CHECK-LABEL: f11:
181; CHECK-NOT: %r2
182; CHECK: blr %r14
183; CHECK-NOT: %r2
184; CHECK: sth %r3, 4094(%r2)
185; CHECK: br %r14
186  %ptr = getelementptr i16, i16 *%base, i64 2047
187  %cond = icmp ult i32 %limit, 420
188  %orig = load i16 , i16 *%ptr
189  %res = select i1 %cond, i16 %orig, i16 %alt
190  store i16 %res, i16 *%ptr
191  ret void
192}
193
194; Check the next halfword up, which should use STHY instead of STH.
195define void @f12(i16 *%base, i16 %alt, i32 %limit) {
196; CHECK-LABEL: f12:
197; CHECK-NOT: %r2
198; CHECK: blr %r14
199; CHECK-NOT: %r2
200; CHECK: sthy %r3, 4096(%r2)
201; CHECK: br %r14
202  %ptr = getelementptr i16, i16 *%base, i64 2048
203  %cond = icmp ult i32 %limit, 420
204  %orig = load i16 , i16 *%ptr
205  %res = select i1 %cond, i16 %orig, i16 %alt
206  store i16 %res, i16 *%ptr
207  ret void
208}
209
210; Check the high end of the aligned STHY range.
211define void @f13(i16 *%base, i16 %alt, i32 %limit) {
212; CHECK-LABEL: f13:
213; CHECK-NOT: %r2
214; CHECK: blr %r14
215; CHECK-NOT: %r2
216; CHECK: sthy %r3, 524286(%r2)
217; CHECK: br %r14
218  %ptr = getelementptr i16, i16 *%base, i64 262143
219  %cond = icmp ult i32 %limit, 420
220  %orig = load i16 , i16 *%ptr
221  %res = select i1 %cond, i16 %orig, i16 %alt
222  store i16 %res, i16 *%ptr
223  ret void
224}
225
226; Check the next halfword up, which needs separate address logic.
227; Other sequences besides this one would be OK.
228define void @f14(i16 *%base, i16 %alt, i32 %limit) {
229; CHECK-LABEL: f14:
230; CHECK-NOT: %r2
231; CHECK: blr %r14
232; CHECK-NOT: %r2
233; CHECK: agfi %r2, 524288
234; CHECK: sth %r3, 0(%r2)
235; CHECK: br %r14
236  %ptr = getelementptr i16, i16 *%base, i64 262144
237  %cond = icmp ult i32 %limit, 420
238  %orig = load i16 , i16 *%ptr
239  %res = select i1 %cond, i16 %orig, i16 %alt
240  store i16 %res, i16 *%ptr
241  ret void
242}
243
244; Check the low end of the STHY range.
245define void @f15(i16 *%base, i16 %alt, i32 %limit) {
246; CHECK-LABEL: f15:
247; CHECK-NOT: %r2
248; CHECK: blr %r14
249; CHECK-NOT: %r2
250; CHECK: sthy %r3, -524288(%r2)
251; CHECK: br %r14
252  %ptr = getelementptr i16, i16 *%base, i64 -262144
253  %cond = icmp ult i32 %limit, 420
254  %orig = load i16 , i16 *%ptr
255  %res = select i1 %cond, i16 %orig, i16 %alt
256  store i16 %res, i16 *%ptr
257  ret void
258}
259
260; Check the next halfword down, which needs separate address logic.
261; Other sequences besides this one would be OK.
262define void @f16(i16 *%base, i16 %alt, i32 %limit) {
263; CHECK-LABEL: f16:
264; CHECK-NOT: %r2
265; CHECK: blr %r14
266; CHECK-NOT: %r2
267; CHECK: agfi %r2, -524290
268; CHECK: sth %r3, 0(%r2)
269; CHECK: br %r14
270  %ptr = getelementptr i16, i16 *%base, i64 -262145
271  %cond = icmp ult i32 %limit, 420
272  %orig = load i16 , i16 *%ptr
273  %res = select i1 %cond, i16 %orig, i16 %alt
274  store i16 %res, i16 *%ptr
275  ret void
276}
277
278; Check that STHY allows an index.
279define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
280; CHECK-LABEL: f17:
281; CHECK-NOT: %r2
282; CHECK: blr %r14
283; CHECK-NOT: %r2
284; CHECK: sthy %r4, 4096(%r3,%r2)
285; CHECK: br %r14
286  %add1 = add i64 %base, %index
287  %add2 = add i64 %add1, 4096
288  %ptr = inttoptr i64 %add2 to i16 *
289  %cond = icmp ult i32 %limit, 420
290  %orig = load i16 , i16 *%ptr
291  %res = select i1 %cond, i16 %orig, i16 %alt
292  store i16 %res, i16 *%ptr
293  ret void
294}
295
296; Check that volatile loads are not matched.
297define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
298; CHECK-LABEL: f18:
299; CHECK: lh {{%r[0-5]}}, 0(%r2)
300; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
301; CHECK: [[LABEL]]:
302; CHECK: sth {{%r[0-5]}}, 0(%r2)
303; CHECK: br %r14
304  %cond = icmp ult i32 %limit, 420
305  %orig = load volatile i16 , i16 *%ptr
306  %res = select i1 %cond, i16 %orig, i16 %alt
307  store i16 %res, i16 *%ptr
308  ret void
309}
310
311; ...likewise stores.  In this case we should have a conditional load into %r3.
312define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
313; CHECK-LABEL: f19:
314; CHECK: jhe [[LABEL:[^ ]*]]
315; CHECK: lh %r3, 0(%r2)
316; CHECK: [[LABEL]]:
317; CHECK: sth %r3, 0(%r2)
318; CHECK: br %r14
319  %cond = icmp ult i32 %limit, 420
320  %orig = load i16 , i16 *%ptr
321  %res = select i1 %cond, i16 %orig, i16 %alt
322  store volatile i16 %res, i16 *%ptr
323  ret void
324}
325
326; Check that atomic loads are not matched.  The transformation is OK for
327; the "unordered" case tested here, but since we don't try to handle atomic
328; operations at all in this context, it seems better to assert that than
329; to restrict the test to a stronger ordering.
330define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
331; FIXME: should use a normal load instead of CS.
332; CHECK-LABEL: f20:
333; CHECK: lh {{%r[0-9]+}}, 0(%r2)
334; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
335; CHECK: [[LABEL]]:
336; CHECK: sth {{%r[0-9]+}}, 0(%r2)
337; CHECK: br %r14
338  %cond = icmp ult i32 %limit, 420
339  %orig = load atomic i16 , i16 *%ptr unordered, align 2
340  %res = select i1 %cond, i16 %orig, i16 %alt
341  store i16 %res, i16 *%ptr
342  ret void
343}
344
345; ...likewise stores.
346define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
347; FIXME: should use a normal store instead of CS.
348; CHECK-LABEL: f21:
349; CHECK: jhe [[LABEL:[^ ]*]]
350; CHECK: lh %r3, 0(%r2)
351; CHECK: [[LABEL]]:
352; CHECK: sth %r3, 0(%r2)
353; CHECK: br %r14
354  %cond = icmp ult i32 %limit, 420
355  %orig = load i16 , i16 *%ptr
356  %res = select i1 %cond, i16 %orig, i16 %alt
357  store atomic i16 %res, i16 *%ptr unordered, align 2
358  ret void
359}
360
361; Try a frame index base.
362define void @f22(i16 %alt, i32 %limit) {
363; CHECK-LABEL: f22:
364; CHECK: brasl %r14, foo@PLT
365; CHECK-NOT: %r15
366; CHECK: jl [[LABEL:[^ ]*]]
367; CHECK-NOT: %r15
368; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
369; CHECK: [[LABEL]]:
370; CHECK: brasl %r14, foo@PLT
371; CHECK: br %r14
372  %ptr = alloca i16
373  call void @foo(i16 *%ptr)
374  %cond = icmp ult i32 %limit, 420
375  %orig = load i16 , i16 *%ptr
376  %res = select i1 %cond, i16 %orig, i16 %alt
377  store i16 %res, i16 *%ptr
378  call void @foo(i16 *%ptr)
379  ret void
380}
381