1; Test 64-bit conditional stores that are presented as selects.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
4
5declare void @foo(i64 *)
6
7; Test with the loaded value first.
8define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
9; CHECK-LABEL: f1:
10; CHECK-NOT: %r2
11; CHECK: blr %r14
12; CHECK-NOT: %r2
13; CHECK: stg %r3, 0(%r2)
14; CHECK: br %r14
15  %cond = icmp ult i32 %limit, 420
16  %orig = load i64, i64 *%ptr
17  %res = select i1 %cond, i64 %orig, i64 %alt
18  store i64 %res, i64 *%ptr
19  ret void
20}
21
22; ...and with the loaded value second
23define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
24; CHECK-LABEL: f2:
25; CHECK-NOT: %r2
26; CHECK: bher %r14
27; CHECK-NOT: %r2
28; CHECK: stg %r3, 0(%r2)
29; CHECK: br %r14
30  %cond = icmp ult i32 %limit, 420
31  %orig = load i64, i64 *%ptr
32  %res = select i1 %cond, i64 %alt, i64 %orig
33  store i64 %res, i64 *%ptr
34  ret void
35}
36
37; Check the high end of the aligned STG range.
38define void @f3(i64 *%base, i64 %alt, i32 %limit) {
39; CHECK-LABEL: f3:
40; CHECK-NOT: %r2
41; CHECK: blr %r14
42; CHECK-NOT: %r2
43; CHECK: stg %r3, 524280(%r2)
44; CHECK: br %r14
45  %ptr = getelementptr i64, i64 *%base, i64 65535
46  %cond = icmp ult i32 %limit, 420
47  %orig = load i64, i64 *%ptr
48  %res = select i1 %cond, i64 %orig, i64 %alt
49  store i64 %res, i64 *%ptr
50  ret void
51}
52
53; Check the next doubleword up, which needs separate address logic.
54; Other sequences besides this one would be OK.
55define void @f4(i64 *%base, i64 %alt, i32 %limit) {
56; CHECK-LABEL: f4:
57; CHECK-NOT: %r2
58; CHECK: blr %r14
59; CHECK-NOT: %r2
60; CHECK: agfi %r2, 524288
61; CHECK: stg %r3, 0(%r2)
62; CHECK: br %r14
63  %ptr = getelementptr i64, i64 *%base, i64 65536
64  %cond = icmp ult i32 %limit, 420
65  %orig = load i64, i64 *%ptr
66  %res = select i1 %cond, i64 %orig, i64 %alt
67  store i64 %res, i64 *%ptr
68  ret void
69}
70
71; Check the low end of the STG range.
72define void @f5(i64 *%base, i64 %alt, i32 %limit) {
73; CHECK-LABEL: f5:
74; CHECK-NOT: %r2
75; CHECK: blr %r14
76; CHECK-NOT: %r2
77; CHECK: stg %r3, -524288(%r2)
78; CHECK: br %r14
79  %ptr = getelementptr i64, i64 *%base, i64 -65536
80  %cond = icmp ult i32 %limit, 420
81  %orig = load i64, i64 *%ptr
82  %res = select i1 %cond, i64 %orig, i64 %alt
83  store i64 %res, i64 *%ptr
84  ret void
85}
86
87; Check the next doubleword down, which needs separate address logic.
88; Other sequences besides this one would be OK.
89define void @f6(i64 *%base, i64 %alt, i32 %limit) {
90; CHECK-LABEL: f6:
91; CHECK-NOT: %r2
92; CHECK: blr %r14
93; CHECK-NOT: %r2
94; CHECK: agfi %r2, -524296
95; CHECK: stg %r3, 0(%r2)
96; CHECK: br %r14
97  %ptr = getelementptr i64, i64 *%base, i64 -65537
98  %cond = icmp ult i32 %limit, 420
99  %orig = load i64, i64 *%ptr
100  %res = select i1 %cond, i64 %orig, i64 %alt
101  store i64 %res, i64 *%ptr
102  ret void
103}
104
105; Check that STG allows an index.
106define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) {
107; CHECK-LABEL: f7:
108; CHECK-NOT: %r2
109; CHECK: blr %r14
110; CHECK-NOT: %r2
111; CHECK: stg %r4, 524287(%r3,%r2)
112; CHECK: br %r14
113  %add1 = add i64 %base, %index
114  %add2 = add i64 %add1, 524287
115  %ptr = inttoptr i64 %add2 to i64 *
116  %cond = icmp ult i32 %limit, 420
117  %orig = load i64, i64 *%ptr
118  %res = select i1 %cond, i64 %orig, i64 %alt
119  store i64 %res, i64 *%ptr
120  ret void
121}
122
123; Check that volatile loads are not matched.
124define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
125; CHECK-LABEL: f8:
126; CHECK: lg {{%r[0-5]}}, 0(%r2)
127; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
128; CHECK: [[LABEL]]:
129; CHECK: stg {{%r[0-5]}}, 0(%r2)
130; CHECK: br %r14
131  %cond = icmp ult i32 %limit, 420
132  %orig = load volatile i64, i64 *%ptr
133  %res = select i1 %cond, i64 %orig, i64 %alt
134  store i64 %res, i64 *%ptr
135  ret void
136}
137
138; ...likewise stores.  In this case we should have a conditional load into %r3.
139define void @f9(i64 *%ptr, i64 %alt, i32 %limit) {
140; CHECK-LABEL: f9:
141; CHECK: jhe [[LABEL:[^ ]*]]
142; CHECK: lg %r3, 0(%r2)
143; CHECK: [[LABEL]]:
144; CHECK: stg %r3, 0(%r2)
145; CHECK: br %r14
146  %cond = icmp ult i32 %limit, 420
147  %orig = load i64, i64 *%ptr
148  %res = select i1 %cond, i64 %orig, i64 %alt
149  store volatile i64 %res, i64 *%ptr
150  ret void
151}
152
153; Check that atomic loads are not matched.  The transformation is OK for
154; the "unordered" case tested here, but since we don't try to handle atomic
155; operations at all in this context, it seems better to assert that than
156; to restrict the test to a stronger ordering.
157define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
158; FIXME: should use a normal load instead of CSG.
159; CHECK-LABEL: f10:
160; CHECK: lg {{%r[0-5]}}, 0(%r2)
161; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
162; CHECK: [[LABEL]]:
163; CHECK: stg {{%r[0-5]}}, 0(%r2)
164; CHECK: br %r14
165  %cond = icmp ult i32 %limit, 420
166  %orig = load atomic i64, i64 *%ptr unordered, align 8
167  %res = select i1 %cond, i64 %orig, i64 %alt
168  store i64 %res, i64 *%ptr
169  ret void
170}
171
172; ...likewise stores.
173define void @f11(i64 *%ptr, i64 %alt, i32 %limit) {
174; FIXME: should use a normal store instead of CSG.
175; CHECK-LABEL: f11:
176; CHECK: jhe [[LABEL:[^ ]*]]
177; CHECK: lg %r3, 0(%r2)
178; CHECK: [[LABEL]]:
179; CHECK: stg %r3, 0(%r2)
180; CHECK: br %r14
181  %cond = icmp ult i32 %limit, 420
182  %orig = load i64, i64 *%ptr
183  %res = select i1 %cond, i64 %orig, i64 %alt
184  store atomic i64 %res, i64 *%ptr unordered, align 8
185  ret void
186}
187
188; Try a frame index base.
189define void @f12(i64 %alt, i32 %limit) {
190; CHECK-LABEL: f12:
191; CHECK: brasl %r14, foo@PLT
192; CHECK-NOT: %r15
193; CHECK: jl [[LABEL:[^ ]*]]
194; CHECK-NOT: %r15
195; CHECK: stg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
196; CHECK: [[LABEL]]:
197; CHECK: brasl %r14, foo@PLT
198; CHECK: br %r14
199  %ptr = alloca i64
200  call void @foo(i64 *%ptr)
201  %cond = icmp ult i32 %limit, 420
202  %orig = load i64, i64 *%ptr
203  %res = select i1 %cond, i64 %orig, i64 %alt
204  store i64 %res, i64 *%ptr
205  call void @foo(i64 *%ptr)
206  ret void
207}
208