1; Test memory-to-memory ANDs.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5@g1src = global i8 1
6@g1dst = global i8 1
7@g2src = global i16 2
8@g2dst = global i16 2
9
10; Test the simple i8 case.
11define void @f1(i8 *%ptr1) {
12; CHECK-LABEL: f1:
13; CHECK: nc 1(1,%r2), 0(%r2)
14; CHECK: br %r14
15  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
16  %val = load i8 , i8 *%ptr1
17  %old = load i8 , i8 *%ptr2
18  %and = and i8 %val, %old
19  store i8 %and, i8 *%ptr2
20  ret void
21}
22
23; ...and again in reverse.
24define void @f2(i8 *%ptr1) {
25; CHECK-LABEL: f2:
26; CHECK: nc 1(1,%r2), 0(%r2)
27; CHECK: br %r14
28  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
29  %val = load i8 , i8 *%ptr1
30  %old = load i8 , i8 *%ptr2
31  %and = and i8 %old, %val
32  store i8 %and, i8 *%ptr2
33  ret void
34}
35
36; Test i8 cases where one value is zero-extended to 32 bits and the other
37; sign-extended.
38define void @f3(i8 *%ptr1) {
39; CHECK-LABEL: f3:
40; CHECK: nc 1(1,%r2), 0(%r2)
41; CHECK: br %r14
42  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
43  %val = load i8 , i8 *%ptr1
44  %extval = zext i8 %val to i32
45  %old = load i8 , i8 *%ptr2
46  %extold = sext i8 %old to i32
47  %and = and i32 %extval, %extold
48  %trunc = trunc i32 %and to i8
49  store i8 %trunc, i8 *%ptr2
50  ret void
51}
52
53; ...and again with the extension types reversed.
54define void @f4(i8 *%ptr1) {
55; CHECK-LABEL: f4:
56; CHECK: nc 1(1,%r2), 0(%r2)
57; CHECK: br %r14
58  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
59  %val = load i8 , i8 *%ptr1
60  %extval = sext i8 %val to i32
61  %old = load i8 , i8 *%ptr2
62  %extold = zext i8 %old to i32
63  %and = and i32 %extval, %extold
64  %trunc = trunc i32 %and to i8
65  store i8 %trunc, i8 *%ptr2
66  ret void
67}
68
69; ...and again with two sign extensions.
70define void @f5(i8 *%ptr1) {
71; CHECK-LABEL: f5:
72; CHECK: nc 1(1,%r2), 0(%r2)
73; CHECK: br %r14
74  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
75  %val = load i8 , i8 *%ptr1
76  %extval = sext i8 %val to i32
77  %old = load i8 , i8 *%ptr2
78  %extold = sext i8 %old to i32
79  %and = and i32 %extval, %extold
80  %trunc = trunc i32 %and to i8
81  store i8 %trunc, i8 *%ptr2
82  ret void
83}
84
85; ...and again with two zero extensions.
86define void @f6(i8 *%ptr1) {
87; CHECK-LABEL: f6:
88; CHECK: nc 1(1,%r2), 0(%r2)
89; CHECK: br %r14
90  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
91  %val = load i8 , i8 *%ptr1
92  %extval = zext i8 %val to i32
93  %old = load i8 , i8 *%ptr2
94  %extold = zext i8 %old to i32
95  %and = and i32 %extval, %extold
96  %trunc = trunc i32 %and to i8
97  store i8 %trunc, i8 *%ptr2
98  ret void
99}
100
101; Test i8 cases where the value is extended to 64 bits (just one case
102; this time).
103define void @f7(i8 *%ptr1) {
104; CHECK-LABEL: f7:
105; CHECK: nc 1(1,%r2), 0(%r2)
106; CHECK: br %r14
107  %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
108  %val = load i8 , i8 *%ptr1
109  %extval = sext i8 %val to i64
110  %old = load i8 , i8 *%ptr2
111  %extold = zext i8 %old to i64
112  %and = and i64 %extval, %extold
113  %trunc = trunc i64 %and to i8
114  store i8 %trunc, i8 *%ptr2
115  ret void
116}
117
118; Test the simple i16 case.
119define void @f8(i16 *%ptr1) {
120; CHECK-LABEL: f8:
121; CHECK: nc 2(2,%r2), 0(%r2)
122; CHECK: br %r14
123  %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
124  %val = load i16 , i16 *%ptr1
125  %old = load i16 , i16 *%ptr2
126  %and = and i16 %val, %old
127  store i16 %and, i16 *%ptr2
128  ret void
129}
130
131; Test i16 cases where the value is extended to 32 bits.
132define void @f9(i16 *%ptr1) {
133; CHECK-LABEL: f9:
134; CHECK: nc 2(2,%r2), 0(%r2)
135; CHECK: br %r14
136  %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
137  %val = load i16 , i16 *%ptr1
138  %extval = zext i16 %val to i32
139  %old = load i16 , i16 *%ptr2
140  %extold = sext i16 %old to i32
141  %and = and i32 %extval, %extold
142  %trunc = trunc i32 %and to i16
143  store i16 %trunc, i16 *%ptr2
144  ret void
145}
146
147; Test i16 cases where the value is extended to 64 bits.
148define void @f10(i16 *%ptr1) {
149; CHECK-LABEL: f10:
150; CHECK: nc 2(2,%r2), 0(%r2)
151; CHECK: br %r14
152  %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
153  %val = load i16 , i16 *%ptr1
154  %extval = sext i16 %val to i64
155  %old = load i16 , i16 *%ptr2
156  %extold = zext i16 %old to i64
157  %and = and i64 %extval, %extold
158  %trunc = trunc i64 %and to i16
159  store i16 %trunc, i16 *%ptr2
160  ret void
161}
162
163; Test the simple i32 case.
164define void @f11(i32 *%ptr1) {
165; CHECK-LABEL: f11:
166; CHECK: nc 4(4,%r2), 0(%r2)
167; CHECK: br %r14
168  %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
169  %val = load i32 , i32 *%ptr1
170  %old = load i32 , i32 *%ptr2
171  %and = and i32 %old, %val
172  store i32 %and, i32 *%ptr2
173  ret void
174}
175
176; Test i32 cases where the value is extended to 64 bits.
177define void @f12(i32 *%ptr1) {
178; CHECK-LABEL: f12:
179; CHECK: nc 4(4,%r2), 0(%r2)
180; CHECK: br %r14
181  %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
182  %val = load i32 , i32 *%ptr1
183  %extval = sext i32 %val to i64
184  %old = load i32 , i32 *%ptr2
185  %extold = zext i32 %old to i64
186  %and = and i64 %extval, %extold
187  %trunc = trunc i64 %and to i32
188  store i32 %trunc, i32 *%ptr2
189  ret void
190}
191
192; Test the i64 case.
193define void @f13(i64 *%ptr1) {
194; CHECK-LABEL: f13:
195; CHECK: nc 8(8,%r2), 0(%r2)
196; CHECK: br %r14
197  %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
198  %val = load i64 , i64 *%ptr1
199  %old = load i64 , i64 *%ptr2
200  %and = and i64 %old, %val
201  store i64 %and, i64 *%ptr2
202  ret void
203}
204
205; Make sure that we don't use NC if the first load is volatile.
206define void @f14(i64 *%ptr1) {
207; CHECK-LABEL: f14:
208; CHECK-NOT: nc
209; CHECK: br %r14
210  %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
211  %val = load volatile i64 , i64 *%ptr1
212  %old = load i64 , i64 *%ptr2
213  %and = and i64 %old, %val
214  store i64 %and, i64 *%ptr2
215  ret void
216}
217
218; ...likewise the second.
219define void @f15(i64 *%ptr1) {
220; CHECK-LABEL: f15:
221; CHECK-NOT: nc
222; CHECK: br %r14
223  %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
224  %val = load i64 , i64 *%ptr1
225  %old = load volatile i64 , i64 *%ptr2
226  %and = and i64 %old, %val
227  store i64 %and, i64 *%ptr2
228  ret void
229}
230
231; ...likewise the store.
232define void @f16(i64 *%ptr1) {
233; CHECK-LABEL: f16:
234; CHECK-NOT: nc
235; CHECK: br %r14
236  %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
237  %val = load i64 , i64 *%ptr1
238  %old = load i64 , i64 *%ptr2
239  %and = and i64 %old, %val
240  store volatile i64 %and, i64 *%ptr2
241  ret void
242}
243
244; Test that NC is not used for aligned loads and stores if there is
245; no way of telling whether they alias.  We don't want to use NC in
246; cases where the addresses could be equal.
247define void @f17(i64 *%ptr1, i64 *%ptr2) {
248; CHECK-LABEL: f17:
249; CHECK-NOT: nc
250; CHECK: br %r14
251  %val = load i64 , i64 *%ptr1
252  %old = load i64 , i64 *%ptr2
253  %and = and i64 %old, %val
254  store i64 %and, i64 *%ptr2
255  ret void
256}
257
258; ...but if one of the loads isn't aligned, we can't be sure.
259define void @f18(i64 *%ptr1, i64 *%ptr2) {
260; CHECK-LABEL: f18:
261; CHECK-NOT: nc
262; CHECK: br %r14
263  %val = load i64 , i64 *%ptr1, align 2
264  %old = load i64 , i64 *%ptr2
265  %and = and i64 %old, %val
266  store i64 %and, i64 *%ptr2
267  ret void
268}
269
270; Repeat the previous test with the operands in the opposite order.
271define void @f19(i64 *%ptr1, i64 *%ptr2) {
272; CHECK-LABEL: f19:
273; CHECK-NOT: nc
274; CHECK: br %r14
275  %val = load i64 , i64 *%ptr1, align 2
276  %old = load i64 , i64 *%ptr2
277  %and = and i64 %val, %old
278  store i64 %and, i64 *%ptr2
279  ret void
280}
281
282; ...and again with the other operand being unaligned.
283define void @f20(i64 *%ptr1, i64 *%ptr2) {
284; CHECK-LABEL: f20:
285; CHECK-NOT: nc
286; CHECK: br %r14
287  %val = load i64 , i64 *%ptr1
288  %old = load i64 , i64 *%ptr2, align 2
289  %and = and i64 %val, %old
290  store i64 %and, i64 *%ptr2, align 2
291  ret void
292}
293
294; Test a case where there is definite overlap.
295define void @f21(i64 %base) {
296; CHECK-LABEL: f21:
297; CHECK-NOT: nc
298; CHECK: br %r14
299  %add = add i64 %base, 1
300  %ptr1 = inttoptr i64 %base to i64 *
301  %ptr2 = inttoptr i64 %add to i64 *
302  %val = load i64 , i64 *%ptr1
303  %old = load i64 , i64 *%ptr2, align 1
304  %and = and i64 %old, %val
305  store i64 %and, i64 *%ptr2, align 1
306  ret void
307}
308
309; Test that we can use NC for global addresses for i8.
310define void @f22(i8 *%ptr) {
311; CHECK-LABEL: f22:
312; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
313; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
314; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
315; CHECK: br %r14
316  %val = load i8 , i8 *@g1src
317  %old = load i8 , i8 *@g1dst
318  %and = and i8 %val, %old
319  store i8 %and, i8 *@g1dst
320  ret void
321}
322
323; Test that we use NC even where LHRL and STHRL are available.
324define void @f23(i16 *%ptr) {
325; CHECK-LABEL: f23:
326; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src
327; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
328; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
329; CHECK: br %r14
330  %val = load i16 , i16 *@g2src
331  %old = load i16 , i16 *@g2dst
332  %and = and i16 %val, %old
333  store i16 %and, i16 *@g2dst
334  ret void
335}
336
337; Test a case where offset disambiguation is enough.
338define void @f24(i64 *%ptr1) {
339; CHECK-LABEL: f24:
340; CHECK: nc 8(8,%r2), 0(%r2)
341; CHECK: br %r14
342  %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
343  %val = load i64 , i64 *%ptr1, align 1
344  %old = load i64 , i64 *%ptr2, align 1
345  %and = and i64 %old, %val
346  store i64 %and, i64 *%ptr2, align 1
347  ret void
348}
349
350; Test a case where TBAA tells us there is no alias.
351define void @f25(i64 *%ptr1, i64 *%ptr2) {
352; CHECK-LABEL: f25:
353; CHECK: nc 0(8,%r3), 0(%r2)
354; CHECK: br %r14
355  %val = load i64 , i64 *%ptr1, align 2, !tbaa !3
356  %old = load i64 , i64 *%ptr2, align 2, !tbaa !4
357  %and = and i64 %old, %val
358  store i64 %and, i64 *%ptr2, align 2, !tbaa !4
359  ret void
360}
361
362; Test a case where TBAA information is present but doesn't help.
363define void @f26(i64 *%ptr1, i64 *%ptr2) {
364; CHECK-LABEL: f26:
365; CHECK-NOT: nc
366; CHECK: br %r14
367  %val = load i64 , i64 *%ptr1, align 2, !tbaa !3
368  %old = load i64 , i64 *%ptr2, align 2, !tbaa !3
369  %and = and i64 %old, %val
370  store i64 %and, i64 *%ptr2, align 2, !tbaa !3
371  ret void
372}
373
374!0 = !{ !"root" }
375!1 = !{ !"set1", !0 }
376!2 = !{ !"set2", !0 }
377!3 = !{ !1, !1, i64 0}
378!4 = !{ !2, !2, i64 0}
379