1; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3target triple = "x86_64-unknown-linux-gnu"
4
5;; memcpy.atomic formation (atomic load & store)
6define void @test1(i64 %Size) nounwind ssp {
7; CHECK-LABEL: @test1(
8; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
9; CHECK-NOT: store
10; CHECK: ret void
11bb.nph:
12  %Base = alloca i8, i32 10000
13  %Dest = alloca i8, i32 10000
14  br label %for.body
15
16for.body:                                         ; preds = %bb.nph, %for.body
17  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
18  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
19  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
20  %V = load atomic i8, i8* %I.0.014 unordered, align 1
21  store atomic i8 %V, i8* %DestI unordered, align 1
22  %indvar.next = add i64 %indvar, 1
23  %exitcond = icmp eq i64 %indvar.next, %Size
24  br i1 %exitcond, label %for.end, label %for.body
25
26for.end:                                          ; preds = %for.body, %entry
27  ret void
28}
29
30;; memcpy.atomic formation (atomic store, normal load)
31define void @test2(i64 %Size) nounwind ssp {
32; CHECK-LABEL: @test2(
33; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
34; CHECK-NOT: store
35; CHECK: ret void
36bb.nph:
37  %Base = alloca i8, i32 10000
38  %Dest = alloca i8, i32 10000
39  br label %for.body
40
41for.body:                                         ; preds = %bb.nph, %for.body
42  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
43  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
44  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
45  %V = load i8, i8* %I.0.014, align 1
46  store atomic i8 %V, i8* %DestI unordered, align 1
47  %indvar.next = add i64 %indvar, 1
48  %exitcond = icmp eq i64 %indvar.next, %Size
49  br i1 %exitcond, label %for.end, label %for.body
50
51for.end:                                          ; preds = %for.body, %entry
52  ret void
53}
54
55;; memcpy.atomic formation (atomic store, normal load w/ no align)
56define void @test2b(i64 %Size) nounwind ssp {
57; CHECK-LABEL: @test2b(
58; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
59; CHECK-NOT: store
60; CHECK: ret void
61bb.nph:
62  %Base = alloca i8, i32 10000
63  %Dest = alloca i8, i32 10000
64  br label %for.body
65
66for.body:                                         ; preds = %bb.nph, %for.body
67  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
68  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
69  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
70  %V = load i8, i8* %I.0.014
71  store atomic i8 %V, i8* %DestI unordered, align 1
72  %indvar.next = add i64 %indvar, 1
73  %exitcond = icmp eq i64 %indvar.next, %Size
74  br i1 %exitcond, label %for.end, label %for.body
75
76for.end:                                          ; preds = %for.body, %entry
77  ret void
78}
79
80;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
81define void @test2c(i64 %Size) nounwind ssp {
82; CHECK-LABEL: @test2c(
83; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
84; CHECK: store
85; CHECK: ret void
86bb.nph:
87  %Base = alloca i32, i32 10000
88  %Dest = alloca i32, i32 10000
89  br label %for.body
90
91for.body:                                         ; preds = %bb.nph, %for.body
92  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
93  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
94  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
95  %V = load i32, i32* %I.0.014, align 2
96  store atomic i32 %V, i32* %DestI unordered, align 4
97  %indvar.next = add i64 %indvar, 1
98  %exitcond = icmp eq i64 %indvar.next, %Size
99  br i1 %exitcond, label %for.end, label %for.body
100
101for.end:                                          ; preds = %for.body, %entry
102  ret void
103}
104
105;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
106define void @test2d(i64 %Size) nounwind ssp {
107; CHECK-LABEL: @test2d(
108; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
109; CHECK: store
110; CHECK: ret void
111bb.nph:
112  %Base = alloca i32, i32 10000
113  %Dest = alloca i32, i32 10000
114  br label %for.body
115
116for.body:                                         ; preds = %bb.nph, %for.body
117  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
118  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
119  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
120  %V = load i32, i32* %I.0.014, align 4
121  store atomic i32 %V, i32* %DestI unordered, align 2
122  %indvar.next = add i64 %indvar, 1
123  %exitcond = icmp eq i64 %indvar.next, %Size
124  br i1 %exitcond, label %for.end, label %for.body
125
126for.end:                                          ; preds = %for.body, %entry
127  ret void
128}
129
130
131;; memcpy.atomic formation (normal store, atomic load)
132define void @test3(i64 %Size) nounwind ssp {
133; CHECK-LABEL: @test3(
134; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
135; CHECK-NOT: store
136; CHECK: ret void
137bb.nph:
138  %Base = alloca i8, i32 10000
139  %Dest = alloca i8, i32 10000
140  br label %for.body
141
142for.body:                                         ; preds = %bb.nph, %for.body
143  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
144  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
145  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
146  %V = load atomic i8, i8* %I.0.014 unordered, align 1
147  store i8 %V, i8* %DestI, align 1
148  %indvar.next = add i64 %indvar, 1
149  %exitcond = icmp eq i64 %indvar.next, %Size
150  br i1 %exitcond, label %for.end, label %for.body
151
152for.end:                                          ; preds = %for.body, %entry
153  ret void
154}
155
156;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
157define void @test3b(i64 %Size) nounwind ssp {
158; CHECK-LABEL: @test3b(
159; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
160; CHECK-NOT: store
161; CHECK: ret void
162bb.nph:
163  %Base = alloca i8, i32 10000
164  %Dest = alloca i8, i32 10000
165  br label %for.body
166
167for.body:                                         ; preds = %bb.nph, %for.body
168  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
169  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
170  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
171  %V = load atomic i8, i8* %I.0.014 unordered, align 1
172  store i8 %V, i8* %DestI
173  %indvar.next = add i64 %indvar, 1
174  %exitcond = icmp eq i64 %indvar.next, %Size
175  br i1 %exitcond, label %for.end, label %for.body
176
177for.end:                                          ; preds = %for.body, %entry
178  ret void
179}
180
181;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
182define void @test3c(i64 %Size) nounwind ssp {
183; CHECK-LABEL: @test3c(
184; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
185; CHECK: store
186; CHECK: ret void
187bb.nph:
188  %Base = alloca i32, i32 10000
189  %Dest = alloca i32, i32 10000
190  br label %for.body
191
192for.body:                                         ; preds = %bb.nph, %for.body
193  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
194  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
195  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
196  %V = load atomic i32, i32* %I.0.014 unordered, align 2
197  store i32 %V, i32* %DestI, align 4
198  %indvar.next = add i64 %indvar, 1
199  %exitcond = icmp eq i64 %indvar.next, %Size
200  br i1 %exitcond, label %for.end, label %for.body
201
202for.end:                                          ; preds = %for.body, %entry
203  ret void
204}
205
206;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
207define void @test3d(i64 %Size) nounwind ssp {
208; CHECK-LABEL: @test3d(
209; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
210; CHECK: store
211; CHECK: ret void
212bb.nph:
213  %Base = alloca i32, i32 10000
214  %Dest = alloca i32, i32 10000
215  br label %for.body
216
217for.body:                                         ; preds = %bb.nph, %for.body
218  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
219  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
220  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
221  %V = load atomic i32, i32* %I.0.014 unordered, align 4
222  store i32 %V, i32* %DestI, align 2
223  %indvar.next = add i64 %indvar, 1
224  %exitcond = icmp eq i64 %indvar.next, %Size
225  br i1 %exitcond, label %for.end, label %for.body
226
227for.end:                                          ; preds = %for.body, %entry
228  ret void
229}
230
231
232;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
233define void @test4(i64 %Size) nounwind ssp {
234; CHECK-LABEL: @test4(
235; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
236; CHECK: store
237; CHECK: ret void
238bb.nph:
239  %Base = alloca i8, i32 10000
240  %Dest = alloca i8, i32 10000
241  br label %for.body
242
243for.body:                                         ; preds = %bb.nph, %for.body
244  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
245  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
246  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
247  %V = load atomic i8, i8* %I.0.014 unordered, align 1
248  store atomic i8 %V, i8* %DestI monotonic, align 1
249  %indvar.next = add i64 %indvar, 1
250  %exitcond = icmp eq i64 %indvar.next, %Size
251  br i1 %exitcond, label %for.end, label %for.body
252
253for.end:                                          ; preds = %for.body, %entry
254  ret void
255}
256
257;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
258define void @test5(i64 %Size) nounwind ssp {
259; CHECK-LABEL: @test5(
260; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
261; CHECK: store
262; CHECK: ret void
263bb.nph:
264  %Base = alloca i8, i32 10000
265  %Dest = alloca i8, i32 10000
266  br label %for.body
267
268for.body:                                         ; preds = %bb.nph, %for.body
269  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
270  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
271  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
272  %V = load atomic i8, i8* %I.0.014 monotonic, align 1
273  store atomic i8 %V, i8* %DestI unordered, align 1
274  %indvar.next = add i64 %indvar, 1
275  %exitcond = icmp eq i64 %indvar.next, %Size
276  br i1 %exitcond, label %for.end, label %for.body
277
278for.end:                                          ; preds = %for.body, %entry
279  ret void
280}
281
282;; memcpy.atomic formation (atomic load & store) -- element size 2
283define void @test6(i64 %Size) nounwind ssp {
284; CHECK-LABEL: @test6(
285; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 1
286; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2)
287; CHECK-NOT: store
288; CHECK: ret void
289bb.nph:
290  %Base = alloca i16, i32 10000
291  %Dest = alloca i16, i32 10000
292  br label %for.body
293
294for.body:                                         ; preds = %bb.nph, %for.body
295  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
296  %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
297  %DestI = getelementptr i16, i16* %Dest, i64 %indvar
298  %V = load atomic i16, i16* %I.0.014 unordered, align 2
299  store atomic i16 %V, i16* %DestI unordered, align 2
300  %indvar.next = add i64 %indvar, 1
301  %exitcond = icmp eq i64 %indvar.next, %Size
302  br i1 %exitcond, label %for.end, label %for.body
303
304for.end:                                          ; preds = %for.body, %entry
305  ret void
306}
307
308;; memcpy.atomic formation (atomic load & store) -- element size 4
309define void @test7(i64 %Size) nounwind ssp {
310; CHECK-LABEL: @test7(
311; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 2
312; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4)
313; CHECK-NOT: store
314; CHECK: ret void
315bb.nph:
316  %Base = alloca i32, i32 10000
317  %Dest = alloca i32, i32 10000
318  br label %for.body
319
320for.body:                                         ; preds = %bb.nph, %for.body
321  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
322  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
323  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
324  %V = load atomic i32, i32* %I.0.014 unordered, align 4
325  store atomic i32 %V, i32* %DestI unordered, align 4
326  %indvar.next = add i64 %indvar, 1
327  %exitcond = icmp eq i64 %indvar.next, %Size
328  br i1 %exitcond, label %for.end, label %for.body
329
330for.end:                                          ; preds = %for.body, %entry
331  ret void
332}
333
334;; memcpy.atomic formation (atomic load & store) -- element size 8
335define void @test8(i64 %Size) nounwind ssp {
336; CHECK-LABEL: @test8(
337; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 3
338; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8)
339; CHECK-NOT: store
340; CHECK: ret void
341bb.nph:
342  %Base = alloca i64, i32 10000
343  %Dest = alloca i64, i32 10000
344  br label %for.body
345
346for.body:                                         ; preds = %bb.nph, %for.body
347  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
348  %I.0.014 = getelementptr i64, i64* %Base, i64 %indvar
349  %DestI = getelementptr i64, i64* %Dest, i64 %indvar
350  %V = load atomic i64, i64* %I.0.014 unordered, align 8
351  store atomic i64 %V, i64* %DestI unordered, align 8
352  %indvar.next = add i64 %indvar, 1
353  %exitcond = icmp eq i64 %indvar.next, %Size
354  br i1 %exitcond, label %for.end, label %for.body
355
356for.end:                                          ; preds = %for.body, %entry
357  ret void
358}
359
360;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
361define void @test9(i64 %Size) nounwind ssp {
362; CHECK-LABEL: @test9(
363; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 4
364; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16)
365; CHECK-NOT: store
366; CHECK: ret void
367bb.nph:
368  %Base = alloca i128, i32 10000
369  %Dest = alloca i128, i32 10000
370  br label %for.body
371
372for.body:                                         ; preds = %bb.nph, %for.body
373  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
374  %I.0.014 = getelementptr i128, i128* %Base, i64 %indvar
375  %DestI = getelementptr i128, i128* %Dest, i64 %indvar
376  %V = load atomic i128, i128* %I.0.014 unordered, align 16
377  store atomic i128 %V, i128* %DestI unordered, align 16
378  %indvar.next = add i64 %indvar, 1
379  %exitcond = icmp eq i64 %indvar.next, %Size
380  br i1 %exitcond, label %for.end, label %for.body
381
382for.end:                                          ; preds = %for.body, %entry
383  ret void
384}
385
386;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
387define void @test10(i64 %Size) nounwind ssp {
388; CHECK-LABEL: @test10(
389; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
390; CHECK: store
391; CHECK: ret void
392bb.nph:
393  %Base = alloca i256, i32 10000
394  %Dest = alloca i256, i32 10000
395  br label %for.body
396
397for.body:                                         ; preds = %bb.nph, %for.body
398  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
399  %I.0.014 = getelementptr i256, i256* %Base, i64 %indvar
400  %DestI = getelementptr i256, i256* %Dest, i64 %indvar
401  %V = load atomic i256, i256* %I.0.014 unordered, align 32
402  store atomic i256 %V, i256* %DestI unordered, align 32
403  %indvar.next = add i64 %indvar, 1
404  %exitcond = icmp eq i64 %indvar.next, %Size
405  br i1 %exitcond, label %for.end, label %for.body
406
407for.end:                                          ; preds = %for.body, %entry
408  ret void
409}
410
411
412
413; Make sure that atomic memset doesn't get recognized by mistake
414define void @test_nomemset(i8* %Base, i64 %Size) nounwind ssp {
415; CHECK-LABEL: @test_nomemset(
416; CHECK-NOT: call void @llvm.memset
417; CHECK: store
418; CHECK: ret void
419bb.nph:                                           ; preds = %entry
420  br label %for.body
421
422for.body:                                         ; preds = %bb.nph, %for.body
423  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
424  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
425  store atomic i8 0, i8* %I.0.014 unordered, align 1
426  %indvar.next = add i64 %indvar, 1
427  %exitcond = icmp eq i64 %indvar.next, %Size
428  br i1 %exitcond, label %for.end, label %for.body
429
430for.end:                                          ; preds = %for.body, %entry
431  ret void
432}
433
434; Verify that unordered memset_pattern isn't recognized.
435; This is a replica of test11_pattern from basic.ll
436define void @test_nomemset_pattern(i32* nocapture %P) nounwind ssp {
437; CHECK-LABEL: @test_nomemset_pattern(
438; CHECK-NEXT: entry:
439; CHECK-NOT: bitcast
440; CHECK-NOT: memset_pattern
441; CHECK: store atomic
442; CHECK: ret void
443entry:
444  br label %for.body
445
446for.body:                                         ; preds = %entry, %for.body
447  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
448  %arrayidx = getelementptr i32, i32* %P, i64 %indvar
449  store atomic i32 1, i32* %arrayidx unordered, align 4
450  %indvar.next = add i64 %indvar, 1
451  %exitcond = icmp eq i64 %indvar.next, 10000
452  br i1 %exitcond, label %for.end, label %for.body
453
454for.end:                                          ; preds = %for.body
455  ret void
456}
457