1; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s
2
3; 32-bit little endian target.
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
5
6;; Trivial RLE test.
7define i32 @test0(i32 %V, i32* %P) {
8  store i32 %V, i32* %P
9
10  %A = load i32* %P
11  ret i32 %A
12; CHECK: @test0
13; CHECK: ret i32 %V
14}
15
16
17;;===----------------------------------------------------------------------===;;
18;; Tests for crashers
19;;===----------------------------------------------------------------------===;;
20
21;; PR5016
22define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
23  store {i32, i32} %A, {i32, i32}* %P
24  %X = bitcast {i32, i32}* %P to i8*
25  %Y = load i8* %X
26  ret i8 %Y
27}
28
29
30;;===----------------------------------------------------------------------===;;
31;; Store -> Load  and  Load -> Load forwarding where src and dst are different
32;; types, but where the base pointer is a must alias.
33;;===----------------------------------------------------------------------===;;
34
35;; i32 -> f32 forwarding.
36define float @coerce_mustalias1(i32 %V, i32* %P) {
37  store i32 %V, i32* %P
38
39  %P2 = bitcast i32* %P to float*
40
41  %A = load float* %P2
42  ret float %A
43; CHECK: @coerce_mustalias1
44; CHECK-NOT: load
45; CHECK: ret float
46}
47
48;; i32* -> float forwarding.
49define float @coerce_mustalias2(i32* %V, i32** %P) {
50  store i32* %V, i32** %P
51
52  %P2 = bitcast i32** %P to float*
53
54  %A = load float* %P2
55  ret float %A
56; CHECK: @coerce_mustalias2
57; CHECK-NOT: load
58; CHECK: ret float
59}
60
61;; float -> i32* forwarding.
62define i32* @coerce_mustalias3(float %V, float* %P) {
63  store float %V, float* %P
64
65  %P2 = bitcast float* %P to i32**
66
67  %A = load i32** %P2
68  ret i32* %A
69; CHECK: @coerce_mustalias3
70; CHECK-NOT: load
71; CHECK: ret i32*
72}
73
74;; i32 -> f32 load forwarding.
75define float @coerce_mustalias4(i32* %P, i1 %cond) {
76  %A = load i32* %P
77
78  %P2 = bitcast i32* %P to float*
79  %B = load float* %P2
80  br i1 %cond, label %T, label %F
81T:
82  ret float %B
83
84F:
85  %X = bitcast i32 %A to float
86  ret float %X
87
88; CHECK: @coerce_mustalias4
89; CHECK: %A = load i32* %P
90; CHECK-NOT: load
91; CHECK: ret float
92; CHECK: F:
93}
94
95;; i32 -> i8 forwarding
96define i8 @coerce_mustalias5(i32 %V, i32* %P) {
97  store i32 %V, i32* %P
98
99  %P2 = bitcast i32* %P to i8*
100
101  %A = load i8* %P2
102  ret i8 %A
103; CHECK: @coerce_mustalias5
104; CHECK-NOT: load
105; CHECK: ret i8
106}
107
108;; i64 -> float forwarding
109define float @coerce_mustalias6(i64 %V, i64* %P) {
110  store i64 %V, i64* %P
111
112  %P2 = bitcast i64* %P to float*
113
114  %A = load float* %P2
115  ret float %A
116; CHECK: @coerce_mustalias6
117; CHECK-NOT: load
118; CHECK: ret float
119}
120
121;; i64 -> i8* (32-bit) forwarding
122define i8* @coerce_mustalias7(i64 %V, i64* %P) {
123  store i64 %V, i64* %P
124
125  %P2 = bitcast i64* %P to i8**
126
127  %A = load i8** %P2
128  ret i8* %A
129; CHECK: @coerce_mustalias7
130; CHECK-NOT: load
131; CHECK: ret i8*
132}
133
134; memset -> i16 forwarding.
135define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
136entry:
137  %conv = bitcast i16* %A to i8*
138  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
139  %arrayidx = getelementptr inbounds i16* %A, i64 42
140  %tmp2 = load i16* %arrayidx
141  ret i16 %tmp2
142; CHECK: @memset_to_i16_local
143; CHECK-NOT: load
144; CHECK: ret i16 257
145}
146
147; memset -> float forwarding.
148define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
149entry:
150  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
151  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
152  %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
153  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
154  ret float %tmp2
155; CHECK: @memset_to_float_local
156; CHECK-NOT: load
157; CHECK: zext
158; CHECK-NEXT: shl
159; CHECK-NEXT: or
160; CHECK-NEXT: shl
161; CHECK-NEXT: or
162; CHECK-NEXT: bitcast
163; CHECK-NEXT: ret float
164}
165
166;; non-local memset -> i16 load forwarding.
167define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
168  %P3 = bitcast i16* %P to i8*
169  br i1 %cond, label %T, label %F
170T:
171  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
172  br label %Cont
173
174F:
175  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
176  br label %Cont
177
178Cont:
179  %P2 = getelementptr i16* %P, i32 4
180  %A = load i16* %P2
181  ret i16 %A
182
183; CHECK: @memset_to_i16_nonlocal0
184; CHECK: Cont:
185; CHECK-NEXT:   %A = phi i16 [ 514, %F ], [ 257, %T ]
186; CHECK-NOT: load
187; CHECK: ret i16 %A
188}
189
190@GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
191
192; memset -> float forwarding.
193define float @memcpy_to_float_local(float* %A) nounwind ssp {
194entry:
195  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
196  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
197  %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
198  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
199  ret float %tmp2
200; CHECK: @memcpy_to_float_local
201; CHECK-NOT: load
202; CHECK: ret float 1.400000e+01
203}
204
205
206
207;; non-local i32/float -> i8 load forwarding.
208define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
209  %P2 = bitcast i32* %P to float*
210  %P3 = bitcast i32* %P to i8*
211  br i1 %cond, label %T, label %F
212T:
213  store i32 42, i32* %P
214  br label %Cont
215
216F:
217  store float 1.0, float* %P2
218  br label %Cont
219
220Cont:
221  %A = load i8* %P3
222  ret i8 %A
223
224; CHECK: @coerce_mustalias_nonlocal0
225; CHECK: Cont:
226; CHECK:   %A = phi i8 [
227; CHECK-NOT: load
228; CHECK: ret i8 %A
229}
230
231
232;; non-local i32/float -> i8 load forwarding.  This also tests that the "P3"
233;; bitcast equivalence can be properly phi translated.
234define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
235  %P2 = bitcast i32* %P to float*
236  br i1 %cond, label %T, label %F
237T:
238  store i32 42, i32* %P
239  br label %Cont
240
241F:
242  store float 1.0, float* %P2
243  br label %Cont
244
245Cont:
246  %P3 = bitcast i32* %P to i8*
247  %A = load i8* %P3
248  ret i8 %A
249
250;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
251;; bootstrap, see r82411
252;
253; HECK: @coerce_mustalias_nonlocal1
254; HECK: Cont:
255; HECK:   %A = phi i8 [
256; HECK-NOT: load
257; HECK: ret i8 %A
258}
259
260
261;; non-local i32 -> i8 partial redundancy load forwarding.
262define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
263  %P3 = bitcast i32* %P to i8*
264  br i1 %cond, label %T, label %F
265T:
266  store i32 42, i32* %P
267  br label %Cont
268
269F:
270  br label %Cont
271
272Cont:
273  %A = load i8* %P3
274  ret i8 %A
275
276; CHECK: @coerce_mustalias_pre0
277; CHECK: F:
278; CHECK:   load i8* %P3
279; CHECK: Cont:
280; CHECK:   %A = phi i8 [
281; CHECK-NOT: load
282; CHECK: ret i8 %A
283}
284
285;;===----------------------------------------------------------------------===;;
286;; Store -> Load  and  Load -> Load forwarding where src and dst are different
287;; types, and the reload is an offset from the store pointer.
288;;===----------------------------------------------------------------------===;;
289
290;; i32 -> i8 forwarding.
291;; PR4216
292define i8 @coerce_offset0(i32 %V, i32* %P) {
293  store i32 %V, i32* %P
294
295  %P2 = bitcast i32* %P to i8*
296  %P3 = getelementptr i8* %P2, i32 2
297
298  %A = load i8* %P3
299  ret i8 %A
300; CHECK: @coerce_offset0
301; CHECK-NOT: load
302; CHECK: ret i8
303}
304
305;; non-local i32/float -> i8 load forwarding.
306define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
307  %P2 = bitcast i32* %P to float*
308  %P3 = bitcast i32* %P to i8*
309  %P4 = getelementptr i8* %P3, i32 2
310  br i1 %cond, label %T, label %F
311T:
312  store i32 42, i32* %P
313  br label %Cont
314
315F:
316  store float 1.0, float* %P2
317  br label %Cont
318
319Cont:
320  %A = load i8* %P4
321  ret i8 %A
322
323; CHECK: @coerce_offset_nonlocal0
324; CHECK: Cont:
325; CHECK:   %A = phi i8 [
326; CHECK-NOT: load
327; CHECK: ret i8 %A
328}
329
330
331;; non-local i32 -> i8 partial redundancy load forwarding.
332define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
333  %P3 = bitcast i32* %P to i8*
334  %P4 = getelementptr i8* %P3, i32 2
335  br i1 %cond, label %T, label %F
336T:
337  store i32 42, i32* %P
338  br label %Cont
339
340F:
341  br label %Cont
342
343Cont:
344  %A = load i8* %P4
345  ret i8 %A
346
347; CHECK: @coerce_offset_pre0
348; CHECK: F:
349; CHECK:   load i8* %P4
350; CHECK: Cont:
351; CHECK:   %A = phi i8 [
352; CHECK-NOT: load
353; CHECK: ret i8 %A
354}
355
356define i32 @chained_load(i32** %p) {
357block1:
358  %A = alloca i32*
359
360  %z = load i32** %p
361  store i32* %z, i32** %A
362  br i1 true, label %block2, label %block3
363
364block2:
365 %a = load i32** %p
366 br label %block4
367
368block3:
369  %b = load i32** %p
370  br label %block4
371
372block4:
373  %c = load i32** %p
374  %d = load i32* %c
375  ret i32 %d
376
377; CHECK: @chained_load
378; CHECK: %z = load i32** %p
379; CHECK-NOT: load
380; CHECK: %d = load i32* %z
381; CHECK-NEXT: ret i32 %d
382}
383
384
385declare i1 @cond() readonly
386declare i1 @cond2() readonly
387
388define i32 @phi_trans2() {
389; CHECK: @phi_trans2
390entry:
391  %P = alloca i32, i32 400
392  br label %F1
393
394F1:
395  %A = phi i32 [1, %entry], [2, %F]
396  %cond2 = call i1 @cond()
397  br i1 %cond2, label %T1, label %TY
398
399T1:
400  %P2 = getelementptr i32* %P, i32 %A
401  %x = load i32* %P2
402  %cond = call i1 @cond2()
403  br i1 %cond, label %TX, label %F
404
405F:
406  %P3 = getelementptr i32* %P, i32 2
407  store i32 17, i32* %P3
408
409  store i32 42, i32* %P2  ; Provides "P[A]".
410  br label %F1
411
412TX:
413  ; This load should not be compiled to 'ret i32 42'.  An overly clever
414  ; implementation of GVN would see that we're returning 17 if the loop
415  ; executes once or 42 if it executes more than that, but we'd have to do
416  ; loop restructuring to expose this, and GVN shouldn't do this sort of CFG
417  ; transformation.
418
419; CHECK: TX:
420; CHECK: ret i32 %x
421  ret i32 %x
422TY:
423  ret i32 0
424}
425
426define i32 @phi_trans3(i32* %p) {
427; CHECK: @phi_trans3
428block1:
429  br i1 true, label %block2, label %block3
430
431block2:
432 store i32 87, i32* %p
433 br label %block4
434
435block3:
436  %p2 = getelementptr i32* %p, i32 43
437  store i32 97, i32* %p2
438  br label %block4
439
440block4:
441  %A = phi i32 [-1, %block2], [42, %block3]
442  br i1 true, label %block5, label %exit
443
444; CHECK: block4:
445; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]
446; CHECK-NOT: load
447
448block5:
449  %B = add i32 %A, 1
450  br i1 true, label %block6, label %exit
451
452block6:
453  %C = getelementptr i32* %p, i32 %B
454  br i1 true, label %block7, label %exit
455
456block7:
457  %D = load i32* %C
458  ret i32 %D
459
460; CHECK: block7:
461; CHECK-NEXT: ret i32 %D
462
463exit:
464  ret i32 -1
465}
466
467define i8 @phi_trans4(i8* %p) {
468; CHECK: @phi_trans4
469entry:
470  %X3 = getelementptr i8* %p, i32 192
471  store i8 192, i8* %X3
472
473  %X = getelementptr i8* %p, i32 4
474  %Y = load i8* %X
475  br label %loop
476
477loop:
478  %i = phi i32 [4, %entry], [192, %loop]
479  %X2 = getelementptr i8* %p, i32 %i
480  %Y2 = load i8* %X2
481
482; CHECK: loop:
483; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ]
484; CHECK-NOT: load i8
485
486  %cond = call i1 @cond2()
487
488  %Z = bitcast i8 *%X3 to i32*
489  store i32 0, i32* %Z
490  br i1 %cond, label %loop, label %out
491
492out:
493  %R = add i8 %Y, %Y2
494  ret i8 %R
495}
496
497define i8 @phi_trans5(i8* %p) {
498; CHECK: @phi_trans5
499entry:
500
501  %X4 = getelementptr i8* %p, i32 2
502  store i8 19, i8* %X4
503
504  %X = getelementptr i8* %p, i32 4
505  %Y = load i8* %X
506  br label %loop
507
508loop:
509  %i = phi i32 [4, %entry], [3, %cont]
510  %X2 = getelementptr i8* %p, i32 %i
511  %Y2 = load i8* %X2  ; Ensure this load is not being incorrectly replaced.
512  %cond = call i1 @cond2()
513  br i1 %cond, label %cont, label %out
514
515cont:
516  %Z = getelementptr i8* %X2, i32 -1
517  %Z2 = bitcast i8 *%Z to i32*
518  store i32 50462976, i32* %Z2  ;; (1 << 8) | (2 << 16) | (3 << 24)
519
520
521; CHECK: store i32
522; CHECK-NEXT: getelementptr i8* %p, i32 3
523; CHECK-NEXT: load i8*
524  br label %loop
525
526out:
527  %R = add i8 %Y, %Y2
528  ret i8 %R
529}
530
531
532; PR6642
533define i32 @memset_to_load() nounwind readnone {
534entry:
535  %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
536  %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
537  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
538  %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
539  %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
540  ret i32 %tmp1
541; CHECK: @memset_to_load
542; CHECK: ret i32 0
543}
544
545
546;;===----------------------------------------------------------------------===;;
547;; Load -> Load forwarding in partial alias case.
548;;===----------------------------------------------------------------------===;;
549
550define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
551entry:
552  %0 = bitcast i8* %P to i32*
553  %tmp2 = load i32* %0
554  %add.ptr = getelementptr inbounds i8* %P, i64 1
555  %tmp5 = load i8* %add.ptr
556  %conv = zext i8 %tmp5 to i32
557  %add = add nsw i32 %tmp2, %conv
558  ret i32 %add
559
560; TEMPORARILYDISABLED: @load_load_partial_alias
561; TEMPORARILYDISABLED: load i32*
562; TEMPORARILYDISABLED-NOT: load
563; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8
564; TEMPORARILYDISABLED-NOT: load
565; TEMPORARILYDISABLED: trunc i32 {{.*}} to i8
566; TEMPORARILYDISABLED-NOT: load
567; TEMPORARILYDISABLED: ret i32
568}
569
570
571; Cross block partial alias case.
572define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
573entry:
574  %xx = bitcast i8* %P to i32*
575  %x1 = load i32* %xx, align 4
576  %cmp = icmp eq i32 %x1, 127
577  br i1 %cmp, label %land.lhs.true, label %if.end
578
579land.lhs.true:                                    ; preds = %entry
580  %arrayidx4 = getelementptr inbounds i8* %P, i64 1
581  %tmp5 = load i8* %arrayidx4, align 1
582  %conv6 = zext i8 %tmp5 to i32
583  ret i32 %conv6
584
585if.end:
586  ret i32 52
587; TEMPORARILY_DISABLED: @load_load_partial_alias_cross_block
588; TEMPORARILY_DISABLED: land.lhs.true:
589; TEMPORARILY_DISABLED-NOT: load i8
590; TEMPORARILY_DISABLED: ret i32 %conv6
591}
592
593
594;;===----------------------------------------------------------------------===;;
595;; Load Widening
596;;===----------------------------------------------------------------------===;;
597
598%widening1 = type { i32, i8, i8, i8, i8 }
599
600@f = global %widening1 zeroinitializer, align 4
601
602define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
603entry:
604  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
605  %conv = zext i8 %tmp to i32
606  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
607  %conv2 = zext i8 %tmp1 to i32
608  %add = add nsw i32 %conv, %conv2
609  ret i32 %add
610; CHECK: @test_widening1
611; CHECK-NOT: load
612; CHECK: load i16*
613; CHECK-NOT: load
614; CHECK-ret i32
615}
616
617define i32 @test_widening2() nounwind ssp noredzone {
618entry:
619  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
620  %conv = zext i8 %tmp to i32
621  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
622  %conv2 = zext i8 %tmp1 to i32
623  %add = add nsw i32 %conv, %conv2
624
625  %tmp2 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2
626  %conv3 = zext i8 %tmp2 to i32
627  %add2 = add nsw i32 %add, %conv3
628
629  %tmp3 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1
630  %conv4 = zext i8 %tmp3 to i32
631  %add3 = add nsw i32 %add2, %conv3
632
633  ret i32 %add3
634; CHECK: @test_widening2
635; CHECK-NOT: load
636; CHECK: load i32*
637; CHECK-NOT: load
638; CHECK-ret i32
639}
640
641declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
642
643declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
644
645