1; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
2
3target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
4
5declare i32 @llvm.amdgcn.workitem.id.x() #1
6
7; CHECK-LABEL: @basic_merge_sext_index(
8; CHECK: sext i32 %id.x to i64
9; CHECK: load <2 x float>
10; CHECK: store <2 x float> zeroinitializer
11define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
12entry:
13  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
14  %sext.id.x = sext i32 %id.x to i64
15  %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %sext.id.x
16  %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %sext.id.x
17  %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
18  %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
19
20  %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
21  %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
22
23  store float 0.0, float addrspace(1)* %a.idx.x, align 4
24  store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
25
26  %add = fadd float %ld.c, %ld.c.idx.1
27  store float %add, float addrspace(1)* %b, align 4
28  ret void
29}
30
31; CHECK-LABEL: @basic_merge_zext_index(
32; CHECK: zext i32 %id.x to i64
33; CHECK: load <2 x float>
34; CHECK: store <2 x float>
35define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
36entry:
37  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
38  %zext.id.x = zext i32 %id.x to i64
39  %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
40  %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
41  %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
42  %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
43
44  %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
45  %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
46  store float 0.0, float addrspace(1)* %a.idx.x, align 4
47  store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
48
49  %add = fadd float %ld.c, %ld.c.idx.1
50  store float %add, float addrspace(1)* %b, align 4
51  ret void
52}
53
54; CHECK-LABEL: @merge_op_zext_index(
55; CHECK: load <2 x float>
56; CHECK: store <2 x float>
57define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 {
58entry:
59  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
60  %shl = shl i32 %id.x, 2
61  %zext.id.x = zext i32 %shl to i64
62  %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
63  %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
64
65  %id.x.1 = or i32 %shl, 1
66  %id.x.1.ext = zext i32 %id.x.1 to i64
67
68  %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext
69  %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext
70
71  %ld.c.0 = load float, float addrspace(1)* %c.0, align 4
72  store float 0.0, float addrspace(1)* %a.0, align 4
73  %ld.c.1 = load float, float addrspace(1)* %c.1, align 4
74  store float 0.0, float addrspace(1)* %a.1, align 4
75
76  %add = fadd float %ld.c.0, %ld.c.1
77  store float %add, float addrspace(1)* %b, align 4
78  ret void
79}
80
81; CHECK-LABEL: @merge_op_sext_index(
82; CHECK: load <2 x float>
83; CHECK: store <2 x float>
84define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 {
85entry:
86  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
87  %shl = shl i32 %id.x, 2
88  %zext.id.x = sext i32 %shl to i64
89  %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
90  %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
91
92  %id.x.1 = or i32 %shl, 1
93  %id.x.1.ext = sext i32 %id.x.1 to i64
94
95  %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext
96  %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext
97
98  %ld.c.0 = load float, float addrspace(1)* %c.0, align 4
99  store float 0.0, float addrspace(1)* %a.0, align 4
100  %ld.c.1 = load float, float addrspace(1)* %c.1, align 4
101  store float 0.0, float addrspace(1)* %a.1, align 4
102
103  %add = fadd float %ld.c.0, %ld.c.1
104  store float %add, float addrspace(1)* %b, align 4
105  ret void
106}
107
108; This case fails to vectorize if not using the extra extension
109; handling in isConsecutiveAccess.
110
111; CHECK-LABEL: @zext_trunc_phi_1(
112; CHECK: loop:
113; CHECK: load <2 x i32>
114; CHECK: store <2 x i32>
115define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 {
116entry:
117  %cmp0 = icmp eq i32 %n, 0
118  br i1 %cmp0, label %exit, label %loop
119
120loop:
121  %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
122  %trunc.iv = trunc i64 %indvars.iv to i32
123  %idx = shl i32 %trunc.iv, 4
124
125  %idx.ext = zext i32 %idx to i64
126  %c.0 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.ext
127  %a.0 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
128
129  %idx.1 = or i32 %idx, 1
130  %idx.1.ext = zext i32 %idx.1 to i64
131  %c.1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.1.ext
132  %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.1.ext
133
134  %ld.c.0 = load i32, i32 addrspace(1)* %c.0, align 4
135  store i32 %ld.c.0, i32 addrspace(1)* %a.0, align 4
136  %ld.c.1 = load i32, i32 addrspace(1)* %c.1, align 4
137  store i32 %ld.c.1, i32 addrspace(1)* %a.1, align 4
138
139  %indvars.iv.next = add i64 %indvars.iv, 1
140  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
141
142  %exitcond = icmp eq i32 %lftr.wideiv, %n
143  br i1 %exitcond, label %exit, label %loop
144
145exit:
146  ret void
147}
148
149attributes #0 = { nounwind }
150attributes #1 = { nounwind readnone }
151