1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
2; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll
3
4@scalar = internal addrspace(3) global float 0.0, align 4
5@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
6
7; CHECK-LABEL: @load_store_lds_f32(
8; CHECK: %tmp = load float, float addrspace(3)* @scalar, align 4
9; CHECK: call void @use(float %tmp)
10; CHECK: store float %v, float addrspace(3)* @scalar, align 4
11; CHECK: call void @llvm.amdgcn.s.barrier()
12; CHECK: %tmp2 = load float, float addrspace(3)* @scalar, align 4
13; CHECK: call void @use(float %tmp2)
14; CHECK: store float %v, float addrspace(3)* @scalar, align 4
15; CHECK: call void @llvm.amdgcn.s.barrier()
16; CHECK: %tmp3 = load float, float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5), align 4
17; CHECK: call void @use(float %tmp3)
18; CHECK: store float %v, float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5), align 4
19; CHECK: call void @llvm.amdgcn.s.barrier()
20; CHECK: %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5
21; CHECK: %tmp5 = load float, float addrspace(3)* %tmp4, align 4
22; CHECK: call void @use(float %tmp5)
23; CHECK: store float %v, float addrspace(3)* %tmp4, align 4
24; CHECK: call void @llvm.amdgcn.s.barrier()
25; CHECK: %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 %i
26; CHECK: %tmp8 = load float, float addrspace(3)* %tmp7, align 4
27; CHECK: call void @use(float %tmp8)
28; CHECK: store float %v, float addrspace(3)* %tmp7, align 4
29; CHECK: call void @llvm.amdgcn.s.barrier()
30; CHECK: ret void
31define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
32bb:
33  %tmp = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
34  call void @use(float %tmp)
35  store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
36  call void @llvm.amdgcn.s.barrier()
37  %tmp1 = addrspacecast float addrspace(3)* @scalar to float*
38  %tmp2 = load float, float* %tmp1, align 4
39  call void @use(float %tmp2)
40  store float %v, float* %tmp1, align 4
41  call void @llvm.amdgcn.s.barrier()
42  %tmp3 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
43  call void @use(float %tmp3)
44  store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
45  call void @llvm.amdgcn.s.barrier()
46  %tmp4 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
47  %tmp5 = load float, float* %tmp4, align 4
48  call void @use(float %tmp5)
49  store float %v, float* %tmp4, align 4
50  call void @llvm.amdgcn.s.barrier()
51  %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
52  %tmp7 = getelementptr inbounds [10 x float], [10 x float]* %tmp6, i32 0, i32 %i
53  %tmp8 = load float, float* %tmp7, align 4
54  call void @use(float %tmp8)
55  store float %v, float* %tmp7, align 4
56  call void @llvm.amdgcn.s.barrier()
57  ret void
58}
59
60; CHECK-LABEL: @constexpr_load_int_from_float_lds(
61; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*), align 4
62define i32 @constexpr_load_int_from_float_lds() #0 {
63bb:
64  %tmp = load i32, i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32*), align 4
65  ret i32 %tmp
66}
67
68; CHECK-LABEL: @load_int_from_global_float(
69; CHECK: %tmp1 = getelementptr float, float addrspace(1)* %input, i32 %i
70; CHECK: %tmp2 = getelementptr float, float addrspace(1)* %tmp1, i32 %j
71; CHECK: %tmp3 = bitcast float addrspace(1)* %tmp2 to i32 addrspace(1)*
72; CHECK: %tmp4 = load i32, i32 addrspace(1)* %tmp3
73; CHECK: ret i32 %tmp4
74define i32 @load_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) #0 {
75bb:
76  %tmp = addrspacecast float addrspace(1)* %input to float*
77  %tmp1 = getelementptr float, float* %tmp, i32 %i
78  %tmp2 = getelementptr float, float* %tmp1, i32 %j
79  %tmp3 = bitcast float* %tmp2 to i32*
80  %tmp4 = load i32, i32* %tmp3
81  ret i32 %tmp4
82}
83
84; CHECK-LABEL: @nested_const_expr(
85; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4
86define amdgpu_kernel void @nested_const_expr() #0 {
87  store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4
88  ret void
89}
90
91; CHECK-LABEL: @rauw(
92; CHECK: %addr = getelementptr float, float addrspace(1)* %input, i64 10
93; CHECK-NEXT: %v = load float, float addrspace(1)* %addr
94; CHECK-NEXT: store float %v, float addrspace(1)* %addr
95; CHECK-NEXT: ret void
96define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 {
97bb:
98  %generic_input = addrspacecast float addrspace(1)* %input to float*
99  %addr = getelementptr float, float* %generic_input, i64 10
100  %v = load float, float* %addr
101  store float %v, float* %addr
102  ret void
103}
104
105; FIXME: Should be able to eliminate the cast inside the loop
106; CHECK-LABEL: @loop(
107
108; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)*
109; CHECK: %end = getelementptr float, float addrspace(3)* %p, i64 10
110; CHECK: br label %loop
111
112; CHECK: loop:                                             ; preds = %loop, %entry
113; CHECK: %i = phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ]
114; CHECK: %v = load float, float addrspace(3)* %i
115; CHECK: call void @use(float %v)
116; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
117; CHECK: %exit_cond = icmp eq float addrspace(3)* %i2, %end
118
119; CHECK: br i1 %exit_cond, label %exit, label %loop
120define amdgpu_kernel void @loop() #0 {
121entry:
122  %p = addrspacecast [10 x float] addrspace(3)* @array to float*
123  %end = getelementptr float, float* %p, i64 10
124  br label %loop
125
126loop:                                             ; preds = %loop, %entry
127  %i = phi float* [ %p, %entry ], [ %i2, %loop ]
128  %v = load float, float* %i
129  call void @use(float %v)
130  %i2 = getelementptr float, float* %i, i64 1
131  %exit_cond = icmp eq float* %i2, %end
132  br i1 %exit_cond, label %exit, label %loop
133
134exit:                                             ; preds = %loop
135  ret void
136}
137
138@generic_end = external addrspace(1) global float*
139
140; CHECK-LABEL: @loop_with_generic_bound(
141; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)*
142; CHECK: %end = load float*, float* addrspace(1)* @generic_end
143; CHECK: br label %loop
144
145; CHECK: loop:
146; CHECK: %i = phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ]
147; CHECK: %v = load float, float addrspace(3)* %i
148; CHECK: call void @use(float %v)
149; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
150; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float*
151; CHECK: %exit_cond = icmp eq float* %0, %end
152; CHECK: br i1 %exit_cond, label %exit, label %loop
153define amdgpu_kernel void @loop_with_generic_bound() #0 {
154entry:
155  %p = addrspacecast [10 x float] addrspace(3)* @array to float*
156  %end = load float*, float* addrspace(1)* @generic_end
157  br label %loop
158
159loop:                                             ; preds = %loop, %entry
160  %i = phi float* [ %p, %entry ], [ %i2, %loop ]
161  %v = load float, float* %i
162  call void @use(float %v)
163  %i2 = getelementptr float, float* %i, i64 1
164  %exit_cond = icmp eq float* %i2, %end
165  br i1 %exit_cond, label %exit, label %loop
166
167exit:                                             ; preds = %loop
168  ret void
169}
170
171; CHECK-LABEL: @select_bug(
172; CHECK: %add.ptr157 = getelementptr inbounds i64, i64* undef, i64 select (i1 icmp ne (i32* inttoptr (i64 4873 to i32*), i32* null), i64 73, i64 93)
173; CHECK: %cmp169 = icmp uge i64* undef, %add.ptr157
174define void @select_bug() #0 {
175  %add.ptr157 = getelementptr inbounds i64, i64* undef, i64 select (i1 icmp ne (i32* inttoptr (i64 4873 to i32*), i32* null), i64 73, i64 93)
176  %cmp169 = icmp uge i64* undef, %add.ptr157
177  unreachable
178}
179
180declare void @llvm.amdgcn.s.barrier() #1
181declare void @use(float) #0
182
183attributes #0 = { nounwind }
184attributes #1 = { convergent nounwind }
185