1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
2
3; Instcombine pulls the addrspacecast out of the select, make sure
4;  this doesn't do something insane on non-canonical IR.
5
6; CHECK-LABEL: @return_select_group_flat(
7; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
8; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
9; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1
10; CHECK-NEXT: ret i32* %select
11define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
12  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
13  %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
14  %select = select i1 %c, i32* %cast0, i32* %cast1
15  ret i32* %select
16}
17
18; CHECK-LABEL: @store_select_group_flat(
19; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1
20; CHECK: store i32 -1, i32 addrspace(3)* %select
21define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
22  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
23  %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
24  %select = select i1 %c, i32* %cast0, i32* %cast1
25  store i32 -1, i32* %select
26  ret void
27}
28
29; Make sure metadata is preserved
30; CHECK-LABEL: @load_select_group_flat_md(
31; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0
32; CHECK: %load = load i32, i32 addrspace(3)* %select
33define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
34  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
35  %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
36  %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0
37  %load = load i32, i32* %select
38  ret i32 %load
39}
40
41; CHECK-LABEL: @store_select_mismatch_group_private_flat(
42; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
43; CHECK: %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
44; CHECK: %select = select i1 %c, i32* %cast0, i32* %cast1
45; CHECK: store i32 -1, i32* %select
46define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 {
47  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
48  %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
49  %select = select i1 %c, i32* %cast0, i32* %cast1
50  store i32 -1, i32* %select
51  ret void
52}
53
54@lds0 = internal addrspace(3) global i32 123, align 4
55@lds1 = internal addrspace(3) global i32 456, align 4
56
57; CHECK-LABEL: @constexpr_select_group_flat(
58; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1)
59define i32 @constexpr_select_group_flat() #0 {
60bb:
61  %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*))
62  ret i32 %tmp
63}
64
65; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch(
66; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
67define i32 @constexpr_select_group_global_flat_mismatch() #0 {
68bb:
69  %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
70  ret i32 %tmp
71}
72
73; CHECK-LABEL: @store_select_group_flat_null(
74; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
75; CHECK: store i32 -1, i32 addrspace(3)* %select
76define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
77  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
78  %select = select i1 %c, i32* %cast0, i32* null
79  store i32 -1, i32* %select
80  ret void
81}
82
83; CHECK-LABEL: @store_select_group_flat_null_swap(
84; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
85; CHECK: store i32 -1, i32 addrspace(3)* %select
86define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
87  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
88  %select = select i1 %c, i32* null, i32* %cast0
89  store i32 -1, i32* %select
90  ret void
91}
92
93; CHECK-LABEL: @store_select_group_flat_undef(
94; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef
95; CHECK: store i32 -1, i32 addrspace(3)* %select
96define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
97  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
98  %select = select i1 %c, i32* %cast0, i32* undef
99  store i32 -1, i32* %select
100  ret void
101}
102
103; CHECK-LABEL: @store_select_group_flat_undef_swap(
104; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0
105; CHECK: store i32 -1, i32 addrspace(3)* %select
106define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
107  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
108  %select = select i1 %c, i32* undef, i32* %cast0
109  store i32 -1, i32* %select
110  ret void
111}
112
113; CHECK-LABEL: @store_select_gep_group_flat_null(
114; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
115; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16
116; CHECK: store i32 -1, i32 addrspace(3)* %gep
117define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
118  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
119  %select = select i1 %c, i32* %cast0, i32* null
120  %gep = getelementptr i32, i32* %select, i64 16
121  store i32 -1, i32* %gep
122  ret void
123}
124
125@global0 = internal addrspace(1) global i32 123, align 4
126
127; CHECK-LABEL: @store_select_group_flat_constexpr(
128; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1
129; CHECK: store i32 7, i32 addrspace(3)* %select
130define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
131  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
132  %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)
133  store i32 7, i32* %select
134  ret void
135}
136
137; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
138; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*)
139; CHECK: store i32 7, i32 addrspace(3)* %select
140define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
141  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
142  %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*)
143  store i32 7, i32* %select
144  ret void
145}
146
147; CHECK-LABEL: @store_select_group_flat_inttoptr_group(
148; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*)
149; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select
150define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
151  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
152  %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*)
153  store i32 7, i32* %select
154  ret void
155}
156
157; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
158; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
159; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
160; CHECK: store i32 7, i32* %select
161define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
162  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
163  %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
164  store i32 7, i32* %select
165  ret void
166}
167
168; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
169; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
170; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
171; CHECK: store i32 7, i32* %select
172define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
173  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
174  %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
175  store i32 7, i32* %select
176  ret void
177}
178
179; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
180; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
181; CHECK: store i32 7, i32* %select
182define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
183  %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
184  store i32 7, i32* %select
185  ret void
186}
187
188; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr(
189; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
190define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 {
191  store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
192  ret void
193}
194
195; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr(
196; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
197define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
198  store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
199  ret void
200}
201
202; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr(
203; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
204define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
205  store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
206  ret void
207}
208
209; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr(
210; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
211define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
212  store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
213  ret void
214}
215
216; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr(
217; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
218define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
219  store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
220  ret void
221}
222
223; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
224; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* undef), align 4
225define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
226  store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4
227  ret void
228}
229
230@lds2 = external addrspace(3) global [1024 x i32], align 4
231
232; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
233; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
234; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
235; CHECK: store i32 7, i32* %select
236define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
237  %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
238  %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
239  store i32 7, i32* %select
240  ret void
241}
242
243; CHECK-LABEL: @store_select_group_flat_vector(
244; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
245; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
246; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
247; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0
248; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1
249; CHECK: store i32 -1, i32* %extract0
250; CHECK: store i32 -2, i32* %extract1
251define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
252  %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
253  %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
254  %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
255  %extract0 = extractelement <2 x i32*> %select, i32 0
256  %extract1 = extractelement <2 x i32*> %select, i32 1
257  store i32 -1, i32* %extract0
258  store i32 -2, i32* %extract1
259  ret void
260}
261
262attributes #0 = { nounwind }
263
264!0 = !{!"branch_weights", i32 2, i32 10}
265