1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}select_and1:
4; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
5; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
6; GCN-NOT: v_and_b32
7; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
8define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
9  %c = icmp slt i32 %x, 11
10  %s = select i1 %c, i32 0, i32 -1
11  %a = and i32 %y, %s
12  store i32 %a, i32 addrspace(1)* %p, align 4
13  ret void
14}
15
16; GCN-LABEL: {{^}}select_and2:
17; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
18; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
19; GCN-NOT: v_and_b32
20; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
21define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
22  %c = icmp slt i32 %x, 11
23  %s = select i1 %c, i32 0, i32 -1
24  %a = and i32 %s, %y
25  store i32 %a, i32 addrspace(1)* %p, align 4
26  ret void
27}
28
29; GCN-LABEL: {{^}}select_and3:
30; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
31; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
32; GCN-NOT: v_and_b32
33; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
34define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
35  %c = icmp slt i32 %x, 11
36  %s = select i1 %c, i32 -1, i32 0
37  %a = and i32 %y, %s
38  store i32 %a, i32 addrspace(1)* %p, align 4
39  ret void
40}
41
42; GCN-LABEL: {{^}}select_and_v4:
43; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
44; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
45; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
46; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
47; GCN-NOT: v_and_b32
48; GCN:     store_dword
49define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
50  %c = icmp slt i32 %x, 11
51  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
52  %a = and <4 x i32> %s, %y
53  store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
54  ret void
55}
56
57; GCN-LABEL: {{^}}select_or1:
58; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
59; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
60; GCN-NOT: v_or_b32
61; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
62define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
63  %c = icmp slt i32 %x, 11
64  %s = select i1 %c, i32 0, i32 -1
65  %a = or i32 %y, %s
66  store i32 %a, i32 addrspace(1)* %p, align 4
67  ret void
68}
69
70; GCN-LABEL: {{^}}select_or2:
71; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
72; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
73; GCN-NOT: v_or_b32
74; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
75define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
76  %c = icmp slt i32 %x, 11
77  %s = select i1 %c, i32 0, i32 -1
78  %a = or i32 %s, %y
79  store i32 %a, i32 addrspace(1)* %p, align 4
80  ret void
81}
82
83; GCN-LABEL: {{^}}select_or3:
84; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
85; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
86; GCN-NOT: v_or_b32
87; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
88define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
89  %c = icmp slt i32 %x, 11
90  %s = select i1 %c, i32 -1, i32 0
91  %a = or i32 %y, %s
92  store i32 %a, i32 addrspace(1)* %p, align 4
93  ret void
94}
95
96; GCN-LABEL: {{^}}select_or_v4:
97; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
98; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
99; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
100; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
101; GCN-NOT: v_or_b32
102; GCN:     store_dword
103define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
104  %c = icmp slt i32 %x, 11
105  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
106  %a = or <4 x i32> %s, %y
107  store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
108  ret void
109}
110
111; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
112; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2
113define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
114  %sel = select i1 %cond, i32 -4, i32 3
115  %bo = sub i32 5, %sel
116  store i32 %bo, i32 addrspace(1)* %p, align 4
117  ret void
118}
119
120; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
121; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
122define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) {
123  %sel = select i1 %cond, i16 -4, i16 3
124  %bo = sub i16 5, %sel
125  store i16 %bo, i16 addrspace(1)* %p, align 2
126  ret void
127}
128
129; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg:
130; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449
131; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3,
132define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) {
133  %sel = select i1 %cond, i16 4, i16 3000
134  %bo = sub i16 1, %sel
135  store i16 %bo, i16 addrspace(1)* %p, align 2
136  ret void
137}
138
139; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16:
140; GCN-DAG: s_mov_b32 [[T:s[0-9]+]], 0x50009
141; GCN:     s_cselect_b32 s{{[0-9]+}}, [[T]], 0x60002
142define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) {
143  %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1>
144  %bo = sub <2 x i16> <i16 5, i16 7>, %sel
145  store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4
146  ret void
147}
148
149; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32:
150; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
151; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5,
152; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6,
153; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7,
154define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) {
155  %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
156  %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
157  store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32
158  ret void
159}
160
161; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64:
162; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5
163define amdgpu_kernel void @sdiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) {
164  %sel = select i1 %cond, i64 121, i64 23
165  %bo = sdiv i64 120, %sel
166  store i64 %bo, i64 addrspace(1)* %p, align 8
167  ret void
168}
169
170; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i32:
171; GCN: s_cselect_b32 s{{[0-9]+}}, 26, 8
172define amdgpu_kernel void @sdiv_constant_sel_constants_i32(i32 addrspace(1)* %p, i1 %cond) {
173  %sel = select i1 %cond, i32 7, i32 23
174  %bo = sdiv i32 184, %sel
175  store i32 %bo, i32 addrspace(1)* %p, align 8
176  ret void
177}
178
179; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64:
180; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5
181define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) {
182  %sel = select i1 %cond, i64 -4, i64 23
183  %bo = udiv i64 120, %sel
184  store i64 %bo, i64 addrspace(1)* %p, align 8
185  ret void
186}
187
188; GCN-LABEL: {{^}}srem_constant_sel_constants:
189; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3
190define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
191  %sel = select i1 %cond, i64 34, i64 15
192  %bo = srem i64 33, %sel
193  store i64 %bo, i64 addrspace(1)* %p, align 8
194  ret void
195}
196
197; GCN-LABEL: {{^}}urem_constant_sel_constants:
198; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3
199define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
200  %sel = select i1 %cond, i64 34, i64 15
201  %bo = urem i64 33, %sel
202  store i64 %bo, i64 addrspace(1)* %p, align 8
203  ret void
204}
205
206; GCN-LABEL: {{^}}shl_constant_sel_constants:
207; GCN: s_cselect_b32 s{{[0-9]+}}, 4, 8
208define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
209  %sel = select i1 %cond, i32 2, i32 3
210  %bo = shl i32 1, %sel
211  store i32 %bo, i32 addrspace(1)* %p, align 4
212  ret void
213}
214
215; GCN-LABEL: {{^}}lshr_constant_sel_constants:
216; GCN: s_cselect_b32 s{{[0-9]+}}, 16, 8
217define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
218  %sel = select i1 %cond, i32 2, i32 3
219  %bo = lshr i32 64, %sel
220  store i32 %bo, i32 addrspace(1)* %p, align 4
221  ret void
222}
223
224; GCN-LABEL: {{^}}ashr_constant_sel_constants:
225; GCN: s_cselect_b32 s{{[0-9]+}}, 32, 16
226define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
227  %sel = select i1 %cond, i32 2, i32 3
228  %bo = ashr i32 128, %sel
229  store i32 %bo, i32 addrspace(1)* %p, align 4
230  ret void
231}
232
233; GCN-LABEL: {{^}}fsub_constant_sel_constants:
234; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0,
235define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
236  %sel = select i1 %cond, float -2.0, float 3.0
237  %bo = fsub float -1.0, %sel
238  store float %bo, float addrspace(1)* %p, align 4
239  ret void
240}
241
242; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16:
243; TODO: it shall be possible to fold constants with OpSel
244; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00
245; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400
246; GCN:     v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
247define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) {
248  %sel = select i1 %cond, half -2.0, half 3.0
249  %bo = fsub half -1.0, %sel
250  store half %bo, half addrspace(1)* %p, align 2
251  ret void
252}
253
254; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16:
255; GCN:     s_cselect_b32 s{{[0-9]+}}, 0x45003c00, -2.0
256define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) {
257  %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0>
258  %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel
259  store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4
260  ret void
261}
262
263; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32:
264; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000
265; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000
266; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000
267; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000
268; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0,
269; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]],
270; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]],
271; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]],
272define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) {
273  %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
274  %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
275  store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32
276  ret void
277}
278
279; GCN-LABEL: {{^}}fdiv_constant_sel_constants:
280; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0,
281define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
282  %sel = select i1 %cond, float -4.0, float 2.0
283  %bo = fdiv float 8.0, %sel
284  store float %bo, float addrspace(1)* %p, align 4
285  ret void
286}
287
288; GCN-LABEL: {{^}}frem_constant_sel_constants:
289; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0,
290define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
291  %sel = select i1 %cond, float -4.0, float 3.0
292  %bo = frem float 5.0, %sel
293  store float %bo, float addrspace(1)* %p, align 4
294  ret void
295}
296