1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
5; GCN: buffer_load_dword [[X:v[0-9]+]]
6; GCN: buffer_load_dword [[Y:v[0-9]+]]
7; GCN: buffer_load_dword [[Z:v[0-9]+]]
8
9; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
10; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
11define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
12  %x = load volatile float, float addrspace(1)* undef
13  %y = load volatile float, float addrspace(1)* undef
14  %z = load volatile float, float addrspace(1)* undef
15  %cmp = icmp eq i32 %c, 0
16  %fabs.x = call float @llvm.fabs.f32(float %x)
17  %fabs.y = call float @llvm.fabs.f32(float %y)
18  %select = select i1 %cmp, float %fabs.x, float %fabs.y
19  %add = fadd float %select, %z
20  store float %add, float addrspace(1)* undef
21  ret void
22}
23
24; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32:
25; GCN: buffer_load_dword [[X:v[0-9]+]]
26; GCN: buffer_load_dword [[Y:v[0-9]+]]
27; GCN: buffer_load_dword [[Z:v[0-9]+]]
28; GCN: buffer_load_dword [[W:v[0-9]+]]
29
30; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
31; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
32; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
33define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
34  %x = load volatile float, float addrspace(1)* undef
35  %y = load volatile float, float addrspace(1)* undef
36  %z = load volatile float, float addrspace(1)* undef
37  %w = load volatile float, float addrspace(1)* undef
38  %cmp = icmp eq i32 %c, 0
39  %fabs.x = call float @llvm.fabs.f32(float %x)
40  %fabs.y = call float @llvm.fabs.f32(float %y)
41  %select = select i1 %cmp, float %fabs.x, float %fabs.y
42  %add0 = fadd float %select, %z
43  %add1 = fadd float %fabs.x, %w
44  store volatile float %add0, float addrspace(1)* undef
45  store volatile float %add1, float addrspace(1)* undef
46  ret void
47}
48
49; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32:
50; GCN: buffer_load_dword [[X:v[0-9]+]]
51; GCN: buffer_load_dword [[Y:v[0-9]+]]
52; GCN: buffer_load_dword [[Z:v[0-9]+]]
53
54; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
55; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]]
56; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
57
58; GCN: buffer_store_dword [[ADD]]
59; GCN: buffer_store_dword [[X_ABS]]
60define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
61  %x = load volatile float, float addrspace(1)* undef
62  %y = load volatile float, float addrspace(1)* undef
63  %z = load volatile float, float addrspace(1)* undef
64  %cmp = icmp eq i32 %c, 0
65  %fabs.x = call float @llvm.fabs.f32(float %x)
66  %fabs.y = call float @llvm.fabs.f32(float %y)
67  %select = select i1 %cmp, float %fabs.x, float %fabs.y
68  %add0 = fadd float %select, %z
69  store volatile float %add0, float addrspace(1)* undef
70  store volatile float %fabs.x, float addrspace(1)* undef
71  ret void
72}
73
74; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32:
75; GCN: buffer_load_dword [[X:v[0-9]+]]
76; GCN: buffer_load_dword [[Y:v[0-9]+]]
77; GCN: buffer_load_dword [[Z:v[0-9]+]]
78; GCN: buffer_load_dword [[W:v[0-9]+]]
79
80; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
81; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
82; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
83define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
84  %x = load volatile float, float addrspace(1)* undef
85  %y = load volatile float, float addrspace(1)* undef
86  %z = load volatile float, float addrspace(1)* undef
87  %w = load volatile float, float addrspace(1)* undef
88  %cmp = icmp eq i32 %c, 0
89  %fabs.x = call float @llvm.fabs.f32(float %x)
90  %fabs.y = call float @llvm.fabs.f32(float %y)
91  %select = select i1 %cmp, float %fabs.x, float %fabs.y
92  %add0 = fadd float %select, %z
93  %add1 = fadd float %fabs.y, %w
94  store volatile float %add0, float addrspace(1)* undef
95  store volatile float %add1, float addrspace(1)* undef
96  ret void
97}
98
99; GCN-LABEL: {{^}}add_select_fabs_var_f32:
100; GCN: buffer_load_dword [[X:v[0-9]+]]
101; GCN: buffer_load_dword [[Y:v[0-9]+]]
102; GCN: buffer_load_dword [[Z:v[0-9]+]]
103
104; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|,
105; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
106define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
107  %x = load volatile float, float addrspace(1)* undef
108  %y = load volatile float, float addrspace(1)* undef
109  %z = load volatile float, float addrspace(1)* undef
110  %cmp = icmp eq i32 %c, 0
111  %fabs.x = call float @llvm.fabs.f32(float %x)
112  %select = select i1 %cmp, float %fabs.x, float %y
113  %add = fadd float %select, %z
114  store volatile float %add, float addrspace(1)* undef
115  ret void
116}
117
118; GCN-LABEL: {{^}}add_select_fabs_negk_f32:
119; GCN: buffer_load_dword [[X:v[0-9]+]]
120; GCN: buffer_load_dword [[Y:v[0-9]+]]
121
122; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|,
123; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
124define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
125  %x = load volatile float, float addrspace(1)* undef
126  %y = load volatile float, float addrspace(1)* undef
127  %cmp = icmp eq i32 %c, 0
128  %fabs = call float @llvm.fabs.f32(float %x)
129  %select = select i1 %cmp, float %fabs, float -1.0
130  %add = fadd float %select, %y
131  store volatile float %add, float addrspace(1)* undef
132  ret void
133}
134
135; FIXME: fabs should fold away
136; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
137; GCN: buffer_load_dword [[X:v[0-9]+]]
138
139; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
140; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
141define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
142  %x = load volatile float, float addrspace(1)* undef
143  %cmp = icmp eq i32 %c, 0
144  %select = select i1 %cmp, float -2.0, float -1.0
145  %fabs = call float @llvm.fabs.f32(float %select)
146  %add = fadd float %fabs, %x
147  store volatile float %add, float addrspace(1)* undef
148  ret void
149}
150
151; GCN-LABEL: {{^}}add_select_posk_posk_f32:
152; GCN: buffer_load_dword [[X:v[0-9]+]]
153
154; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
155; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
156define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
157  %x = load volatile float, float addrspace(1)* undef
158  %cmp = icmp eq i32 %c, 0
159  %select = select i1 %cmp, float 2.0, float 1.0
160  %add = fadd float %select, %x
161  store volatile float %add, float addrspace(1)* undef
162  ret void
163}
164
165; GCN-LABEL: {{^}}add_select_negk_fabs_f32:
166; GCN: buffer_load_dword [[X:v[0-9]+]]
167; GCN: buffer_load_dword [[Y:v[0-9]+]]
168
169; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
170; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
171; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
172define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
173  %x = load volatile float, float addrspace(1)* undef
174  %y = load volatile float, float addrspace(1)* undef
175  %cmp = icmp eq i32 %c, 0
176  %fabs = call float @llvm.fabs.f32(float %x)
177  %select = select i1 %cmp, float -1.0, float %fabs
178  %add = fadd float %select, %y
179  store volatile float %add, float addrspace(1)* undef
180  ret void
181}
182
183; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32:
184; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
185; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
186; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
187
188; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
189; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
190; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
191define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
192  %x = load volatile float, float addrspace(1)* undef
193  %y = load volatile float, float addrspace(1)* undef
194  %cmp = icmp eq i32 %c, 0
195  %fabs = call float @llvm.fabs.f32(float %x)
196  %select = select i1 %cmp, float -1024.0, float %fabs
197  %add = fadd float %select, %y
198  store volatile float %add, float addrspace(1)* undef
199  ret void
200}
201
202; GCN-LABEL: {{^}}add_select_fabs_posk_f32:
203; GCN: buffer_load_dword [[X:v[0-9]+]]
204; GCN: buffer_load_dword [[Y:v[0-9]+]]
205
206; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
207; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
208define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
209  %x = load volatile float, float addrspace(1)* undef
210  %y = load volatile float, float addrspace(1)* undef
211
212  %cmp = icmp eq i32 %c, 0
213  %fabs = call float @llvm.fabs.f32(float %x)
214  %select = select i1 %cmp, float %fabs, float 1.0
215  %add = fadd float %select, %y
216  store volatile float %add, float addrspace(1)* undef
217  ret void
218}
219
220; GCN-LABEL: {{^}}add_select_posk_fabs_f32:
221; GCN: buffer_load_dword [[X:v[0-9]+]]
222; GCN: buffer_load_dword [[Y:v[0-9]+]]
223
224; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
225; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
226; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
227define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
228  %x = load volatile float, float addrspace(1)* undef
229  %y = load volatile float, float addrspace(1)* undef
230  %cmp = icmp eq i32 %c, 0
231  %fabs = call float @llvm.fabs.f32(float %x)
232  %select = select i1 %cmp, float 1.0, float %fabs
233  %add = fadd float %select, %y
234  store volatile float %add, float addrspace(1)* undef
235  ret void
236}
237
238; GCN-LABEL: {{^}}add_select_fneg_fneg_f32:
239; GCN: buffer_load_dword [[X:v[0-9]+]]
240; GCN: buffer_load_dword [[Y:v[0-9]+]]
241; GCN: buffer_load_dword [[Z:v[0-9]+]]
242
243; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
244; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
245define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
246  %x = load volatile float, float addrspace(1)* undef
247  %y = load volatile float, float addrspace(1)* undef
248  %z = load volatile float, float addrspace(1)* undef
249  %cmp = icmp eq i32 %c, 0
250  %fneg.x = fsub float -0.0, %x
251  %fneg.y = fsub float -0.0, %y
252  %select = select i1 %cmp, float %fneg.x, float %fneg.y
253  %add = fadd float %select, %z
254  store volatile float %add, float addrspace(1)* undef
255  ret void
256}
257
258; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32:
259; GCN: buffer_load_dword [[X:v[0-9]+]]
260; GCN: buffer_load_dword [[Y:v[0-9]+]]
261; GCN: buffer_load_dword [[Z:v[0-9]+]]
262; GCN: buffer_load_dword [[W:v[0-9]+]]
263
264; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
265; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
266; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
267define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
268  %x = load volatile float, float addrspace(1)* undef
269  %y = load volatile float, float addrspace(1)* undef
270  %z = load volatile float, float addrspace(1)* undef
271  %w = load volatile float, float addrspace(1)* undef
272  %cmp = icmp eq i32 %c, 0
273  %fneg.x = fsub float -0.0, %x
274  %fneg.y = fsub float -0.0, %y
275  %select = select i1 %cmp, float %fneg.x, float %fneg.y
276  %add0 = fadd float %select, %z
277  %add1 = fadd float %fneg.x, %w
278  store volatile float %add0, float addrspace(1)* undef
279  store volatile float %add1, float addrspace(1)* undef
280  ret void
281}
282
283; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32:
284; GCN: buffer_load_dword [[X:v[0-9]+]]
285; GCN: buffer_load_dword [[Y:v[0-9]+]]
286; GCN: buffer_load_dword [[Z:v[0-9]+]]
287
288; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
289; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
290; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
291
292; GCN: buffer_store_dword [[ADD]]
293; GCN: buffer_store_dword [[NEG_X]]
294define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
295  %x = load volatile float, float addrspace(1)* undef
296  %y = load volatile float, float addrspace(1)* undef
297  %z = load volatile float, float addrspace(1)* undef
298  %cmp = icmp eq i32 %c, 0
299  %fneg.x = fsub float -0.0, %x
300  %fneg.y = fsub float -0.0, %y
301  %select = select i1 %cmp, float %fneg.x, float %fneg.y
302  %add0 = fadd float %select, %z
303  store volatile float %add0, float addrspace(1)* undef
304  store volatile float %fneg.x, float addrspace(1)* undef
305  ret void
306}
307
308; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32:
309; GCN: buffer_load_dword [[X:v[0-9]+]]
310; GCN: buffer_load_dword [[Y:v[0-9]+]]
311; GCN: buffer_load_dword [[Z:v[0-9]+]]
312; GCN: buffer_load_dword [[W:v[0-9]+]]
313
314; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
315; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
316; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
317define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
318  %x = load volatile float, float addrspace(1)* undef
319  %y = load volatile float, float addrspace(1)* undef
320  %z = load volatile float, float addrspace(1)* undef
321  %w = load volatile float, float addrspace(1)* undef
322  %cmp = icmp eq i32 %c, 0
323  %fneg.x = fsub float -0.0, %x
324  %fneg.y = fsub float -0.0, %y
325  %select = select i1 %cmp, float %fneg.x, float %fneg.y
326  %add0 = fadd float %select, %z
327  %add1 = fadd float %fneg.y, %w
328  store volatile float %add0, float addrspace(1)* undef
329  store volatile float %add1, float addrspace(1)* undef
330  ret void
331}
332
333; GCN-LABEL: {{^}}add_select_fneg_var_f32:
334; GCN: buffer_load_dword [[X:v[0-9]+]]
335; GCN: buffer_load_dword [[Y:v[0-9]+]]
336; GCN: buffer_load_dword [[Z:v[0-9]+]]
337
338; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]],
339; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
340define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
341  %x = load volatile float, float addrspace(1)* undef
342  %y = load volatile float, float addrspace(1)* undef
343  %z = load volatile float, float addrspace(1)* undef
344  %cmp = icmp eq i32 %c, 0
345  %fneg.x = fsub float -0.0, %x
346  %select = select i1 %cmp, float %fneg.x, float %y
347  %add = fadd float %select, %z
348  store volatile float %add, float addrspace(1)* undef
349  ret void
350}
351
352; GCN-LABEL: {{^}}add_select_fneg_negk_f32:
353; GCN: buffer_load_dword [[X:v[0-9]+]]
354; GCN: buffer_load_dword [[Y:v[0-9]+]]
355
356; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
357; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
358define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
359  %x = load volatile float, float addrspace(1)* undef
360  %y = load volatile float, float addrspace(1)* undef
361  %cmp = icmp eq i32 %c, 0
362  %fneg.x = fsub float -0.0, %x
363  %select = select i1 %cmp, float %fneg.x, float -1.0
364  %add = fadd float %select, %y
365  store volatile float %add, float addrspace(1)* undef
366  ret void
367}
368
369; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
370; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
371; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
372; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
373
374; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
375; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
376define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
377  %x = load volatile float, float addrspace(1)* undef
378  %y = load volatile float, float addrspace(1)* undef
379  %cmp = icmp eq i32 %c, 0
380  %fneg.x = fsub float -0.0, %x
381  %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
382  %add = fadd float %select, %y
383  store volatile float %add, float addrspace(1)* undef
384  ret void
385}
386
387; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
388; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
389; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
390; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
391
392; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
393; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
394
395; GCN: v_sub_f32_e32 v{{[0-9]+}},  [[Y]], [[SELECT]]
396define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
397  %x = load volatile float, float addrspace(1)* undef
398  %y = load volatile float, float addrspace(1)* undef
399  %cmp = icmp eq i32 %c, 0
400  %fneg.x = fsub float -0.0, %x
401  %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
402  %add = fadd float %select, %y
403  store volatile float %add, float addrspace(1)* undef
404  ret void
405}
406
407; GCN-LABEL: {{^}}add_select_negk_negk_f32:
408; GCN: buffer_load_dword [[X:v[0-9]+]]
409
410; GCN: v_cmp_eq_u32_e64
411; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
412; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
413define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
414  %x = load volatile float, float addrspace(1)* undef
415  %cmp = icmp eq i32 %c, 0
416  %select = select i1 %cmp, float -2.0, float -1.0
417  %add = fadd float %select, %x
418  store volatile float %add, float addrspace(1)* undef
419  ret void
420}
421
422; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32:
423; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000
424; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000
425; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
426
427; GCN: v_cmp_eq_u32_e64
428; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
429; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
430define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
431  %x = load volatile float, float addrspace(1)* undef
432  %cmp = icmp eq i32 %c, 0
433  %select = select i1 %cmp, float -2048.0, float -4096.0
434  %add = fadd float %select, %x
435  store volatile float %add, float addrspace(1)* undef
436  ret void
437}
438
439; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32:
440; GCN: buffer_load_dword [[X:v[0-9]+]]
441
442; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
443; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
444define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
445  %x = load volatile float, float addrspace(1)* undef
446  %cmp = icmp eq i32 %c, 0
447  %select = select i1 %cmp, float -2.0, float -1.0
448  %fneg.x = fsub float -0.0, %select
449  %add = fadd float %fneg.x, %x
450  store volatile float %add, float addrspace(1)* undef
451  ret void
452}
453
454; GCN-LABEL: {{^}}add_select_negk_fneg_f32:
455; GCN: buffer_load_dword [[X:v[0-9]+]]
456; GCN: buffer_load_dword [[Y:v[0-9]+]]
457
458; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
459; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
460; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
461define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
462  %x = load volatile float, float addrspace(1)* undef
463  %y = load volatile float, float addrspace(1)* undef
464  %cmp = icmp eq i32 %c, 0
465  %fneg.x = fsub float -0.0, %x
466  %select = select i1 %cmp, float -1.0, float %fneg.x
467  %add = fadd float %select, %y
468  store volatile float %add, float addrspace(1)* undef
469  ret void
470}
471
472; GCN-LABEL: {{^}}add_select_fneg_posk_f32:
473; GCN: buffer_load_dword [[X:v[0-9]+]]
474; GCN: buffer_load_dword [[Y:v[0-9]+]]
475
476; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
477; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
478define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
479  %x = load volatile float, float addrspace(1)* undef
480  %y = load volatile float, float addrspace(1)* undef
481  %cmp = icmp eq i32 %c, 0
482  %fneg.x = fsub float -0.0, %x
483  %select = select i1 %cmp, float %fneg.x, float 1.0
484  %add = fadd float %select, %y
485  store volatile float %add, float addrspace(1)* undef
486  ret void
487}
488
489; GCN-LABEL: {{^}}add_select_posk_fneg_f32:
490; GCN: buffer_load_dword [[X:v[0-9]+]]
491; GCN: buffer_load_dword [[Y:v[0-9]+]]
492
493; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
494; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
495; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
496define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
497  %x = load volatile float, float addrspace(1)* undef
498  %y = load volatile float, float addrspace(1)* undef
499  %cmp = icmp eq i32 %c, 0
500  %fneg.x = fsub float -0.0, %x
501  %select = select i1 %cmp, float 1.0, float %fneg.x
502  %add = fadd float %select, %y
503  store volatile float %add, float addrspace(1)* undef
504  ret void
505}
506
507; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32:
508; GCN: buffer_load_dword [[X:v[0-9]+]]
509; GCN: buffer_load_dword [[Y:v[0-9]+]]
510; GCN: buffer_load_dword [[Z:v[0-9]+]]
511
512; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|,
513; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
514define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
515  %x = load volatile float, float addrspace(1)* undef
516  %y = load volatile float, float addrspace(1)* undef
517  %z = load volatile float, float addrspace(1)* undef
518  %cmp = icmp eq i32 %c, 0
519  %fabs.x = call float @llvm.fabs.f32(float %x)
520  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
521  %fabs.y = call float @llvm.fabs.f32(float %y)
522  %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y
523  %add = fadd float %select, %z
524  store volatile float %add, float addrspace(1)* undef
525  ret void
526}
527
528; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32:
529; GCN: buffer_load_dword [[X:v[0-9]+]]
530; GCN: buffer_load_dword [[Y:v[0-9]+]]
531; GCN: buffer_load_dword [[Z:v[0-9]+]]
532
533; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|,
534; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
535define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
536  %x = load volatile float, float addrspace(1)* undef
537  %y = load volatile float, float addrspace(1)* undef
538  %z = load volatile float, float addrspace(1)* undef
539  %cmp = icmp eq i32 %c, 0
540  %fabs.x = call float @llvm.fabs.f32(float %x)
541  %fabs.y = call float @llvm.fabs.f32(float %y)
542  %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
543  %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y
544  %add = fadd float %select, %z
545  store volatile float %add, float addrspace(1)* undef
546  ret void
547}
548
549; GCN-LABEL: {{^}}add_select_neg_fabs_f32:
550; GCN: buffer_load_dword [[X:v[0-9]+]]
551; GCN: buffer_load_dword [[Y:v[0-9]+]]
552; GCN: buffer_load_dword [[Z:v[0-9]+]]
553
554; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]],
555; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
556define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
557  %x = load volatile float, float addrspace(1)* undef
558  %y = load volatile float, float addrspace(1)* undef
559  %z = load volatile float, float addrspace(1)* undef
560  %cmp = icmp eq i32 %c, 0
561  %fneg.x = fsub float -0.000000e+00, %x
562  %fabs.y = call float @llvm.fabs.f32(float %y)
563  %select = select i1 %cmp, float %fneg.x, float %fabs.y
564  %add = fadd float %select, %z
565  store volatile float %add, float addrspace(1)* undef
566  ret void
567}
568
569; GCN-LABEL: {{^}}add_select_fabs_neg_f32:
570; GCN: buffer_load_dword [[X:v[0-9]+]]
571; GCN: buffer_load_dword [[Y:v[0-9]+]]
572; GCN: buffer_load_dword [[Z:v[0-9]+]]
573
574; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|,
575; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
576define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
577  %x = load volatile float, float addrspace(1)* undef
578  %y = load volatile float, float addrspace(1)* undef
579  %z = load volatile float, float addrspace(1)* undef
580  %cmp = icmp eq i32 %c, 0
581  %fabs.x = call float @llvm.fabs.f32(float %x)
582  %fneg.y = fsub float -0.000000e+00, %y
583  %select = select i1 %cmp, float %fabs.x, float %fneg.y
584  %add = fadd float %select, %z
585  store volatile float %add, float addrspace(1)* undef
586  ret void
587}
588
589; GCN-LABEL: {{^}}add_select_neg_negfabs_f32:
590; GCN: buffer_load_dword [[X:v[0-9]+]]
591; GCN: buffer_load_dword [[Y:v[0-9]+]]
592; GCN: buffer_load_dword [[Z:v[0-9]+]]
593
594; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]],
595; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
596define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
597  %x = load volatile float, float addrspace(1)* undef
598  %y = load volatile float, float addrspace(1)* undef
599  %z = load volatile float, float addrspace(1)* undef
600  %cmp = icmp eq i32 %c, 0
601  %fneg.x = fsub float -0.000000e+00, %x
602  %fabs.y = call float @llvm.fabs.f32(float %y)
603  %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
604  %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y
605  %add = fadd float %select, %z
606  store volatile float %add, float addrspace(1)* undef
607  ret void
608}
609
610; GCN-LABEL: {{^}}add_select_negfabs_neg_f32:
611; GCN: buffer_load_dword [[X:v[0-9]+]]
612; GCN: buffer_load_dword [[Y:v[0-9]+]]
613; GCN: buffer_load_dword [[Z:v[0-9]+]]
614
615; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]],
616; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
617define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
618  %x = load volatile float, float addrspace(1)* undef
619  %y = load volatile float, float addrspace(1)* undef
620  %z = load volatile float, float addrspace(1)* undef
621  %cmp = icmp eq i32 %c, 0
622  %fabs.x = call float @llvm.fabs.f32(float %x)
623  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
624  %fneg.y = fsub float -0.000000e+00, %y
625  %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x
626  %add = fadd float %select, %z
627  store volatile float %add, float addrspace(1)* undef
628  ret void
629}
630
631; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32:
632; GCN: buffer_load_dword [[X:v[0-9]+]]
633; GCN: buffer_load_dword [[Y:v[0-9]+]]
634
635; GCN-DAG: v_cmp_eq_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
636; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]]
637; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
638define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
639  %x = load volatile float, float addrspace(1)* undef
640  %y = load volatile float, float addrspace(1)* undef
641  %cmp = icmp eq i32 %c, 0
642  %fabs.x = call float @llvm.fabs.f32(float %x)
643  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
644  %select = select i1 %cmp, float %fneg.fabs.x, float 4.0
645  %add = fmul float %select, %y
646  store volatile float %add, float addrspace(1)* undef
647  ret void
648}
649
650; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32:
651; GCN: buffer_load_dword [[X:v[0-9]+]]
652; GCN: buffer_load_dword [[Y:v[0-9]+]]
653
654; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
655; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]]
656; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
657define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
658  %x = load volatile float, float addrspace(1)* undef
659  %y = load volatile float, float addrspace(1)* undef
660  %cmp = icmp eq i32 %c, 0
661  %fabs.x = call float @llvm.fabs.f32(float %x)
662  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
663  %select = select i1 %cmp, float 4.0, float %fneg.fabs.x
664  %add = fmul float %select, %y
665  store volatile float %add, float addrspace(1)* undef
666  ret void
667}
668
669; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32:
670; GCN: buffer_load_dword [[X:v[0-9]+]]
671; GCN: buffer_load_dword [[Y:v[0-9]+]]
672
673; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
674; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
675define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
676  %x = load volatile float, float addrspace(1)* undef
677  %y = load volatile float, float addrspace(1)* undef
678  %cmp = icmp eq i32 %c, 0
679  %fabs.x = call float @llvm.fabs.f32(float %x)
680  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
681  %select = select i1 %cmp, float %fneg.fabs.x, float -4.0
682  %add = fmul float %select, %y
683  store volatile float %add, float addrspace(1)* undef
684  ret void
685}
686
687; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32:
688; GCN: buffer_load_dword [[X:v[0-9]+]]
689; GCN: buffer_load_dword [[Y:v[0-9]+]]
690
691; GCN: v_cmp_ne_u32_e64 vcc
692; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
693; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
694define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
695  %x = load volatile float, float addrspace(1)* undef
696  %y = load volatile float, float addrspace(1)* undef
697  %cmp = icmp eq i32 %c, 0
698  %fabs.x = call float @llvm.fabs.f32(float %x)
699  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
700  %select = select i1 %cmp, float -4.0, float %fneg.fabs.x
701  %add = fmul float %select, %y
702  store volatile float %add, float addrspace(1)* undef
703  ret void
704}
705
706; --------------------------------------------------------------------------------
707; Don't fold if fneg can fold into the source
708; --------------------------------------------------------------------------------
709
710; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
711; GCN: buffer_load_dword [[X:v[0-9]+]]
712; GCN: buffer_load_dword [[Y:v[0-9]+]]
713
714; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
715; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
716; GCN-NEXT: buffer_store_dword [[SELECT]]
717define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
718  %x = load volatile float, float addrspace(1)* undef
719  %y = load volatile float, float addrspace(1)* undef
720  %cmp = icmp eq i32 %c, 0
721  %add = fadd float %x, 4.0
722  %fneg = fsub float -0.0, %add
723  %select = select i1 %cmp, float %fneg, float 2.0
724  store volatile float %select, float addrspace(1)* undef
725  ret void
726}
727
728; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
729; GCN: buffer_load_dword [[X:v[0-9]+]]
730
731; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
732; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
733; GCN-NEXT: buffer_store_dword [[SELECT]]
734define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
735  %x = load volatile float, float addrspace(1)* undef
736  %cmp = icmp eq i32 %c, 0
737  %add = fsub float %x, 4.0
738  %fneg = fsub float -0.0, %add
739  %select = select i1 %cmp, float %fneg, float 2.0
740  store volatile float %select, float addrspace(1)* undef
741  ret void
742}
743
744; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
745; GCN: buffer_load_dword [[X:v[0-9]+]]
746
747; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
748; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
749; GCN-NEXT: buffer_store_dword [[SELECT]]
750define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
751  %x = load volatile float, float addrspace(1)* undef
752  %cmp = icmp eq i32 %c, 0
753  %mul = fmul float %x, 4.0
754  %fneg = fsub float -0.0, %mul
755  %select = select i1 %cmp, float %fneg, float 2.0
756  store volatile float %select, float addrspace(1)* undef
757  ret void
758}
759
760; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
761; GCN: buffer_load_dword [[X:v[0-9]+]]
762; GCN: buffer_load_dword [[Z:v[0-9]+]]
763
764; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
765; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
766; GCN-NEXT: buffer_store_dword [[SELECT]]
767define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
768  %x = load volatile float, float addrspace(1)* undef
769  %z = load volatile float, float addrspace(1)* undef
770  %cmp = icmp eq i32 %c, 0
771  %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
772  %fneg = fsub float -0.0, %fma
773  %select = select i1 %cmp, float %fneg, float 2.0
774  store volatile float %select, float addrspace(1)* undef
775  ret void
776}
777
778; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
779; GCN: buffer_load_dword [[X:v[0-9]+]]
780; GCN: buffer_load_dword [[Z:v[0-9]+]]
781
782; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
783; GCN-NEXT: buffer_store_dword [[SELECT]]
784define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
785  %x = load volatile float, float addrspace(1)* undef
786  %z = load volatile float, float addrspace(1)* undef
787  %cmp = icmp eq i32 %c, 0
788  %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
789  %fneg = fsub float -0.0, %fmad
790  %select = select i1 %cmp, float %fneg, float 2.0
791  store volatile float %select, float addrspace(1)* undef
792  ret void
793}
794
795; FIXME: This one should fold to rcp
796; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
797; GCN: buffer_load_dword [[X:v[0-9]+]]
798
799; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
800; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
801; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
802; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
803define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
804  %x = load volatile float, float addrspace(1)* undef
805  %y = load volatile float, float addrspace(1)* undef
806  %cmp = icmp eq i32 %c, 0
807  %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
808  %fneg = fsub float -0.0, %rcp
809  %select = select i1 %cmp, float %fneg, float 2.0
810  store volatile float %select, float addrspace(1)* undef
811  ret void
812}
813
814declare float @llvm.fabs.f32(float) #1
815declare float @llvm.fma.f32(float, float, float) #1
816declare float @llvm.fmuladd.f32(float, float, float) #1
817declare float @llvm.amdgcn.rcp.f32(float) #1
818declare float @llvm.amdgcn.rcp.legacy(float) #1
819declare float @llvm.amdgcn.fmul.legacy(float, float) #1
820
821attributes #0 = { nounwind }
822attributes #1 = { nounwind readnone }
823