1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
2
3; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine.
4
5; GCN-LABEL: {{^}}multi_use_fneg_src:
6; GCN: buffer_load_dword [[A:v[0-9]+]]
7; GCN: buffer_load_dword [[B:v[0-9]+]]
8; GCN: buffer_load_dword [[C:v[0-9]+]]
9
10; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
11; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
12; GCN: buffer_store_dword [[MUL]]
13define amdgpu_kernel void @multi_use_fneg_src() #0 {
14  %a = load volatile float, float addrspace(1)* undef
15  %b = load volatile float, float addrspace(1)* undef
16  %x = load volatile i32, i32 addrspace(1)* undef
17  %y = load volatile i32, i32 addrspace(1)* undef
18
19  %mul = fmul float %a, %b
20  %neg.mul = fsub float -0.0, %mul
21  %cmp = fcmp oeq float %neg.mul, 4.0
22  %select = select i1 %cmp, i32 %x, i32 %y
23  store volatile i32 %select, i32 addrspace(1)* undef
24  store volatile float %mul, float addrspace(1)* undef
25  ret void
26}
27
28; GCN-LABEL: {{^}}multi_foldable_use_fneg_src:
29; GCN: buffer_load_dword [[A:v[0-9]+]]
30; GCN: buffer_load_dword [[B:v[0-9]+]]
31; GCN: buffer_load_dword [[C:v[0-9]+]]
32
33; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
34; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
35; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
36define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
37  %a = load volatile float, float addrspace(1)* undef
38  %b = load volatile float, float addrspace(1)* undef
39  %x = load volatile i32, i32 addrspace(1)* undef
40  %y = load volatile i32, i32 addrspace(1)* undef
41
42  %mul = fmul float %a, %b
43  %neg.mul = fsub float -0.0, %mul
44  %use1 = fmul float %mul, %neg.mul
45  %cmp = fcmp oeq float %neg.mul, 4.0
46  %select = select i1 %cmp, i32 %x, i32 %y
47
48  store volatile i32 %select, i32 addrspace(1)* undef
49  store volatile float %use1, float addrspace(1)* undef
50  ret void
51}
52
53; GCN-LABEL: {{^}}multi_use_fneg:
54; GCN: buffer_load_dword [[A:v[0-9]+]]
55; GCN: buffer_load_dword [[B:v[0-9]+]]
56; GCN: buffer_load_dword [[C:v[0-9]+]]
57
58; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
59; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]]
60; GCN-NOT: xor
61; GCN: buffer_store_dword [[MUL]]
62define amdgpu_kernel void @multi_use_fneg() #0 {
63  %a = load volatile float, float addrspace(1)* undef
64  %b = load volatile float, float addrspace(1)* undef
65  %x = load volatile i32, i32 addrspace(1)* undef
66  %y = load volatile i32, i32 addrspace(1)* undef
67
68  %mul = fmul float %a, %b
69  %neg.mul = fsub float -0.0, %mul
70  %cmp = fcmp oeq float %neg.mul, 4.0
71  %select = select i1 %cmp, i32 %x, i32 %y
72  store volatile i32 %select, i32 addrspace(1)* undef
73  store volatile float %neg.mul, float addrspace(1)* undef
74  ret void
75}
76
77; GCN-LABEL: {{^}}multi_foldable_use_fneg:
78; GCN: buffer_load_dword [[A:v[0-9]+]]
79; GCN: buffer_load_dword [[B:v[0-9]+]]
80
81; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
82; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
83; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
84; GCN: buffer_store_dword [[MUL1]]
85define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
86  %a = load volatile float, float addrspace(1)* undef
87  %b = load volatile float, float addrspace(1)* undef
88  %x = load volatile i32, i32 addrspace(1)* undef
89  %y = load volatile i32, i32 addrspace(1)* undef
90  %z = load volatile i32, i32 addrspace(1)* undef
91
92  %mul = fmul float %a, %b
93  %neg.mul = fsub float -0.0, %mul
94  %cmp = fcmp oeq float %neg.mul, 4.0
95  %select = select i1 %cmp, i32 %x, i32 %y
96  %use1 = fmul float %neg.mul, %mul
97  store volatile i32 %select, i32 addrspace(1)* undef
98  store volatile float %use1, float addrspace(1)* undef
99  ret void
100}
101
102; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32:
103; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}}
104define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
105  %a = load volatile float, float addrspace(1)* undef
106  %x = load volatile i32, i32 addrspace(1)* undef
107  %y = load volatile i32, i32 addrspace(1)* undef
108  %neg.a = fsub float -0.0, %a
109  %cmp = fcmp oeq float %neg.a, 4.0
110  %select = select i1 %cmp, i32 %x, i32 %y
111  store volatile i32 %select, i32 addrspace(1)* undef
112  ret void
113}
114
115; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32:
116; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}}
117define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
118  %a = load volatile float, float addrspace(1)* undef
119  %x = load volatile i32, i32 addrspace(1)* undef
120  %y = load volatile i32, i32 addrspace(1)* undef
121  %neg.a = fsub float -0.0, %a
122  %cmp = fcmp ogt float %neg.a, 4.0
123  %select = select i1 %cmp, i32 %x, i32 %y
124  store volatile i32 %select, i32 addrspace(1)* undef
125  ret void
126}
127
128; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32:
129; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}}
130define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
131  %a = load volatile float, float addrspace(1)* undef
132  %x = load volatile i32, i32 addrspace(1)* undef
133  %y = load volatile i32, i32 addrspace(1)* undef
134  %neg.a = fsub float -0.0, %a
135  %cmp = fcmp oge float %neg.a, 4.0
136  %select = select i1 %cmp, i32 %x, i32 %y
137  store volatile i32 %select, i32 addrspace(1)* undef
138  ret void
139}
140
141; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32:
142; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}}
143define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
144  %a = load volatile float, float addrspace(1)* undef
145  %x = load volatile i32, i32 addrspace(1)* undef
146  %y = load volatile i32, i32 addrspace(1)* undef
147  %neg.a = fsub float -0.0, %a
148  %cmp = fcmp olt float %neg.a, 4.0
149  %select = select i1 %cmp, i32 %x, i32 %y
150  store volatile i32 %select, i32 addrspace(1)* undef
151  ret void
152}
153
154; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32:
155; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}}
156define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
157  %a = load volatile float, float addrspace(1)* undef
158  %x = load volatile i32, i32 addrspace(1)* undef
159  %y = load volatile i32, i32 addrspace(1)* undef
160  %neg.a = fsub float -0.0, %a
161  %cmp = fcmp ole float %neg.a, 4.0
162  %select = select i1 %cmp, i32 %x, i32 %y
163  store volatile i32 %select, i32 addrspace(1)* undef
164  ret void
165}
166
167; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32:
168; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}}
169define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
170  %a = load volatile float, float addrspace(1)* undef
171  %x = load volatile i32, i32 addrspace(1)* undef
172  %y = load volatile i32, i32 addrspace(1)* undef
173  %neg.a = fsub float -0.0, %a
174  %cmp = fcmp one float %neg.a, 4.0
175  %select = select i1 %cmp, i32 %x, i32 %y
176  store volatile i32 %select, i32 addrspace(1)* undef
177  ret void
178}
179
180; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32:
181; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}}
182define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
183  %a = load volatile float, float addrspace(1)* undef
184  %x = load volatile i32, i32 addrspace(1)* undef
185  %y = load volatile i32, i32 addrspace(1)* undef
186  %neg.a = fsub float -0.0, %a
187  %cmp = fcmp ueq float %neg.a, 4.0
188  %select = select i1 %cmp, i32 %x, i32 %y
189  store volatile i32 %select, i32 addrspace(1)* undef
190  ret void
191}
192
193; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32:
194; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}}
195define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
196  %a = load volatile float, float addrspace(1)* undef
197  %x = load volatile i32, i32 addrspace(1)* undef
198  %y = load volatile i32, i32 addrspace(1)* undef
199  %neg.a = fsub float -0.0, %a
200  %cmp = fcmp ugt float %neg.a, 4.0
201  %select = select i1 %cmp, i32 %x, i32 %y
202  store volatile i32 %select, i32 addrspace(1)* undef
203  ret void
204}
205
206; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32:
207; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}}
208define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
209  %a = load volatile float, float addrspace(1)* undef
210  %x = load volatile i32, i32 addrspace(1)* undef
211  %y = load volatile i32, i32 addrspace(1)* undef
212  %neg.a = fsub float -0.0, %a
213  %cmp = fcmp uge float %neg.a, 4.0
214  %select = select i1 %cmp, i32 %x, i32 %y
215  store volatile i32 %select, i32 addrspace(1)* undef
216  ret void
217}
218
219; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32:
220; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}}
221define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
222  %a = load volatile float, float addrspace(1)* undef
223  %x = load volatile i32, i32 addrspace(1)* undef
224  %y = load volatile i32, i32 addrspace(1)* undef
225  %neg.a = fsub float -0.0, %a
226  %cmp = fcmp ult float %neg.a, 4.0
227  %select = select i1 %cmp, i32 %x, i32 %y
228  store volatile i32 %select, i32 addrspace(1)* undef
229  ret void
230}
231
232; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32:
233; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}}
234define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
235  %a = load volatile float, float addrspace(1)* undef
236  %x = load volatile i32, i32 addrspace(1)* undef
237  %y = load volatile i32, i32 addrspace(1)* undef
238  %neg.a = fsub float -0.0, %a
239  %cmp = fcmp ule float %neg.a, 4.0
240  %select = select i1 %cmp, i32 %x, i32 %y
241  store volatile i32 %select, i32 addrspace(1)* undef
242  ret void
243}
244
245; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32:
246; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}}
247define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
248  %a = load volatile float, float addrspace(1)* undef
249  %x = load volatile i32, i32 addrspace(1)* undef
250  %y = load volatile i32, i32 addrspace(1)* undef
251  %neg.a = fsub float -0.0, %a
252  %cmp = fcmp une float %neg.a, 4.0
253  %select = select i1 %cmp, i32 %x, i32 %y
254  store volatile i32 %select, i32 addrspace(1)* undef
255  ret void
256}
257
258attributes #0 = { nounwind }
259