1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s
2; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s
3
4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s
5; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s
6
7; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
8
9declare i32 @llvm.amdgcn.workitem.id.x() #1
10
11; The two inputs to the instruction are different SGPRs from the same
12; super register, so we can't fold both SGPR operands even though they
13; are both the same register.
14
15; FUNC-LABEL: {{^}}s_test_fmin_legacy_subreg_inputs_f32:
16; EG: MIN *
17; SI-SAFE: v_min_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
18
19; SI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
20
21; VI-SAFE: v_cmp_nlt_f32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
22
23; VI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
24define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(1)* %out, <4 x float> %reg0) #0 {
25   %r0 = extractelement <4 x float> %reg0, i32 0
26   %r1 = extractelement <4 x float> %reg0, i32 1
27   %r2 = fcmp uge float %r0, %r1
28   %r3 = select i1 %r2, float %r1, float %r0
29   store float %r3, float addrspace(1)* %out
30   ret void
31}
32
33; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
34; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
35
36; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
37
38; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
39
40; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
41
42; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
43
44; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
45; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
46; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[VB]], [[VA]]
47
48; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, s[[A]], [[VB]]
49define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) #0 {
50  %cmp = fcmp ule float %a, %b
51  %val = select i1 %cmp, float %a, float %b
52  store float %val, float addrspace(1)* %out, align 4
53  ret void
54}
55
56; Nsz also needed
57; FIXME: Should separate tests
58; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
59; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
60
61; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[A]], 1.0
62; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[B]], 2.0
63
64; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
65
66; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
67; VI-SAFE: v_cndmask_b32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]], vcc
68
69; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
70define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(float addrspace(1)* %out, float %a, float %b) #0 {
71  %a.nnan = fadd nnan float %a, 1.0
72  %b.nnan = fadd nnan float %b, 2.0
73  %cmp = fcmp ule float %a.nnan, %b.nnan
74  %val = select i1 %cmp, float %a.nnan, float %b.nnan
75  store float %val, float addrspace(1)* %out, align 4
76  ret void
77}
78
79; FUNC-LABEL: {{^}}test_fmin_legacy_ule_f32:
80; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
81; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
82
83; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
84
85; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[A]], [[B]]
86; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
87
88; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
89define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
90  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
91  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
92  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
93
94  %a = load volatile float, float addrspace(1)* %gep.0, align 4
95  %b = load volatile float, float addrspace(1)* %gep.1, align 4
96
97  %cmp = fcmp ule float %a, %b
98  %val = select i1 %cmp, float %a, float %b
99  store float %val, float addrspace(1)* %out, align 4
100  ret void
101}
102
103; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32:
104; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
105; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
106
107; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
108
109; VI-SAFE: v_cmp_le_f32_e32 vcc, [[A]], [[B]]
110; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
111
112; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
113define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
114  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
115  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
116  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
117
118  %a = load volatile float, float addrspace(1)* %gep.0, align 4
119  %b = load volatile float, float addrspace(1)* %gep.1, align 4
120
121  %cmp = fcmp ole float %a, %b
122  %val = select i1 %cmp, float %a, float %b
123  store float %val, float addrspace(1)* %out, align 4
124  ret void
125}
126
127; FUNC-LABEL: {{^}}test_fmin_legacy_olt_f32:
128; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
129; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
130
131; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
132
133; VI-SAFE: v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
134; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
135
136; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
137define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
138  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
139  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
140  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
141
142  %a = load volatile float, float addrspace(1)* %gep.0, align 4
143  %b = load volatile float, float addrspace(1)* %gep.1, align 4
144
145  %cmp = fcmp olt float %a, %b
146  %val = select i1 %cmp, float %a, float %b
147  store float %val, float addrspace(1)* %out, align 4
148  ret void
149}
150
151; FUNC-LABEL: {{^}}test_fmin_legacy_ult_f32:
152; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
153; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
154
155; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
156
157; VI-SAFE: v_cmp_nge_f32_e32 vcc, [[A]], [[B]]
158; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
159
160; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
161define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
162  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
163  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
164  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
165
166  %a = load volatile float, float addrspace(1)* %gep.0, align 4
167  %b = load volatile float, float addrspace(1)* %gep.1, align 4
168
169  %cmp = fcmp ult float %a, %b
170  %val = select i1 %cmp, float %a, float %b
171  store float %val, float addrspace(1)* %out, align 4
172  ret void
173}
174
175; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
176; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
177; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
178
179; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
180
181; VI-SAFE: v_cmp_nge_f32_e32 vcc, [[A]], [[B]]
182; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
183
184; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
185define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
186  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
187  %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
188  %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
189
190  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
191  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
192
193  %cmp = fcmp ult <1 x float> %a, %b
194  %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
195  store <1 x float> %val, <1 x float> addrspace(1)* %out
196  ret void
197}
198
199; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
200; GCN: {{buffer|flat}}_load_dwordx2
201; GCN: {{buffer|flat}}_load_dwordx2
202; SI-SAFE: v_min_legacy_f32_e32
203; SI-SAFE: v_min_legacy_f32_e32
204
205; VI-SAFE: v_cmp_nge_f32_e32
206; VI-SAFE: v_cndmask_b32_e32
207; VI-SAFE: v_cmp_nge_f32_e32
208; VI-SAFE: v_cndmask_b32_e32
209
210; GCN-NONAN: v_min_f32_e32
211; GCN-NONAN: v_min_f32_e32
212define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
213  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
214  %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
215  %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
216
217  %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
218  %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
219
220  %cmp = fcmp ult <2 x float> %a, %b
221  %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
222  store <2 x float> %val, <2 x float> addrspace(1)* %out
223  ret void
224}
225
226; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
227; SI-SAFE: v_min_legacy_f32_e32
228; SI-SAFE: v_min_legacy_f32_e32
229; SI-SAFE: v_min_legacy_f32_e32
230; SI-SAFE-NOT: v_min_
231
232; VI-SAFE: v_cmp_nge_f32_e32
233; VI-SAFE: v_cndmask_b32_e32
234; VI-SAFE: v_cmp_nge_f32_e32
235; VI-SAFE: v_cndmask_b32_e32
236; VI-SAFE: v_cmp_nge_f32_e32
237; VI-SAFE: v_cndmask_b32_e32
238; VI-NOT: v_cmp
239; VI-NOT: v_cndmask
240
241; GCN-NONAN: v_min_f32_e32
242; GCN-NONAN: v_min_f32_e32
243; GCN-NONAN: v_min_f32_e32
244; GCN-NONAN-NOT: v_min_
245define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
246  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
247  %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
248  %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
249
250  %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
251  %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
252
253  %cmp = fcmp ult <3 x float> %a, %b
254  %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
255  store <3 x float> %val, <3 x float> addrspace(1)* %out
256  ret void
257}
258
259; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32_multi_use:
260; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
261; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
262; GCN-NOT: v_min
263; GCN: v_cmp_le_f32
264; GCN-NEXT: v_cndmask_b32
265; GCN-NOT: v_min
266; GCN: s_endpgm
267define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
268  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
269  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
270  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
271
272  %a = load volatile float, float addrspace(1)* %gep.0, align 4
273  %b = load volatile float, float addrspace(1)* %gep.1, align 4
274
275  %cmp = fcmp ole float %a, %b
276  %val0 = select i1 %cmp, float %a, float %b
277  store float %val0, float addrspace(1)* %out0, align 4
278  store i1 %cmp, i1 addrspace(1)* %out1
279  ret void
280}
281
282attributes #0 = { nounwind }
283attributes #1 = { nounwind readnone }
284