1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s
2; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s
3
4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s
5; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s
6
7; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
8
9declare i32 @llvm.amdgcn.workitem.id.x() #1
10
11; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32:
12; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
14
15; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
16
17; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
18; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
19
20; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
21
22; EG: MAX
23define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
24  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
25  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
26  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
27
28  %a = load volatile float, float addrspace(1)* %gep.0, align 4
29  %b = load volatile float, float addrspace(1)* %gep.1, align 4
30
31  %cmp = fcmp uge float %a, %b
32  %val = select i1 %cmp, float %a, float %b
33  store float %val, float addrspace(1)* %out, align 4
34  ret void
35}
36
37; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src:
38; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
39; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
40; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
41; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
42
43; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
44
45; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
46; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
47
48; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
49
50; EG: MAX
51define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
52  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
53  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
54  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
55
56  %a = load volatile float, float addrspace(1)* %gep.0, align 4
57  %b = load volatile float, float addrspace(1)* %gep.1, align 4
58  %a.nnan = fadd nnan float %a, 1.0
59  %b.nnan = fadd nnan float %b, 2.0
60
61  %cmp = fcmp uge float %a.nnan, %b.nnan
62  %val = select i1 %cmp, float %a.nnan, float %b.nnan
63  store float %val, float addrspace(1)* %out, align 4
64  ret void
65}
66
67; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32:
68; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
69; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
70
71; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
72
73; VI-SAFE: v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
74; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
75
76; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
77; EG: MAX
78define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
79  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
80  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
81  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
82
83  %a = load volatile float, float addrspace(1)* %gep.0, align 4
84  %b = load volatile float, float addrspace(1)* %gep.1, align 4
85
86  %cmp = fcmp oge float %a, %b
87  %val = select i1 %cmp, float %a, float %b
88  store float %val, float addrspace(1)* %out, align 4
89  ret void
90}
91
92; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32:
93; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
94; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
95
96; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
97
98; VI-SAFE: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
99; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
100
101
102; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
103; EG: MAX
104define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
105  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
106  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
107  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
108
109  %a = load volatile float, float addrspace(1)* %gep.0, align 4
110  %b = load volatile float, float addrspace(1)* %gep.1, align 4
111
112  %cmp = fcmp ugt float %a, %b
113  %val = select i1 %cmp, float %a, float %b
114  store float %val, float addrspace(1)* %out, align 4
115  ret void
116}
117
118; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32:
119; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
120; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
121
122; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
123
124; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
125; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
126
127; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
128; EG: MAX
129define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
130  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
131  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
132  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
133
134  %a = load volatile float, float addrspace(1)* %gep.0, align 4
135  %b = load volatile float, float addrspace(1)* %gep.1, align 4
136
137  %cmp = fcmp ogt float %a, %b
138  %val = select i1 %cmp, float %a, float %b
139  store float %val, float addrspace(1)* %out, align 4
140  ret void
141}
142
143; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
144; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
145; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
146
147; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
148
149; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
150; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
151
152
153; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
154; EG: MAX
155define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
156  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
157  %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
158  %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
159
160  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
161  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
162
163  %cmp = fcmp ogt <1 x float> %a, %b
164  %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
165  store <1 x float> %val, <1 x float> addrspace(1)* %out
166  ret void
167}
168
169; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
170; SI-SAFE: v_max_legacy_f32_e32
171; SI-SAFE: v_max_legacy_f32_e32
172; SI-SAFE: v_max_legacy_f32_e32
173
174; VI-SAFE: v_cmp_gt_f32_e32
175; VI-SAFE: v_cndmask_b32_e32
176; VI-SAFE: v_cmp_gt_f32_e32
177; VI-SAFE: v_cndmask_b32_e32
178; VI-SAFE: v_cmp_gt_f32_e32
179; VI-SAFE: v_cndmask_b32_e32
180; VI-SAFE-NOT: v_cmp
181; VI-SAFE-NOT: v_cndmask
182
183; GCN-NONAN: v_max_f32_e32
184; GCN-NONAN: v_max_f32_e32
185; GCN-NONAN: v_max_f32_e32
186
187; GCN-NOT: v_max
188define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
189  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
190  %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
191  %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
192
193  %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
194  %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
195
196  %cmp = fcmp ogt <3 x float> %a, %b
197  %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
198  store <3 x float> %val, <3 x float> addrspace(1)* %out
199  ret void
200}
201
202; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use:
203; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
204; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
205; GCN-NOT: v_max_
206; GCN: v_cmp_gt_f32
207; GCN-NEXT: v_cndmask_b32
208; GCN-NOT: v_max_
209
210; EG: MAX
211define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
212  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
213  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
214  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
215
216  %a = load volatile float, float addrspace(1)* %gep.0, align 4
217  %b = load volatile float, float addrspace(1)* %gep.1, align 4
218
219  %cmp = fcmp ogt float %a, %b
220  %val = select i1 %cmp, float %a, float %b
221  store float %val, float addrspace(1)* %out0, align 4
222  store i1 %cmp, i1addrspace(1)* %out1
223  ret void
224}
225
226attributes #0 = { nounwind }
227attributes #1 = { nounwind readnone }
228