1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}test_fmin_f32:
5; GCN: v_min_f32_e32
6define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 {
7  %val = call float @llvm.minnum.f32(float %a, float %b)
8  store float %val, float addrspace(1)* %out, align 4
9  ret void
10}
11
12; GCN-LABEL: {{^}}test_fmin_v2f32:
13; GCN: v_min_f32_e32
14; GCN: v_min_f32_e32
15define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
16  %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
17  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
18  ret void
19}
20
21; GCN-LABEL: {{^}}test_fmin_v4f32:
22; GCN: v_min_f32_e32
23; GCN: v_min_f32_e32
24; GCN: v_min_f32_e32
25; GCN: v_min_f32_e32
26define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
27  %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
28  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
29  ret void
30}
31
32; GCN-LABEL: {{^}}test_fmin_v8f32:
33; GCN: v_min_f32_e32
34; GCN: v_min_f32_e32
35; GCN: v_min_f32_e32
36; GCN: v_min_f32_e32
37; GCN: v_min_f32_e32
38; GCN: v_min_f32_e32
39; GCN: v_min_f32_e32
40; GCN: v_min_f32_e32
41define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
42  %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
43  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
44  ret void
45}
46
47; GCN-LABEL: {{^}}test_fmin_v16f32:
48; GCN: v_min_f32_e32
49; GCN: v_min_f32_e32
50; GCN: v_min_f32_e32
51; GCN: v_min_f32_e32
52; GCN: v_min_f32_e32
53; GCN: v_min_f32_e32
54; GCN: v_min_f32_e32
55; GCN: v_min_f32_e32
56; GCN: v_min_f32_e32
57; GCN: v_min_f32_e32
58; GCN: v_min_f32_e32
59; GCN: v_min_f32_e32
60; GCN: v_min_f32_e32
61; GCN: v_min_f32_e32
62; GCN: v_min_f32_e32
63; GCN: v_min_f32_e32
64define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
65  %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
66  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
67  ret void
68}
69
70; GCN-LABEL: {{^}}constant_fold_fmin_f32:
71; GCN-NOT: v_min_f32_e32
72; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
73; GCN: buffer_store_dword [[REG]]
74define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
75  %val = call float @llvm.minnum.f32(float 1.0, float 2.0)
76  store float %val, float addrspace(1)* %out, align 4
77  ret void
78}
79
80; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_nan:
81; GCN-NOT: v_min_f32_e32
82; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
83; GCN: buffer_store_dword [[REG]]
84define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 {
85  %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
86  store float %val, float addrspace(1)* %out, align 4
87  ret void
88}
89
90; GCN-LABEL: {{^}}constant_fold_fmin_f32_val_nan:
91; GCN-NOT: v_min_f32_e32
92; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
93; GCN: buffer_store_dword [[REG]]
94define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 {
95  %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
96  store float %val, float addrspace(1)* %out, align 4
97  ret void
98}
99
100; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_val:
101; GCN-NOT: v_min_f32_e32
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
103; GCN: buffer_store_dword [[REG]]
104define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 {
105  %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
106  store float %val, float addrspace(1)* %out, align 4
107  ret void
108}
109
110; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_p0:
111; GCN-NOT: v_min_f32_e32
112; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
113; GCN: buffer_store_dword [[REG]]
114define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 {
115  %val = call float @llvm.minnum.f32(float 0.0, float 0.0)
116  store float %val, float addrspace(1)* %out, align 4
117  ret void
118}
119
120; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0:
121; GCN-NOT: v_min_f32_e32
122; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
123; GCN: buffer_store_dword [[REG]]
124define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 {
125  %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
126  store float %val, float addrspace(1)* %out, align 4
127  ret void
128}
129
130; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_p0:
131; GCN-NOT: v_min_f32_e32
132; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
133; GCN: buffer_store_dword [[REG]]
134define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 {
135  %val = call float @llvm.minnum.f32(float -0.0, float 0.0)
136  store float %val, float addrspace(1)* %out, align 4
137  ret void
138}
139
140; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_n0:
141; GCN-NOT: v_min_f32_e32
142; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
143; GCN: buffer_store_dword [[REG]]
144define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 {
145  %val = call float @llvm.minnum.f32(float -0.0, float -0.0)
146  store float %val, float addrspace(1)* %out, align 4
147  ret void
148}
149
150; GCN-LABEL: {{^}}fmin_var_immediate_f32:
151; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
152define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
153  %val = call float @llvm.minnum.f32(float %a, float 2.0)
154  store float %val, float addrspace(1)* %out, align 4
155  ret void
156}
157
158; GCN-LABEL: {{^}}fmin_immediate_var_f32:
159; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
160define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
161  %val = call float @llvm.minnum.f32(float 2.0, float %a)
162  store float %val, float addrspace(1)* %out, align 4
163  ret void
164}
165
166; GCN-LABEL: {{^}}fmin_var_literal_f32:
167; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
168; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
169define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
170  %val = call float @llvm.minnum.f32(float %a, float 99.0)
171  store float %val, float addrspace(1)* %out, align 4
172  ret void
173}
174
175; GCN-LABEL: {{^}}fmin_literal_var_f32:
176; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
177; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
178define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
179  %val = call float @llvm.minnum.f32(float 99.0, float %a)
180  store float %val, float addrspace(1)* %out, align 4
181  ret void
182}
183
184; GCN-LABEL: {{^}}test_func_fmin_v3f32:
185; GCN: v_min_f32_e32
186; GCN: v_min_f32_e32
187; GCN: v_min_f32_e32
188; GCN-NOT: v_min_f32
189define <3 x float> @test_func_fmin_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
190  %val = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) #0
191  ret <3 x float> %val
192}
193
194declare float @llvm.minnum.f32(float, float) #1
195declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
196declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
197declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
198declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
199declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
200
201attributes #0 = { nounwind }
202attributes #1 = { nounwind readnone }
203