1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on: 5; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} 6; GCN: v_mul_f32_e64 [[QUIET1:v[0-9]+]], 1.0, s{{[0-9]+}} 7; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]] 8; GCN-NOT: [[RESULT]] 9; GCN: buffer_store_dword [[RESULT]] 10define amdgpu_kernel void @test_fmin_f32_ieee_mode_on(float addrspace(1)* %out, float %a, float %b) #0 { 11 %val = call float @llvm.minnum.f32(float %a, float %b) #1 12 store float %val, float addrspace(1)* %out, align 4 13 ret void 14} 15 16; GCN-LABEL: {{^}}test_fmin_nnan_f32_ieee_mode_on: 17; GCN: s_waitcnt 18; GCN-NEXT: v_min_f32_e32 v0, v0, v1 19; GCN-NEXT: s_setpc_b64 20define float @test_fmin_nnan_f32_ieee_mode_on(float %a, float %b) #0 { 21 %val = call nnan float @llvm.minnum.f32(float %a, float %b) #1 22 ret float %val 23} 24 25; GCN-LABEL: {{^}}test_fmin_nnan_f32_ieee_mode_off: 26; GCN-NOT: v0 27; GCN-NOT: v1 28; GCN: v_min_f32_e32 v0, v0, v1 29; GCN-NEXT: ; return 30define amdgpu_ps float @test_fmin_nnan_f32_ieee_mode_off(float %a, float %b) #0 { 31 %val = call nnan float @llvm.minnum.f32(float %a, float %b) #1 32 ret float %val 33} 34 35; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_off: 36; GCN: v_min_f32_e32 v0, v0, v1 37; GCN-NEXT: ; return 38define amdgpu_ps float @test_fmin_f32_ieee_mode_off(float %a, float %b) #0 { 39 %val = call float @llvm.minnum.f32(float %a, float %b) #1 40 ret float %val 41} 42 43; GCN-LABEL: {{^}}test_fmin_v2f32: 44; GCN: v_min_f32_e32 45; GCN: v_min_f32_e32 46define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 47 %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) 48 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 49 ret void 50} 51 52; GCN-LABEL: {{^}}test_fmin_v4f32: 53; GCN: v_min_f32_e32 54; GCN: v_min_f32_e32 55; GCN: v_min_f32_e32 56; GCN: v_min_f32_e32 57define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 { 58 %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) 59 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 60 ret void 61} 62 63; GCN-LABEL: {{^}}test_fmin_v8f32: 64; GCN: v_min_f32_e32 65; GCN: v_min_f32_e32 66; GCN: v_min_f32_e32 67; GCN: v_min_f32_e32 68; GCN: v_min_f32_e32 69; GCN: v_min_f32_e32 70; GCN: v_min_f32_e32 71; GCN: v_min_f32_e32 72define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { 73 %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) 74 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 75 ret void 76} 77 78; GCN-LABEL: {{^}}test_fmin_v16f32: 79; GCN: v_min_f32_e32 80; GCN: v_min_f32_e32 81; GCN: v_min_f32_e32 82; GCN: v_min_f32_e32 83; GCN: v_min_f32_e32 84; GCN: v_min_f32_e32 85; GCN: v_min_f32_e32 86; GCN: v_min_f32_e32 87; GCN: v_min_f32_e32 88; GCN: v_min_f32_e32 89; GCN: v_min_f32_e32 90; GCN: v_min_f32_e32 91; GCN: v_min_f32_e32 92; GCN: v_min_f32_e32 93; GCN: v_min_f32_e32 94; GCN: v_min_f32_e32 95define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 { 96 %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) 97 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 98 ret void 99} 100 101; GCN-LABEL: {{^}}constant_fold_fmin_f32: 102; GCN-NOT: v_min_f32_e32 103; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 104; GCN: buffer_store_dword [[REG]] 105define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 { 106 %val = call float @llvm.minnum.f32(float 1.0, float 2.0) 107 store float %val, float addrspace(1)* %out, align 4 108 ret void 109} 110 111; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_nan: 112; GCN-NOT: v_min_f32_e32 113; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 114; GCN: buffer_store_dword [[REG]] 115define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 { 116 %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) 117 store float %val, float addrspace(1)* %out, align 4 118 ret void 119} 120 121; GCN-LABEL: {{^}}constant_fold_fmin_f32_val_nan: 122; GCN-NOT: v_min_f32_e32 123; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 124; GCN: buffer_store_dword [[REG]] 125define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 { 126 %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) 127 store float %val, float addrspace(1)* %out, align 4 128 ret void 129} 130 131; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_val: 132; GCN-NOT: v_min_f32_e32 133; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 134; GCN: buffer_store_dword [[REG]] 135define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 { 136 %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) 137 store float %val, float addrspace(1)* %out, align 4 138 ret void 139} 140 141; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_p0: 142; GCN-NOT: v_min_f32_e32 143; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 144; GCN: buffer_store_dword [[REG]] 145define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 { 146 %val = call float @llvm.minnum.f32(float 0.0, float 0.0) 147 store float %val, float addrspace(1)* %out, align 4 148 ret void 149} 150 151; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0: 152; GCN-NOT: v_min_f32_e32 153; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 154; GCN: buffer_store_dword [[REG]] 155define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 { 156 %val = call float @llvm.minnum.f32(float 0.0, float -0.0) 157 store float %val, float addrspace(1)* %out, align 4 158 ret void 159} 160 161; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_p0: 162; GCN-NOT: v_min_f32_e32 163; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 164; GCN: buffer_store_dword [[REG]] 165define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 { 166 %val = call float @llvm.minnum.f32(float -0.0, float 0.0) 167 store float %val, float addrspace(1)* %out, align 4 168 ret void 169} 170 171; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_n0: 172; GCN-NOT: v_min_f32_e32 173; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 174; GCN: buffer_store_dword [[REG]] 175define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 { 176 %val = call float @llvm.minnum.f32(float -0.0, float -0.0) 177 store float %val, float addrspace(1)* %out, align 4 178 ret void 179} 180 181; GCN-LABEL: {{^}}fmin_var_immediate_f32_no_ieee: 182; GCN: v_min_f32_e32 v0, 2.0, v0 183define amdgpu_ps float @fmin_var_immediate_f32_no_ieee(float %a) #0 { 184 %val = call float @llvm.minnum.f32(float %a, float 2.0) #1 185 ret float %val 186} 187 188; GCN-LABEL: {{^}}fmin_immediate_var_f32_no_ieee: 189; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 190define amdgpu_ps float @fmin_immediate_var_f32_no_ieee(float inreg %a) #0 { 191 %val = call float @llvm.minnum.f32(float 2.0, float %a) #1 192 ret float %val 193} 194 195; GCN-LABEL: {{^}}fmin_var_literal_f32_no_ieee: 196; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 197; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 198define amdgpu_ps float @fmin_var_literal_f32_no_ieee(float inreg %a) #0 { 199 %val = call float @llvm.minnum.f32(float %a, float 99.0) #1 200 ret float %val 201} 202 203; GCN-LABEL: {{^}}fmin_literal_var_f32_no_ieee: 204; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 205; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 206define amdgpu_ps float @fmin_literal_var_f32_no_ieee(float inreg %a) #0 { 207 %val = call float @llvm.minnum.f32(float 99.0, float %a) #1 208 ret float %val 209} 210 211; GCN-LABEL: {{^}}test_func_fmin_v3f32: 212; GCN: v_min_f32_e32 213; GCN: v_min_f32_e32 214; GCN: v_min_f32_e32 215; GCN-NOT: v_min_f32 216define <3 x float> @test_func_fmin_v3f32(<3 x float> %a, <3 x float> %b) nounwind { 217 %val = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) #0 218 ret <3 x float> %val 219} 220 221declare float @llvm.minnum.f32(float, float) #1 222declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1 223declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1 224declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1 225declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1 226declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1 227 228attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 229attributes #1 = { nounwind readnone } 230