1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}test_fmin_f32: 5; GCN: v_min_f32_e32 6define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 { 7 %val = call float @llvm.minnum.f32(float %a, float %b) 8 store float %val, float addrspace(1)* %out, align 4 9 ret void 10} 11 12; GCN-LABEL: {{^}}test_fmin_v2f32: 13; GCN: v_min_f32_e32 14; GCN: v_min_f32_e32 15define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 16 %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) 17 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 18 ret void 19} 20 21; GCN-LABEL: {{^}}test_fmin_v4f32: 22; GCN: v_min_f32_e32 23; GCN: v_min_f32_e32 24; GCN: v_min_f32_e32 25; GCN: v_min_f32_e32 26define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 { 27 %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) 28 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 29 ret void 30} 31 32; GCN-LABEL: {{^}}test_fmin_v8f32: 33; GCN: v_min_f32_e32 34; GCN: v_min_f32_e32 35; GCN: v_min_f32_e32 36; GCN: v_min_f32_e32 37; GCN: v_min_f32_e32 38; GCN: v_min_f32_e32 39; GCN: v_min_f32_e32 40; GCN: v_min_f32_e32 41define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { 42 %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) 43 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 44 ret void 45} 46 47; GCN-LABEL: {{^}}test_fmin_v16f32: 48; GCN: v_min_f32_e32 49; GCN: v_min_f32_e32 50; GCN: v_min_f32_e32 51; GCN: v_min_f32_e32 52; GCN: v_min_f32_e32 53; GCN: v_min_f32_e32 54; GCN: v_min_f32_e32 55; GCN: v_min_f32_e32 56; GCN: v_min_f32_e32 57; GCN: v_min_f32_e32 58; GCN: v_min_f32_e32 59; GCN: v_min_f32_e32 60; GCN: v_min_f32_e32 61; GCN: v_min_f32_e32 62; GCN: v_min_f32_e32 63; GCN: v_min_f32_e32 64define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 { 65 %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) 66 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 67 ret void 68} 69 70; GCN-LABEL: {{^}}constant_fold_fmin_f32: 71; GCN-NOT: v_min_f32_e32 72; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 73; GCN: buffer_store_dword [[REG]] 74define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 { 75 %val = call float @llvm.minnum.f32(float 1.0, float 2.0) 76 store float %val, float addrspace(1)* %out, align 4 77 ret void 78} 79 80; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_nan: 81; GCN-NOT: v_min_f32_e32 82; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 83; GCN: buffer_store_dword [[REG]] 84define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 { 85 %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) 86 store float %val, float addrspace(1)* %out, align 4 87 ret void 88} 89 90; GCN-LABEL: {{^}}constant_fold_fmin_f32_val_nan: 91; GCN-NOT: v_min_f32_e32 92; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 93; GCN: buffer_store_dword [[REG]] 94define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 { 95 %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) 96 store float %val, float addrspace(1)* %out, align 4 97 ret void 98} 99 100; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_val: 101; GCN-NOT: v_min_f32_e32 102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 103; GCN: buffer_store_dword [[REG]] 104define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 { 105 %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) 106 store float %val, float addrspace(1)* %out, align 4 107 ret void 108} 109 110; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_p0: 111; GCN-NOT: v_min_f32_e32 112; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 113; GCN: buffer_store_dword [[REG]] 114define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 { 115 %val = call float @llvm.minnum.f32(float 0.0, float 0.0) 116 store float %val, float addrspace(1)* %out, align 4 117 ret void 118} 119 120; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0: 121; GCN-NOT: v_min_f32_e32 122; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 123; GCN: buffer_store_dword [[REG]] 124define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 { 125 %val = call float @llvm.minnum.f32(float 0.0, float -0.0) 126 store float %val, float addrspace(1)* %out, align 4 127 ret void 128} 129 130; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_p0: 131; GCN-NOT: v_min_f32_e32 132; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 133; GCN: buffer_store_dword [[REG]] 134define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 { 135 %val = call float @llvm.minnum.f32(float -0.0, float 0.0) 136 store float %val, float addrspace(1)* %out, align 4 137 ret void 138} 139 140; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_n0: 141; GCN-NOT: v_min_f32_e32 142; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 143; GCN: buffer_store_dword [[REG]] 144define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 { 145 %val = call float @llvm.minnum.f32(float -0.0, float -0.0) 146 store float %val, float addrspace(1)* %out, align 4 147 ret void 148} 149 150; GCN-LABEL: {{^}}fmin_var_immediate_f32: 151; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 152define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 { 153 %val = call float @llvm.minnum.f32(float %a, float 2.0) 154 store float %val, float addrspace(1)* %out, align 4 155 ret void 156} 157 158; GCN-LABEL: {{^}}fmin_immediate_var_f32: 159; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 160define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 { 161 %val = call float @llvm.minnum.f32(float 2.0, float %a) 162 store float %val, float addrspace(1)* %out, align 4 163 ret void 164} 165 166; GCN-LABEL: {{^}}fmin_var_literal_f32: 167; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 168; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 169define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 { 170 %val = call float @llvm.minnum.f32(float %a, float 99.0) 171 store float %val, float addrspace(1)* %out, align 4 172 ret void 173} 174 175; GCN-LABEL: {{^}}fmin_literal_var_f32: 176; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 177; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 178define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 { 179 %val = call float @llvm.minnum.f32(float 99.0, float %a) 180 store float %val, float addrspace(1)* %out, align 4 181 ret void 182} 183 184; GCN-LABEL: {{^}}test_func_fmin_v3f32: 185; GCN: v_min_f32_e32 186; GCN: v_min_f32_e32 187; GCN: v_min_f32_e32 188; GCN-NOT: v_min_f32 189define <3 x float> @test_func_fmin_v3f32(<3 x float> %a, <3 x float> %b) nounwind { 190 %val = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) #0 191 ret <3 x float> %val 192} 193 194declare float @llvm.minnum.f32(float, float) #1 195declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1 196declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1 197declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1 198declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1 199declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1 200 201attributes #0 = { nounwind } 202attributes #1 = { nounwind readnone } 203