1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on: 5; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} 6; GCN: v_mul_f32_e64 [[QUIET1:v[0-9]+]], 1.0, s{{[0-9]+}} 7; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]] 8; GCN-NOT: [[RESULT]] 9; GCN: buffer_store_dword [[RESULT]] 10define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(float addrspace(1)* %out, float %a, float %b) #0 { 11 %val = call float @llvm.maxnum.f32(float %a, float %b) #1 12 store float %val, float addrspace(1)* %out, align 4 13 ret void 14} 15 16; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_off: 17; GCN: v_max_f32_e32 v0, v0, v1 18; GCN-NEXT: ; return 19define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) #0 { 20 %val = call float @llvm.maxnum.f32(float %a, float %b) #1 21 ret float %val 22} 23 24; GCN-LABEL: {{^}}test_fmax_v2f32: 25; GCN: v_max_f32_e32 26; GCN: v_max_f32_e32 27define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 28 %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) 29 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 30 ret void 31} 32 33; GCN-LABEL: {{^}}test_fmax_v3f32: 34; GCN: v_max_f32_e32 35; GCN: v_max_f32_e32 36; GCN: v_max_f32_e32 37; GCN-NOT: v_max_f32 38define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) nounwind { 39 %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0 40 store <3 x float> %val, <3 x float> addrspace(1)* %out, align 16 41 ret void 42} 43 44; GCN-LABEL: {{^}}test_fmax_v4f32: 45; GCN: v_max_f32_e32 46; GCN: v_max_f32_e32 47; GCN: v_max_f32_e32 48; GCN: v_max_f32_e32 49define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 { 50 %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) 51 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 52 ret void 53} 54 55; GCN-LABEL: {{^}}test_fmax_v8f32: 56; GCN: v_max_f32_e32 57; GCN: v_max_f32_e32 58; GCN: v_max_f32_e32 59; GCN: v_max_f32_e32 60; GCN: v_max_f32_e32 61; GCN: v_max_f32_e32 62; GCN: v_max_f32_e32 63; GCN: v_max_f32_e32 64define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { 65 %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) 66 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 67 ret void 68} 69 70; GCN-LABEL: {{^}}test_fmax_v16f32: 71; GCN: v_max_f32_e32 72; GCN: v_max_f32_e32 73; GCN: v_max_f32_e32 74; GCN: v_max_f32_e32 75; GCN: v_max_f32_e32 76; GCN: v_max_f32_e32 77; GCN: v_max_f32_e32 78; GCN: v_max_f32_e32 79; GCN: v_max_f32_e32 80; GCN: v_max_f32_e32 81; GCN: v_max_f32_e32 82; GCN: v_max_f32_e32 83; GCN: v_max_f32_e32 84; GCN: v_max_f32_e32 85; GCN: v_max_f32_e32 86; GCN: v_max_f32_e32 87define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 { 88 %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) 89 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 90 ret void 91} 92 93; GCN-LABEL: {{^}}constant_fold_fmax_f32: 94; GCN-NOT: v_max_f32_e32 95; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0 96; GCN: buffer_store_dword [[REG]] 97define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 { 98 %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) 99 store float %val, float addrspace(1)* %out, align 4 100 ret void 101} 102 103; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_nan: 104; GCN-NOT: v_max_f32_e32 105; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 106; GCN: buffer_store_dword [[REG]] 107define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 { 108 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) 109 store float %val, float addrspace(1)* %out, align 4 110 ret void 111} 112 113; GCN-LABEL: {{^}}constant_fold_fmax_f32_val_nan: 114; GCN-NOT: v_max_f32_e32 115; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 116; GCN: buffer_store_dword [[REG]] 117define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 { 118 %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) 119 store float %val, float addrspace(1)* %out, align 4 120 ret void 121} 122 123; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_val: 124; GCN-NOT: v_max_f32_e32 125; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 126; GCN: buffer_store_dword [[REG]] 127define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 { 128 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) 129 store float %val, float addrspace(1)* %out, align 4 130 ret void 131} 132 133; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_p0: 134; GCN-NOT: v_max_f32_e32 135; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 136; GCN: buffer_store_dword [[REG]] 137define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 { 138 %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) 139 store float %val, float addrspace(1)* %out, align 4 140 ret void 141} 142 143; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_n0: 144; GCN-NOT: v_max_f32_e32 145; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 146; GCN: buffer_store_dword [[REG]] 147define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 { 148 %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) 149 store float %val, float addrspace(1)* %out, align 4 150 ret void 151} 152 153; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0: 154; GCN-NOT: v_max_f32_e32 155; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 156; GCN: buffer_store_dword [[REG]] 157define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 { 158 %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) 159 store float %val, float addrspace(1)* %out, align 4 160 ret void 161} 162 163; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_n0: 164; GCN-NOT: v_max_f32_e32 165; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 166; GCN: buffer_store_dword [[REG]] 167define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 { 168 %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) 169 store float %val, float addrspace(1)* %out, align 4 170 ret void 171} 172 173; GCN-LABEL: {{^}}fmax_var_immediate_f32_no_ieee: 174; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 175define amdgpu_ps float @fmax_var_immediate_f32_no_ieee(float inreg %a) #0 { 176 %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0 177 ret float %val 178} 179 180; GCN-LABEL: {{^}}fmax_immediate_var_f32_no_ieee: 181; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 182define amdgpu_ps float @fmax_immediate_var_f32_no_ieee(float inreg %a) #0 { 183 %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0 184 ret float %val 185} 186 187; GCN-LABEL: {{^}}fmax_var_literal_f32_no_ieee: 188; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 189; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 190define amdgpu_ps float @fmax_var_literal_f32_no_ieee(float inreg %a) #0 { 191 %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0 192 ret float %val 193} 194 195; GCN-LABEL: {{^}}fmax_literal_var_f32_no_ieee: 196; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 197; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 198define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 { 199 %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0 200 ret float %val 201} 202 203; GCN-LABEL: {{^}}test_func_fmax_v3f32: 204; GCN: v_max_f32_e32 205; GCN: v_max_f32_e32 206; GCN: v_max_f32_e32 207; GCN-NOT: v_max_f32 208define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) #0 { 209 %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0 210 ret <3 x float> %val 211} 212 213declare float @llvm.maxnum.f32(float, float) #1 214declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1 215declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1 216declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1 217declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1 218declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1 219declare double @llvm.maxnum.f64(double, double) 220 221attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 222attributes #1 = { nounwind readnone } 223