1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2 3declare i32 @llvm.r600.read.tidig.x() nounwind readnone 4declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone 5declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone 6declare float @llvm.fabs.f32(float) nounwind readnone 7 8; SI-LABEL @test_div_scale_f32_1: 9; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 10; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 11; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 12; SI: buffer_store_dword [[RESULT0]] 13; SI: s_endpgm 14define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { 15 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 16 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 17 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 18 19 %a = load float, float addrspace(1)* %gep.0, align 4 20 %b = load float, float addrspace(1)* %gep.1, align 4 21 22 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 23 %result0 = extractvalue { float, i1 } %result, 0 24 store float %result0, float addrspace(1)* %out, align 4 25 ret void 26} 27 28; SI-LABEL @test_div_scale_f32_2: 29; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 30; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 31; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 32; SI: buffer_store_dword [[RESULT0]] 33; SI: s_endpgm 34define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { 35 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 36 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 37 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 38 39 %a = load float, float addrspace(1)* %gep.0, align 4 40 %b = load float, float addrspace(1)* %gep.1, align 4 41 42 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 43 %result0 = extractvalue { float, i1 } %result, 0 44 store float %result0, float addrspace(1)* %out, align 4 45 ret void 46} 47 48; SI-LABEL @test_div_scale_f64_1: 49; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 50; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 51; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 52; SI: buffer_store_dwordx2 [[RESULT0]] 53; SI: s_endpgm 54define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind { 55 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 56 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 57 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 58 59 %a = load double, double addrspace(1)* %gep.0, align 8 60 %b = load double, double addrspace(1)* %gep.1, align 8 61 62 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 63 %result0 = extractvalue { double, i1 } %result, 0 64 store double %result0, double addrspace(1)* %out, align 8 65 ret void 66} 67 68; SI-LABEL @test_div_scale_f64_1: 69; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 70; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 71; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 72; SI: buffer_store_dwordx2 [[RESULT0]] 73; SI: s_endpgm 74define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind { 75 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 76 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 77 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 78 79 %a = load double, double addrspace(1)* %gep.0, align 8 80 %b = load double, double addrspace(1)* %gep.1, align 8 81 82 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 83 %result0 = extractvalue { double, i1 } %result, 0 84 store double %result0, double addrspace(1)* %out, align 8 85 ret void 86} 87 88; SI-LABEL @test_div_scale_f32_scalar_num_1: 89; SI-DAG: buffer_load_dword [[B:v[0-9]+]] 90; SI-DAG: s_load_dword [[A:s[0-9]+]] 91; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 92; SI: buffer_store_dword [[RESULT0]] 93; SI: s_endpgm 94define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind { 95 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 96 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid 97 98 %b = load float, float addrspace(1)* %gep, align 4 99 100 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 101 %result0 = extractvalue { float, i1 } %result, 0 102 store float %result0, float addrspace(1)* %out, align 4 103 ret void 104} 105 106; SI-LABEL @test_div_scale_f32_scalar_num_2: 107; SI-DAG: buffer_load_dword [[B:v[0-9]+]] 108; SI-DAG: s_load_dword [[A:s[0-9]+]] 109; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 110; SI: buffer_store_dword [[RESULT0]] 111; SI: s_endpgm 112define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind { 113 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 114 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid 115 116 %b = load float, float addrspace(1)* %gep, align 4 117 118 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 119 %result0 = extractvalue { float, i1 } %result, 0 120 store float %result0, float addrspace(1)* %out, align 4 121 ret void 122} 123 124; SI-LABEL @test_div_scale_f32_scalar_den_1: 125; SI-DAG: buffer_load_dword [[A:v[0-9]+]] 126; SI-DAG: s_load_dword [[B:s[0-9]+]] 127; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 128; SI: buffer_store_dword [[RESULT0]] 129; SI: s_endpgm 130define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind { 131 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 132 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid 133 134 %a = load float, float addrspace(1)* %gep, align 4 135 136 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 137 %result0 = extractvalue { float, i1 } %result, 0 138 store float %result0, float addrspace(1)* %out, align 4 139 ret void 140} 141 142; SI-LABEL @test_div_scale_f32_scalar_den_2: 143; SI-DAG: buffer_load_dword [[A:v[0-9]+]] 144; SI-DAG: s_load_dword [[B:s[0-9]+]] 145; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 146; SI: buffer_store_dword [[RESULT0]] 147; SI: s_endpgm 148define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind { 149 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 150 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid 151 152 %a = load float, float addrspace(1)* %gep, align 4 153 154 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 155 %result0 = extractvalue { float, i1 } %result, 0 156 store float %result0, float addrspace(1)* %out, align 4 157 ret void 158} 159 160; SI-LABEL @test_div_scale_f64_scalar_num_1: 161; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 162; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 163; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 164; SI: buffer_store_dwordx2 [[RESULT0]] 165; SI: s_endpgm 166define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind { 167 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 168 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid 169 170 %b = load double, double addrspace(1)* %gep, align 8 171 172 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 173 %result0 = extractvalue { double, i1 } %result, 0 174 store double %result0, double addrspace(1)* %out, align 8 175 ret void 176} 177 178; SI-LABEL @test_div_scale_f64_scalar_num_2: 179; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 180; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 181; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 182; SI: buffer_store_dwordx2 [[RESULT0]] 183; SI: s_endpgm 184define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind { 185 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 186 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid 187 188 %b = load double, double addrspace(1)* %gep, align 8 189 190 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 191 %result0 = extractvalue { double, i1 } %result, 0 192 store double %result0, double addrspace(1)* %out, align 8 193 ret void 194} 195 196; SI-LABEL @test_div_scale_f64_scalar_den_1: 197; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 198; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 199; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 200; SI: buffer_store_dwordx2 [[RESULT0]] 201; SI: s_endpgm 202define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind { 203 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 204 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid 205 206 %a = load double, double addrspace(1)* %gep, align 8 207 208 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 209 %result0 = extractvalue { double, i1 } %result, 0 210 store double %result0, double addrspace(1)* %out, align 8 211 ret void 212} 213 214; SI-LABEL @test_div_scale_f64_scalar_den_2: 215; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 216; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 217; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 218; SI: buffer_store_dwordx2 [[RESULT0]] 219; SI: s_endpgm 220define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind { 221 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 222 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid 223 224 %a = load double, double addrspace(1)* %gep, align 8 225 226 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 227 %result0 = extractvalue { double, i1 } %result, 0 228 store double %result0, double addrspace(1)* %out, align 8 229 ret void 230} 231 232; SI-LABEL @test_div_scale_f32_all_scalar_1: 233; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 234; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc 235; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] 236; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]] 237; SI: buffer_store_dword [[RESULT0]] 238; SI: s_endpgm 239define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind { 240 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 241 %result0 = extractvalue { float, i1 } %result, 0 242 store float %result0, float addrspace(1)* %out, align 4 243 ret void 244} 245 246; SI-LABEL @test_div_scale_f32_all_scalar_2: 247; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 248; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc 249; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] 250; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]] 251; SI: buffer_store_dword [[RESULT0]] 252; SI: s_endpgm 253define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind { 254 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 255 %result0 = extractvalue { float, i1 } %result, 0 256 store float %result0, float addrspace(1)* %out, align 4 257 ret void 258} 259 260; SI-LABEL @test_div_scale_f64_all_scalar_1: 261; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb 262; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 263; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]] 264; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]] 265; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}} 266; SI: buffer_store_dwordx2 [[RESULT0]] 267; SI: s_endpgm 268define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind { 269 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 270 %result0 = extractvalue { double, i1 } %result, 0 271 store double %result0, double addrspace(1)* %out, align 8 272 ret void 273} 274 275; SI-LABEL @test_div_scale_f64_all_scalar_2: 276; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 277; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd 278; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]] 279; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]] 280; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]] 281; SI: buffer_store_dwordx2 [[RESULT0]] 282; SI: s_endpgm 283define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind { 284 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 285 %result0 = extractvalue { double, i1 } %result, 0 286 store double %result0, double addrspace(1)* %out, align 8 287 ret void 288} 289 290; SI-LABEL @test_div_scale_f32_inline_imm_num: 291; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 292; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0 293; SI: buffer_store_dword [[RESULT0]] 294; SI: s_endpgm 295define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { 296 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 297 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 298 %a = load float, float addrspace(1)* %gep.0, align 4 299 300 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone 301 %result0 = extractvalue { float, i1 } %result, 0 302 store float %result0, float addrspace(1)* %out, align 4 303 ret void 304} 305 306; SI-LABEL @test_div_scale_f32_inline_imm_den: 307; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 308; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]] 309; SI: buffer_store_dword [[RESULT0]] 310; SI: s_endpgm 311define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { 312 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 313 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 314 %a = load float, float addrspace(1)* %gep.0, align 4 315 316 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone 317 %result0 = extractvalue { float, i1 } %result, 0 318 store float %result0, float addrspace(1)* %out, align 4 319 ret void 320} 321 322; SI-LABEL @test_div_scale_f32_fabs_num: 323; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 324; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 325; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]| 326; SI: buffer_store_dword [[RESULT0]] 327; SI: s_endpgm 328define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { 329 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 330 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 331 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 332 333 %a = load float, float addrspace(1)* %gep.0, align 4 334 %b = load float, float addrspace(1)* %gep.1, align 4 335 336 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone 337 338 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone 339 %result0 = extractvalue { float, i1 } %result, 0 340 store float %result0, float addrspace(1)* %out, align 4 341 ret void 342} 343 344; SI-LABEL @test_div_scale_f32_fabs_den: 345; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 346; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 347; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]] 348; SI: buffer_store_dword [[RESULT0]] 349; SI: s_endpgm 350define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { 351 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 352 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 353 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 354 355 %a = load float, float addrspace(1)* %gep.0, align 4 356 %b = load float, float addrspace(1)* %gep.1, align 4 357 358 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone 359 360 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone 361 %result0 = extractvalue { float, i1 } %result, 0 362 store float %result0, float addrspace(1)* %out, align 4 363 ret void 364} 365