1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s 4 5define float @v_rsq_clamp_f32(float %src) #0 { 6; SI-LABEL: v_rsq_clamp_f32: 7; SI: ; %bb.0: 8; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 10; SI-NEXT: s_setpc_b64 s[30:31] 11; 12; VI-LABEL: v_rsq_clamp_f32: 13; VI: ; %bb.0: 14; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; VI-NEXT: v_rsq_f32_e32 v0, v0 16; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 17; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 18; VI-NEXT: s_setpc_b64 s[30:31] 19 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) 20 ret float %rsq_clamp 21} 22 23define float @v_rsq_clamp_fabs_f32(float %src) #0 { 24; SI-LABEL: v_rsq_clamp_fabs_f32: 25; SI: ; %bb.0: 26; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0| 28; SI-NEXT: s_setpc_b64 s[30:31] 29; 30; VI-LABEL: v_rsq_clamp_fabs_f32: 31; VI: ; %bb.0: 32; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; VI-NEXT: v_rsq_f32_e64 v0, |v0| 34; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 35; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 36; VI-NEXT: s_setpc_b64 s[30:31] 37 %fabs.src = call float @llvm.fabs.f32(float %src) 38 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src) 39 ret float %rsq_clamp 40} 41 42define double @v_rsq_clamp_f64(double %src) #0 { 43; SI-LABEL: v_rsq_clamp_f64: 44; SI: ; %bb.0: 45; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] 47; SI-NEXT: s_setpc_b64 s[30:31] 48; 49; VI-LABEL: v_rsq_clamp_f64: 50; VI: ; %bb.0: 51; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] 53; VI-NEXT: s_mov_b32 s4, -1 54; VI-NEXT: s_mov_b32 s5, 0x7fefffff 55; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] 56; VI-NEXT: s_mov_b32 s5, 0xffefffff 57; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] 58; VI-NEXT: s_setpc_b64 s[30:31] 59 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) 60 ret double %rsq_clamp 61} 62 63define double @v_rsq_clamp_fabs_f64(double %src) #0 { 64; SI-LABEL: v_rsq_clamp_fabs_f64: 65; SI: ; %bb.0: 66; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]| 68; SI-NEXT: s_setpc_b64 s[30:31] 69; 70; VI-LABEL: v_rsq_clamp_fabs_f64: 71; VI: ; %bb.0: 72; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| 74; VI-NEXT: s_mov_b32 s4, -1 75; VI-NEXT: s_mov_b32 s5, 0x7fefffff 76; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] 77; VI-NEXT: s_mov_b32 s5, 0xffefffff 78; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] 79; VI-NEXT: s_setpc_b64 s[30:31] 80 %fabs.src = call double @llvm.fabs.f64(double %src) 81 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src) 82 ret double %rsq_clamp 83} 84 85define float @v_rsq_clamp_undef_f32() #0 { 86; SI-LABEL: v_rsq_clamp_undef_f32: 87; SI: ; %bb.0: 88; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4 90; SI-NEXT: s_setpc_b64 s[30:31] 91; 92; VI-LABEL: v_rsq_clamp_undef_f32: 93; VI: ; %bb.0: 94; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; VI-NEXT: v_rsq_f32_e32 v0, s4 96; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 97; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 98; VI-NEXT: s_setpc_b64 s[30:31] 99 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef) 100 ret float %rsq_clamp 101} 102 103define double @v_rsq_clamp_undef_f64() #0 { 104; SI-LABEL: v_rsq_clamp_undef_f64: 105; SI: ; %bb.0: 106; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5] 108; SI-NEXT: s_setpc_b64 s[30:31] 109; 110; VI-LABEL: v_rsq_clamp_undef_f64: 111; VI: ; %bb.0: 112; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5] 114; VI-NEXT: s_mov_b32 s4, -1 115; VI-NEXT: s_mov_b32 s5, 0x7fefffff 116; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] 117; VI-NEXT: s_mov_b32 s5, 0xffefffff 118; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] 119; VI-NEXT: s_setpc_b64 s[30:31] 120 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef) 121 ret double %rsq_clamp 122} 123 124define float @v_rsq_clamp_f32_non_ieee(float %src) #2 { 125; SI-LABEL: v_rsq_clamp_f32_non_ieee: 126; SI: ; %bb.0: 127; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 129; SI-NEXT: s_setpc_b64 s[30:31] 130; 131; VI-LABEL: v_rsq_clamp_f32_non_ieee: 132; VI: ; %bb.0: 133; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; VI-NEXT: v_rsq_f32_e32 v0, v0 135; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 136; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 137; VI-NEXT: s_setpc_b64 s[30:31] 138 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) 139 ret float %rsq_clamp 140} 141 142define double @v_rsq_clamp_f64_non_ieee(double %src) #2 { 143; SI-LABEL: v_rsq_clamp_f64_non_ieee: 144; SI: ; %bb.0: 145; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] 147; SI-NEXT: s_setpc_b64 s[30:31] 148; 149; VI-LABEL: v_rsq_clamp_f64_non_ieee: 150; VI: ; %bb.0: 151; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] 153; VI-NEXT: s_mov_b32 s4, -1 154; VI-NEXT: s_mov_b32 s5, 0x7fefffff 155; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] 156; VI-NEXT: s_mov_b32 s5, 0xffefffff 157; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] 158; VI-NEXT: s_setpc_b64 s[30:31] 159 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) 160 ret double %rsq_clamp 161} 162 163declare float @llvm.fabs.f32(float) #1 164declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 165declare double @llvm.fabs.f64(double) #1 166declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 167 168attributes #0 = { nounwind } 169attributes #1 = { nounwind readnone } 170attributes #2 = { nounwind "amdgpu-ieee"="false" } 171