1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4 5define i16 @v_powi_f16(i16 %l, i32 %r) { 6; GCN-LABEL: v_powi_f16: 7; GCN: ; %bb.0: 8; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 10; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1 11; GCN-NEXT: v_log_f32_e32 v0, v0 12; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 13; GCN-NEXT: v_exp_f32_e32 v0, v0 14; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 15; GCN-NEXT: s_setpc_b64 s[30:31] 16 %l.cast = bitcast i16 %l to half 17 %res = call half @llvm.powi.f16(half %l.cast, i32 %r) 18 %res.cast = bitcast half %res to i16 19 ret i16 %res.cast 20} 21 22define float @v_powi_f32(float %l, i32 %r) { 23; GCN-LABEL: v_powi_f32: 24; GCN: ; %bb.0: 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_log_f32_e32 v0, v0 27; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1 28; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 29; GCN-NEXT: v_exp_f32_e32 v0, v0 30; GCN-NEXT: s_setpc_b64 s[30:31] 31 %res = call float @llvm.powi.f32(float %l, i32 %r) 32 ret float %res 33} 34 35define float @v_powi_0_f32(float %l) { 36; GCN-LABEL: v_powi_0_f32: 37; GCN: ; %bb.0: 38; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GCN-NEXT: v_mov_b32_e32 v0, 1.0 40; GCN-NEXT: s_setpc_b64 s[30:31] 41 %res = call float @llvm.powi.f32(float %l, i32 0) 42 ret float %res 43} 44 45define float @v_powi_1_f32(float %l) { 46; GCN-LABEL: v_powi_1_f32: 47; GCN: ; %bb.0: 48; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GCN-NEXT: s_setpc_b64 s[30:31] 50 %res = call float @llvm.powi.f32(float %l, i32 1) 51 ret float %res 52} 53 54define float @v_powi_neg1_f32(float %l) { 55; GFX7-LABEL: v_powi_neg1_f32: 56; GFX7: ; %bb.0: 57; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 59; GFX7-NEXT: v_rcp_f32_e32 v2, v1 60; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 61; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 62; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 63; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 64; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 65; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 66; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 67; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 68; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 69; GFX7-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX8-LABEL: v_powi_neg1_f32: 72; GFX8: ; %bb.0: 73; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 75; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 76; GFX8-NEXT: v_rcp_f32_e32 v3, v1 77; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 78; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 79; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 80; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 81; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 82; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 83; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 84; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 85; GFX8-NEXT: s_setpc_b64 s[30:31] 86 %res = call float @llvm.powi.f32(float %l, i32 -1) 87 ret float %res 88} 89 90define float @v_powi_2_f32(float %l) { 91; GCN-LABEL: v_powi_2_f32: 92; GCN: ; %bb.0: 93; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 95; GCN-NEXT: s_setpc_b64 s[30:31] 96 %res = call float @llvm.powi.f32(float %l, i32 2) 97 ret float %res 98} 99 100define float @v_powi_neg2_f32(float %l) { 101; GFX7-LABEL: v_powi_neg2_f32: 102; GFX7: ; %bb.0: 103; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 105; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 106; GFX7-NEXT: v_rcp_f32_e32 v2, v1 107; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 108; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 109; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 110; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 111; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 112; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 113; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 114; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 115; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 116; GFX7-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX8-LABEL: v_powi_neg2_f32: 119; GFX8: ; %bb.0: 120; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 122; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 123; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 124; GFX8-NEXT: v_rcp_f32_e32 v3, v1 125; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 126; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 127; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 128; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 129; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 130; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 131; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 132; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 133; GFX8-NEXT: s_setpc_b64 s[30:31] 134 %res = call float @llvm.powi.f32(float %l, i32 -2) 135 ret float %res 136} 137 138define float @v_powi_4_f32(float %l) { 139; GCN-LABEL: v_powi_4_f32: 140; GCN: ; %bb.0: 141; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 143; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 144; GCN-NEXT: s_setpc_b64 s[30:31] 145 %res = call float @llvm.powi.f32(float %l, i32 4) 146 ret float %res 147} 148 149define float @v_powi_8_f32(float %l) { 150; GCN-LABEL: v_powi_8_f32: 151; GCN: ; %bb.0: 152; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 154; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 155; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 156; GCN-NEXT: s_setpc_b64 s[30:31] 157 %res = call float @llvm.powi.f32(float %l, i32 8) 158 ret float %res 159} 160 161define float @v_powi_16_f32(float %l) { 162; GCN-LABEL: v_powi_16_f32: 163; GCN: ; %bb.0: 164; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 166; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 167; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 168; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 169; GCN-NEXT: s_setpc_b64 s[30:31] 170 %res = call float @llvm.powi.f32(float %l, i32 16) 171 ret float %res 172} 173 174define float @v_powi_128_f32(float %l) { 175; GCN-LABEL: v_powi_128_f32: 176; GCN: ; %bb.0: 177; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 179; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 180; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 181; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 182; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 183; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 184; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 185; GCN-NEXT: s_setpc_b64 s[30:31] 186 %res = call float @llvm.powi.f32(float %l, i32 128) 187 ret float %res 188} 189 190define float @v_powi_neg128_f32(float %l) { 191; GFX7-LABEL: v_powi_neg128_f32: 192; GFX7: ; %bb.0: 193; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 195; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 196; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 197; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 198; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 199; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 200; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 201; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 202; GFX7-NEXT: v_rcp_f32_e32 v2, v1 203; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 204; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 205; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 206; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 207; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 208; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 209; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 210; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 211; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 212; GFX7-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX8-LABEL: v_powi_neg128_f32: 215; GFX8: ; %bb.0: 216; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 218; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 219; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 220; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 221; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 222; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 223; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 224; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 225; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 226; GFX8-NEXT: v_rcp_f32_e32 v3, v1 227; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 228; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 229; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 230; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 231; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 232; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 233; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 234; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 235; GFX8-NEXT: s_setpc_b64 s[30:31] 236 %res = call float @llvm.powi.f32(float %l, i32 -128) 237 ret float %res 238} 239 240; FIXME: f64 broken 241; define double @v_powi_f64(double %l, i32 %r) { 242; %res = call double @llvm.powi.f64(double %l, i32 %r) 243; ret double %res 244; } 245 246declare half @llvm.powi.f16(half, i32) #0 247declare float @llvm.powi.f32(float, i32) #0 248declare double @llvm.powi.f64(double, i32) #0 249 250attributes #0 = { nounwind readnone speculatable willreturn } 251