1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5 6define float @v_pow_f32(float %x, float %y) { 7; GFX6-LABEL: v_pow_f32: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_log_f32_e32 v0, v0 11; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 12; GFX6-NEXT: v_exp_f32_e32 v0, v0 13; GFX6-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX8-LABEL: v_pow_f32: 16; GFX8: ; %bb.0: 17; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX8-NEXT: v_log_f32_e32 v0, v0 19; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 20; GFX8-NEXT: v_exp_f32_e32 v0, v0 21; GFX8-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX9-LABEL: v_pow_f32: 24; GFX9: ; %bb.0: 25; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX9-NEXT: v_log_f32_e32 v0, v0 27; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 28; GFX9-NEXT: v_exp_f32_e32 v0, v0 29; GFX9-NEXT: s_setpc_b64 s[30:31] 30 %pow = call float @llvm.pow.f32(float %x, float %y) 31 ret float %pow 32} 33 34define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 35; GFX6-LABEL: v_pow_v2f32: 36; GFX6: ; %bb.0: 37; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX6-NEXT: v_log_f32_e32 v0, v0 39; GFX6-NEXT: v_log_f32_e32 v1, v1 40; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 41; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 42; GFX6-NEXT: v_exp_f32_e32 v0, v0 43; GFX6-NEXT: v_exp_f32_e32 v1, v1 44; GFX6-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX8-LABEL: v_pow_v2f32: 47; GFX8: ; %bb.0: 48; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX8-NEXT: v_log_f32_e32 v0, v0 50; GFX8-NEXT: v_log_f32_e32 v1, v1 51; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 52; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 53; GFX8-NEXT: v_exp_f32_e32 v0, v0 54; GFX8-NEXT: v_exp_f32_e32 v1, v1 55; GFX8-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX9-LABEL: v_pow_v2f32: 58; GFX9: ; %bb.0: 59; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX9-NEXT: v_log_f32_e32 v0, v0 61; GFX9-NEXT: v_log_f32_e32 v1, v1 62; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 63; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 64; GFX9-NEXT: v_exp_f32_e32 v0, v0 65; GFX9-NEXT: v_exp_f32_e32 v1, v1 66; GFX9-NEXT: s_setpc_b64 s[30:31] 67 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 68 ret <2 x float> %pow 69} 70 71define half @v_pow_f16(half %x, half %y) { 72; GFX6-LABEL: v_pow_f16: 73; GFX6: ; %bb.0: 74; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 76; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 77; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 78; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 79; GFX6-NEXT: v_log_f32_e32 v0, v0 80; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 81; GFX6-NEXT: v_exp_f32_e32 v0, v0 82; GFX6-NEXT: s_setpc_b64 s[30:31] 83; 84; GFX8-LABEL: v_pow_f16: 85; GFX8: ; %bb.0: 86; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 88; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 89; GFX8-NEXT: v_log_f32_e32 v0, v0 90; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 91; GFX8-NEXT: v_exp_f32_e32 v0, v0 92; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 93; GFX8-NEXT: s_setpc_b64 s[30:31] 94; 95; GFX9-LABEL: v_pow_f16: 96; GFX9: ; %bb.0: 97; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 99; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 100; GFX9-NEXT: v_log_f32_e32 v0, v0 101; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 102; GFX9-NEXT: v_exp_f32_e32 v0, v0 103; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 104; GFX9-NEXT: s_setpc_b64 s[30:31] 105 %pow = call half @llvm.pow.f16(half %x, half %y) 106 ret half %pow 107} 108 109define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 110; GFX6-LABEL: v_pow_v2f16: 111; GFX6: ; %bb.0: 112; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 114; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 115; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 116; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 117; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 118; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 119; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 120; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 121; GFX6-NEXT: v_log_f32_e32 v0, v0 122; GFX6-NEXT: v_log_f32_e32 v1, v1 123; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 124; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 125; GFX6-NEXT: v_exp_f32_e32 v0, v0 126; GFX6-NEXT: v_exp_f32_e32 v1, v1 127; GFX6-NEXT: s_setpc_b64 s[30:31] 128; 129; GFX8-LABEL: v_pow_v2f16: 130; GFX8: ; %bb.0: 131; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 133; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 134; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 135; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 136; GFX8-NEXT: v_log_f32_e32 v2, v2 137; GFX8-NEXT: v_log_f32_e32 v0, v0 138; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 139; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 140; GFX8-NEXT: v_exp_f32_e32 v0, v0 141; GFX8-NEXT: v_exp_f32_e32 v2, v2 142; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 143; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 144; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 145; GFX8-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX9-LABEL: v_pow_v2f16: 148; GFX9: ; %bb.0: 149; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 151; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 152; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 153; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 154; GFX9-NEXT: v_log_f32_e32 v2, v2 155; GFX9-NEXT: v_log_f32_e32 v0, v0 156; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 157; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 158; GFX9-NEXT: v_exp_f32_e32 v0, v0 159; GFX9-NEXT: v_exp_f32_e32 v1, v2 160; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 161; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 162; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 163; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 164; GFX9-NEXT: s_setpc_b64 s[30:31] 165 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 166 ret <2 x half> %pow 167} 168 169define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 170; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 171; GFX6: ; %bb.0: 172; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 174; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 175; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 176; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 177; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 178; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 179; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3 180; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 181; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 182; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 183; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 184; GFX6-NEXT: v_log_f32_e32 v3, v3 185; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 186; GFX6-NEXT: v_log_f32_e32 v4, v0 187; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3 188; GFX6-NEXT: v_exp_f32_e32 v0, v0 189; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4 190; GFX6-NEXT: v_exp_f32_e32 v1, v1 191; GFX6-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 194; GFX8: ; %bb.0: 195; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 197; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 198; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 199; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 200; GFX8-NEXT: v_log_f32_e32 v2, v2 201; GFX8-NEXT: v_log_f32_e32 v0, v0 202; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 203; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 204; GFX8-NEXT: v_exp_f32_e32 v0, v0 205; GFX8-NEXT: v_exp_f32_e32 v2, v2 206; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 207; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 208; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 209; GFX8-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 212; GFX9: ; %bb.0: 213; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 215; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 216; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 217; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 218; GFX9-NEXT: v_log_f32_e32 v2, v2 219; GFX9-NEXT: v_log_f32_e32 v0, v0 220; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 221; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 222; GFX9-NEXT: v_exp_f32_e32 v0, v0 223; GFX9-NEXT: v_exp_f32_e32 v1, v2 224; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 225; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 226; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 227; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 228; GFX9-NEXT: s_setpc_b64 s[30:31] 229 %x.fneg = fneg <2 x half> %x 230 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 231 ret <2 x half> %pow 232} 233 234define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 235; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 236; GFX6: ; %bb.0: 237; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 239; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 240; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 241; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 242; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 243; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 244; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 245; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 246; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 247; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 248; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 249; GFX6-NEXT: v_log_f32_e32 v0, v0 250; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 251; GFX6-NEXT: v_log_f32_e32 v1, v1 252; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 253; GFX6-NEXT: v_exp_f32_e32 v0, v0 254; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 255; GFX6-NEXT: v_exp_f32_e32 v1, v1 256; GFX6-NEXT: s_setpc_b64 s[30:31] 257; 258; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 259; GFX8: ; %bb.0: 260; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 262; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 263; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 264; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 265; GFX8-NEXT: v_log_f32_e32 v2, v2 266; GFX8-NEXT: v_log_f32_e32 v0, v0 267; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 268; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 269; GFX8-NEXT: v_exp_f32_e32 v0, v0 270; GFX8-NEXT: v_exp_f32_e32 v2, v2 271; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 272; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 273; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 274; GFX8-NEXT: s_setpc_b64 s[30:31] 275; 276; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 277; GFX9: ; %bb.0: 278; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 280; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 281; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 282; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 283; GFX9-NEXT: v_log_f32_e32 v2, v2 284; GFX9-NEXT: v_log_f32_e32 v0, v0 285; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 286; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 287; GFX9-NEXT: v_exp_f32_e32 v0, v0 288; GFX9-NEXT: v_exp_f32_e32 v1, v2 289; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 290; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 291; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 292; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 293; GFX9-NEXT: s_setpc_b64 s[30:31] 294 %y.fneg = fneg <2 x half> %y 295 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 296 ret <2 x half> %pow 297} 298 299define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 300; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 301; GFX6: ; %bb.0: 302; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 304; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 305; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 306; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 307; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 308; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 309; GFX6-NEXT: s_mov_b32 s4, 0x80008000 310; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 311; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 312; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 313; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 314; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 315; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 316; GFX6-NEXT: v_xor_b32_e32 v2, s4, v2 317; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 318; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 319; GFX6-NEXT: v_log_f32_e32 v0, v0 320; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 321; GFX6-NEXT: v_log_f32_e32 v1, v1 322; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 323; GFX6-NEXT: v_exp_f32_e32 v0, v0 324; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 325; GFX6-NEXT: v_exp_f32_e32 v1, v1 326; GFX6-NEXT: s_setpc_b64 s[30:31] 327; 328; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 329; GFX8: ; %bb.0: 330; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 331; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 332; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 333; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 334; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 335; GFX8-NEXT: v_log_f32_e32 v2, v2 336; GFX8-NEXT: v_log_f32_e32 v0, v0 337; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 338; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 339; GFX8-NEXT: v_exp_f32_e32 v0, v0 340; GFX8-NEXT: v_exp_f32_e32 v2, v2 341; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 342; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 343; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 344; GFX8-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 347; GFX9: ; %bb.0: 348; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 350; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 351; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 352; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 353; GFX9-NEXT: v_log_f32_e32 v2, v2 354; GFX9-NEXT: v_log_f32_e32 v0, v0 355; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 356; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 357; GFX9-NEXT: v_exp_f32_e32 v0, v0 358; GFX9-NEXT: v_exp_f32_e32 v1, v2 359; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 360; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 361; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 362; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 363; GFX9-NEXT: s_setpc_b64 s[30:31] 364 %x.fneg = fneg <2 x half> %x 365 %y.fneg = fneg <2 x half> %y 366 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 367 ret <2 x half> %pow 368} 369 370; FIXME 371; define double @v_pow_f64(double %x, double %y) { 372; %pow = call double @llvm.pow.f64(double %x, double %y) 373; ret double %pow 374; } 375 376define float @v_pow_f32_fabs_lhs(float %x, float %y) { 377; GFX6-LABEL: v_pow_f32_fabs_lhs: 378; GFX6: ; %bb.0: 379; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX6-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 381; GFX6-NEXT: v_log_f32_e32 v0, v0 382; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 383; GFX6-NEXT: v_exp_f32_e32 v0, v0 384; GFX6-NEXT: s_setpc_b64 s[30:31] 385; 386; GFX8-LABEL: v_pow_f32_fabs_lhs: 387; GFX8: ; %bb.0: 388; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX8-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 390; GFX8-NEXT: v_log_f32_e32 v0, v0 391; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 392; GFX8-NEXT: v_exp_f32_e32 v0, v0 393; GFX8-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX9-LABEL: v_pow_f32_fabs_lhs: 396; GFX9: ; %bb.0: 397; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 399; GFX9-NEXT: v_log_f32_e32 v0, v0 400; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 401; GFX9-NEXT: v_exp_f32_e32 v0, v0 402; GFX9-NEXT: s_setpc_b64 s[30:31] 403 %fabs.x = call float @llvm.fabs.f32(float %x) 404 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 405 ret float %pow 406} 407 408define float @v_pow_f32_fabs_rhs(float %x, float %y) { 409; GFX6-LABEL: v_pow_f32_fabs_rhs: 410; GFX6: ; %bb.0: 411; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX6-NEXT: v_log_f32_e32 v0, v0 413; GFX6-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 414; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 415; GFX6-NEXT: v_exp_f32_e32 v0, v0 416; GFX6-NEXT: s_setpc_b64 s[30:31] 417; 418; GFX8-LABEL: v_pow_f32_fabs_rhs: 419; GFX8: ; %bb.0: 420; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 421; GFX8-NEXT: v_log_f32_e32 v0, v0 422; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 423; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 424; GFX8-NEXT: v_exp_f32_e32 v0, v0 425; GFX8-NEXT: s_setpc_b64 s[30:31] 426; 427; GFX9-LABEL: v_pow_f32_fabs_rhs: 428; GFX9: ; %bb.0: 429; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 430; GFX9-NEXT: v_log_f32_e32 v0, v0 431; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 432; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 433; GFX9-NEXT: v_exp_f32_e32 v0, v0 434; GFX9-NEXT: s_setpc_b64 s[30:31] 435 %fabs.y = call float @llvm.fabs.f32(float %y) 436 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 437 ret float %pow 438} 439 440define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 441; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 442; GFX6: ; %bb.0: 443; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 444; GFX6-NEXT: s_brev_b32 s4, -2 445; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 446; GFX6-NEXT: v_log_f32_e32 v0, v0 447; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 448; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 449; GFX6-NEXT: v_exp_f32_e32 v0, v0 450; GFX6-NEXT: s_setpc_b64 s[30:31] 451; 452; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 453; GFX8: ; %bb.0: 454; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX8-NEXT: s_brev_b32 s4, -2 456; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 457; GFX8-NEXT: v_log_f32_e32 v0, v0 458; GFX8-NEXT: v_and_b32_e32 v1, s4, v1 459; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 460; GFX8-NEXT: v_exp_f32_e32 v0, v0 461; GFX8-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 464; GFX9: ; %bb.0: 465; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX9-NEXT: s_brev_b32 s4, -2 467; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 468; GFX9-NEXT: v_log_f32_e32 v0, v0 469; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 470; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 471; GFX9-NEXT: v_exp_f32_e32 v0, v0 472; GFX9-NEXT: s_setpc_b64 s[30:31] 473 %fabs.x = call float @llvm.fabs.f32(float %x) 474 %fabs.y = call float @llvm.fabs.f32(float %y) 475 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 476 ret float %pow 477} 478 479define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 480; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 481; GFX6: ; %bb.0: 482; GFX6-NEXT: v_log_f32_e32 v1, s0 483; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 484; GFX6-NEXT: v_exp_f32_e32 v0, v0 485; GFX6-NEXT: ; return to shader part epilog 486; 487; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 488; GFX8: ; %bb.0: 489; GFX8-NEXT: v_log_f32_e32 v1, s0 490; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 491; GFX8-NEXT: v_exp_f32_e32 v0, v0 492; GFX8-NEXT: ; return to shader part epilog 493; 494; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 495; GFX9: ; %bb.0: 496; GFX9-NEXT: v_log_f32_e32 v1, s0 497; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 498; GFX9-NEXT: v_exp_f32_e32 v0, v0 499; GFX9-NEXT: ; return to shader part epilog 500 %pow = call float @llvm.pow.f32(float %x, float %y) 501 ret float %pow 502} 503 504define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 505; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 506; GFX6: ; %bb.0: 507; GFX6-NEXT: v_log_f32_e32 v0, v0 508; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 509; GFX6-NEXT: v_exp_f32_e32 v0, v0 510; GFX6-NEXT: ; return to shader part epilog 511; 512; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 513; GFX8: ; %bb.0: 514; GFX8-NEXT: v_log_f32_e32 v0, v0 515; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 516; GFX8-NEXT: v_exp_f32_e32 v0, v0 517; GFX8-NEXT: ; return to shader part epilog 518; 519; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 520; GFX9: ; %bb.0: 521; GFX9-NEXT: v_log_f32_e32 v0, v0 522; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 523; GFX9-NEXT: v_exp_f32_e32 v0, v0 524; GFX9-NEXT: ; return to shader part epilog 525 %pow = call float @llvm.pow.f32(float %x, float %y) 526 ret float %pow 527} 528 529define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 530; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 531; GFX6: ; %bb.0: 532; GFX6-NEXT: v_log_f32_e32 v0, s0 533; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 534; GFX6-NEXT: v_exp_f32_e32 v0, v0 535; GFX6-NEXT: ; return to shader part epilog 536; 537; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 538; GFX8: ; %bb.0: 539; GFX8-NEXT: v_log_f32_e32 v0, s0 540; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 541; GFX8-NEXT: v_exp_f32_e32 v0, v0 542; GFX8-NEXT: ; return to shader part epilog 543; 544; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 545; GFX9: ; %bb.0: 546; GFX9-NEXT: v_log_f32_e32 v0, s0 547; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 548; GFX9-NEXT: v_exp_f32_e32 v0, v0 549; GFX9-NEXT: ; return to shader part epilog 550 %pow = call float @llvm.pow.f32(float %x, float %y) 551 ret float %pow 552} 553 554declare half @llvm.pow.f16(half, half) 555declare float @llvm.pow.f32(float, float) 556declare double @llvm.pow.f64(double, double) 557 558declare half @llvm.fabs.f16(half) 559declare float @llvm.fabs.f32(float) 560 561declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 562declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 563