1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5 6define float @v_pow_f32(float %x, float %y) { 7; GFX6-LABEL: v_pow_f32: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_log_f32_e32 v0, v0 11; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 12; GFX6-NEXT: v_exp_f32_e32 v0, v0 13; GFX6-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX8-LABEL: v_pow_f32: 16; GFX8: ; %bb.0: 17; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX8-NEXT: v_log_f32_e32 v0, v0 19; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 20; GFX8-NEXT: v_exp_f32_e32 v0, v0 21; GFX8-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX9-LABEL: v_pow_f32: 24; GFX9: ; %bb.0: 25; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX9-NEXT: v_log_f32_e32 v0, v0 27; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 28; GFX9-NEXT: v_exp_f32_e32 v0, v0 29; GFX9-NEXT: s_setpc_b64 s[30:31] 30 %pow = call float @llvm.pow.f32(float %x, float %y) 31 ret float %pow 32} 33 34define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 35; GFX6-LABEL: v_pow_v2f32: 36; GFX6: ; %bb.0: 37; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX6-NEXT: v_log_f32_e32 v0, v0 39; GFX6-NEXT: v_log_f32_e32 v1, v1 40; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 41; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 42; GFX6-NEXT: v_exp_f32_e32 v0, v0 43; GFX6-NEXT: v_exp_f32_e32 v1, v1 44; GFX6-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX8-LABEL: v_pow_v2f32: 47; GFX8: ; %bb.0: 48; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX8-NEXT: v_log_f32_e32 v0, v0 50; GFX8-NEXT: v_log_f32_e32 v1, v1 51; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 52; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 53; GFX8-NEXT: v_exp_f32_e32 v0, v0 54; GFX8-NEXT: v_exp_f32_e32 v1, v1 55; GFX8-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX9-LABEL: v_pow_v2f32: 58; GFX9: ; %bb.0: 59; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX9-NEXT: v_log_f32_e32 v0, v0 61; GFX9-NEXT: v_log_f32_e32 v1, v1 62; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 63; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 64; GFX9-NEXT: v_exp_f32_e32 v0, v0 65; GFX9-NEXT: v_exp_f32_e32 v1, v1 66; GFX9-NEXT: s_setpc_b64 s[30:31] 67 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 68 ret <2 x float> %pow 69} 70 71define half @v_pow_f16(half %x, half %y) { 72; GFX6-LABEL: v_pow_f16: 73; GFX6: ; %bb.0: 74; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 76; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 77; GFX6-NEXT: v_log_f32_e32 v0, v0 78; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 79; GFX6-NEXT: v_exp_f32_e32 v0, v0 80; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 81; GFX6-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX8-LABEL: v_pow_f16: 84; GFX8: ; %bb.0: 85; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX8-NEXT: v_log_f16_e32 v0, v0 87; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 88; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 89; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 90; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 91; GFX8-NEXT: v_exp_f16_e32 v0, v0 92; GFX8-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX9-LABEL: v_pow_f16: 95; GFX9: ; %bb.0: 96; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX9-NEXT: v_log_f16_e32 v0, v0 98; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 99; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 100; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 101; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 102; GFX9-NEXT: v_exp_f16_e32 v0, v0 103; GFX9-NEXT: s_setpc_b64 s[30:31] 104 %pow = call half @llvm.pow.f16(half %x, half %y) 105 ret half %pow 106} 107 108define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 109; GFX6-LABEL: v_pow_v2f16: 110; GFX6: ; %bb.0: 111; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 113; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 114; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 115; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 116; GFX6-NEXT: v_log_f32_e32 v0, v0 117; GFX6-NEXT: v_log_f32_e32 v1, v1 118; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 119; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 120; GFX6-NEXT: v_exp_f32_e32 v0, v0 121; GFX6-NEXT: v_exp_f32_e32 v1, v1 122; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 123; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 124; GFX6-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX8-LABEL: v_pow_v2f16: 127; GFX8: ; %bb.0: 128; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX8-NEXT: v_log_f16_e32 v2, v0 130; GFX8-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 131; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v1 132; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 133; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 134; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 135; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 136; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 137; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 138; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2 139; GFX8-NEXT: v_mov_b32_e32 v2, 16 140; GFX8-NEXT: v_exp_f16_e32 v0, v0 141; GFX8-NEXT: v_exp_f16_e32 v1, v1 142; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 143; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 144; GFX8-NEXT: s_setpc_b64 s[30:31] 145; 146; GFX9-LABEL: v_pow_v2f16: 147; GFX9: ; %bb.0: 148; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX9-NEXT: v_log_f16_e32 v2, v0 150; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 151; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 152; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 153; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 154; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 155; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 156; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 157; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 158; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 159; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 160; GFX9-NEXT: v_exp_f16_e32 v1, v2 161; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 162; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 163; GFX9-NEXT: s_setpc_b64 s[30:31] 164 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 165 ret <2 x half> %pow 166} 167 168define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 169; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 170; GFX6: ; %bb.0: 171; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 173; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 174; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 175; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 176; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 177; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 178; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 179; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 180; GFX6-NEXT: v_log_f32_e32 v1, v1 181; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 182; GFX6-NEXT: v_log_f32_e32 v0, v0 183; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 184; GFX6-NEXT: v_exp_f32_e32 v1, v1 185; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3 186; GFX6-NEXT: v_exp_f32_e32 v2, v0 187; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v1 188; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v2 189; GFX6-NEXT: s_setpc_b64 s[30:31] 190; 191; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 192; GFX8: ; %bb.0: 193; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 195; GFX8-NEXT: v_log_f16_e32 v2, v0 196; GFX8-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 197; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v1 198; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 199; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 200; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 201; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 202; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 203; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 204; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2 205; GFX8-NEXT: v_mov_b32_e32 v2, 16 206; GFX8-NEXT: v_exp_f16_e32 v0, v0 207; GFX8-NEXT: v_exp_f16_e32 v1, v1 208; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 209; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 210; GFX8-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 213; GFX9: ; %bb.0: 214; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 216; GFX9-NEXT: v_log_f16_e32 v2, v0 217; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 218; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 219; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 220; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 221; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 222; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 223; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 224; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 225; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 226; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 227; GFX9-NEXT: v_exp_f16_e32 v1, v2 228; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 229; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 230; GFX9-NEXT: s_setpc_b64 s[30:31] 231 %x.fneg = fneg <2 x half> %x 232 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 233 ret <2 x half> %pow 234} 235 236define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 237; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 238; GFX6: ; %bb.0: 239; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 241; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 242; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 243; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 244; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 245; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 246; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 247; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 248; GFX6-NEXT: v_log_f32_e32 v0, v0 249; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 250; GFX6-NEXT: v_log_f32_e32 v1, v1 251; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 252; GFX6-NEXT: v_exp_f32_e32 v0, v0 253; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 254; GFX6-NEXT: v_exp_f32_e32 v1, v1 255; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 256; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 257; GFX6-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 260; GFX8: ; %bb.0: 261; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX8-NEXT: v_log_f16_e32 v2, v0 263; GFX8-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 264; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 265; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v1 266; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 267; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 268; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 269; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 270; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 271; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 272; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2 273; GFX8-NEXT: v_mov_b32_e32 v2, 16 274; GFX8-NEXT: v_exp_f16_e32 v0, v0 275; GFX8-NEXT: v_exp_f16_e32 v1, v1 276; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 277; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 278; GFX8-NEXT: s_setpc_b64 s[30:31] 279; 280; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 281; GFX9: ; %bb.0: 282; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; GFX9-NEXT: v_log_f16_e32 v2, v0 284; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 285; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 286; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 287; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 288; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 289; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 290; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 291; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 292; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 293; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 294; GFX9-NEXT: v_exp_f16_e32 v1, v2 295; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 296; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 297; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 298; GFX9-NEXT: s_setpc_b64 s[30:31] 299 %y.fneg = fneg <2 x half> %y 300 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 301 ret <2 x half> %pow 302} 303 304define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 305; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 306; GFX6: ; %bb.0: 307; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff 309; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 310; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 311; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 312; GFX6-NEXT: s_mov_b32 s4, 0x80008000 313; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 314; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 315; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 316; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 317; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 318; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 319; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 320; GFX6-NEXT: v_xor_b32_e32 v1, s4, v1 321; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 322; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 323; GFX6-NEXT: v_log_f32_e32 v0, v0 324; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 325; GFX6-NEXT: v_log_f32_e32 v2, v2 326; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 327; GFX6-NEXT: v_exp_f32_e32 v0, v0 328; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3 329; GFX6-NEXT: v_exp_f32_e32 v1, v1 330; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 331; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 332; GFX6-NEXT: s_setpc_b64 s[30:31] 333; 334; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 335; GFX8: ; %bb.0: 336; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; GFX8-NEXT: s_mov_b32 s4, 0x80008000 338; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 339; GFX8-NEXT: v_log_f16_e32 v2, v0 340; GFX8-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 341; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 342; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v1 343; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 344; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 345; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 346; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 347; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 348; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 349; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2 350; GFX8-NEXT: v_mov_b32_e32 v2, 16 351; GFX8-NEXT: v_exp_f16_e32 v0, v0 352; GFX8-NEXT: v_exp_f16_e32 v1, v1 353; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 354; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 355; GFX8-NEXT: s_setpc_b64 s[30:31] 356; 357; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 358; GFX9: ; %bb.0: 359; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 360; GFX9-NEXT: s_mov_b32 s4, 0x80008000 361; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 362; GFX9-NEXT: v_log_f16_e32 v2, v0 363; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 364; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 365; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 366; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 367; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 368; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 369; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 370; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 371; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 372; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 373; GFX9-NEXT: v_exp_f16_e32 v1, v2 374; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 375; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 376; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 377; GFX9-NEXT: s_setpc_b64 s[30:31] 378 %x.fneg = fneg <2 x half> %x 379 %y.fneg = fneg <2 x half> %y 380 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 381 ret <2 x half> %pow 382} 383 384; FIXME 385; define double @v_pow_f64(double %x, double %y) { 386; %pow = call double @llvm.pow.f64(double %x, double %y) 387; ret double %pow 388; } 389 390define float @v_pow_f32_fabs_lhs(float %x, float %y) { 391; GFX6-LABEL: v_pow_f32_fabs_lhs: 392; GFX6: ; %bb.0: 393; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 394; GFX6-NEXT: v_log_f32_e64 v0, |v0| 395; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 396; GFX6-NEXT: v_exp_f32_e32 v0, v0 397; GFX6-NEXT: s_setpc_b64 s[30:31] 398; 399; GFX8-LABEL: v_pow_f32_fabs_lhs: 400; GFX8: ; %bb.0: 401; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; GFX8-NEXT: v_log_f32_e64 v0, |v0| 403; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 404; GFX8-NEXT: v_exp_f32_e32 v0, v0 405; GFX8-NEXT: s_setpc_b64 s[30:31] 406; 407; GFX9-LABEL: v_pow_f32_fabs_lhs: 408; GFX9: ; %bb.0: 409; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410; GFX9-NEXT: v_log_f32_e64 v0, |v0| 411; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 412; GFX9-NEXT: v_exp_f32_e32 v0, v0 413; GFX9-NEXT: s_setpc_b64 s[30:31] 414 %fabs.x = call float @llvm.fabs.f32(float %x) 415 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 416 ret float %pow 417} 418 419define float @v_pow_f32_fabs_rhs(float %x, float %y) { 420; GFX6-LABEL: v_pow_f32_fabs_rhs: 421; GFX6: ; %bb.0: 422; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 423; GFX6-NEXT: v_log_f32_e32 v0, v0 424; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| 425; GFX6-NEXT: v_exp_f32_e32 v0, v0 426; GFX6-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX8-LABEL: v_pow_f32_fabs_rhs: 429; GFX8: ; %bb.0: 430; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX8-NEXT: v_log_f32_e32 v0, v0 432; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| 433; GFX8-NEXT: v_exp_f32_e32 v0, v0 434; GFX8-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX9-LABEL: v_pow_f32_fabs_rhs: 437; GFX9: ; %bb.0: 438; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX9-NEXT: v_log_f32_e32 v0, v0 440; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| 441; GFX9-NEXT: v_exp_f32_e32 v0, v0 442; GFX9-NEXT: s_setpc_b64 s[30:31] 443 %fabs.y = call float @llvm.fabs.f32(float %y) 444 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 445 ret float %pow 446} 447 448define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 449; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 450; GFX6: ; %bb.0: 451; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX6-NEXT: v_log_f32_e64 v0, |v0| 453; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| 454; GFX6-NEXT: v_exp_f32_e32 v0, v0 455; GFX6-NEXT: s_setpc_b64 s[30:31] 456; 457; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 458; GFX8: ; %bb.0: 459; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 460; GFX8-NEXT: v_log_f32_e64 v0, |v0| 461; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| 462; GFX8-NEXT: v_exp_f32_e32 v0, v0 463; GFX8-NEXT: s_setpc_b64 s[30:31] 464; 465; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 466; GFX9: ; %bb.0: 467; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 468; GFX9-NEXT: v_log_f32_e64 v0, |v0| 469; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| 470; GFX9-NEXT: v_exp_f32_e32 v0, v0 471; GFX9-NEXT: s_setpc_b64 s[30:31] 472 %fabs.x = call float @llvm.fabs.f32(float %x) 473 %fabs.y = call float @llvm.fabs.f32(float %y) 474 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 475 ret float %pow 476} 477 478define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 479; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 480; GFX6: ; %bb.0: 481; GFX6-NEXT: v_log_f32_e32 v1, s0 482; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 483; GFX6-NEXT: v_exp_f32_e32 v0, v0 484; GFX6-NEXT: ; return to shader part epilog 485; 486; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 487; GFX8: ; %bb.0: 488; GFX8-NEXT: v_log_f32_e32 v1, s0 489; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 490; GFX8-NEXT: v_exp_f32_e32 v0, v0 491; GFX8-NEXT: ; return to shader part epilog 492; 493; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 494; GFX9: ; %bb.0: 495; GFX9-NEXT: v_log_f32_e32 v1, s0 496; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 497; GFX9-NEXT: v_exp_f32_e32 v0, v0 498; GFX9-NEXT: ; return to shader part epilog 499 %pow = call float @llvm.pow.f32(float %x, float %y) 500 ret float %pow 501} 502 503define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 504; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 505; GFX6: ; %bb.0: 506; GFX6-NEXT: v_log_f32_e32 v0, v0 507; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 508; GFX6-NEXT: v_exp_f32_e32 v0, v0 509; GFX6-NEXT: ; return to shader part epilog 510; 511; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 512; GFX8: ; %bb.0: 513; GFX8-NEXT: v_log_f32_e32 v0, v0 514; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 515; GFX8-NEXT: v_exp_f32_e32 v0, v0 516; GFX8-NEXT: ; return to shader part epilog 517; 518; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 519; GFX9: ; %bb.0: 520; GFX9-NEXT: v_log_f32_e32 v0, v0 521; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 522; GFX9-NEXT: v_exp_f32_e32 v0, v0 523; GFX9-NEXT: ; return to shader part epilog 524 %pow = call float @llvm.pow.f32(float %x, float %y) 525 ret float %pow 526} 527 528define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 529; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 530; GFX6: ; %bb.0: 531; GFX6-NEXT: v_log_f32_e32 v0, s0 532; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 533; GFX6-NEXT: v_exp_f32_e32 v0, v0 534; GFX6-NEXT: ; return to shader part epilog 535; 536; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 537; GFX8: ; %bb.0: 538; GFX8-NEXT: v_log_f32_e32 v0, s0 539; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 540; GFX8-NEXT: v_exp_f32_e32 v0, v0 541; GFX8-NEXT: ; return to shader part epilog 542; 543; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 544; GFX9: ; %bb.0: 545; GFX9-NEXT: v_log_f32_e32 v0, s0 546; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 547; GFX9-NEXT: v_exp_f32_e32 v0, v0 548; GFX9-NEXT: ; return to shader part epilog 549 %pow = call float @llvm.pow.f32(float %x, float %y) 550 ret float %pow 551} 552 553define float @v_pow_f32_fneg_lhs(float %x, float %y) { 554; GFX6-LABEL: v_pow_f32_fneg_lhs: 555; GFX6: ; %bb.0: 556; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 557; GFX6-NEXT: v_log_f32_e64 v0, -v0 558; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 559; GFX6-NEXT: v_exp_f32_e32 v0, v0 560; GFX6-NEXT: s_setpc_b64 s[30:31] 561; 562; GFX8-LABEL: v_pow_f32_fneg_lhs: 563; GFX8: ; %bb.0: 564; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX8-NEXT: v_log_f32_e64 v0, -v0 566; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 567; GFX8-NEXT: v_exp_f32_e32 v0, v0 568; GFX8-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX9-LABEL: v_pow_f32_fneg_lhs: 571; GFX9: ; %bb.0: 572; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX9-NEXT: v_log_f32_e64 v0, -v0 574; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 575; GFX9-NEXT: v_exp_f32_e32 v0, v0 576; GFX9-NEXT: s_setpc_b64 s[30:31] 577 %neg.x = fneg float %x 578 %pow = call float @llvm.pow.f32(float %neg.x, float %y) 579 ret float %pow 580} 581 582define float @v_pow_f32_fneg_rhs(float %x, float %y) { 583; GFX6-LABEL: v_pow_f32_fneg_rhs: 584; GFX6: ; %bb.0: 585; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 586; GFX6-NEXT: v_log_f32_e32 v0, v0 587; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 588; GFX6-NEXT: v_exp_f32_e32 v0, v0 589; GFX6-NEXT: s_setpc_b64 s[30:31] 590; 591; GFX8-LABEL: v_pow_f32_fneg_rhs: 592; GFX8: ; %bb.0: 593; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 594; GFX8-NEXT: v_log_f32_e32 v0, v0 595; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 596; GFX8-NEXT: v_exp_f32_e32 v0, v0 597; GFX8-NEXT: s_setpc_b64 s[30:31] 598; 599; GFX9-LABEL: v_pow_f32_fneg_rhs: 600; GFX9: ; %bb.0: 601; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 602; GFX9-NEXT: v_log_f32_e32 v0, v0 603; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 604; GFX9-NEXT: v_exp_f32_e32 v0, v0 605; GFX9-NEXT: s_setpc_b64 s[30:31] 606 %neg.y = fneg float %y 607 %pow = call float @llvm.pow.f32(float %x, float %neg.y) 608 ret float %pow 609} 610 611declare half @llvm.pow.f16(half, half) 612declare float @llvm.pow.f32(float, float) 613declare double @llvm.pow.f64(double, double) 614 615declare half @llvm.fabs.f16(half) 616declare float @llvm.fabs.f32(float) 617 618declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 619declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 620