1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 6 7define float @v_roundeven_f32(float %x) { 8; GFX6-LABEL: v_roundeven_f32: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: v_rndne_f32_e32 v0, v0 12; GFX6-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX7-LABEL: v_roundeven_f32: 15; GFX7: ; %bb.0: 16; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX7-NEXT: v_rndne_f32_e32 v0, v0 18; GFX7-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX8-LABEL: v_roundeven_f32: 21; GFX8: ; %bb.0: 22; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX8-NEXT: v_rndne_f32_e32 v0, v0 24; GFX8-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX9-LABEL: v_roundeven_f32: 27; GFX9: ; %bb.0: 28; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GFX9-NEXT: v_rndne_f32_e32 v0, v0 30; GFX9-NEXT: s_setpc_b64 s[30:31] 31 %roundeven = call float @llvm.roundeven.f32(float %x) 32 ret float %roundeven 33} 34 35define <2 x float> @v_roundeven_v2f32(<2 x float> %x) { 36; GFX6-LABEL: v_roundeven_v2f32: 37; GFX6: ; %bb.0: 38; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX6-NEXT: v_rndne_f32_e32 v0, v0 40; GFX6-NEXT: v_rndne_f32_e32 v1, v1 41; GFX6-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX7-LABEL: v_roundeven_v2f32: 44; GFX7: ; %bb.0: 45; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX7-NEXT: v_rndne_f32_e32 v0, v0 47; GFX7-NEXT: v_rndne_f32_e32 v1, v1 48; GFX7-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX8-LABEL: v_roundeven_v2f32: 51; GFX8: ; %bb.0: 52; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX8-NEXT: v_rndne_f32_e32 v0, v0 54; GFX8-NEXT: v_rndne_f32_e32 v1, v1 55; GFX8-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX9-LABEL: v_roundeven_v2f32: 58; GFX9: ; %bb.0: 59; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX9-NEXT: v_rndne_f32_e32 v0, v0 61; GFX9-NEXT: v_rndne_f32_e32 v1, v1 62; GFX9-NEXT: s_setpc_b64 s[30:31] 63 %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x) 64 ret <2 x float> %roundeven 65} 66 67define <3 x float> @v_roundeven_v3f32(<3 x float> %x) { 68; GFX6-LABEL: v_roundeven_v3f32: 69; GFX6: ; %bb.0: 70; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX6-NEXT: v_rndne_f32_e32 v0, v0 72; GFX6-NEXT: v_rndne_f32_e32 v1, v1 73; GFX6-NEXT: v_rndne_f32_e32 v2, v2 74; GFX6-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX7-LABEL: v_roundeven_v3f32: 77; GFX7: ; %bb.0: 78; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX7-NEXT: v_rndne_f32_e32 v0, v0 80; GFX7-NEXT: v_rndne_f32_e32 v1, v1 81; GFX7-NEXT: v_rndne_f32_e32 v2, v2 82; GFX7-NEXT: s_setpc_b64 s[30:31] 83; 84; GFX8-LABEL: v_roundeven_v3f32: 85; GFX8: ; %bb.0: 86; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX8-NEXT: v_rndne_f32_e32 v0, v0 88; GFX8-NEXT: v_rndne_f32_e32 v1, v1 89; GFX8-NEXT: v_rndne_f32_e32 v2, v2 90; GFX8-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX9-LABEL: v_roundeven_v3f32: 93; GFX9: ; %bb.0: 94; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX9-NEXT: v_rndne_f32_e32 v0, v0 96; GFX9-NEXT: v_rndne_f32_e32 v1, v1 97; GFX9-NEXT: v_rndne_f32_e32 v2, v2 98; GFX9-NEXT: s_setpc_b64 s[30:31] 99 %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x) 100 ret <3 x float> %roundeven 101} 102 103define <4 x float> @v_roundeven_v4f32(<4 x float> %x) { 104; GFX6-LABEL: v_roundeven_v4f32: 105; GFX6: ; %bb.0: 106; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX6-NEXT: v_rndne_f32_e32 v0, v0 108; GFX6-NEXT: v_rndne_f32_e32 v1, v1 109; GFX6-NEXT: v_rndne_f32_e32 v2, v2 110; GFX6-NEXT: v_rndne_f32_e32 v3, v3 111; GFX6-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX7-LABEL: v_roundeven_v4f32: 114; GFX7: ; %bb.0: 115; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX7-NEXT: v_rndne_f32_e32 v0, v0 117; GFX7-NEXT: v_rndne_f32_e32 v1, v1 118; GFX7-NEXT: v_rndne_f32_e32 v2, v2 119; GFX7-NEXT: v_rndne_f32_e32 v3, v3 120; GFX7-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX8-LABEL: v_roundeven_v4f32: 123; GFX8: ; %bb.0: 124; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX8-NEXT: v_rndne_f32_e32 v0, v0 126; GFX8-NEXT: v_rndne_f32_e32 v1, v1 127; GFX8-NEXT: v_rndne_f32_e32 v2, v2 128; GFX8-NEXT: v_rndne_f32_e32 v3, v3 129; GFX8-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX9-LABEL: v_roundeven_v4f32: 132; GFX9: ; %bb.0: 133; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX9-NEXT: v_rndne_f32_e32 v0, v0 135; GFX9-NEXT: v_rndne_f32_e32 v1, v1 136; GFX9-NEXT: v_rndne_f32_e32 v2, v2 137; GFX9-NEXT: v_rndne_f32_e32 v3, v3 138; GFX9-NEXT: s_setpc_b64 s[30:31] 139 %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x) 140 ret <4 x float> %roundeven 141} 142 143define half @v_roundeven_f16(half %x) { 144; GFX6-LABEL: v_roundeven_f16: 145; GFX6: ; %bb.0: 146; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 148; GFX6-NEXT: v_rndne_f32_e32 v0, v0 149; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 150; GFX6-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX7-LABEL: v_roundeven_f16: 153; GFX7: ; %bb.0: 154; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 156; GFX7-NEXT: v_rndne_f32_e32 v0, v0 157; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 158; GFX7-NEXT: s_setpc_b64 s[30:31] 159; 160; GFX8-LABEL: v_roundeven_f16: 161; GFX8: ; %bb.0: 162; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; GFX8-NEXT: v_rndne_f16_e32 v0, v0 164; GFX8-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX9-LABEL: v_roundeven_f16: 167; GFX9: ; %bb.0: 168; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX9-NEXT: v_rndne_f16_e32 v0, v0 170; GFX9-NEXT: s_setpc_b64 s[30:31] 171 %roundeven = call half @llvm.roundeven.f16(half %x) 172 ret half %roundeven 173} 174 175define <2 x half> @v_roundeven_v2f16(<2 x half> %x) { 176; GFX6-LABEL: v_roundeven_v2f16: 177; GFX6: ; %bb.0: 178; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 180; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 181; GFX6-NEXT: v_rndne_f32_e32 v0, v0 182; GFX6-NEXT: v_rndne_f32_e32 v1, v1 183; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 184; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 185; GFX6-NEXT: s_setpc_b64 s[30:31] 186; 187; GFX7-LABEL: v_roundeven_v2f16: 188; GFX7: ; %bb.0: 189; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 191; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 192; GFX7-NEXT: v_rndne_f32_e32 v0, v0 193; GFX7-NEXT: v_rndne_f32_e32 v1, v1 194; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 195; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 196; GFX7-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX8-LABEL: v_roundeven_v2f16: 199; GFX8: ; %bb.0: 200; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX8-NEXT: v_rndne_f16_e32 v1, v0 202; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 203; GFX8-NEXT: v_mov_b32_e32 v2, 16 204; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 205; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 206; GFX8-NEXT: s_setpc_b64 s[30:31] 207; 208; GFX9-LABEL: v_roundeven_v2f16: 209; GFX9: ; %bb.0: 210; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GFX9-NEXT: v_rndne_f16_e32 v1, v0 212; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 213; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 214; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 215; GFX9-NEXT: s_setpc_b64 s[30:31] 216 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x) 217 ret <2 x half> %roundeven 218} 219 220define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) { 221; GFX6-LABEL: v_roundeven_v2f16_fneg: 222; GFX6: ; %bb.0: 223; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 225; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 226; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 227; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 228; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 229; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 230; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0 231; GFX6-NEXT: v_rndne_f32_e32 v0, v1 232; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 233; GFX6-NEXT: v_rndne_f32_e32 v1, v2 234; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 235; GFX6-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX7-LABEL: v_roundeven_v2f16_fneg: 238; GFX7: ; %bb.0: 239; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 241; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 242; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 243; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 244; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 245; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 246; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0 247; GFX7-NEXT: v_rndne_f32_e32 v0, v1 248; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 249; GFX7-NEXT: v_rndne_f32_e32 v1, v2 250; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 251; GFX7-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX8-LABEL: v_roundeven_v2f16_fneg: 254; GFX8: ; %bb.0: 255; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 257; GFX8-NEXT: v_rndne_f16_e32 v1, v0 258; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 259; GFX8-NEXT: v_mov_b32_e32 v2, 16 260; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 261; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 262; GFX8-NEXT: s_setpc_b64 s[30:31] 263; 264; GFX9-LABEL: v_roundeven_v2f16_fneg: 265; GFX9: ; %bb.0: 266; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 267; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 268; GFX9-NEXT: v_rndne_f16_e32 v1, v0 269; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 270; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 271; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 272; GFX9-NEXT: s_setpc_b64 s[30:31] 273 %x.fneg = fneg <2 x half> %x 274 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg) 275 ret <2 x half> %roundeven 276} 277 278define <4 x half> @v_roundeven_v4f16(<4 x half> %x) { 279; GFX6-LABEL: v_roundeven_v4f16: 280; GFX6: ; %bb.0: 281; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 283; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 284; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 285; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 286; GFX6-NEXT: v_rndne_f32_e32 v0, v0 287; GFX6-NEXT: v_rndne_f32_e32 v1, v1 288; GFX6-NEXT: v_rndne_f32_e32 v2, v2 289; GFX6-NEXT: v_rndne_f32_e32 v3, v3 290; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 291; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 292; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 293; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 294; GFX6-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX7-LABEL: v_roundeven_v4f16: 297; GFX7: ; %bb.0: 298; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 300; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 301; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 302; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 303; GFX7-NEXT: v_rndne_f32_e32 v0, v0 304; GFX7-NEXT: v_rndne_f32_e32 v1, v1 305; GFX7-NEXT: v_rndne_f32_e32 v2, v2 306; GFX7-NEXT: v_rndne_f32_e32 v3, v3 307; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 308; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 309; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 310; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 311; GFX7-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX8-LABEL: v_roundeven_v4f16: 314; GFX8: ; %bb.0: 315; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX8-NEXT: v_rndne_f16_e32 v2, v0 317; GFX8-NEXT: v_rndne_f16_e32 v3, v1 318; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 319; GFX8-NEXT: v_mov_b32_e32 v4, 16 320; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 321; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 322; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 323; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 324; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 325; GFX8-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX9-LABEL: v_roundeven_v4f16: 328; GFX9: ; %bb.0: 329; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX9-NEXT: v_rndne_f16_e32 v2, v0 331; GFX9-NEXT: v_rndne_f16_e32 v3, v1 332; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 333; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 334; GFX9-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 335; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0 336; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1 337; GFX9-NEXT: s_setpc_b64 s[30:31] 338 %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) 339 ret <4 x half> %roundeven 340} 341 342 343define float @v_roundeven_f32_fabs(float %x) { 344; GFX6-LABEL: v_roundeven_f32_fabs: 345; GFX6: ; %bb.0: 346; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX6-NEXT: v_rndne_f32_e64 v0, |v0| 348; GFX6-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX7-LABEL: v_roundeven_f32_fabs: 351; GFX7: ; %bb.0: 352; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX7-NEXT: v_rndne_f32_e64 v0, |v0| 354; GFX7-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX8-LABEL: v_roundeven_f32_fabs: 357; GFX8: ; %bb.0: 358; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX8-NEXT: v_rndne_f32_e64 v0, |v0| 360; GFX8-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX9-LABEL: v_roundeven_f32_fabs: 363; GFX9: ; %bb.0: 364; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX9-NEXT: v_rndne_f32_e64 v0, |v0| 366; GFX9-NEXT: s_setpc_b64 s[30:31] 367 %fabs.x = call float @llvm.fabs.f32(float %x) 368 %roundeven = call float @llvm.roundeven.f32(float %fabs.x) 369 ret float %roundeven 370} 371 372define amdgpu_ps float @s_roundeven_f32(float inreg %x) { 373; GFX6-LABEL: s_roundeven_f32: 374; GFX6: ; %bb.0: 375; GFX6-NEXT: v_rndne_f32_e32 v0, s0 376; GFX6-NEXT: ; return to shader part epilog 377; 378; GFX7-LABEL: s_roundeven_f32: 379; GFX7: ; %bb.0: 380; GFX7-NEXT: v_rndne_f32_e32 v0, s0 381; GFX7-NEXT: ; return to shader part epilog 382; 383; GFX8-LABEL: s_roundeven_f32: 384; GFX8: ; %bb.0: 385; GFX8-NEXT: v_rndne_f32_e32 v0, s0 386; GFX8-NEXT: ; return to shader part epilog 387; 388; GFX9-LABEL: s_roundeven_f32: 389; GFX9: ; %bb.0: 390; GFX9-NEXT: v_rndne_f32_e32 v0, s0 391; GFX9-NEXT: ; return to shader part epilog 392 %roundeven = call float @llvm.roundeven.f32(float %x) 393 ret float %roundeven 394} 395 396define float @v_roundeven_f32_fneg(float %x) { 397; GFX6-LABEL: v_roundeven_f32_fneg: 398; GFX6: ; %bb.0: 399; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 400; GFX6-NEXT: v_rndne_f32_e64 v0, -v0 401; GFX6-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX7-LABEL: v_roundeven_f32_fneg: 404; GFX7: ; %bb.0: 405; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX7-NEXT: v_rndne_f32_e64 v0, -v0 407; GFX7-NEXT: s_setpc_b64 s[30:31] 408; 409; GFX8-LABEL: v_roundeven_f32_fneg: 410; GFX8: ; %bb.0: 411; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX8-NEXT: v_rndne_f32_e64 v0, -v0 413; GFX8-NEXT: s_setpc_b64 s[30:31] 414; 415; GFX9-LABEL: v_roundeven_f32_fneg: 416; GFX9: ; %bb.0: 417; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 418; GFX9-NEXT: v_rndne_f32_e64 v0, -v0 419; GFX9-NEXT: s_setpc_b64 s[30:31] 420 %neg.x = fneg float %x 421 %roundeven = call float @llvm.roundeven.f32(float %neg.x) 422 ret float %roundeven 423} 424 425define double @v_roundeven_f64(double %x) { 426; GFX6-LABEL: v_roundeven_f64: 427; GFX6: ; %bb.0: 428; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v1 430; GFX6-NEXT: v_mov_b32_e32 v2, 0 431; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3 432; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] 433; GFX6-NEXT: s_mov_b32 s4, -1 434; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 435; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 436; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 437; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 438; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 439; GFX6-NEXT: s_setpc_b64 s[30:31] 440; 441; GFX7-LABEL: v_roundeven_f64: 442; GFX7: ; %bb.0: 443; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 444; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 445; GFX7-NEXT: s_setpc_b64 s[30:31] 446; 447; GFX8-LABEL: v_roundeven_f64: 448; GFX8: ; %bb.0: 449; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 450; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 451; GFX8-NEXT: s_setpc_b64 s[30:31] 452; 453; GFX9-LABEL: v_roundeven_f64: 454; GFX9: ; %bb.0: 455; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 456; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 457; GFX9-NEXT: s_setpc_b64 s[30:31] 458 %roundeven = call double @llvm.roundeven.f64(double %x) 459 ret double %roundeven 460} 461 462define double @v_roundeven_f64_fneg(double %x) { 463; GFX6-LABEL: v_roundeven_f64_fneg: 464; GFX6: ; %bb.0: 465; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 467; GFX6-NEXT: v_and_b32_e32 v4, 0x80000000, v2 468; GFX6-NEXT: v_mov_b32_e32 v3, 0 469; GFX6-NEXT: v_or_b32_e32 v4, 0x43300000, v4 470; GFX6-NEXT: v_add_f64 v[5:6], -v[0:1], v[3:4] 471; GFX6-NEXT: v_mov_b32_e32 v1, v0 472; GFX6-NEXT: s_mov_b32 s4, -1 473; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 474; GFX6-NEXT: v_add_f64 v[3:4], v[5:6], -v[3:4] 475; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[1:2]|, s[4:5] 476; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 477; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 478; GFX6-NEXT: s_setpc_b64 s[30:31] 479; 480; GFX7-LABEL: v_roundeven_f64_fneg: 481; GFX7: ; %bb.0: 482; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 484; GFX7-NEXT: s_setpc_b64 s[30:31] 485; 486; GFX8-LABEL: v_roundeven_f64_fneg: 487; GFX8: ; %bb.0: 488; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 489; GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 490; GFX8-NEXT: s_setpc_b64 s[30:31] 491; 492; GFX9-LABEL: v_roundeven_f64_fneg: 493; GFX9: ; %bb.0: 494; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 495; GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 496; GFX9-NEXT: s_setpc_b64 s[30:31] 497 %neg.x = fneg double %x 498 %roundeven = call double @llvm.roundeven.f64(double %neg.x) 499 ret double %roundeven 500} 501 502define <2 x double> @v_roundeven_v2f64(<2 x double> %x) { 503; GFX6-LABEL: v_roundeven_v2f64: 504; GFX6: ; %bb.0: 505; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 506; GFX6-NEXT: s_brev_b32 s6, 1 507; GFX6-NEXT: s_mov_b32 s7, 0x43300000 508; GFX6-NEXT: v_and_b32_e32 v5, s6, v1 509; GFX6-NEXT: v_mov_b32_e32 v4, 0 510; GFX6-NEXT: v_or_b32_e32 v5, s7, v5 511; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5] 512; GFX6-NEXT: s_mov_b32 s4, -1 513; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 514; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5] 515; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 516; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 517; GFX6-NEXT: v_and_b32_e32 v5, s6, v3 518; GFX6-NEXT: v_or_b32_e32 v5, s7, v5 519; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5] 520; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 521; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5] 522; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5] 523; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 524; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 525; GFX6-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX7-LABEL: v_roundeven_v2f64: 528; GFX7: ; %bb.0: 529; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 531; GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 532; GFX7-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX8-LABEL: v_roundeven_v2f64: 535; GFX8: ; %bb.0: 536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 538; GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 539; GFX8-NEXT: s_setpc_b64 s[30:31] 540; 541; GFX9-LABEL: v_roundeven_v2f64: 542; GFX9: ; %bb.0: 543; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 545; GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 546; GFX9-NEXT: s_setpc_b64 s[30:31] 547 %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) 548 ret <2 x double> %roundeven 549} 550 551declare half @llvm.roundeven.f16(half) #0 552declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0 553declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0 554 555declare float @llvm.roundeven.f32(float) #0 556declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0 557declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0 558declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0 559 560declare double @llvm.roundeven.f64(double) #0 561declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0 562 563declare half @llvm.fabs.f16(half) #0 564declare float @llvm.fabs.f32(float) #0 565 566attributes #0 = { nounwind readnone speculatable willreturn } 567