1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s 3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s 4 5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s 6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s 7 8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s 9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s 10 11 12define half @test_fmin_legacy_ule_f16(half %a, half %b) #0 { 13; GFX9-SAFE-LABEL: test_fmin_legacy_ule_f16: 14; GFX9-SAFE: ; %bb.0: 15; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 17; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 18; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX9-NNAN-LABEL: test_fmin_legacy_ule_f16: 21; GFX9-NNAN: ; %bb.0: 22; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX9-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 24; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 25; 26; VI-SAFE-LABEL: test_fmin_legacy_ule_f16: 27; VI-SAFE: ; %bb.0: 28; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 30; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 31; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 32; 33; VI-NNAN-LABEL: test_fmin_legacy_ule_f16: 34; VI-NNAN: ; %bb.0: 35; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 37; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 38; 39; SI-SAFE-LABEL: test_fmin_legacy_ule_f16: 40; SI-SAFE: ; %bb.0: 41; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 43; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 44; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 45; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 46; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v1, v0 47; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 48; 49; SI-NNAN-LABEL: test_fmin_legacy_ule_f16: 50; SI-NNAN: ; %bb.0: 51; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 53; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 54; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 55; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 56; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v1 57; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 58 %cmp = fcmp ule half %a, %b 59 %val = select i1 %cmp, half %a, half %b 60 ret half %val 61} 62 63define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 { 64; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 65; GFX9-SAFE: ; %bb.0: 66; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 68; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 69; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 70; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 71; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 72; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 73; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 74; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0 75; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 78; GFX9-NNAN: ; %bb.0: 79; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v1 81; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 82; 83; VI-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 84; VI-SAFE: ; %bb.0: 85; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 87; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 88; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 89; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 90; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 91; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 92; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 93; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 94; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 95; 96; VI-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 97; VI-NNAN: ; %bb.0: 98; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; VI-NNAN-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 100; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 101; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 102; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 103; 104; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 105; SI-SAFE: ; %bb.0: 106; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 108; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 109; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 110; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 111; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 112; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 113; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 114; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 115; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v2, v0 116; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v3, v1 117; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 118; 119; SI-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 120; SI-NNAN: ; %bb.0: 121; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 123; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 124; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 125; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 126; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 127; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 128; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 129; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 130; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v2 131; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v3 132; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 133 %cmp = fcmp ule <2 x half> %a, %b 134 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b 135 ret <2 x half> %val 136} 137 138define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 { 139; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 140; GFX9-SAFE: ; %bb.0: 141; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 143; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 144; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 145; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 146; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 147; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 148; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 149; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 150; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 151; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0 152; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 155; GFX9-NNAN: ; %bb.0: 156; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 158; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 159; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 160; 161; VI-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 162; VI-SAFE: ; %bb.0: 163; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 165; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 166; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 167; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 168; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 169; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 170; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 171; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 172; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 173; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 174; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 175; 176; VI-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 177; VI-NNAN: ; %bb.0: 178; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; VI-NNAN-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 180; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2 181; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3 182; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 183; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 184; 185; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 186; SI-SAFE: ; %bb.0: 187; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 189; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 190; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 191; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 192; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 193; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 194; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 195; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 196; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 197; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 198; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 199; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 200; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v3, v0 201; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v4, v1 202; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v5, v2 203; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 204; 205; SI-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 206; SI-NNAN: ; %bb.0: 207; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 209; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 210; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 211; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 212; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 213; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 214; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 215; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 216; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 217; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 218; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 219; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 220; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v3 221; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v4 222; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v5 223; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 224 %cmp = fcmp ule <3 x half> %a, %b 225 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b 226 ret <3 x half> %val 227} 228 229define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 { 230; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 231; GFX9-SAFE: ; %bb.0: 232; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 234; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 235; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 236; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 237; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 238; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 239; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 240; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 241; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 242; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 243; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 244; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 245; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0xffff 246; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v2, v0 247; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v2, v1 248; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0 249; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v6, 16, v1 250; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 251; 252; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 253; GFX9-NNAN: ; %bb.0: 254; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 255; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 256; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 257; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 258; 259; VI-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 260; VI-SAFE: ; %bb.0: 261; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 263; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 264; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 265; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 266; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 267; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 268; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 269; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 270; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 271; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 272; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 273; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 274; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 275; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 276; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6 277; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 278; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 279; 280; VI-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 281; VI-NNAN: ; %bb.0: 282; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; VI-NNAN-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 284; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3 285; VI-NNAN-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 286; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2 287; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 288; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 289; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 290; 291; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 292; SI-SAFE: ; %bb.0: 293; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 295; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 296; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 297; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 298; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 299; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 300; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 301; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 302; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 303; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 304; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 305; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 306; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 307; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 308; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 309; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 310; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v4, v0 311; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v5, v1 312; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v6, v2 313; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v7, v3 314; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 315; 316; SI-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 317; SI-NNAN: ; %bb.0: 318; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 320; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 321; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 322; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 323; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 324; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 325; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 326; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 327; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 328; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 329; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 330; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 331; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 332; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 333; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 334; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 335; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v4 336; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v5 337; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v6 338; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v7 339; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 340 %cmp = fcmp ule <4 x half> %a, %b 341 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b 342 ret <4 x half> %val 343} 344 345define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 { 346; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 347; GFX9-SAFE: ; %bb.0: 348; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 350; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 351; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14 352; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 353; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 354; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 355; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12 356; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 357; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 358; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 359; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10 360; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 361; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 362; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 363; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8 364; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 365; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7 366; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 367; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6 368; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 369; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5 370; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 371; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4 372; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 373; GFX9-SAFE-NEXT: v_mov_b32_e32 v4, 0xffff 374; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v4, v0 375; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v4, v1 376; GFX9-SAFE-NEXT: v_and_b32_e32 v2, v4, v2 377; GFX9-SAFE-NEXT: v_and_b32_e32 v3, v4, v3 378; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v8, 16, v0 379; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v10, 16, v1 380; GFX9-SAFE-NEXT: v_lshl_or_b32 v2, v12, 16, v2 381; GFX9-SAFE-NEXT: v_lshl_or_b32 v3, v14, 16, v3 382; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 383; 384; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 385; GFX9-NNAN: ; %bb.0: 386; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v4 388; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v5 389; GFX9-NNAN-NEXT: v_pk_min_f16 v2, v2, v6 390; GFX9-NNAN-NEXT: v_pk_min_f16 v3, v3, v7 391; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 392; 393; VI-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 394; VI-SAFE: ; %bb.0: 395; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 397; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 398; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14 399; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 400; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 401; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 402; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12 403; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 404; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 405; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 406; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10 407; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 408; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 409; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 410; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8 411; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 412; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7 413; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 414; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6 415; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 416; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5 417; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 418; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4 419; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 420; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8 421; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 422; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10 423; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 424; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12 425; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 426; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14 427; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 428; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 429; 430; VI-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 431; VI-NNAN: ; %bb.0: 432; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; VI-NNAN-NEXT: v_min_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 434; VI-NNAN-NEXT: v_min_f16_e32 v3, v3, v7 435; VI-NNAN-NEXT: v_min_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 436; VI-NNAN-NEXT: v_min_f16_e32 v2, v2, v6 437; VI-NNAN-NEXT: v_min_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 438; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v5 439; VI-NNAN-NEXT: v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 440; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v4 441; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 442; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 443; VI-NNAN-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 444; VI-NNAN-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 445; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 446; 447; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 448; SI-SAFE: ; %bb.0: 449; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 450; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 451; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 452; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 453; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 454; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 455; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 456; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 457; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 458; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 459; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 460; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 461; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 462; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 463; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 464; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 465; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 466; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 467; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 468; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 469; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 470; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 471; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 472; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 473; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 474; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 475; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 476; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 477; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 478; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 479; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 480; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 481; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 482; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v8, v0 483; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v9, v1 484; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v10, v2 485; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v11, v3 486; SI-SAFE-NEXT: v_min_legacy_f32_e32 v4, v12, v4 487; SI-SAFE-NEXT: v_min_legacy_f32_e32 v5, v13, v5 488; SI-SAFE-NEXT: v_min_legacy_f32_e32 v6, v14, v6 489; SI-SAFE-NEXT: v_min_legacy_f32_e32 v7, v15, v7 490; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 491; 492; SI-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 493; SI-NNAN: ; %bb.0: 494; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 495; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 496; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 497; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 498; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 499; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 500; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 501; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 502; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 503; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 504; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 505; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 506; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 507; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 508; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 509; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 510; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 511; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 512; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 513; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 514; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 515; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 516; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 517; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 518; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 519; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 520; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 521; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 522; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 523; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 524; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 525; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 526; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 527; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v8 528; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v9 529; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v10 530; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v11 531; SI-NNAN-NEXT: v_min_f32_e32 v4, v4, v12 532; SI-NNAN-NEXT: v_min_f32_e32 v5, v5, v13 533; SI-NNAN-NEXT: v_min_f32_e32 v6, v6, v14 534; SI-NNAN-NEXT: v_min_f32_e32 v7, v7, v15 535; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 536 %cmp = fcmp ule <8 x half> %a, %b 537 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b 538 ret <8 x half> %val 539} 540 541attributes #0 = { nounwind } 542