1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -check-prefix=VI %s 4 5define amdgpu_kernel void @test_fmin_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 6; SI-LABEL: test_fmin_legacy_uge_f64: 7; SI: ; %bb.0: 8; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 9; SI-NEXT: s_mov_b32 s3, 0xf000 10; SI-NEXT: s_mov_b32 s10, 0 11; SI-NEXT: s_mov_b32 s11, s3 12; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 13; SI-NEXT: s_waitcnt lgkmcnt(0) 14; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 15; SI-NEXT: v_mov_b32_e32 v1, 0 16; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 17; SI-NEXT: s_mov_b32 s2, -1 18; SI-NEXT: s_mov_b32 s0, s4 19; SI-NEXT: s_mov_b32 s1, s5 20; SI-NEXT: s_waitcnt vmcnt(0) 21; SI-NEXT: v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3] 22; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 23; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 24; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 25; SI-NEXT: s_endpgm 26; 27; VI-LABEL: test_fmin_legacy_uge_f64: 28; VI: ; %bb.0: 29; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 30; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 31; VI-NEXT: s_waitcnt lgkmcnt(0) 32; VI-NEXT: v_mov_b32_e32 v1, s3 33; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 34; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 35; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 36; VI-NEXT: v_mov_b32_e32 v4, s0 37; VI-NEXT: v_mov_b32_e32 v5, s1 38; VI-NEXT: s_waitcnt vmcnt(0) 39; VI-NEXT: v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3] 40; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 41; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 42; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 43; VI-NEXT: s_endpgm 44 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 45 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 46 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 47 48 %a = load double, double addrspace(1)* %gep.0, align 8 49 %b = load double, double addrspace(1)* %gep.1, align 8 50 51 %cmp = fcmp uge double %a, %b 52 %val = select i1 %cmp, double %b, double %a 53 store double %val, double addrspace(1)* %out, align 8 54 ret void 55} 56 57define amdgpu_kernel void @test_fmin_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 58; SI-LABEL: test_fmin_legacy_ugt_f64: 59; SI: ; %bb.0: 60; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 61; SI-NEXT: s_mov_b32 s3, 0xf000 62; SI-NEXT: s_mov_b32 s10, 0 63; SI-NEXT: s_mov_b32 s11, s3 64; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 65; SI-NEXT: s_waitcnt lgkmcnt(0) 66; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 67; SI-NEXT: v_mov_b32_e32 v1, 0 68; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 69; SI-NEXT: s_mov_b32 s2, -1 70; SI-NEXT: s_mov_b32 s0, s4 71; SI-NEXT: s_mov_b32 s1, s5 72; SI-NEXT: s_waitcnt vmcnt(0) 73; SI-NEXT: v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3] 74; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 75; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 76; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 77; SI-NEXT: s_endpgm 78; 79; VI-LABEL: test_fmin_legacy_ugt_f64: 80; VI: ; %bb.0: 81; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 82; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 83; VI-NEXT: s_waitcnt lgkmcnt(0) 84; VI-NEXT: v_mov_b32_e32 v1, s3 85; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 86; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 87; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 88; VI-NEXT: v_mov_b32_e32 v4, s0 89; VI-NEXT: v_mov_b32_e32 v5, s1 90; VI-NEXT: s_waitcnt vmcnt(0) 91; VI-NEXT: v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3] 92; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 93; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 94; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 95; VI-NEXT: s_endpgm 96 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 97 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 98 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 99 100 %a = load double, double addrspace(1)* %gep.0, align 8 101 %b = load double, double addrspace(1)* %gep.1, align 8 102 103 %cmp = fcmp ugt double %a, %b 104 %val = select i1 %cmp, double %b, double %a 105 store double %val, double addrspace(1)* %out, align 8 106 ret void 107} 108 109define amdgpu_kernel void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 110; SI-LABEL: test_fmin_legacy_ule_f64: 111; SI: ; %bb.0: 112; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 113; SI-NEXT: s_mov_b32 s3, 0xf000 114; SI-NEXT: s_mov_b32 s10, 0 115; SI-NEXT: s_mov_b32 s11, s3 116; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 117; SI-NEXT: s_waitcnt lgkmcnt(0) 118; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 119; SI-NEXT: v_mov_b32_e32 v1, 0 120; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 121; SI-NEXT: s_mov_b32 s2, -1 122; SI-NEXT: s_mov_b32 s0, s4 123; SI-NEXT: s_mov_b32 s1, s5 124; SI-NEXT: s_waitcnt vmcnt(0) 125; SI-NEXT: v_cmp_ngt_f64_e32 vcc, v[0:1], v[2:3] 126; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 127; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 128; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 129; SI-NEXT: s_endpgm 130; 131; VI-LABEL: test_fmin_legacy_ule_f64: 132; VI: ; %bb.0: 133; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 134; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 135; VI-NEXT: s_waitcnt lgkmcnt(0) 136; VI-NEXT: v_mov_b32_e32 v1, s3 137; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 138; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 139; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 140; VI-NEXT: v_mov_b32_e32 v4, s0 141; VI-NEXT: v_mov_b32_e32 v5, s1 142; VI-NEXT: s_waitcnt vmcnt(0) 143; VI-NEXT: v_cmp_ngt_f64_e32 vcc, v[0:1], v[2:3] 144; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 145; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 146; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 147; VI-NEXT: s_endpgm 148 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 149 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 150 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 151 152 %a = load double, double addrspace(1)* %gep.0, align 8 153 %b = load double, double addrspace(1)* %gep.1, align 8 154 155 %cmp = fcmp ule double %a, %b 156 %val = select i1 %cmp, double %a, double %b 157 store double %val, double addrspace(1)* %out, align 8 158 ret void 159} 160 161define amdgpu_kernel void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 162; SI-LABEL: test_fmin_legacy_ult_f64: 163; SI: ; %bb.0: 164; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 165; SI-NEXT: s_mov_b32 s3, 0xf000 166; SI-NEXT: s_mov_b32 s10, 0 167; SI-NEXT: s_mov_b32 s11, s3 168; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 169; SI-NEXT: s_waitcnt lgkmcnt(0) 170; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 171; SI-NEXT: v_mov_b32_e32 v1, 0 172; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 173; SI-NEXT: s_mov_b32 s2, -1 174; SI-NEXT: s_mov_b32 s0, s4 175; SI-NEXT: s_mov_b32 s1, s5 176; SI-NEXT: s_waitcnt vmcnt(0) 177; SI-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3] 178; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 179; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 180; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 181; SI-NEXT: s_endpgm 182; 183; VI-LABEL: test_fmin_legacy_ult_f64: 184; VI: ; %bb.0: 185; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 186; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 187; VI-NEXT: s_waitcnt lgkmcnt(0) 188; VI-NEXT: v_mov_b32_e32 v1, s3 189; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 190; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 191; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 192; VI-NEXT: v_mov_b32_e32 v4, s0 193; VI-NEXT: v_mov_b32_e32 v5, s1 194; VI-NEXT: s_waitcnt vmcnt(0) 195; VI-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3] 196; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 197; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 198; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 199; VI-NEXT: s_endpgm 200 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 201 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 202 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 203 204 %a = load double, double addrspace(1)* %gep.0, align 8 205 %b = load double, double addrspace(1)* %gep.1, align 8 206 207 %cmp = fcmp ult double %a, %b 208 %val = select i1 %cmp, double %a, double %b 209 store double %val, double addrspace(1)* %out, align 8 210 ret void 211} 212 213define amdgpu_kernel void @test_fmin_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 214; SI-LABEL: test_fmin_legacy_oge_f64: 215; SI: ; %bb.0: 216; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 217; SI-NEXT: s_mov_b32 s3, 0xf000 218; SI-NEXT: s_mov_b32 s10, 0 219; SI-NEXT: s_mov_b32 s11, s3 220; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 221; SI-NEXT: s_waitcnt lgkmcnt(0) 222; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 223; SI-NEXT: v_mov_b32_e32 v1, 0 224; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 225; SI-NEXT: s_mov_b32 s2, -1 226; SI-NEXT: s_mov_b32 s0, s4 227; SI-NEXT: s_mov_b32 s1, s5 228; SI-NEXT: s_waitcnt vmcnt(0) 229; SI-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3] 230; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 231; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 232; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 233; SI-NEXT: s_endpgm 234; 235; VI-LABEL: test_fmin_legacy_oge_f64: 236; VI: ; %bb.0: 237; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 238; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 239; VI-NEXT: s_waitcnt lgkmcnt(0) 240; VI-NEXT: v_mov_b32_e32 v1, s3 241; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 242; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 243; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 244; VI-NEXT: v_mov_b32_e32 v4, s0 245; VI-NEXT: v_mov_b32_e32 v5, s1 246; VI-NEXT: s_waitcnt vmcnt(0) 247; VI-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3] 248; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 249; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 250; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 251; VI-NEXT: s_endpgm 252 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 253 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 254 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 255 256 %a = load double, double addrspace(1)* %gep.0, align 8 257 %b = load double, double addrspace(1)* %gep.1, align 8 258 259 %cmp = fcmp oge double %a, %b 260 %val = select i1 %cmp, double %b, double %a 261 store double %val, double addrspace(1)* %out, align 8 262 ret void 263} 264 265define amdgpu_kernel void @test_fmin_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 266; SI-LABEL: test_fmin_legacy_ogt_f64: 267; SI: ; %bb.0: 268; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 269; SI-NEXT: s_mov_b32 s3, 0xf000 270; SI-NEXT: s_mov_b32 s10, 0 271; SI-NEXT: s_mov_b32 s11, s3 272; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 273; SI-NEXT: s_waitcnt lgkmcnt(0) 274; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 275; SI-NEXT: v_mov_b32_e32 v1, 0 276; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 277; SI-NEXT: s_mov_b32 s2, -1 278; SI-NEXT: s_mov_b32 s0, s4 279; SI-NEXT: s_mov_b32 s1, s5 280; SI-NEXT: s_waitcnt vmcnt(0) 281; SI-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3] 282; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 283; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 284; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 285; SI-NEXT: s_endpgm 286; 287; VI-LABEL: test_fmin_legacy_ogt_f64: 288; VI: ; %bb.0: 289; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 290; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 291; VI-NEXT: s_waitcnt lgkmcnt(0) 292; VI-NEXT: v_mov_b32_e32 v1, s3 293; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 294; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 295; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 296; VI-NEXT: v_mov_b32_e32 v4, s0 297; VI-NEXT: v_mov_b32_e32 v5, s1 298; VI-NEXT: s_waitcnt vmcnt(0) 299; VI-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3] 300; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 301; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 302; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 303; VI-NEXT: s_endpgm 304 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 305 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 306 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 307 308 %a = load double, double addrspace(1)* %gep.0, align 8 309 %b = load double, double addrspace(1)* %gep.1, align 8 310 311 %cmp = fcmp ogt double %a, %b 312 %val = select i1 %cmp, double %b, double %a 313 store double %val, double addrspace(1)* %out, align 8 314 ret void 315} 316 317define amdgpu_kernel void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 318; SI-LABEL: test_fmin_legacy_ole_f64: 319; SI: ; %bb.0: 320; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 321; SI-NEXT: s_mov_b32 s3, 0xf000 322; SI-NEXT: s_mov_b32 s10, 0 323; SI-NEXT: s_mov_b32 s11, s3 324; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 325; SI-NEXT: s_waitcnt lgkmcnt(0) 326; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 327; SI-NEXT: v_mov_b32_e32 v1, 0 328; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 329; SI-NEXT: s_mov_b32 s2, -1 330; SI-NEXT: s_mov_b32 s0, s4 331; SI-NEXT: s_mov_b32 s1, s5 332; SI-NEXT: s_waitcnt vmcnt(0) 333; SI-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3] 334; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 335; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 336; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 337; SI-NEXT: s_endpgm 338; 339; VI-LABEL: test_fmin_legacy_ole_f64: 340; VI: ; %bb.0: 341; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 342; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 343; VI-NEXT: s_waitcnt lgkmcnt(0) 344; VI-NEXT: v_mov_b32_e32 v1, s3 345; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 346; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 347; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 348; VI-NEXT: v_mov_b32_e32 v4, s0 349; VI-NEXT: v_mov_b32_e32 v5, s1 350; VI-NEXT: s_waitcnt vmcnt(0) 351; VI-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3] 352; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 353; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 354; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 355; VI-NEXT: s_endpgm 356 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 357 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 358 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 359 360 %a = load double, double addrspace(1)* %gep.0, align 8 361 %b = load double, double addrspace(1)* %gep.1, align 8 362 363 %cmp = fcmp ole double %a, %b 364 %val = select i1 %cmp, double %a, double %b 365 store double %val, double addrspace(1)* %out, align 8 366 ret void 367} 368 369define amdgpu_kernel void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 370; SI-LABEL: test_fmin_legacy_olt_f64: 371; SI: ; %bb.0: 372; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 373; SI-NEXT: s_mov_b32 s3, 0xf000 374; SI-NEXT: s_mov_b32 s10, 0 375; SI-NEXT: s_mov_b32 s11, s3 376; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 377; SI-NEXT: s_waitcnt lgkmcnt(0) 378; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 379; SI-NEXT: v_mov_b32_e32 v1, 0 380; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 381; SI-NEXT: s_mov_b32 s2, -1 382; SI-NEXT: s_mov_b32 s0, s4 383; SI-NEXT: s_mov_b32 s1, s5 384; SI-NEXT: s_waitcnt vmcnt(0) 385; SI-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] 386; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 387; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 388; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 389; SI-NEXT: s_endpgm 390; 391; VI-LABEL: test_fmin_legacy_olt_f64: 392; VI: ; %bb.0: 393; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 394; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 395; VI-NEXT: s_waitcnt lgkmcnt(0) 396; VI-NEXT: v_mov_b32_e32 v1, s3 397; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 398; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 399; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 400; VI-NEXT: v_mov_b32_e32 v4, s0 401; VI-NEXT: v_mov_b32_e32 v5, s1 402; VI-NEXT: s_waitcnt vmcnt(0) 403; VI-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] 404; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 405; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 406; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 407; VI-NEXT: s_endpgm 408 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 409 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 410 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 411 412 %a = load double, double addrspace(1)* %gep.0, align 8 413 %b = load double, double addrspace(1)* %gep.1, align 8 414 415 %cmp = fcmp olt double %a, %b 416 %val = select i1 %cmp, double %a, double %b 417 store double %val, double addrspace(1)* %out, align 8 418 ret void 419} 420 421declare i32 @llvm.amdgcn.workitem.id.x() #1 422 423attributes #0 = { nounwind } 424attributes #1 = { nounwind readnone } 425