1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -check-prefix=VI %s 4 5; Make sure we don't try to form FMAX_LEGACY nodes with f64 6 7define amdgpu_kernel void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 8; SI-LABEL: test_fmax_legacy_uge_f64: 9; SI: ; %bb.0: 10; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 11; SI-NEXT: s_mov_b32 s3, 0xf000 12; SI-NEXT: s_mov_b32 s10, 0 13; SI-NEXT: s_mov_b32 s11, s3 14; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 15; SI-NEXT: s_waitcnt lgkmcnt(0) 16; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 17; SI-NEXT: v_mov_b32_e32 v1, 0 18; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 19; SI-NEXT: s_mov_b32 s2, -1 20; SI-NEXT: s_mov_b32 s0, s4 21; SI-NEXT: s_mov_b32 s1, s5 22; SI-NEXT: s_waitcnt vmcnt(0) 23; SI-NEXT: v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3] 24; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 25; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 26; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 27; SI-NEXT: s_endpgm 28; 29; VI-LABEL: test_fmax_legacy_uge_f64: 30; VI: ; %bb.0: 31; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 32; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 33; VI-NEXT: s_waitcnt lgkmcnt(0) 34; VI-NEXT: v_mov_b32_e32 v1, s3 35; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 36; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 37; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 38; VI-NEXT: v_mov_b32_e32 v4, s0 39; VI-NEXT: v_mov_b32_e32 v5, s1 40; VI-NEXT: s_waitcnt vmcnt(0) 41; VI-NEXT: v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3] 42; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 43; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 44; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 45; VI-NEXT: s_endpgm 46 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 47 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 48 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 49 50 %a = load double, double addrspace(1)* %gep.0, align 8 51 %b = load double, double addrspace(1)* %gep.1, align 8 52 53 %cmp = fcmp uge double %a, %b 54 %val = select i1 %cmp, double %a, double %b 55 store double %val, double addrspace(1)* %out, align 8 56 ret void 57} 58 59define amdgpu_kernel void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 60; SI-LABEL: test_fmax_legacy_oge_f64: 61; SI: ; %bb.0: 62; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 63; SI-NEXT: s_mov_b32 s3, 0xf000 64; SI-NEXT: s_mov_b32 s10, 0 65; SI-NEXT: s_mov_b32 s11, s3 66; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 67; SI-NEXT: s_waitcnt lgkmcnt(0) 68; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 69; SI-NEXT: v_mov_b32_e32 v1, 0 70; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 71; SI-NEXT: s_mov_b32 s2, -1 72; SI-NEXT: s_mov_b32 s0, s4 73; SI-NEXT: s_mov_b32 s1, s5 74; SI-NEXT: s_waitcnt vmcnt(0) 75; SI-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3] 76; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 77; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 78; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 79; SI-NEXT: s_endpgm 80; 81; VI-LABEL: test_fmax_legacy_oge_f64: 82; VI: ; %bb.0: 83; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 84; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 85; VI-NEXT: s_waitcnt lgkmcnt(0) 86; VI-NEXT: v_mov_b32_e32 v1, s3 87; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 88; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 89; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 90; VI-NEXT: v_mov_b32_e32 v4, s0 91; VI-NEXT: v_mov_b32_e32 v5, s1 92; VI-NEXT: s_waitcnt vmcnt(0) 93; VI-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3] 94; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 95; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 96; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 97; VI-NEXT: s_endpgm 98 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 99 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 100 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 101 102 %a = load double, double addrspace(1)* %gep.0, align 8 103 %b = load double, double addrspace(1)* %gep.1, align 8 104 105 %cmp = fcmp oge double %a, %b 106 %val = select i1 %cmp, double %a, double %b 107 store double %val, double addrspace(1)* %out, align 8 108 ret void 109} 110 111define amdgpu_kernel void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 112; SI-LABEL: test_fmax_legacy_ugt_f64: 113; SI: ; %bb.0: 114; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 115; SI-NEXT: s_mov_b32 s3, 0xf000 116; SI-NEXT: s_mov_b32 s10, 0 117; SI-NEXT: s_mov_b32 s11, s3 118; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 119; SI-NEXT: s_waitcnt lgkmcnt(0) 120; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 121; SI-NEXT: v_mov_b32_e32 v1, 0 122; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 123; SI-NEXT: s_mov_b32 s2, -1 124; SI-NEXT: s_mov_b32 s0, s4 125; SI-NEXT: s_mov_b32 s1, s5 126; SI-NEXT: s_waitcnt vmcnt(0) 127; SI-NEXT: v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3] 128; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 129; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 130; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 131; SI-NEXT: s_endpgm 132; 133; VI-LABEL: test_fmax_legacy_ugt_f64: 134; VI: ; %bb.0: 135; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 136; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 137; VI-NEXT: s_waitcnt lgkmcnt(0) 138; VI-NEXT: v_mov_b32_e32 v1, s3 139; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 140; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 141; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 142; VI-NEXT: v_mov_b32_e32 v4, s0 143; VI-NEXT: v_mov_b32_e32 v5, s1 144; VI-NEXT: s_waitcnt vmcnt(0) 145; VI-NEXT: v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3] 146; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 147; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 148; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 149; VI-NEXT: s_endpgm 150 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 151 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 152 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 153 154 %a = load double, double addrspace(1)* %gep.0, align 8 155 %b = load double, double addrspace(1)* %gep.1, align 8 156 157 %cmp = fcmp ugt double %a, %b 158 %val = select i1 %cmp, double %a, double %b 159 store double %val, double addrspace(1)* %out, align 8 160 ret void 161} 162 163define amdgpu_kernel void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 164; SI-LABEL: test_fmax_legacy_ogt_f64: 165; SI: ; %bb.0: 166; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 167; SI-NEXT: s_mov_b32 s3, 0xf000 168; SI-NEXT: s_mov_b32 s10, 0 169; SI-NEXT: s_mov_b32 s11, s3 170; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 171; SI-NEXT: s_waitcnt lgkmcnt(0) 172; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 173; SI-NEXT: v_mov_b32_e32 v1, 0 174; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 175; SI-NEXT: s_mov_b32 s2, -1 176; SI-NEXT: s_mov_b32 s0, s4 177; SI-NEXT: s_mov_b32 s1, s5 178; SI-NEXT: s_waitcnt vmcnt(0) 179; SI-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3] 180; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 181; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 182; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 183; SI-NEXT: s_endpgm 184; 185; VI-LABEL: test_fmax_legacy_ogt_f64: 186; VI: ; %bb.0: 187; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 188; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 189; VI-NEXT: s_waitcnt lgkmcnt(0) 190; VI-NEXT: v_mov_b32_e32 v1, s3 191; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 192; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 193; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 194; VI-NEXT: v_mov_b32_e32 v4, s0 195; VI-NEXT: v_mov_b32_e32 v5, s1 196; VI-NEXT: s_waitcnt vmcnt(0) 197; VI-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3] 198; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 199; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 200; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 201; VI-NEXT: s_endpgm 202 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 203 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 204 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 205 206 %a = load double, double addrspace(1)* %gep.0, align 8 207 %b = load double, double addrspace(1)* %gep.1, align 8 208 209 %cmp = fcmp ogt double %a, %b 210 %val = select i1 %cmp, double %a, double %b 211 store double %val, double addrspace(1)* %out, align 8 212 ret void 213} 214 215declare i32 @llvm.amdgcn.workitem.id.x() #1 216 217attributes #0 = { nounwind } 218attributes #1 = { nounwind readnone } 219