1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s 2 3; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine. 4 5; GCN-LABEL: {{^}}multi_use_fneg_src: 6; GCN: buffer_load_dword [[A:v[0-9]+]] 7; GCN: buffer_load_dword [[B:v[0-9]+]] 8; GCN: buffer_load_dword [[C:v[0-9]+]] 9 10; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 11; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]] 12; GCN: buffer_store_dword [[MUL]] 13define amdgpu_kernel void @multi_use_fneg_src() #0 { 14 %a = load volatile float, float addrspace(1)* undef 15 %b = load volatile float, float addrspace(1)* undef 16 %x = load volatile i32, i32 addrspace(1)* undef 17 %y = load volatile i32, i32 addrspace(1)* undef 18 19 %mul = fmul float %a, %b 20 %neg.mul = fsub float -0.0, %mul 21 %cmp = fcmp oeq float %neg.mul, 4.0 22 %select = select i1 %cmp, i32 %x, i32 %y 23 store volatile i32 %select, i32 addrspace(1)* undef 24 store volatile float %mul, float addrspace(1)* undef 25 ret void 26} 27 28; GCN-LABEL: {{^}}multi_foldable_use_fneg_src: 29; GCN: buffer_load_dword [[A:v[0-9]+]] 30; GCN: buffer_load_dword [[B:v[0-9]+]] 31; GCN: buffer_load_dword [[C:v[0-9]+]] 32 33; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 34; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]] 35; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]] 36define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 { 37 %a = load volatile float, float addrspace(1)* undef 38 %b = load volatile float, float addrspace(1)* undef 39 %x = load volatile i32, i32 addrspace(1)* undef 40 %y = load volatile i32, i32 addrspace(1)* undef 41 42 %mul = fmul float %a, %b 43 %neg.mul = fsub float -0.0, %mul 44 %use1 = fmul float %mul, %neg.mul 45 %cmp = fcmp oeq float %neg.mul, 4.0 46 %select = select i1 %cmp, i32 %x, i32 %y 47 48 store volatile i32 %select, i32 addrspace(1)* undef 49 store volatile float %use1, float addrspace(1)* undef 50 ret void 51} 52 53; GCN-LABEL: {{^}}multi_use_fneg: 54; GCN: buffer_load_dword [[A:v[0-9]+]] 55; GCN: buffer_load_dword [[B:v[0-9]+]] 56; GCN: buffer_load_dword [[C:v[0-9]+]] 57 58; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 59; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]] 60; GCN-NOT: xor 61; GCN: buffer_store_dword [[MUL]] 62define amdgpu_kernel void @multi_use_fneg() #0 { 63 %a = load volatile float, float addrspace(1)* undef 64 %b = load volatile float, float addrspace(1)* undef 65 %x = load volatile i32, i32 addrspace(1)* undef 66 %y = load volatile i32, i32 addrspace(1)* undef 67 68 %mul = fmul float %a, %b 69 %neg.mul = fsub float -0.0, %mul 70 %cmp = fcmp oeq float %neg.mul, 4.0 71 %select = select i1 %cmp, i32 %x, i32 %y 72 store volatile i32 %select, i32 addrspace(1)* undef 73 store volatile float %neg.mul, float addrspace(1)* undef 74 ret void 75} 76 77; GCN-LABEL: {{^}}multi_foldable_use_fneg: 78; GCN: buffer_load_dword [[A:v[0-9]+]] 79; GCN: buffer_load_dword [[B:v[0-9]+]] 80 81; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]] 82; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]] 83; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]] 84; GCN: buffer_store_dword [[MUL1]] 85define amdgpu_kernel void @multi_foldable_use_fneg() #0 { 86 %a = load volatile float, float addrspace(1)* undef 87 %b = load volatile float, float addrspace(1)* undef 88 %x = load volatile i32, i32 addrspace(1)* undef 89 %y = load volatile i32, i32 addrspace(1)* undef 90 %z = load volatile i32, i32 addrspace(1)* undef 91 92 %mul = fmul float %a, %b 93 %neg.mul = fsub float -0.0, %mul 94 %cmp = fcmp oeq float %neg.mul, 4.0 95 %select = select i1 %cmp, i32 %x, i32 %y 96 %use1 = fmul float %neg.mul, %mul 97 store volatile i32 %select, i32 addrspace(1)* undef 98 store volatile float %use1, float addrspace(1)* undef 99 ret void 100} 101 102; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32: 103; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}} 104define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 { 105 %a = load volatile float, float addrspace(1)* undef 106 %x = load volatile i32, i32 addrspace(1)* undef 107 %y = load volatile i32, i32 addrspace(1)* undef 108 %neg.a = fsub float -0.0, %a 109 %cmp = fcmp oeq float %neg.a, 4.0 110 %select = select i1 %cmp, i32 %x, i32 %y 111 store volatile i32 %select, i32 addrspace(1)* undef 112 ret void 113} 114 115; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32: 116; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}} 117define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 { 118 %a = load volatile float, float addrspace(1)* undef 119 %x = load volatile i32, i32 addrspace(1)* undef 120 %y = load volatile i32, i32 addrspace(1)* undef 121 %neg.a = fsub float -0.0, %a 122 %cmp = fcmp ogt float %neg.a, 4.0 123 %select = select i1 %cmp, i32 %x, i32 %y 124 store volatile i32 %select, i32 addrspace(1)* undef 125 ret void 126} 127 128; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32: 129; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}} 130define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 { 131 %a = load volatile float, float addrspace(1)* undef 132 %x = load volatile i32, i32 addrspace(1)* undef 133 %y = load volatile i32, i32 addrspace(1)* undef 134 %neg.a = fsub float -0.0, %a 135 %cmp = fcmp oge float %neg.a, 4.0 136 %select = select i1 %cmp, i32 %x, i32 %y 137 store volatile i32 %select, i32 addrspace(1)* undef 138 ret void 139} 140 141; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32: 142; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}} 143define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 { 144 %a = load volatile float, float addrspace(1)* undef 145 %x = load volatile i32, i32 addrspace(1)* undef 146 %y = load volatile i32, i32 addrspace(1)* undef 147 %neg.a = fsub float -0.0, %a 148 %cmp = fcmp olt float %neg.a, 4.0 149 %select = select i1 %cmp, i32 %x, i32 %y 150 store volatile i32 %select, i32 addrspace(1)* undef 151 ret void 152} 153 154; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32: 155; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}} 156define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 { 157 %a = load volatile float, float addrspace(1)* undef 158 %x = load volatile i32, i32 addrspace(1)* undef 159 %y = load volatile i32, i32 addrspace(1)* undef 160 %neg.a = fsub float -0.0, %a 161 %cmp = fcmp ole float %neg.a, 4.0 162 %select = select i1 %cmp, i32 %x, i32 %y 163 store volatile i32 %select, i32 addrspace(1)* undef 164 ret void 165} 166 167; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32: 168; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}} 169define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 { 170 %a = load volatile float, float addrspace(1)* undef 171 %x = load volatile i32, i32 addrspace(1)* undef 172 %y = load volatile i32, i32 addrspace(1)* undef 173 %neg.a = fsub float -0.0, %a 174 %cmp = fcmp one float %neg.a, 4.0 175 %select = select i1 %cmp, i32 %x, i32 %y 176 store volatile i32 %select, i32 addrspace(1)* undef 177 ret void 178} 179 180; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32: 181; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}} 182define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 { 183 %a = load volatile float, float addrspace(1)* undef 184 %x = load volatile i32, i32 addrspace(1)* undef 185 %y = load volatile i32, i32 addrspace(1)* undef 186 %neg.a = fsub float -0.0, %a 187 %cmp = fcmp ueq float %neg.a, 4.0 188 %select = select i1 %cmp, i32 %x, i32 %y 189 store volatile i32 %select, i32 addrspace(1)* undef 190 ret void 191} 192 193; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32: 194; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}} 195define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 { 196 %a = load volatile float, float addrspace(1)* undef 197 %x = load volatile i32, i32 addrspace(1)* undef 198 %y = load volatile i32, i32 addrspace(1)* undef 199 %neg.a = fsub float -0.0, %a 200 %cmp = fcmp ugt float %neg.a, 4.0 201 %select = select i1 %cmp, i32 %x, i32 %y 202 store volatile i32 %select, i32 addrspace(1)* undef 203 ret void 204} 205 206; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32: 207; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}} 208define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 { 209 %a = load volatile float, float addrspace(1)* undef 210 %x = load volatile i32, i32 addrspace(1)* undef 211 %y = load volatile i32, i32 addrspace(1)* undef 212 %neg.a = fsub float -0.0, %a 213 %cmp = fcmp uge float %neg.a, 4.0 214 %select = select i1 %cmp, i32 %x, i32 %y 215 store volatile i32 %select, i32 addrspace(1)* undef 216 ret void 217} 218 219; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32: 220; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}} 221define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 { 222 %a = load volatile float, float addrspace(1)* undef 223 %x = load volatile i32, i32 addrspace(1)* undef 224 %y = load volatile i32, i32 addrspace(1)* undef 225 %neg.a = fsub float -0.0, %a 226 %cmp = fcmp ult float %neg.a, 4.0 227 %select = select i1 %cmp, i32 %x, i32 %y 228 store volatile i32 %select, i32 addrspace(1)* undef 229 ret void 230} 231 232; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32: 233; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}} 234define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 { 235 %a = load volatile float, float addrspace(1)* undef 236 %x = load volatile i32, i32 addrspace(1)* undef 237 %y = load volatile i32, i32 addrspace(1)* undef 238 %neg.a = fsub float -0.0, %a 239 %cmp = fcmp ule float %neg.a, 4.0 240 %select = select i1 %cmp, i32 %x, i32 %y 241 store volatile i32 %select, i32 addrspace(1)* undef 242 ret void 243} 244 245; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32: 246; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}} 247define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 { 248 %a = load volatile float, float addrspace(1)* undef 249 %x = load volatile i32, i32 addrspace(1)* undef 250 %y = load volatile i32, i32 addrspace(1)* undef 251 %neg.a = fsub float -0.0, %a 252 %cmp = fcmp une float %neg.a, 4.0 253 %select = select i1 %cmp, i32 %x, i32 %y 254 store volatile i32 %select, i32 addrspace(1)* undef 255 ret void 256} 257 258attributes #0 = { nounwind } 259