1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 3 4declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0 5declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0 6declare float @llvm.fabs.f32(float) #0 7 8declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 9declare half @llvm.fabs.f16(half) #0 10 11; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs: 12; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| 13define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { 14 %temp = call float @llvm.fabs.f32(float %a) 15 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) 16 store i64 %result, i64 addrspace(1)* %out 17 ret void 18} 19 20; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs: 21; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}| 22define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { 23 %temp = call float @llvm.fabs.f32(float %a) 24 %src_input = call float @llvm.fabs.f32(float %src) 25 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1) 26 store i64 %result, i64 addrspace(1)* %out 27 ret void 28} 29 30; GCN-LABEL: {{^}}v_fcmp_f32: 31; GCN-NOT: v_cmp_eq_f32_e64 32define amdgpu_kernel void @v_fcmp_f32(i64 addrspace(1)* %out, float %src) { 33 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1) 34 store i64 %result, i64 addrspace(1)* %out 35 ret void 36} 37 38; GCN-LABEL: {{^}}v_fcmp_f32_oeq: 39; GCN: v_cmp_eq_f32_e64 40define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { 41 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) 42 store i64 %result, i64 addrspace(1)* %out 43 ret void 44} 45 46; GCN-LABEL: {{^}}v_fcmp_f32_one: 47; GCN: v_cmp_neq_f32_e64 48define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { 49 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) 50 store i64 %result, i64 addrspace(1)* %out 51 ret void 52} 53 54; GCN-LABEL: {{^}}v_fcmp_f32_ogt: 55; GCN: v_cmp_gt_f32_e64 56define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { 57 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) 58 store i64 %result, i64 addrspace(1)* %out 59 ret void 60} 61 62; GCN-LABEL: {{^}}v_fcmp_f32_oge: 63; GCN: v_cmp_ge_f32_e64 64define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { 65 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) 66 store i64 %result, i64 addrspace(1)* %out 67 ret void 68} 69 70; GCN-LABEL: {{^}}v_fcmp_f32_olt: 71; GCN: v_cmp_lt_f32_e64 72define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { 73 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) 74 store i64 %result, i64 addrspace(1)* %out 75 ret void 76} 77 78; GCN-LABEL: {{^}}v_fcmp_f32_ole: 79; GCN: v_cmp_le_f32_e64 80define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { 81 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) 82 store i64 %result, i64 addrspace(1)* %out 83 ret void 84} 85 86 87; GCN-LABEL: {{^}}v_fcmp_f32_ueq: 88; GCN: v_cmp_nlg_f32_e64 89define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { 90 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) 91 store i64 %result, i64 addrspace(1)* %out 92 ret void 93} 94 95; GCN-LABEL: {{^}}v_fcmp_f32_une: 96; GCN: v_cmp_neq_f32_e64 97define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { 98 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) 99 store i64 %result, i64 addrspace(1)* %out 100 ret void 101} 102 103; GCN-LABEL: {{^}}v_fcmp_f32_ugt: 104; GCN: v_cmp_nle_f32_e64 105define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { 106 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) 107 store i64 %result, i64 addrspace(1)* %out 108 ret void 109} 110 111; GCN-LABEL: {{^}}v_fcmp_f32_uge: 112; GCN: v_cmp_nlt_f32_e64 113define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { 114 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) 115 store i64 %result, i64 addrspace(1)* %out 116 ret void 117} 118 119; GCN-LABEL: {{^}}v_fcmp_f32_ult: 120; GCN: v_cmp_nge_f32_e64 121define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { 122 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) 123 store i64 %result, i64 addrspace(1)* %out 124 ret void 125} 126 127; GCN-LABEL: {{^}}v_fcmp_f32_ule: 128; GCN: v_cmp_ngt_f32_e64 129define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { 130 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) 131 store i64 %result, i64 addrspace(1)* %out 132 ret void 133} 134 135; GCN-LABEL: {{^}}v_fcmp_f64_oeq: 136; GCN: v_cmp_eq_f64_e64 137define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { 138 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) 139 store i64 %result, i64 addrspace(1)* %out 140 ret void 141} 142 143; GCN-LABEL: {{^}}v_fcmp_f64_one: 144; GCN: v_cmp_neq_f64_e64 145define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { 146 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) 147 store i64 %result, i64 addrspace(1)* %out 148 ret void 149} 150 151; GCN-LABEL: {{^}}v_fcmp_f64_ogt: 152; GCN: v_cmp_gt_f64_e64 153define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { 154 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) 155 store i64 %result, i64 addrspace(1)* %out 156 ret void 157} 158 159; GCN-LABEL: {{^}}v_fcmp_f64_oge: 160; GCN: v_cmp_ge_f64_e64 161define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { 162 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) 163 store i64 %result, i64 addrspace(1)* %out 164 ret void 165} 166 167; GCN-LABEL: {{^}}v_fcmp_f64_olt: 168; GCN: v_cmp_lt_f64_e64 169define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { 170 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) 171 store i64 %result, i64 addrspace(1)* %out 172 ret void 173} 174 175; GCN-LABEL: {{^}}v_fcmp_f64_ole: 176; GCN: v_cmp_le_f64_e64 177define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { 178 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) 179 store i64 %result, i64 addrspace(1)* %out 180 ret void 181} 182 183; GCN-LABEL: {{^}}v_fcmp_f64_ueq: 184; GCN: v_cmp_nlg_f64_e64 185define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { 186 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) 187 store i64 %result, i64 addrspace(1)* %out 188 ret void 189} 190 191; GCN-LABEL: {{^}}v_fcmp_f64_une: 192; GCN: v_cmp_neq_f64_e64 193define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { 194 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) 195 store i64 %result, i64 addrspace(1)* %out 196 ret void 197} 198 199; GCN-LABEL: {{^}}v_fcmp_f64_ugt: 200; GCN: v_cmp_nle_f64_e64 201define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { 202 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) 203 store i64 %result, i64 addrspace(1)* %out 204 ret void 205} 206 207; GCN-LABEL: {{^}}v_fcmp_f64_uge: 208; GCN: v_cmp_nlt_f64_e64 209define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { 210 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) 211 store i64 %result, i64 addrspace(1)* %out 212 ret void 213} 214 215; GCN-LABEL: {{^}}v_fcmp_f64_ult: 216; GCN: v_cmp_nge_f64_e64 217define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { 218 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) 219 store i64 %result, i64 addrspace(1)* %out 220 ret void 221} 222 223; GCN-LABEL: {{^}}v_fcmp_f64_ule: 224; GCN: v_cmp_ngt_f64_e64 225define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { 226 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) 227 store i64 %result, i64 addrspace(1)* %out 228 ret void 229} 230 231; GCN-LABEL: {{^}}v_fcmp_f16_oeq_with_fabs: 232; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| 233 234; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}} 235; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}| 236; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]] 237define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i64 addrspace(1)* %out, half %src, half %a) { 238 %temp = call half @llvm.fabs.f16(half %a) 239 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1) 240 store i64 %result, i64 addrspace(1)* %out 241 ret void 242} 243 244; GCN-LABEL: {{^}}v_fcmp_f16_oeq_both_operands_with_fabs: 245; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}| 246 247; SI: v_cvt_f32_f16_e64 [[CVT0:v[0-9]+]], |s{{[0-9]+}}| 248; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}| 249; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]] 250define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, half %src, half %a) { 251 %temp = call half @llvm.fabs.f16(half %a) 252 %src_input = call half @llvm.fabs.f16(half %src) 253 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1) 254 store i64 %result, i64 addrspace(1)* %out 255 ret void 256} 257 258; GCN-LABEL: {{^}}v_fcmp_f16: 259; GCN-NOT: v_cmp_eq_ 260define amdgpu_kernel void @v_fcmp_f16(i64 addrspace(1)* %out, half %src) { 261 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1) 262 store i64 %result, i64 addrspace(1)* %out 263 ret void 264} 265 266; GCN-LABEL: {{^}}v_fcmp_f16_oeq: 267; VI: v_cmp_eq_f16_e64 268 269; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 270; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 271; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 272define amdgpu_kernel void @v_fcmp_f16_oeq(i64 addrspace(1)* %out, half %src) { 273 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1) 274 store i64 %result, i64 addrspace(1)* %out 275 ret void 276} 277 278; GCN-LABEL: {{^}}v_fcmp_f16_one: 279; VI: v_cmp_neq_f16_e64 280 281; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 282; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 283; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 284define amdgpu_kernel void @v_fcmp_f16_one(i64 addrspace(1)* %out, half %src) { 285 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6) 286 store i64 %result, i64 addrspace(1)* %out 287 ret void 288} 289 290; GCN-LABEL: {{^}}v_fcmp_f16_ogt: 291; VI: v_cmp_gt_f16_e64 292 293; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 294; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 295; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 296define amdgpu_kernel void @v_fcmp_f16_ogt(i64 addrspace(1)* %out, half %src) { 297 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2) 298 store i64 %result, i64 addrspace(1)* %out 299 ret void 300} 301 302; GCN-LABEL: {{^}}v_fcmp_f16_oge: 303; VI: v_cmp_ge_f16_e64 304 305; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 306; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 307; SI: v_cmp_le_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 308define amdgpu_kernel void @v_fcmp_f16_oge(i64 addrspace(1)* %out, half %src) { 309 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3) 310 store i64 %result, i64 addrspace(1)* %out 311 ret void 312} 313 314; GCN-LABEL: {{^}}v_fcmp_f16_olt: 315; VI: v_cmp_lt_f16_e64 316 317; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 318; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 319; SI: v_cmp_gt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 320define amdgpu_kernel void @v_fcmp_f16_olt(i64 addrspace(1)* %out, half %src) { 321 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4) 322 store i64 %result, i64 addrspace(1)* %out 323 ret void 324} 325 326; GCN-LABEL: {{^}}v_fcmp_f16_ole: 327; VI: v_cmp_le_f16_e64 328 329; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 330; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 331; SI: v_cmp_ge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 332define amdgpu_kernel void @v_fcmp_f16_ole(i64 addrspace(1)* %out, half %src) { 333 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5) 334 store i64 %result, i64 addrspace(1)* %out 335 ret void 336} 337 338; GCN-LABEL: {{^}}v_fcmp_f16_ueq: 339; VI: v_cmp_nlg_f16_e64 340 341; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 342; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 343; SI: v_cmp_nlg_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 344define amdgpu_kernel void @v_fcmp_f16_ueq(i64 addrspace(1)* %out, half %src) { 345 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9) 346 store i64 %result, i64 addrspace(1)* %out 347 ret void 348} 349 350; GCN-LABEL: {{^}}v_fcmp_f16_une: 351; VI: v_cmp_neq_f16_e64 352 353; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 354; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 355; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 356define amdgpu_kernel void @v_fcmp_f16_une(i64 addrspace(1)* %out, half %src) { 357 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14) 358 store i64 %result, i64 addrspace(1)* %out 359 ret void 360} 361 362; GCN-LABEL: {{^}}v_fcmp_f16_ugt: 363; VI: v_cmp_nle_f16_e64 364 365; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 366; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 367; SI: v_cmp_nge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 368define amdgpu_kernel void @v_fcmp_f16_ugt(i64 addrspace(1)* %out, half %src) { 369 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10) 370 store i64 %result, i64 addrspace(1)* %out 371 ret void 372} 373 374; GCN-LABEL: {{^}}v_fcmp_f16_uge: 375; VI: v_cmp_nlt_f16_e64 376 377; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 378; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 379; SI: v_cmp_ngt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 380define amdgpu_kernel void @v_fcmp_f16_uge(i64 addrspace(1)* %out, half %src) { 381 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11) 382 store i64 %result, i64 addrspace(1)* %out 383 ret void 384} 385 386; GCN-LABEL: {{^}}v_fcmp_f16_ult: 387; VI: v_cmp_nge_f16_e64 388 389; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 390; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 391; SI: v_cmp_nle_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 392define amdgpu_kernel void @v_fcmp_f16_ult(i64 addrspace(1)* %out, half %src) { 393 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12) 394 store i64 %result, i64 addrspace(1)* %out 395 ret void 396} 397 398; GCN-LABEL: {{^}}v_fcmp_f16_ule: 399; VI: v_cmp_ngt_f16_e64 400 401; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000 402; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} 403; SI: v_cmp_nlt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]] 404define amdgpu_kernel void @v_fcmp_f16_ule(i64 addrspace(1)* %out, half %src) { 405 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13) 406 store i64 %result, i64 addrspace(1)* %out 407 ret void 408} 409 410attributes #0 = { nounwind readnone convergent } 411