1; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 3declare i32 @llvm.amdgcn.workitem.id.x() #0 4 5; -------------------------------------------------------------------------------- 6; i32 compares 7; -------------------------------------------------------------------------------- 8 9; GCN-LABEL: {{^}}commute_eq_64_i32: 10; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}} 11define amdgpu_kernel void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 12 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 13 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 14 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 15 %val = load i32, i32 addrspace(1)* %gep.in 16 %cmp = icmp eq i32 %val, 64 17 %ext = sext i1 %cmp to i32 18 store i32 %ext, i32 addrspace(1)* %gep.out 19 ret void 20} 21 22; GCN-LABEL: {{^}}commute_ne_64_i32: 23; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}} 24define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 25 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 26 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 27 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 28 %val = load i32, i32 addrspace(1)* %gep.in 29 %cmp = icmp ne i32 %val, 64 30 %ext = sext i1 %cmp to i32 31 store i32 %ext, i32 addrspace(1)* %gep.out 32 ret void 33} 34 35; FIXME: Why isn't this being folded as a constant? 36; GCN-LABEL: {{^}}commute_ne_litk_i32: 37; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 38; GCN: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, [[K]] 39define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 40 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 41 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 42 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 43 %val = load i32, i32 addrspace(1)* %gep.in 44 %cmp = icmp ne i32 %val, 12345 45 %ext = sext i1 %cmp to i32 46 store i32 %ext, i32 addrspace(1)* %gep.out 47 ret void 48} 49 50; GCN-LABEL: {{^}}commute_ugt_64_i32: 51; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}} 52define amdgpu_kernel void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 53 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 54 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 55 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 56 %val = load i32, i32 addrspace(1)* %gep.in 57 %cmp = icmp ugt i32 %val, 64 58 %ext = sext i1 %cmp to i32 59 store i32 %ext, i32 addrspace(1)* %gep.out 60 ret void 61} 62 63; GCN-LABEL: {{^}}commute_uge_64_i32: 64; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}} 65define amdgpu_kernel void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 66 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 67 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 68 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 69 %val = load i32, i32 addrspace(1)* %gep.in 70 %cmp = icmp uge i32 %val, 64 71 %ext = sext i1 %cmp to i32 72 store i32 %ext, i32 addrspace(1)* %gep.out 73 ret void 74} 75 76; GCN-LABEL: {{^}}commute_ult_64_i32: 77; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} 78define amdgpu_kernel void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 79 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 80 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 81 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 82 %val = load i32, i32 addrspace(1)* %gep.in 83 %cmp = icmp ult i32 %val, 64 84 %ext = sext i1 %cmp to i32 85 store i32 %ext, i32 addrspace(1)* %gep.out 86 ret void 87} 88 89; GCN-LABEL: {{^}}commute_ule_63_i32: 90; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} 91define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 92 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 93 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 94 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 95 %val = load i32, i32 addrspace(1)* %gep.in 96 %cmp = icmp ule i32 %val, 63 97 %ext = sext i1 %cmp to i32 98 store i32 %ext, i32 addrspace(1)* %gep.out 99 ret void 100} 101 102; GCN-LABEL: {{^}}commute_ule_64_i32: 103; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}} 104; GCN: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, [[K]] 105define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 106 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 107 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 108 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 109 %val = load i32, i32 addrspace(1)* %gep.in 110 %cmp = icmp ule i32 %val, 64 111 %ext = sext i1 %cmp to i32 112 store i32 %ext, i32 addrspace(1)* %gep.out 113 ret void 114} 115 116; GCN-LABEL: {{^}}commute_sgt_neg1_i32: 117; GCN: v_ashrrev_i32_e32 v2, 31, v2 118define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 119 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 120 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 121 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 122 %val = load i32, i32 addrspace(1)* %gep.in 123 %cmp = icmp sgt i32 %val, -1 124 %ext = sext i1 %cmp to i32 125 store i32 %ext, i32 addrspace(1)* %gep.out 126 ret void 127} 128 129; GCN-LABEL: {{^}}commute_sge_neg2_i32: 130; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}} 131define amdgpu_kernel void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 132 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 133 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 134 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 135 %val = load i32, i32 addrspace(1)* %gep.in 136 %cmp = icmp sge i32 %val, -2 137 %ext = sext i1 %cmp to i32 138 store i32 %ext, i32 addrspace(1)* %gep.out 139 ret void 140} 141 142; GCN-LABEL: {{^}}commute_slt_neg16_i32: 143; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}} 144define amdgpu_kernel void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 145 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 146 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 147 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 148 %val = load i32, i32 addrspace(1)* %gep.in 149 %cmp = icmp slt i32 %val, -16 150 %ext = sext i1 %cmp to i32 151 store i32 %ext, i32 addrspace(1)* %gep.out 152 ret void 153} 154 155; GCN-LABEL: {{^}}commute_sle_5_i32: 156; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}} 157define amdgpu_kernel void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { 158 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 159 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 160 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 161 %val = load i32, i32 addrspace(1)* %gep.in 162 %cmp = icmp sle i32 %val, 5 163 %ext = sext i1 %cmp to i32 164 store i32 %ext, i32 addrspace(1)* %gep.out 165 ret void 166} 167 168; -------------------------------------------------------------------------------- 169; i64 compares 170; -------------------------------------------------------------------------------- 171 172; GCN-LABEL: {{^}}commute_eq_64_i64: 173; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} 174define amdgpu_kernel void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 175 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 176 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 177 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 178 %val = load i64, i64 addrspace(1)* %gep.in 179 %cmp = icmp eq i64 %val, 64 180 %ext = sext i1 %cmp to i32 181 store i32 %ext, i32 addrspace(1)* %gep.out 182 ret void 183} 184 185; GCN-LABEL: {{^}}commute_ne_64_i64: 186; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} 187define amdgpu_kernel void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 188 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 189 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 190 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 191 %val = load i64, i64 addrspace(1)* %gep.in 192 %cmp = icmp ne i64 %val, 64 193 %ext = sext i1 %cmp to i32 194 store i32 %ext, i32 addrspace(1)* %gep.out 195 ret void 196} 197 198; GCN-LABEL: {{^}}commute_ugt_64_i64: 199; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} 200define amdgpu_kernel void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 201 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 202 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 203 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 204 %val = load i64, i64 addrspace(1)* %gep.in 205 %cmp = icmp ugt i64 %val, 64 206 %ext = sext i1 %cmp to i32 207 store i32 %ext, i32 addrspace(1)* %gep.out 208 ret void 209} 210 211; GCN-LABEL: {{^}}commute_uge_64_i64: 212; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}} 213define amdgpu_kernel void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 214 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 215 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 216 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 217 %val = load i64, i64 addrspace(1)* %gep.in 218 %cmp = icmp uge i64 %val, 64 219 %ext = sext i1 %cmp to i32 220 store i32 %ext, i32 addrspace(1)* %gep.out 221 ret void 222} 223 224; GCN-LABEL: {{^}}commute_ult_64_i64: 225; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} 226define amdgpu_kernel void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 227 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 228 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 229 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 230 %val = load i64, i64 addrspace(1)* %gep.in 231 %cmp = icmp ult i64 %val, 64 232 %ext = sext i1 %cmp to i32 233 store i32 %ext, i32 addrspace(1)* %gep.out 234 ret void 235} 236 237; GCN-LABEL: {{^}}commute_ule_63_i64: 238; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} 239define amdgpu_kernel void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 240 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 241 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 242 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 243 %val = load i64, i64 addrspace(1)* %gep.in 244 %cmp = icmp ule i64 %val, 63 245 %ext = sext i1 %cmp to i32 246 store i32 %ext, i32 addrspace(1)* %gep.out 247 ret void 248} 249 250; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm 251 252; GCN-LABEL: {{^}}commute_ule_64_i64: 253; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}} 254; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} 255define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 256 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 257 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 258 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 259 %val = load i64, i64 addrspace(1)* %gep.in 260 %cmp = icmp ule i64 %val, 64 261 %ext = sext i1 %cmp to i32 262 store i32 %ext, i32 addrspace(1)* %gep.out 263 ret void 264} 265 266; GCN-LABEL: {{^}}commute_sgt_neg1_i64: 267; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}} 268define amdgpu_kernel void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 269 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 270 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 271 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 272 %val = load i64, i64 addrspace(1)* %gep.in 273 %cmp = icmp sgt i64 %val, -1 274 %ext = sext i1 %cmp to i32 275 store i32 %ext, i32 addrspace(1)* %gep.out 276 ret void 277} 278 279; GCN-LABEL: {{^}}commute_sge_neg2_i64: 280; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}} 281define amdgpu_kernel void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 282 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 283 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 284 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 285 %val = load i64, i64 addrspace(1)* %gep.in 286 %cmp = icmp sge i64 %val, -2 287 %ext = sext i1 %cmp to i32 288 store i32 %ext, i32 addrspace(1)* %gep.out 289 ret void 290} 291 292; GCN-LABEL: {{^}}commute_slt_neg16_i64: 293; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}} 294define amdgpu_kernel void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 295 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 296 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 297 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 298 %val = load i64, i64 addrspace(1)* %gep.in 299 %cmp = icmp slt i64 %val, -16 300 %ext = sext i1 %cmp to i32 301 store i32 %ext, i32 addrspace(1)* %gep.out 302 ret void 303} 304 305; GCN-LABEL: {{^}}commute_sle_5_i64: 306; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}} 307define amdgpu_kernel void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 308 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 309 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 310 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 311 %val = load i64, i64 addrspace(1)* %gep.in 312 %cmp = icmp sle i64 %val, 5 313 %ext = sext i1 %cmp to i32 314 store i32 %ext, i32 addrspace(1)* %gep.out 315 ret void 316} 317 318; -------------------------------------------------------------------------------- 319; f32 compares 320; -------------------------------------------------------------------------------- 321 322 323; GCN-LABEL: {{^}}commute_oeq_2.0_f32: 324; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}} 325define amdgpu_kernel void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 326 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 327 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 328 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 329 %val = load float, float addrspace(1)* %gep.in 330 %cmp = fcmp oeq float %val, 2.0 331 %ext = sext i1 %cmp to i32 332 store i32 %ext, i32 addrspace(1)* %gep.out 333 ret void 334} 335 336 337; GCN-LABEL: {{^}}commute_ogt_2.0_f32: 338; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}} 339define amdgpu_kernel void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 340 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 341 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 342 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 343 %val = load float, float addrspace(1)* %gep.in 344 %cmp = fcmp ogt float %val, 2.0 345 %ext = sext i1 %cmp to i32 346 store i32 %ext, i32 addrspace(1)* %gep.out 347 ret void 348} 349 350; GCN-LABEL: {{^}}commute_oge_2.0_f32: 351; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}} 352define amdgpu_kernel void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 353 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 354 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 355 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 356 %val = load float, float addrspace(1)* %gep.in 357 %cmp = fcmp oge float %val, 2.0 358 %ext = sext i1 %cmp to i32 359 store i32 %ext, i32 addrspace(1)* %gep.out 360 ret void 361} 362 363; GCN-LABEL: {{^}}commute_olt_2.0_f32: 364; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}} 365define amdgpu_kernel void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 366 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 367 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 368 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 369 %val = load float, float addrspace(1)* %gep.in 370 %cmp = fcmp olt float %val, 2.0 371 %ext = sext i1 %cmp to i32 372 store i32 %ext, i32 addrspace(1)* %gep.out 373 ret void 374} 375 376; GCN-LABEL: {{^}}commute_ole_2.0_f32: 377; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}} 378define amdgpu_kernel void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 379 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 380 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 381 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 382 %val = load float, float addrspace(1)* %gep.in 383 %cmp = fcmp ole float %val, 2.0 384 %ext = sext i1 %cmp to i32 385 store i32 %ext, i32 addrspace(1)* %gep.out 386 ret void 387} 388 389; GCN-LABEL: {{^}}commute_one_2.0_f32: 390; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}} 391define amdgpu_kernel void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 392 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 393 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 394 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 395 %val = load float, float addrspace(1)* %gep.in 396 %cmp = fcmp one float %val, 2.0 397 %ext = sext i1 %cmp to i32 398 store i32 %ext, i32 addrspace(1)* %gep.out 399 ret void 400} 401 402; GCN-LABEL: {{^}}commute_ord_2.0_f32: 403; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] 404define amdgpu_kernel void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 405 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 406 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 407 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 408 %val = load float, float addrspace(1)* %gep.in 409 %cmp = fcmp ord float %val, 2.0 410 %ext = sext i1 %cmp to i32 411 store i32 %ext, i32 addrspace(1)* %gep.out 412 ret void 413} 414 415; GCN-LABEL: {{^}}commute_ueq_2.0_f32: 416; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}} 417define amdgpu_kernel void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 418 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 419 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 420 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 421 %val = load float, float addrspace(1)* %gep.in 422 %cmp = fcmp ueq float %val, 2.0 423 %ext = sext i1 %cmp to i32 424 store i32 %ext, i32 addrspace(1)* %gep.out 425 ret void 426} 427 428; GCN-LABEL: {{^}}commute_ugt_2.0_f32: 429; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}} 430define amdgpu_kernel void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 431 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 432 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 433 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 434 %val = load float, float addrspace(1)* %gep.in 435 %cmp = fcmp ugt float %val, 2.0 436 %ext = sext i1 %cmp to i32 437 store i32 %ext, i32 addrspace(1)* %gep.out 438 ret void 439} 440 441; GCN-LABEL: {{^}}commute_uge_2.0_f32: 442; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}} 443define amdgpu_kernel void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 444 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 445 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 446 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 447 %val = load float, float addrspace(1)* %gep.in 448 %cmp = fcmp uge float %val, 2.0 449 %ext = sext i1 %cmp to i32 450 store i32 %ext, i32 addrspace(1)* %gep.out 451 ret void 452} 453 454; GCN-LABEL: {{^}}commute_ult_2.0_f32: 455; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}} 456define amdgpu_kernel void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 457 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 458 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 459 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 460 %val = load float, float addrspace(1)* %gep.in 461 %cmp = fcmp ult float %val, 2.0 462 %ext = sext i1 %cmp to i32 463 store i32 %ext, i32 addrspace(1)* %gep.out 464 ret void 465} 466 467; GCN-LABEL: {{^}}commute_ule_2.0_f32: 468; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}} 469define amdgpu_kernel void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 470 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 471 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 472 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 473 %val = load float, float addrspace(1)* %gep.in 474 %cmp = fcmp ule float %val, 2.0 475 %ext = sext i1 %cmp to i32 476 store i32 %ext, i32 addrspace(1)* %gep.out 477 ret void 478} 479 480; GCN-LABEL: {{^}}commute_une_2.0_f32: 481; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}} 482define amdgpu_kernel void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 483 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 484 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 485 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 486 %val = load float, float addrspace(1)* %gep.in 487 %cmp = fcmp une float %val, 2.0 488 %ext = sext i1 %cmp to i32 489 store i32 %ext, i32 addrspace(1)* %gep.out 490 ret void 491} 492 493; GCN-LABEL: {{^}}commute_uno_2.0_f32: 494; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] 495define amdgpu_kernel void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { 496 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 497 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid 498 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 499 %val = load float, float addrspace(1)* %gep.in 500 %cmp = fcmp uno float %val, 2.0 501 %ext = sext i1 %cmp to i32 502 store i32 %ext, i32 addrspace(1)* %gep.out 503 ret void 504} 505 506; -------------------------------------------------------------------------------- 507; f64 compares 508; -------------------------------------------------------------------------------- 509 510 511; GCN-LABEL: {{^}}commute_oeq_2.0_f64: 512; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 513define amdgpu_kernel void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 514 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 515 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 516 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 517 %val = load double, double addrspace(1)* %gep.in 518 %cmp = fcmp oeq double %val, 2.0 519 %ext = sext i1 %cmp to i32 520 store i32 %ext, i32 addrspace(1)* %gep.out 521 ret void 522} 523 524 525; GCN-LABEL: {{^}}commute_ogt_2.0_f64: 526; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 527define amdgpu_kernel void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 528 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 529 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 530 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 531 %val = load double, double addrspace(1)* %gep.in 532 %cmp = fcmp ogt double %val, 2.0 533 %ext = sext i1 %cmp to i32 534 store i32 %ext, i32 addrspace(1)* %gep.out 535 ret void 536} 537 538; GCN-LABEL: {{^}}commute_oge_2.0_f64: 539; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 540define amdgpu_kernel void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 541 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 542 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 543 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 544 %val = load double, double addrspace(1)* %gep.in 545 %cmp = fcmp oge double %val, 2.0 546 %ext = sext i1 %cmp to i32 547 store i32 %ext, i32 addrspace(1)* %gep.out 548 ret void 549} 550 551; GCN-LABEL: {{^}}commute_olt_2.0_f64: 552; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 553define amdgpu_kernel void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 554 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 555 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 556 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 557 %val = load double, double addrspace(1)* %gep.in 558 %cmp = fcmp olt double %val, 2.0 559 %ext = sext i1 %cmp to i32 560 store i32 %ext, i32 addrspace(1)* %gep.out 561 ret void 562} 563 564; GCN-LABEL: {{^}}commute_ole_2.0_f64: 565; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 566define amdgpu_kernel void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 567 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 568 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 569 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 570 %val = load double, double addrspace(1)* %gep.in 571 %cmp = fcmp ole double %val, 2.0 572 %ext = sext i1 %cmp to i32 573 store i32 %ext, i32 addrspace(1)* %gep.out 574 ret void 575} 576 577; GCN-LABEL: {{^}}commute_one_2.0_f64: 578; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 579define amdgpu_kernel void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 580 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 581 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 582 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 583 %val = load double, double addrspace(1)* %gep.in 584 %cmp = fcmp one double %val, 2.0 585 %ext = sext i1 %cmp to i32 586 store i32 %ext, i32 addrspace(1)* %gep.out 587 ret void 588} 589 590; GCN-LABEL: {{^}}commute_ord_2.0_f64: 591; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] 592define amdgpu_kernel void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 593 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 594 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 595 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 596 %val = load double, double addrspace(1)* %gep.in 597 %cmp = fcmp ord double %val, 2.0 598 %ext = sext i1 %cmp to i32 599 store i32 %ext, i32 addrspace(1)* %gep.out 600 ret void 601} 602 603; GCN-LABEL: {{^}}commute_ueq_2.0_f64: 604; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 605define amdgpu_kernel void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 606 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 607 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 608 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 609 %val = load double, double addrspace(1)* %gep.in 610 %cmp = fcmp ueq double %val, 2.0 611 %ext = sext i1 %cmp to i32 612 store i32 %ext, i32 addrspace(1)* %gep.out 613 ret void 614} 615 616; GCN-LABEL: {{^}}commute_ugt_2.0_f64: 617; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 618define amdgpu_kernel void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 619 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 620 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 621 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 622 %val = load double, double addrspace(1)* %gep.in 623 %cmp = fcmp ugt double %val, 2.0 624 %ext = sext i1 %cmp to i32 625 store i32 %ext, i32 addrspace(1)* %gep.out 626 ret void 627} 628 629; GCN-LABEL: {{^}}commute_uge_2.0_f64: 630; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 631define amdgpu_kernel void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 632 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 633 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 634 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 635 %val = load double, double addrspace(1)* %gep.in 636 %cmp = fcmp uge double %val, 2.0 637 %ext = sext i1 %cmp to i32 638 store i32 %ext, i32 addrspace(1)* %gep.out 639 ret void 640} 641 642; GCN-LABEL: {{^}}commute_ult_2.0_f64: 643; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 644define amdgpu_kernel void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 645 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 646 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 647 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 648 %val = load double, double addrspace(1)* %gep.in 649 %cmp = fcmp ult double %val, 2.0 650 %ext = sext i1 %cmp to i32 651 store i32 %ext, i32 addrspace(1)* %gep.out 652 ret void 653} 654 655; GCN-LABEL: {{^}}commute_ule_2.0_f64: 656; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 657define amdgpu_kernel void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 658 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 659 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 660 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 661 %val = load double, double addrspace(1)* %gep.in 662 %cmp = fcmp ule double %val, 2.0 663 %ext = sext i1 %cmp to i32 664 store i32 %ext, i32 addrspace(1)* %gep.out 665 ret void 666} 667 668; GCN-LABEL: {{^}}commute_une_2.0_f64: 669; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} 670define amdgpu_kernel void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 671 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 672 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 673 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 674 %val = load double, double addrspace(1)* %gep.in 675 %cmp = fcmp une double %val, 2.0 676 %ext = sext i1 %cmp to i32 677 store i32 %ext, i32 addrspace(1)* %gep.out 678 ret void 679} 680 681; GCN-LABEL: {{^}}commute_uno_2.0_f64: 682; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] 683define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { 684 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 685 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid 686 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 687 %val = load double, double addrspace(1)* %gep.in 688 %cmp = fcmp uno double %val, 2.0 689 %ext = sext i1 %cmp to i32 690 store i32 %ext, i32 addrspace(1)* %gep.out 691 ret void 692} 693 694 695; FIXME: Should be able to fold this frameindex 696; Without commuting the frame index in the pre-regalloc run of 697; SIShrinkInstructions, this was using the VOP3 compare. 698 699; GCN-LABEL: {{^}}commute_frameindex: 700; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} 701 702; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} 703; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]] 704define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { 705entry: 706 %stack0 = alloca i32, addrspace(5) 707 %ptr0 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* undef 708 %eq = icmp eq i32 addrspace(5)* %ptr0, %stack0 709 %ext = zext i1 %eq to i32 710 store volatile i32 %ext, i32 addrspace(1)* %out 711 ret void 712} 713 714attributes #0 = { nounwind readnone } 715attributes #1 = { nounwind } 716