1; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}uniform_if_scc: 5; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 6; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 7; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 8 9; Fall-through to the else 10; GCN: s_mov_b32 [[S_VAL]], 1 11 12; GCN: [[IF_LABEL]]: 13; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 14; GCN: buffer_store_dword [[V_VAL]] 15define amdgpu_kernel void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) { 16entry: 17 %cmp0 = icmp eq i32 %cond, 0 18 br i1 %cmp0, label %if, label %else 19 20if: 21 br label %done 22 23else: 24 br label %done 25 26done: 27 %value = phi i32 [0, %if], [1, %else] 28 store i32 %value, i32 addrspace(1)* %out 29 ret void 30} 31 32; GCN-LABEL: {{^}}uniform_if_vcc: 33; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} 34; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 35; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 36 37; Fall-through to the else 38; GCN: s_mov_b32 [[S_VAL]], 1 39 40; GCN: [[IF_LABEL]]: 41; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 42; GCN: buffer_store_dword [[V_VAL]] 43define amdgpu_kernel void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) { 44entry: 45 %cmp0 = fcmp oeq float %cond, 0.0 46 br i1 %cmp0, label %if, label %else 47 48if: 49 br label %done 50 51else: 52 br label %done 53 54done: 55 %value = phi i32 [0, %if], [1, %else] 56 store i32 %value, i32 addrspace(1)* %out 57 ret void 58} 59 60; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc: 61; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 62; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 63; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 64 65; Fall-through to the else 66; GCN: s_mov_b32 [[S_VAL]], 1 67 68; GCN: [[IF_LABEL]]: 69; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 70; GCN: buffer_store_dword [[V_VAL]] 71define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) { 72entry: 73 %cmp0 = icmp eq i32 %cond, 0 74 br i1 %cmp0, label %else, label %if 75 76if: 77 br label %done 78 79else: 80 br label %done 81 82done: 83 %value = phi i32 [0, %if], [1, %else] 84 store i32 %value, i32 addrspace(1)* %out 85 ret void 86} 87 88; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc: 89; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} 90; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 91; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 92 93; Fall-through to the else 94; GCN: s_mov_b32 [[S_VAL]], 1 95 96; GCN: [[IF_LABEL]]: 97; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 98; GCN: buffer_store_dword [[V_VAL]] 99define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) { 100entry: 101 %cmp0 = fcmp oeq float %cond, 0.0 102 br i1 %cmp0, label %else, label %if 103 104if: 105 br label %done 106 107else: 108 br label %done 109 110done: 111 %value = phi i32 [0, %if], [1, %else] 112 store i32 %value, i32 addrspace(1)* %out 113 ret void 114} 115 116; GCN-LABEL: {{^}}uniform_if_move_valu: 117; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] 118; Using a floating-point value in an integer compare will cause the compare to 119; be selected for the SALU and then later moved to the VALU. 120; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] 121; GCN: s_and_b64 vcc, exec, [[COND]] 122; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 123; GCN: buffer_store_dword 124; GCN: [[ENDIF_LABEL]]: 125; GCN: s_endpgm 126define amdgpu_kernel void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) { 127entry: 128 %a.0 = fadd float %a, 10.0 129 %cond = bitcast float %a.0 to i32 130 %cmp = icmp eq i32 %cond, 5 131 br i1 %cmp, label %if, label %endif 132 133if: 134 store i32 0, i32 addrspace(1)* %out 135 br label %endif 136 137endif: 138 ret void 139} 140 141; GCN-LABEL: {{^}}uniform_if_move_valu_commute: 142; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] 143; Using a floating-point value in an integer compare will cause the compare to 144; be selected for the SALU and then later moved to the VALU. 145; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] 146; GCN: s_and_b64 vcc, exec, [[COND]] 147; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 148; GCN: buffer_store_dword 149; GCN: [[ENDIF_LABEL]]: 150; GCN: s_endpgm 151define amdgpu_kernel void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) { 152entry: 153 %a.0 = fadd float %a, 10.0 154 %cond = bitcast float %a.0 to i32 155 %cmp = icmp ugt i32 %cond, 5 156 br i1 %cmp, label %if, label %endif 157 158if: 159 store i32 0, i32 addrspace(1)* %out 160 br label %endif 161 162endif: 163 ret void 164} 165 166 167; GCN-LABEL: {{^}}uniform_if_else_ret: 168; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 169; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 170 171; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 172; GCN: buffer_store_dword [[TWO]] 173; GCN: s_endpgm 174 175; GCN: {{^}}[[IF_LABEL]]: 176; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 177; GCN: buffer_store_dword [[ONE]] 178; GCN: s_endpgm 179define amdgpu_kernel void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) { 180entry: 181 %cmp = icmp eq i32 %a, 0 182 br i1 %cmp, label %if.then, label %if.else 183 184if.then: ; preds = %entry 185 store i32 1, i32 addrspace(1)* %out 186 br label %if.end 187 188if.else: ; preds = %entry 189 store i32 2, i32 addrspace(1)* %out 190 br label %if.end 191 192if.end: ; preds = %if.else, %if.then 193 ret void 194} 195 196; GCN-LABEL: {{^}}uniform_if_else: 197; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 198; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 199 200; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2 201; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]] 202 203; GCN: [[IF_LABEL]]: 204; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1 205 206; GCN-NEXT: [[ENDIF_LABEL]]: 207; GCN: buffer_store_dword [[IMM_REG]] 208 209; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3 210; GCN: buffer_store_dword [[THREE]] 211; GCN: s_endpgm 212define amdgpu_kernel void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) { 213entry: 214 %cmp = icmp eq i32 %a, 0 215 br i1 %cmp, label %if.then, label %if.else 216 217if.then: ; preds = %entry 218 store i32 1, i32 addrspace(1)* %out0 219 br label %if.end 220 221if.else: ; preds = %entry 222 store i32 2, i32 addrspace(1)* %out0 223 br label %if.end 224 225if.end: ; preds = %if.else, %if.then 226 store i32 3, i32 addrspace(1)* %out1 227 ret void 228} 229 230; GCN-LABEL: {{^}}icmp_2_users: 231; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1 232; GCN: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]] 233; GCN: buffer_store_dword 234; GCN: [[LABEL]]: 235; GCN: s_endpgm 236define amdgpu_kernel void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) { 237main_body: 238 %0 = icmp sgt i32 %cond, 0 239 %1 = sext i1 %0 to i32 240 br i1 %0, label %IF, label %ENDIF 241 242IF: 243 store i32 %1, i32 addrspace(1)* %out 244 br label %ENDIF 245 246ENDIF: ; preds = %IF, %main_body 247 ret void 248} 249 250; GCN-LABEL: {{^}}icmp_users_different_blocks: 251; GCN: s_load_dwordx2 s{{\[}}[[COND0:[0-9]+]]:[[COND1:[0-9]+]]{{\]}} 252; GCN: s_cmp_lt_i32 s[[COND0]], 1 253; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] 254; GCN: v_cmp_gt_i32_e64 {{[^,]*}}, s[[COND1]], 0{{$}} 255; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]] 256; GCN: {{^}}[[EXIT]]: 257; GCN: s_endpgm 258; GCN: {{^}}[[BODY]]: 259; GCN: buffer_store 260; GCN: s_endpgm 261define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { 262bb: 263 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 264 %cmp0 = icmp sgt i32 %cond0, 0 265 %cmp1 = icmp sgt i32 %cond1, 0 266 br i1 %cmp0, label %bb2, label %bb9 267 268bb2: ; preds = %bb 269 %tmp2 = sext i1 %cmp1 to i32 270 %tmp3 = add i32 %tmp2, %tmp 271 br i1 %cmp1, label %bb9, label %bb7 272 273bb7: ; preds = %bb5 274 store i32 %tmp3, i32 addrspace(1)* %out 275 br label %bb9 276 277bb9: ; preds = %bb8, %bb4 278 ret void 279} 280 281; SI-LABEL: {{^}}uniform_loop: 282; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]: 283; SI: s_add_i32 [[I:s[0-9]+]], s{{[0-9]+}}, -1 284; SI: s_cmp_lg_u32 [[I]], 0 285; SI: s_cbranch_scc1 [[LOOP_LABEL]] 286; SI: s_endpgm 287define amdgpu_kernel void @uniform_loop(i32 addrspace(1)* %out, i32 %a) { 288entry: 289 br label %loop 290 291loop: 292 %i = phi i32 [0, %entry], [%i.i, %loop] 293 %i.i = add i32 %i, 1 294 %cmp = icmp eq i32 %a, %i.i 295 br i1 %cmp, label %done, label %loop 296 297done: 298 ret void 299} 300 301; Test uniform and divergent. 302 303; GCN-LABEL: {{^}}uniform_inside_divergent: 304; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 305; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 306; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0 307; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]] 308; GCN: s_endpgm 309; GCN: {{^}}[[IF_UNIFORM_LABEL]]: 310; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 311; GCN: buffer_store_dword [[ONE]] 312define amdgpu_kernel void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { 313entry: 314 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 315 %d_cmp = icmp ult i32 %tid, 16 316 br i1 %d_cmp, label %if, label %endif 317 318if: 319 store i32 0, i32 addrspace(1)* %out 320 %u_cmp = icmp eq i32 %cond, 0 321 br i1 %u_cmp, label %if_uniform, label %endif 322 323if_uniform: 324 store i32 1, i32 addrspace(1)* %out 325 br label %endif 326 327endif: 328 ret void 329} 330 331; GCN-LABEL: {{^}}divergent_inside_uniform: 332; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 333; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] 334; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 335; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 336; GCN: ; mask branch [[ENDIF_LABEL]] 337; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 338; GCN: buffer_store_dword [[ONE]] 339; GCN: [[ENDIF_LABEL]]: 340; GCN: s_endpgm 341define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { 342entry: 343 %u_cmp = icmp eq i32 %cond, 0 344 br i1 %u_cmp, label %if, label %endif 345 346if: 347 store i32 0, i32 addrspace(1)* %out 348 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 349 %d_cmp = icmp ult i32 %tid, 16 350 br i1 %d_cmp, label %if_uniform, label %endif 351 352if_uniform: 353 store i32 1, i32 addrspace(1)* %out 354 br label %endif 355 356endif: 357 ret void 358} 359 360; GCN-LABEL: {{^}}divergent_if_uniform_if: 361; GCN: v_cmp_eq_u32_e32 vcc, 0, v0 362; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 363; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 364; GCN: buffer_store_dword [[ONE]] 365; GCN: s_or_b64 exec, exec, [[MASK]] 366; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 367; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]] 368; GCN: s_endpgm 369; GCN: [[IF_UNIFORM]]: 370; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 371; GCN: buffer_store_dword [[TWO]] 372define amdgpu_kernel void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { 373entry: 374 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 375 %d_cmp = icmp eq i32 %tid, 0 376 br i1 %d_cmp, label %if, label %endif 377 378if: 379 store i32 1, i32 addrspace(1)* %out 380 br label %endif 381 382endif: 383 %u_cmp = icmp eq i32 %cond, 0 384 br i1 %u_cmp, label %if_uniform, label %exit 385 386if_uniform: 387 store i32 2, i32 addrspace(1)* %out 388 br label %exit 389 390exit: 391 ret void 392} 393 394; The condition of the branches in the two blocks are 395; uniform. MachineCSE replaces the 2nd condition with the inverse of 396; the first, leaving an scc use in a different block than it was 397; defed. 398 399; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks: 400; GCN: s_load_dword [[COND:s[0-9]+]] 401; GCN: s_cmp_lt_i32 [[COND]], 1 402; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 403 404; GCN: %bb.1: 405; GCN-NOT: cmp 406; GCN: buffer_load_dword 407; GCN: buffer_store_dword 408; GCN: s_cbranch_scc1 BB[[FNNUM]]_3 409 410; GCN: BB[[FNNUM]]_3: 411; GCN: s_endpgm 412define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { 413bb: 414 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 415 %tmp1 = icmp sgt i32 %cond, 0 416 br i1 %tmp1, label %bb2, label %bb9 417 418bb2: ; preds = %bb 419 %tmp3 = load volatile i32, i32 addrspace(1)* undef 420 store volatile i32 0, i32 addrspace(1)* undef 421 %tmp9 = icmp sle i32 %cond, 0 422 br i1 %tmp9, label %bb9, label %bb7 423 424bb7: ; preds = %bb5 425 store i32 %tmp3, i32 addrspace(1)* %out 426 br label %bb9 427 428bb9: ; preds = %bb8, %bb4 429 ret void 430} 431 432; GCN-LABEL: {{^}}uniform_if_scc_i64_eq: 433; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 434; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 435; SI-DAG: v_cmp_eq_u64_e64 436; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 437 438; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 439 440; Fall-through to the else 441; GCN: s_mov_b32 [[S_VAL]], 1 442 443; GCN: [[IF_LABEL]]: 444; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 445; GCN: buffer_store_dword [[V_VAL]] 446define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) { 447entry: 448 %cmp0 = icmp eq i64 %cond, 0 449 br i1 %cmp0, label %if, label %else 450 451if: 452 br label %done 453 454else: 455 br label %done 456 457done: 458 %value = phi i32 [0, %if], [1, %else] 459 store i32 %value, i32 addrspace(1)* %out 460 ret void 461} 462 463; GCN-LABEL: {{^}}uniform_if_scc_i64_ne: 464; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 465; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 466 467; SI-DAG: v_cmp_ne_u64_e64 468; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 469 470; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 471 472; Fall-through to the else 473; GCN: s_mov_b32 [[S_VAL]], 1 474 475; GCN: [[IF_LABEL]]: 476; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 477; GCN: buffer_store_dword [[V_VAL]] 478define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) { 479entry: 480 %cmp0 = icmp ne i64 %cond, 0 481 br i1 %cmp0, label %if, label %else 482 483if: 484 br label %done 485 486else: 487 br label %done 488 489done: 490 %value = phi i32 [0, %if], [1, %else] 491 store i32 %value, i32 addrspace(1)* %out 492 ret void 493} 494 495; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt: 496; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 497; GCN-DAG: v_cmp_gt_i64_e64 498; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 499 500; Fall-through to the else 501; GCN: s_mov_b32 [[S_VAL]], 1 502 503; GCN: [[IF_LABEL]]: 504; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 505; GCN: buffer_store_dword [[V_VAL]] 506define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) { 507entry: 508 %cmp0 = icmp sgt i64 %cond, 0 509 br i1 %cmp0, label %if, label %else 510 511if: 512 br label %done 513 514else: 515 br label %done 516 517done: 518 %value = phi i32 [0, %if], [1, %else] 519 store i32 %value, i32 addrspace(1)* %out 520 ret void 521} 522 523; GCN-LABEL: {{^}}move_to_valu_i64_eq: 524; GCN: v_cmp_eq_u64_e32 525define amdgpu_kernel void @move_to_valu_i64_eq(i32 addrspace(1)* %out) { 526 %cond = load volatile i64, i64 addrspace(3)* undef 527 %cmp0 = icmp eq i64 %cond, 0 528 br i1 %cmp0, label %if, label %else 529 530if: 531 br label %done 532 533else: 534 br label %done 535 536done: 537 %value = phi i32 [0, %if], [1, %else] 538 store i32 %value, i32 addrspace(1)* %out 539 ret void 540} 541 542; GCN-LABEL: {{^}}move_to_valu_i64_ne: 543; GCN: v_cmp_ne_u64_e32 544define amdgpu_kernel void @move_to_valu_i64_ne(i32 addrspace(1)* %out) { 545 %cond = load volatile i64, i64 addrspace(3)* undef 546 %cmp0 = icmp ne i64 %cond, 0 547 br i1 %cmp0, label %if, label %else 548 549if: 550 br label %done 551 552else: 553 br label %done 554 555done: 556 %value = phi i32 [0, %if], [1, %else] 557 store i32 %value, i32 addrspace(1)* %out 558 ret void 559} 560 561; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi: 562; GCN: v_add_{{[iu]}}32_e32 563; GCN: ds_write_b32 564define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) { 565bb0: 566 br label %bb1 567 568bb1: ; preds = %bb3, %bb0 569 %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ] 570 %tmp1 = add nsw i32 %tmp0, -1 571 %tmp2 = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tmp1 572 br i1 undef, label %bb2, label %bb3 573 574bb2: ; preds = %bb1 575 store volatile i32 1, i32 addrspace(3)* %tmp2, align 4 576 br label %bb3 577 578bb3: ; preds = %bb2, %bb1 579 %tmp4 = add nsw i32 %tmp0, 2 580 br label %bb1 581} 582 583declare i32 @llvm.amdgcn.workitem.id.x() #0 584 585attributes #0 = { nounwind readnone } 586