1; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s 3 4; SI-LABEL: {{^}}uniform_if_scc: 5; SI-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0 6; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 7; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 8 9; Fall-through to the else 10; SI: v_mov_b32_e32 [[STORE_VAL]], 1 11 12; SI: [[IF_LABEL]]: 13; SI: buffer_store_dword [[STORE_VAL]] 14define void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) { 15entry: 16 %cmp0 = icmp eq i32 %cond, 0 17 br i1 %cmp0, label %if, label %else 18 19if: 20 br label %done 21 22else: 23 br label %done 24 25done: 26 %value = phi i32 [0, %if], [1, %else] 27 store i32 %value, i32 addrspace(1)* %out 28 ret void 29} 30 31; SI-LABEL: {{^}}uniform_if_vcc: 32; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and 33; also scheduled the write first. 34; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} 35; SI-DAG: s_and_b64 vcc, exec, [[COND]] 36; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 37; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 38 39; Fall-through to the else 40; SI: v_mov_b32_e32 [[STORE_VAL]], 1 41 42; SI: [[IF_LABEL]]: 43; SI: buffer_store_dword [[STORE_VAL]] 44define void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) { 45entry: 46 %cmp0 = fcmp oeq float %cond, 0.0 47 br i1 %cmp0, label %if, label %else 48 49if: 50 br label %done 51 52else: 53 br label %done 54 55done: 56 %value = phi i32 [0, %if], [1, %else] 57 store i32 %value, i32 addrspace(1)* %out 58 ret void 59} 60 61; SI-LABEL: {{^}}uniform_if_swap_br_targets_scc: 62; SI-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0 63; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 64; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 65 66; Fall-through to the else 67; SI: v_mov_b32_e32 [[STORE_VAL]], 1 68 69; SI: [[IF_LABEL]]: 70; SI: buffer_store_dword [[STORE_VAL]] 71define void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) { 72entry: 73 %cmp0 = icmp eq i32 %cond, 0 74 br i1 %cmp0, label %else, label %if 75 76if: 77 br label %done 78 79else: 80 br label %done 81 82done: 83 %value = phi i32 [0, %if], [1, %else] 84 store i32 %value, i32 addrspace(1)* %out 85 ret void 86} 87 88; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc: 89; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and 90; also scheduled the write first. 91; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} 92; SI-DAG: s_and_b64 vcc, exec, [[COND]] 93; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 94; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 95 96; Fall-through to the else 97; SI: v_mov_b32_e32 [[STORE_VAL]], 1 98 99; SI: [[IF_LABEL]]: 100; SI: buffer_store_dword [[STORE_VAL]] 101define void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) { 102entry: 103 %cmp0 = fcmp oeq float %cond, 0.0 104 br i1 %cmp0, label %else, label %if 105 106if: 107 br label %done 108 109else: 110 br label %done 111 112done: 113 %value = phi i32 [0, %if], [1, %else] 114 store i32 %value, i32 addrspace(1)* %out 115 ret void 116} 117 118; SI-LABEL: {{^}}uniform_if_move_valu: 119; SI: v_add_f32_e32 [[CMP:v[0-9]+]] 120; Using a floating-point value in an integer compare will cause the compare to 121; be selected for the SALU and then later moved to the VALU. 122; SI: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] 123; SI: s_and_b64 vcc, exec, [[COND]] 124; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 125; SI: buffer_store_dword 126; SI: [[ENDIF_LABEL]]: 127; SI: s_endpgm 128define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) { 129entry: 130 %a.0 = fadd float %a, 10.0 131 %cond = bitcast float %a.0 to i32 132 %cmp = icmp eq i32 %cond, 5 133 br i1 %cmp, label %if, label %endif 134 135if: 136 store i32 0, i32 addrspace(1)* %out 137 br label %endif 138 139endif: 140 ret void 141} 142 143; SI-LABEL: {{^}}uniform_if_move_valu_commute: 144; SI: v_add_f32_e32 [[CMP:v[0-9]+]] 145; Using a floating-point value in an integer compare will cause the compare to 146; be selected for the SALU and then later moved to the VALU. 147; SI: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] 148; SI: s_and_b64 vcc, exec, [[COND]] 149; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 150; SI: buffer_store_dword 151; SI: [[ENDIF_LABEL]]: 152; SI: s_endpgm 153define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) { 154entry: 155 %a.0 = fadd float %a, 10.0 156 %cond = bitcast float %a.0 to i32 157 %cmp = icmp ugt i32 %cond, 5 158 br i1 %cmp, label %if, label %endif 159 160if: 161 store i32 0, i32 addrspace(1)* %out 162 br label %endif 163 164endif: 165 ret void 166} 167 168 169; SI-LABEL: {{^}}uniform_if_else_ret: 170; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 171; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 172 173; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 174; SI: buffer_store_dword [[TWO]] 175; SI: s_endpgm 176 177; SI: {{^}}[[IF_LABEL]]: 178; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 179; SI: buffer_store_dword [[ONE]] 180; SI: s_endpgm 181define void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) { 182entry: 183 %cmp = icmp eq i32 %a, 0 184 br i1 %cmp, label %if.then, label %if.else 185 186if.then: ; preds = %entry 187 store i32 1, i32 addrspace(1)* %out 188 br label %if.end 189 190if.else: ; preds = %entry 191 store i32 2, i32 addrspace(1)* %out 192 br label %if.end 193 194if.end: ; preds = %if.else, %if.then 195 ret void 196} 197 198; SI-LABEL: {{^}}uniform_if_else: 199; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 200; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 201 202; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 203; SI: buffer_store_dword [[TWO]] 204; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]] 205 206; SI: [[IF_LABEL]]: 207; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 208; SI: buffer_store_dword [[ONE]] 209 210; SI: [[ENDIF_LABEL]]: 211; SI: v_mov_b32_e32 [[THREE:v[0-9]+]], 3 212; SI: buffer_store_dword [[THREE]] 213; SI: s_endpgm 214define void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) { 215entry: 216 %cmp = icmp eq i32 %a, 0 217 br i1 %cmp, label %if.then, label %if.else 218 219if.then: ; preds = %entry 220 store i32 1, i32 addrspace(1)* %out0 221 br label %if.end 222 223if.else: ; preds = %entry 224 store i32 2, i32 addrspace(1)* %out0 225 br label %if.end 226 227if.end: ; preds = %if.else, %if.then 228 store i32 3, i32 addrspace(1)* %out1 229 ret void 230} 231 232; SI-LABEL: {{^}}icmp_2_users: 233; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1 234; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]] 235; SI: buffer_store_dword 236; SI: [[LABEL]]: 237; SI: s_endpgm 238define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) { 239main_body: 240 %0 = icmp sgt i32 %cond, 0 241 %1 = sext i1 %0 to i32 242 br i1 %0, label %IF, label %ENDIF 243 244IF: 245 store i32 %1, i32 addrspace(1)* %out 246 br label %ENDIF 247 248ENDIF: ; preds = %IF, %main_body 249 ret void 250} 251 252; SI-LABEL: {{^}}icmp_users_different_blocks: 253; SI: s_load_dword [[COND:s[0-9]+]] 254; SI: s_cmp_lt_i32 [[COND]], 1 255; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] 256; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]] 257; SI: s_and_b64 vcc, exec, [[MASK]] 258; SI: s_cbranch_vccnz [[EXIT]] 259; SI: buffer_store 260; SI: {{^}}[[EXIT]]: 261; SI: s_endpgm 262define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { 263bb: 264 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 265 %cmp0 = icmp sgt i32 %cond0, 0 266 %cmp1 = icmp sgt i32 %cond1, 0 267 br i1 %cmp0, label %bb2, label %bb9 268 269bb2: ; preds = %bb 270 %tmp2 = sext i1 %cmp1 to i32 271 %tmp3 = add i32 %tmp2, %tmp 272 br i1 %cmp1, label %bb9, label %bb7 273 274bb7: ; preds = %bb5 275 store i32 %tmp3, i32 addrspace(1)* %out 276 br label %bb9 277 278bb9: ; preds = %bb8, %bb4 279 ret void 280} 281 282; SI-LABEL: {{^}}uniform_loop: 283; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]: 284; FIXME: We need to teach SIFixSGPRCopies about uniform branches so we 285; get s_add_i32 here. 286; SI: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}} 287; SI: v_cmp_ne_i32_e32 vcc, 0, [[I]] 288; SI: s_and_b64 vcc, exec, vcc 289; SI: s_cbranch_vccnz [[LOOP_LABEL]] 290; SI: s_endpgm 291define void @uniform_loop(i32 addrspace(1)* %out, i32 %a) { 292entry: 293 br label %loop 294 295loop: 296 %i = phi i32 [0, %entry], [%i.i, %loop] 297 %i.i = add i32 %i, 1 298 %cmp = icmp eq i32 %a, %i.i 299 br i1 %cmp, label %done, label %loop 300 301done: 302 ret void 303} 304 305; Test uniform and divergent. 306 307; SI-LABEL: {{^}}uniform_inside_divergent: 308; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 309; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 310; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] 311; SI: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 312; SI: s_cmp_lg_i32 {{s[0-9]+}}, 0 313; SI: s_cbranch_scc1 [[ENDIF_LABEL]] 314; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 315; SI: buffer_store_dword [[ONE]] 316define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { 317entry: 318 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 319 %d_cmp = icmp ult i32 %tid, 16 320 br i1 %d_cmp, label %if, label %endif 321 322if: 323 store i32 0, i32 addrspace(1)* %out 324 %u_cmp = icmp eq i32 %cond, 0 325 br i1 %u_cmp, label %if_uniform, label %endif 326 327if_uniform: 328 store i32 1, i32 addrspace(1)* %out 329 br label %endif 330 331endif: 332 ret void 333} 334 335; SI-LABEL: {{^}}divergent_inside_uniform: 336; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 337; SI: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] 338; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 339; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 340; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] 341; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 342; SI: buffer_store_dword [[ONE]] 343; SI: [[ENDIF_LABEL]]: 344; SI: s_endpgm 345define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { 346entry: 347 %u_cmp = icmp eq i32 %cond, 0 348 br i1 %u_cmp, label %if, label %endif 349 350if: 351 store i32 0, i32 addrspace(1)* %out 352 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 353 %d_cmp = icmp ult i32 %tid, 16 354 br i1 %d_cmp, label %if_uniform, label %endif 355 356if_uniform: 357 store i32 1, i32 addrspace(1)* %out 358 br label %endif 359 360endif: 361 ret void 362} 363 364; SI-LABEL: {{^}}divergent_if_uniform_if: 365; SI: v_cmp_eq_i32_e32 vcc, 0, v0 366; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 367; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] 368; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 369; SI: buffer_store_dword [[ONE]] 370; SI: s_or_b64 exec, exec, [[MASK]] 371; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 372; SI: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] 373; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 374; SI: buffer_store_dword [[TWO]] 375; SI: [[EXIT]]: 376; SI: s_endpgm 377define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { 378entry: 379 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 380 %d_cmp = icmp eq i32 %tid, 0 381 br i1 %d_cmp, label %if, label %endif 382 383if: 384 store i32 1, i32 addrspace(1)* %out 385 br label %endif 386 387endif: 388 %u_cmp = icmp eq i32 %cond, 0 389 br i1 %u_cmp, label %if_uniform, label %exit 390 391if_uniform: 392 store i32 2, i32 addrspace(1)* %out 393 br label %exit 394 395exit: 396 ret void 397} 398 399; The condition of the branches in the two blocks are 400; uniform. MachineCSE replaces the 2nd condition with the inverse of 401; the first, leaving an scc use in a different block than it was 402; defed. 403 404; SI-LABEL: {{^}}cse_uniform_condition_different_blocks: 405; SI: s_load_dword [[COND:s[0-9]+]] 406; SI: s_cmp_lt_i32 [[COND]], 1 407; SI: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 408 409; SI: BB#1: 410; SI-NOT: cmp 411; SI: buffer_load_dword 412; SI: buffer_store_dword 413; SI: s_cbranch_scc1 BB[[FNNUM]]_3 414 415; SI: BB[[FNNUM]]_3: 416; SI: s_endpgm 417define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { 418bb: 419 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 420 %tmp1 = icmp sgt i32 %cond, 0 421 br i1 %tmp1, label %bb2, label %bb9 422 423bb2: ; preds = %bb 424 %tmp3 = load volatile i32, i32 addrspace(1)* undef 425 store volatile i32 0, i32 addrspace(1)* undef 426 %tmp9 = icmp sle i32 %cond, 0 427 br i1 %tmp9, label %bb9, label %bb7 428 429bb7: ; preds = %bb5 430 store i32 %tmp3, i32 addrspace(1)* %out 431 br label %bb9 432 433bb9: ; preds = %bb8, %bb4 434 ret void 435} 436 437declare i32 @llvm.amdgcn.workitem.id.x() #0 438 439attributes #0 = { readnone } 440