1; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s 2 3;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4;; This test checks that x86-cmov-converter optimization transform CMOV 5;; instruction into branches when it is profitable. 6;; There are 5 cases below: 7;; 1. CmovInCriticalPath: 8;; CMOV depends on the condition and it is in the hot path. 9;; Thus, it worths transforming. 10;; 11;; 2. CmovNotInCriticalPath: 12;; Similar test like in (1), just that CMOV is not in the hot path. 13;; Thus, it does not worth transforming. 14;; 15;; 3. MaxIndex: 16;; Maximum calculation algorithm that is looking for the max index, 17;; calculating CMOV value is cheaper than calculating CMOV condition. 18;; Thus, it worths transforming. 19;; 20;; 4. MaxValue: 21;; Maximum calculation algorithm that is looking for the max value, 22;; calculating CMOV value is not cheaper than calculating CMOV condition. 23;; Thus, it does not worth transforming. 24;; 25;; 5. BinarySearch: 26;; Usually, binary search CMOV is not predicted. 27;; Thus, it does not worth transforming. 28;; 29;; Test was created using the following command line: 30;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o - 31;; Where foo.c is: 32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 33;;void CmovInHotPath(int n, int a, int b, int *c, int *d) { 34;; for (int i = 0; i < n; i++) { 35;; int t = c[i] + 1; 36;; if (c[i] * a > b) 37;; t = 10; 38;; c[i] = (c[i] + 1) * t; 39;; } 40;;} 41;; 42;; 43;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) { 44;; for (int i = 0; i < n; i++) { 45;; int t = c[i]; 46;; if (c[i] * a > b) 47;; t = 10; 48;; c[i] = t; 49;; d[i] /= b; 50;; } 51;;} 52;; 53;; 54;;int MaxIndex(int n, int *a) { 55;; int t = 0; 56;; for (int i = 1; i < n; i++) { 57;; if (a[i] > a[t]) 58;; t = i; 59;; } 60;; return t; 61;;} 62;; 63;; 64;;int MaxValue(int n, int *a) { 65;; int t = a[0]; 66;; for (int i = 1; i < n; i++) { 67;; if (a[i] > t) 68;; t = a[i]; 69;; } 70;; return t; 71;;} 72;; 73;;typedef struct Node Node; 74;;struct Node { 75;; unsigned Val; 76;; Node *Right; 77;; Node *Left; 78;;}; 79;; 80;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) { 81;; while (Curr->Val > Next->Val) { 82;; Curr = Next; 83;; if (Mask & (0x1 << Curr->Val)) 84;; Next = Curr->Right; 85;; else 86;; Next = Curr->Left; 87;; } 88;; return Curr->Val; 89;;} 90;; 91;; 92;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) { 93;; for (int i = 0; i < n; i++) { 94;; int t = c[i]; 95;; if (c[i] * a > b) 96;; t = 10; 97;; c[i] = t; 98;; } 99;;} 100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 101 102%struct.Node = type { i32, %struct.Node*, %struct.Node* } 103 104; CHECK-LABEL: CmovInHotPath 105; CHECK-NOT: cmov 106; CHECK: jg 107 108define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 { 109entry: 110 %cmp14 = icmp sgt i32 %n, 0 111 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 112 113for.body.preheader: ; preds = %entry 114 %wide.trip.count = zext i32 %n to i64 115 br label %for.body 116 117for.cond.cleanup: ; preds = %for.body, %entry 118 ret void 119 120for.body: ; preds = %for.body.preheader, %for.body 121 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 122 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv 123 %0 = load i32, i32* %arrayidx, align 4 124 %add = add nsw i32 %0, 1 125 %mul = mul nsw i32 %0, %a 126 %cmp3 = icmp sgt i32 %mul, %b 127 %. = select i1 %cmp3, i32 10, i32 %add 128 %mul7 = mul nsw i32 %., %add 129 store i32 %mul7, i32* %arrayidx, align 4 130 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 131 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 132 br i1 %exitcond, label %for.cond.cleanup, label %for.body 133} 134 135; CHECK-LABEL: CmovNotInHotPath 136; CHECK: cmovg 137 138define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 { 139entry: 140 %cmp18 = icmp sgt i32 %n, 0 141 br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup 142 143for.body.preheader: ; preds = %entry 144 %wide.trip.count = zext i32 %n to i64 145 br label %for.body 146 147for.cond.cleanup: ; preds = %for.body, %entry 148 ret void 149 150for.body: ; preds = %for.body.preheader, %for.body 151 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 152 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv 153 %0 = load i32, i32* %arrayidx, align 4 154 %mul = mul nsw i32 %0, %a 155 %cmp3 = icmp sgt i32 %mul, %b 156 %. = select i1 %cmp3, i32 10, i32 %0 157 store i32 %., i32* %arrayidx, align 4 158 %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv 159 %1 = load i32, i32* %arrayidx7, align 4 160 %div = sdiv i32 %1, %b 161 store i32 %div, i32* %arrayidx7, align 4 162 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 163 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 164 br i1 %exitcond, label %for.cond.cleanup, label %for.body 165} 166 167; CHECK-LABEL: MaxIndex 168; CHECK-NOT: cmov 169; CHECK: jg 170 171define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 { 172entry: 173 %cmp14 = icmp sgt i32 %n, 1 174 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 175 176for.body.preheader: ; preds = %entry 177 %wide.trip.count = zext i32 %n to i64 178 br label %for.body 179 180for.cond.cleanup: ; preds = %for.body, %entry 181 %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ] 182 ret i32 %t.0.lcssa 183 184for.body: ; preds = %for.body.preheader, %for.body 185 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 186 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] 187 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 188 %0 = load i32, i32* %arrayidx, align 4 189 %idxprom1 = sext i32 %t.015 to i64 190 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1 191 %1 = load i32, i32* %arrayidx2, align 4 192 %cmp3 = icmp sgt i32 %0, %1 193 %2 = trunc i64 %indvars.iv to i32 194 %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015 195 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 196 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 197 br i1 %exitcond, label %for.cond.cleanup, label %for.body 198} 199 200; CHECK-LABEL: MaxValue 201; CHECK-NOT: jg 202; CHECK: cmovg 203 204define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 { 205entry: 206 %0 = load i32, i32* %a, align 4 207 %cmp13 = icmp sgt i32 %n, 1 208 br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup 209 210for.body.preheader: ; preds = %entry 211 %wide.trip.count = zext i32 %n to i64 212 br label %for.body 213 214for.cond.cleanup: ; preds = %for.body, %entry 215 %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ] 216 ret i32 %t.0.lcssa 217 218for.body: ; preds = %for.body.preheader, %for.body 219 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 220 %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ] 221 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 222 %1 = load i32, i32* %arrayidx1, align 4 223 %cmp2 = icmp sgt i32 %1, %t.014 224 %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014 225 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 226 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 227 br i1 %exitcond, label %for.cond.cleanup, label %for.body 228} 229 230; CHECK-LABEL: BinarySearch 231; CHECK: set 232 233define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 { 234entry: 235 %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0 236 %0 = load i32, i32* %Val8, align 8 237 %Val19 = getelementptr inbounds %struct.Node, %struct.Node* %Next, i64 0, i32 0 238 %1 = load i32, i32* %Val19, align 8 239 %cmp10 = icmp ugt i32 %0, %1 240 br i1 %cmp10, label %while.body, label %while.end 241 242while.body: ; preds = %entry, %while.body 243 %2 = phi i32 [ %4, %while.body ], [ %1, %entry ] 244 %Next.addr.011 = phi %struct.Node* [ %3, %while.body ], [ %Next, %entry ] 245 %shl = shl i32 1, %2 246 %and = and i32 %shl, %Mask 247 %tobool = icmp eq i32 %and, 0 248 %Left = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 2 249 %Right = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 1 250 %Left.sink = select i1 %tobool, %struct.Node** %Left, %struct.Node** %Right 251 %3 = load %struct.Node*, %struct.Node** %Left.sink, align 8 252 %Val1 = getelementptr inbounds %struct.Node, %struct.Node* %3, i64 0, i32 0 253 %4 = load i32, i32* %Val1, align 8 254 %cmp = icmp ugt i32 %2, %4 255 br i1 %cmp, label %while.body, label %while.end 256 257while.end: ; preds = %while.body, %entry 258 %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ] 259 ret i32 %.lcssa 260} 261 262;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 263;; The following test checks that x86-cmov-converter optimization transforms 264;; CMOV instructions into branch correctly. 265;; 266;; MBB: 267;; cond = cmp ... 268;; v1 = CMOVgt t1, f1, cond 269;; v2 = CMOVle s1, f2, cond 270;; 271;; Where: t1 = 11, f1 = 22, f2 = a 272;; 273;; After CMOV transformation 274;; ------------------------- 275;; MBB: 276;; cond = cmp ... 277;; ja %SinkMBB 278;; 279;; FalseMBB: 280;; jmp %SinkMBB 281;; 282;; SinkMBB: 283;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] 284;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch 285;; ; true-value with false-value 286;; ; Phi instruction cannot use 287;; ; previous Phi instruction result 288;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 289 290; CHECK-LABEL: Transform 291; CHECK-NOT: cmov 292; CHECK: divl [[a:%[0-9a-z]*]] 293; CHECK: movl $11, [[s1:%[0-9a-z]*]] 294; CHECK: movl [[a]], [[s2:%[0-9a-z]*]] 295; CHECK: cmpl [[a]], %edx 296; CHECK: ja [[SinkBB:.*]] 297; CHECK: [[FalseBB:.*]]: 298; CHECK: movl $22, [[s1]] 299; CHECK: movl $22, [[s2]] 300; CHECK: [[SinkBB]]: 301; CHECK: ja 302 303define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 { 304entry: 305 %cmp10 = icmp ugt i32 0, %n 306 br i1 %cmp10, label %while.body, label %while.end 307 308while.body: ; preds = %entry, %while.body 309 %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ] 310 %arr_i = getelementptr inbounds i32, i32* %arr, i32 %i 311 %x = load i32, i32* %arr_i, align 4 312 %div = udiv i32 %x, %a 313 %cond = icmp ugt i32 %div, %a 314 %condOpp = icmp ule i32 %div, %a 315 %s1 = select i1 %cond, i32 11, i32 22 316 %s2 = select i1 %condOpp, i32 %s1, i32 %a 317 %sum = urem i32 %s1, %s2 318 store i32 %sum, i32* %arr_i, align 4 319 %i_inc = add i32 %i, 1 320 %cmp = icmp ugt i32 %i_inc, %n 321 br i1 %cmp, label %while.body, label %while.end 322 323while.end: ; preds = %while.body, %entry 324 ret void 325} 326 327; Test that we always will convert a cmov with a memory operand into a branch, 328; even outside of a loop. 329define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 { 330; CHECK-LABEL: test_cmov_memoperand: 331entry: 332 %cond = icmp ugt i32 %a, %b 333; CHECK: movl %edx, %eax 334; CHECK: cmpl 335 %load = load i32, i32* %y 336 %z = select i1 %cond, i32 %x, i32 %load 337; CHECK-NOT: cmov 338; CHECK: ja [[FALSE_BB:.*]] 339; CHECK: movl (%rcx), %eax 340; CHECK: [[FALSE_BB]]: 341 ret i32 %z 342} 343 344; Test that we can convert a group of cmovs where only one has a memory 345; operand. 346define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { 347; CHECK-LABEL: test_cmov_memoperand_in_group: 348entry: 349 %cond = icmp ugt i32 %a, %b 350; CHECK: movl %edx, %eax 351; CHECK: cmpl 352 %y = load i32, i32* %y.ptr 353 %z1 = select i1 %cond, i32 %x, i32 %a 354 %z2 = select i1 %cond, i32 %x, i32 %y 355 %z3 = select i1 %cond, i32 %x, i32 %b 356; CHECK-NOT: cmov 357; CHECK: ja [[FALSE_BB:.*]] 358; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] 359; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] 360; CHECK-DAG: movl %{{.*}} %eax 361; CHECK: [[FALSE_BB]]: 362; CHECK: addl 363; CHECK-DAG: %[[R1]] 364; CHECK-DAG: , 365; CHECK-DAG: %eax 366; CHECK-DAG: addl 367; CHECK-DAG: %[[R2]] 368; CHECK-DAG: , 369; CHECK-DAG: %eax 370; CHECK: retq 371 %s1 = add i32 %z1, %z2 372 %s2 = add i32 %s1, %z3 373 ret i32 %s2 374} 375 376; Same as before but with operands reversed in the select with a load. 377define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { 378; CHECK-LABEL: test_cmov_memoperand_in_group2: 379entry: 380 %cond = icmp ugt i32 %a, %b 381; CHECK: movl %edx, %eax 382; CHECK: cmpl 383 %y = load i32, i32* %y.ptr 384 %z2 = select i1 %cond, i32 %a, i32 %x 385 %z1 = select i1 %cond, i32 %y, i32 %x 386 %z3 = select i1 %cond, i32 %b, i32 %x 387; CHECK-NOT: cmov 388; CHECK: jbe [[FALSE_BB:.*]] 389; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] 390; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] 391; CHECK-DAG: movl %{{.*}} %eax 392; CHECK: [[FALSE_BB]]: 393; CHECK: addl 394; CHECK-DAG: %[[R1]] 395; CHECK-DAG: , 396; CHECK-DAG: %eax 397; CHECK-DAG: addl 398; CHECK-DAG: %[[R2]] 399; CHECK-DAG: , 400; CHECK-DAG: %eax 401; CHECK: retq 402 %s1 = add i32 %z1, %z2 403 %s2 = add i32 %s1, %z3 404 ret i32 %s2 405} 406 407; Test that we don't convert a group of cmovs with conflicting directions of 408; loads. 409define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 { 410; CHECK-LABEL: test_cmov_memoperand_conflicting_dir: 411entry: 412 %cond = icmp ugt i32 %a, %b 413; CHECK: cmpl 414 %y1 = load i32, i32* %y1.ptr 415 %y2 = load i32, i32* %y2.ptr 416 %z1 = select i1 %cond, i32 %x, i32 %y1 417 %z2 = select i1 %cond, i32 %y2, i32 %x 418; CHECK: cmoval 419; CHECK: cmoval 420 %s1 = add i32 %z1, %z2 421 ret i32 %s1 422} 423 424; Test that we can convert a group of cmovs where only one has a memory 425; operand and where that memory operand's registers come from a prior cmov in 426; the group. 427define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 { 428; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: 429entry: 430 %cond = icmp ugt i32 %a, %b 431; CHECK: movl %edi, %eax 432; CHECK: cmpl 433 %p = select i1 %cond, i32* %x, i32* %y 434 %load = load i32, i32* %p 435 %z = select i1 %cond, i32 %a, i32 %load 436; CHECK-NOT: cmov 437; CHECK: ja [[FALSE_BB:.*]] 438; CHECK: movl (%r{{..}}), %eax 439; CHECK: [[FALSE_BB]]: 440; CHECK: retq 441 ret i32 %z 442} 443 444; Test that we can convert a group of two cmovs with memory operands where one 445; uses the result of the other as part of the address. 446define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 { 447; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: 448entry: 449 %cond = icmp ugt i32 %a, %b 450; CHECK: movl %edi, %eax 451; CHECK: cmpl 452 %load1 = load i32*, i32** %y 453 %p = select i1 %cond, i32* %x, i32* %load1 454 %load2 = load i32, i32* %p 455 %z = select i1 %cond, i32 %a, i32 %load2 456; CHECK-NOT: cmov 457; CHECK: ja [[FALSE_BB:.*]] 458; CHECK: movq (%r{{..}}), %[[R1:.*]] 459; CHECK: movl (%[[R1]]), %eax 460; CHECK: [[FALSE_BB]]: 461; CHECK: retq 462 ret i32 %z 463} 464 465; Test that we can convert a group of cmovs where only one has a memory 466; operand and where that memory operand's registers come from a prior cmov and 467; where that cmov gets *its* input from a prior cmov in the group. 468define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 { 469; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: 470entry: 471 %cond = icmp ugt i32 %a, %b 472; CHECK: movl %edi, %eax 473; CHECK: cmpl 474 %p = select i1 %cond, i32* %x, i32* %y 475 %p2 = select i1 %cond, i32* %z, i32* %p 476 %load = load i32, i32* %p2 477 %r = select i1 %cond, i32 %a, i32 %load 478; CHECK-NOT: cmov 479; CHECK: ja [[FALSE_BB:.*]] 480; CHECK: movl (%r{{..}}), %eax 481; CHECK: [[FALSE_BB]]: 482; CHECK: retq 483 ret i32 %r 484} 485 486attributes #0 = {"target-cpu"="x86-64"} 487