1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s 3 4; cmp with single-use load, should not form branch. 5define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) { 6; CHECK-LABEL: test1: 7; CHECK: # %bb.0: 8; CHECK-NEXT: movl %esi, %eax 9; CHECK-NEXT: ucomisd (%rdi), %xmm0 10; CHECK-NEXT: cmovbel %edx, %eax 11; CHECK-NEXT: retq 12 %load = load double, double* %b, align 8 13 %cmp = fcmp olt double %load, %a 14 %cond = select i1 %cmp, i32 %x, i32 %y 15 ret i32 %cond 16} 17 18; Sanity check: no load. 19define i32 @test2(double %a, double %b, i32 %x, i32 %y) { 20; CHECK-LABEL: test2: 21; CHECK: # %bb.0: 22; CHECK-NEXT: movl %edi, %eax 23; CHECK-NEXT: ucomisd %xmm1, %xmm0 24; CHECK-NEXT: cmovbel %esi, %eax 25; CHECK-NEXT: retq 26 %cmp = fcmp ogt double %a, %b 27 %cond = select i1 %cmp, i32 %x, i32 %y 28 ret i32 %cond 29} 30 31; Multiple uses of the load. 32define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { 33; CHECK-LABEL: test4: 34; CHECK: # %bb.0: 35; CHECK-NEXT: movl (%rsi), %eax 36; CHECK-NEXT: cmpl %edi, %eax 37; CHECK-NEXT: cmovael %ecx, %edx 38; CHECK-NEXT: addl %edx, %eax 39; CHECK-NEXT: retq 40 %load = load i32, i32* %b, align 4 41 %cmp = icmp ult i32 %load, %a 42 %cond = select i1 %cmp, i32 %x, i32 %y 43 %add = add i32 %cond, %load 44 ret i32 %add 45} 46 47; Multiple uses of the cmp. 48define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { 49; CHECK-LABEL: test5: 50; CHECK: # %bb.0: 51; CHECK-NEXT: movl %ecx, %eax 52; CHECK-NEXT: cmpl %edi, (%rsi) 53; CHECK-NEXT: cmoval %edi, %eax 54; CHECK-NEXT: cmovael %edx, %eax 55; CHECK-NEXT: retq 56 %load = load i32, i32* %b, align 4 57 %cmp = icmp ult i32 %load, %a 58 %cmp1 = icmp ugt i32 %load, %a 59 %cond = select i1 %cmp1, i32 %a, i32 %y 60 %cond5 = select i1 %cmp, i32 %cond, i32 %x 61 ret i32 %cond5 62} 63 64; Zero-extended select. 65define void @test6(i32 %a, i32 %x, i32* %y.ptr, i64* %z.ptr) { 66; CHECK-LABEL: test6: 67; CHECK: # %bb.0: # %entry 68; CHECK-NEXT: # kill: def $esi killed $esi def $rsi 69; CHECK-NEXT: testl %edi, %edi 70; CHECK-NEXT: cmovnsl (%rdx), %esi 71; CHECK-NEXT: movq %rsi, (%rcx) 72; CHECK-NEXT: retq 73entry: 74 %y = load i32, i32* %y.ptr 75 %cmp = icmp slt i32 %a, 0 76 %z = select i1 %cmp, i32 %x, i32 %y 77 %z.ext = zext i32 %z to i64 78 store i64 %z.ext, i64* %z.ptr 79 ret void 80} 81 82; If a select is not obviously predictable, don't turn it into a branch. 83define i32 @weighted_select1(i32 %a, i32 %b) { 84; CHECK-LABEL: weighted_select1: 85; CHECK: # %bb.0: 86; CHECK-NEXT: movl %esi, %eax 87; CHECK-NEXT: testl %edi, %edi 88; CHECK-NEXT: cmovnel %edi, %eax 89; CHECK-NEXT: retq 90 %cmp = icmp ne i32 %a, 0 91 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 92 ret i32 %sel 93} 94 95; If a select is obviously predictable, turn it into a branch. 96define i32 @weighted_select2(i32 %a, i32 %b) { 97; CHECK-LABEL: weighted_select2: 98; CHECK: # %bb.0: 99; CHECK-NEXT: movl %edi, %eax 100; CHECK-NEXT: testl %edi, %edi 101; CHECK-NEXT: jne .LBB6_2 102; CHECK-NEXT: # %bb.1: # %select.false 103; CHECK-NEXT: movl %esi, %eax 104; CHECK-NEXT: .LBB6_2: # %select.end 105; CHECK-NEXT: retq 106 %cmp = icmp ne i32 %a, 0 107 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 108 ret i32 %sel 109} 110 111; Note the reversed profile weights: it doesn't matter if it's 112; obviously true or obviously false. 113; Either one should become a branch rather than conditional move. 114; TODO: But likely true vs. likely false should affect basic block placement? 115define i32 @weighted_select3(i32 %a, i32 %b) { 116; CHECK-LABEL: weighted_select3: 117; CHECK: # %bb.0: 118; CHECK-NEXT: movl %edi, %eax 119; CHECK-NEXT: testl %edi, %edi 120; CHECK-NEXT: je .LBB7_1 121; CHECK-NEXT: # %bb.2: # %select.end 122; CHECK-NEXT: retq 123; CHECK-NEXT: .LBB7_1: # %select.false 124; CHECK-NEXT: movl %esi, %eax 125; CHECK-NEXT: retq 126 %cmp = icmp ne i32 %a, 0 127 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17 128 ret i32 %sel 129} 130 131; Weightlessness is no reason to die. 132define i32 @unweighted_select(i32 %a, i32 %b) { 133; CHECK-LABEL: unweighted_select: 134; CHECK: # %bb.0: 135; CHECK-NEXT: movl %esi, %eax 136; CHECK-NEXT: testl %edi, %edi 137; CHECK-NEXT: cmovnel %edi, %eax 138; CHECK-NEXT: retq 139 %cmp = icmp ne i32 %a, 0 140 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !18 141 ret i32 %sel 142} 143 144define i32 @weighted_select_optsize(i32 %a, i32 %b) optsize { 145; CHECK-LABEL: weighted_select_optsize: 146; CHECK: # %bb.0: 147; CHECK-NEXT: movl %esi, %eax 148; CHECK-NEXT: testl %edi, %edi 149; CHECK-NEXT: cmovnel %edi, %eax 150; CHECK-NEXT: retq 151 %cmp = icmp ne i32 %a, 0 152 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 153 ret i32 %sel 154} 155 156define i32 @weighted_select_pgso(i32 %a, i32 %b) !prof !14 { 157; CHECK-LABEL: weighted_select_pgso: 158; CHECK: # %bb.0: 159; CHECK-NEXT: movl %esi, %eax 160; CHECK-NEXT: testl %edi, %edi 161; CHECK-NEXT: cmovnel %edi, %eax 162; CHECK-NEXT: retq 163 %cmp = icmp ne i32 %a, 0 164 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 165 ret i32 %sel 166} 167 168; If two selects in a row are predictable, turn them into branches. 169define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { 170; CHECK-LABEL: weighted_selects: 171; CHECK: # %bb.0: 172; CHECK-NEXT: movl %esi, %eax 173; CHECK-NEXT: testl %edi, %edi 174; CHECK-NEXT: movl %edi, %ecx 175; CHECK-NEXT: jne .LBB11_2 176; CHECK-NEXT: # %bb.1: # %select.false 177; CHECK-NEXT: movl %eax, %ecx 178; CHECK-NEXT: .LBB11_2: # %select.end 179; CHECK-NEXT: testl %ecx, %ecx 180; CHECK-NEXT: jne .LBB11_4 181; CHECK-NEXT: # %bb.3: # %select.false2 182; CHECK-NEXT: movl %edi, %eax 183; CHECK-NEXT: .LBB11_4: # %select.end1 184; CHECK-NEXT: retq 185 %cmp = icmp ne i32 %a, 0 186 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 187 %cmp1 = icmp ne i32 %sel, 0 188 %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !16 189 ret i32 %sel1 190} 191 192!llvm.module.flags = !{!0} 193!0 = !{i32 1, !"ProfileSummary", !1} 194!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 195!2 = !{!"ProfileFormat", !"InstrProf"} 196!3 = !{!"TotalCount", i64 10000} 197!4 = !{!"MaxCount", i64 10} 198!5 = !{!"MaxInternalCount", i64 1} 199!6 = !{!"MaxFunctionCount", i64 1000} 200!7 = !{!"NumCounts", i64 3} 201!8 = !{!"NumFunctions", i64 3} 202!9 = !{!"DetailedSummary", !10} 203!10 = !{!11, !12, !13} 204!11 = !{i32 10000, i64 100, i32 1} 205!12 = !{i32 999000, i64 100, i32 1} 206!13 = !{i32 999999, i64 1, i32 2} 207!14 = !{!"function_entry_count", i64 0} 208!15 = !{!"branch_weights", i32 1, i32 99} 209!16 = !{!"branch_weights", i32 1, i32 100} 210!17 = !{!"branch_weights", i32 100, i32 1} 211!18 = !{!"branch_weights", i32 0, i32 0} 212!19 = !{!"function_entry_count", i64 100} 213