1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s 2 3; These tests check for loop branching structure, and that the loop align 4; directive is placed in the expected place. 5 6; CodeGen should insert a branch into the middle of the loop in 7; order to avoid a branch within the loop. 8 9; CHECK-LABEL: simple: 10; CHECK: align 11; CHECK-NEXT: .LBB0_1: 12; CHECK-NEXT: callq loop_header 13; CHECK: js .LBB0_3 14; CHECK-NEXT: callq loop_latch 15; CHECK-NEXT: jmp .LBB0_1 16; CHECK-NEXT: .LBB0_3: 17; CHECK-NEXT: callq exit 18 19define void @simple() nounwind { 20entry: 21 br label %loop 22 23loop: 24 call void @loop_header() 25 %t0 = tail call i32 @get() 26 %t1 = icmp slt i32 %t0, 0 27 br i1 %t1, label %done, label %bb 28 29bb: 30 call void @loop_latch() 31 br label %loop 32 33done: 34 call void @exit() 35 ret void 36} 37 38; CodeGen should move block_a to the top of the loop so that it 39; falls through into the loop, avoiding a branch within the loop. 40 41; CHECK-LABEL: slightly_more_involved: 42; CHECK: jmp .LBB1_1 43; CHECK-NEXT: align 44; CHECK-NEXT: .LBB1_4: 45; CHECK-NEXT: callq bar99 46; CHECK-NEXT: .LBB1_1: 47; CHECK-NEXT: callq body 48 49define void @slightly_more_involved() nounwind { 50entry: 51 br label %loop 52 53loop: 54 call void @body() 55 %t0 = call i32 @get() 56 %t1 = icmp slt i32 %t0, 2 57 br i1 %t1, label %block_a, label %bb 58 59bb: 60 %t2 = call i32 @get() 61 %t3 = icmp slt i32 %t2, 99 62 br i1 %t3, label %exit, label %loop 63 64block_a: 65 call void @bar99() 66 br label %loop 67 68exit: 69 call void @exit() 70 ret void 71} 72 73; Same as slightly_more_involved, but block_a is now a CFG diamond with 74; fallthrough edges which should be preserved. 75; "callq block_a_merge_func" is tail duped. 76 77; CHECK-LABEL: yet_more_involved: 78; CHECK: jmp .LBB2_1 79; CHECK-NEXT: align 80 81; CHECK: .LBB2_1: 82; CHECK-NEXT: callq body 83; CHECK-NEXT: callq get 84; CHECK-NEXT: cmpl $2, %eax 85; CHECK-NEXT: jge .LBB2_2 86; CHECK-NEXT: callq bar99 87; CHECK-NEXT: callq get 88; CHECK-NEXT: cmpl $2999, %eax 89; CHECK-NEXT: jg .LBB2_6 90; CHECK-NEXT: callq block_a_true_func 91; CHECK-NEXT: callq block_a_merge_func 92; CHECK-NEXT: jmp .LBB2_1 93; CHECK-NEXT: align 94; CHECK-NEXT: .LBB2_6: 95; CHECK-NEXT: callq block_a_false_func 96; CHECK-NEXT: callq block_a_merge_func 97; CHECK-NEXT: jmp .LBB2_1 98 99define void @yet_more_involved() nounwind { 100entry: 101 br label %loop 102 103loop: 104 call void @body() 105 %t0 = call i32 @get() 106 %t1 = icmp slt i32 %t0, 2 107 br i1 %t1, label %block_a, label %bb 108 109bb: 110 %t2 = call i32 @get() 111 %t3 = icmp slt i32 %t2, 99 112 br i1 %t3, label %exit, label %loop 113 114block_a: 115 call void @bar99() 116 %z0 = call i32 @get() 117 %z1 = icmp slt i32 %z0, 3000 118 br i1 %z1, label %block_a_true, label %block_a_false 119 120block_a_true: 121 call void @block_a_true_func() 122 br label %block_a_merge 123 124block_a_false: 125 call void @block_a_false_func() 126 br label %block_a_merge 127 128block_a_merge: 129 call void @block_a_merge_func() 130 br label %loop 131 132exit: 133 call void @exit() 134 ret void 135} 136 137; CodeGen should move the CFG islands that are part of the loop but don't 138; conveniently fit anywhere so that they are at least contiguous with the 139; loop. 140 141; CHECK-LABEL: cfg_islands: 142; CHECK: jmp .LBB3_1 143; CHECK-NEXT: align 144; CHECK-NEXT: .LBB3_7: 145; CHECK-NEXT: callq bar100 146; CHECK-NEXT: .LBB3_1: 147; CHECK-NEXT: callq loop_header 148; CHECK: jl .LBB3_7 149; CHECK: jge .LBB3_3 150; CHECK-NEXT: callq bar101 151; CHECK-NEXT: jmp .LBB3_1 152; CHECK-NEXT: align 153; CHECK-NEXT: .LBB3_3: 154; CHECK: jge .LBB3_4 155; CHECK-NEXT: callq bar102 156; CHECK-NEXT: jmp .LBB3_1 157; CHECK-NEXT: .LBB3_4: 158; CHECK: jl .LBB3_6 159; CHECK-NEXT: callq loop_latch 160; CHECK-NEXT: jmp .LBB3_1 161; CHECK-NEXT: .LBB3_6: 162 163define void @cfg_islands() nounwind { 164entry: 165 br label %loop 166 167loop: 168 call void @loop_header() 169 %t0 = call i32 @get() 170 %t1 = icmp slt i32 %t0, 100 171 br i1 %t1, label %block100, label %bb 172 173bb: 174 %t2 = call i32 @get() 175 %t3 = icmp slt i32 %t2, 101 176 br i1 %t3, label %block101, label %bb1 177 178bb1: 179 %t4 = call i32 @get() 180 %t5 = icmp slt i32 %t4, 102 181 br i1 %t5, label %block102, label %bb2 182 183bb2: 184 %t6 = call i32 @get() 185 %t7 = icmp slt i32 %t6, 103 186 br i1 %t7, label %exit, label %bb3 187 188bb3: 189 call void @loop_latch() 190 br label %loop 191 192exit: 193 call void @exit() 194 ret void 195 196block100: 197 call void @bar100() 198 br label %loop 199 200block101: 201 call void @bar101() 202 br label %loop 203 204block102: 205 call void @bar102() 206 br label %loop 207} 208 209; CHECK-LABEL: check_minsize: 210; CHECK-NOT: align 211; CHECK: .LBB4_1: 212; CHECK-NEXT: callq loop_header 213; CHECK: callq loop_latch 214; CHECK: .LBB4_3: 215; CHECK: callq exit 216 217 218define void @check_minsize() minsize nounwind { 219entry: 220 br label %loop 221 222loop: 223 call void @loop_header() 224 %t0 = tail call i32 @get() 225 %t1 = icmp slt i32 %t0, 0 226 br i1 %t1, label %done, label %bb 227 228bb: 229 call void @loop_latch() 230 br label %loop 231 232done: 233 call void @exit() 234 ret void 235} 236 237; This is exactly the same function as slightly_more_involved. 238; The difference is that when optimising for size, we do not want 239; to see this reordering. 240 241; CHECK-LABEL: slightly_more_involved_2: 242; CHECK-NOT: jmp .LBB5_1 243; CHECK: .LBB5_1: 244; CHECK-NEXT: callq body 245 246define void @slightly_more_involved_2() #0 { 247entry: 248 br label %loop 249 250loop: 251 call void @body() 252 %t0 = call i32 @get() 253 %t1 = icmp slt i32 %t0, 2 254 br i1 %t1, label %block_a, label %bb 255 256bb: 257 %t2 = call i32 @get() 258 %t3 = icmp slt i32 %t2, 99 259 br i1 %t3, label %exit, label %loop 260 261block_a: 262 call void @bar99() 263 br label %loop 264 265exit: 266 call void @exit() 267 ret void 268} 269 270attributes #0 = { minsize norecurse nounwind optsize readnone uwtable } 271 272; CHECK-LABEL: slightly_more_involved_2_pgso: 273; CHECK-NOT: jmp .LBB6_1 274; CHECK: .LBB6_1: 275; CHECK-NEXT: callq body 276 277define void @slightly_more_involved_2_pgso() norecurse nounwind readnone uwtable !prof !14 { 278entry: 279 br label %loop 280 281loop: 282 call void @body() 283 %t0 = call i32 @get() 284 %t1 = icmp slt i32 %t0, 2 285 br i1 %t1, label %block_a, label %bb 286 287bb: 288 %t2 = call i32 @get() 289 %t3 = icmp slt i32 %t2, 99 290 br i1 %t3, label %exit, label %loop 291 292block_a: 293 call void @bar99() 294 br label %loop 295 296exit: 297 call void @exit() 298 ret void 299} 300 301declare void @bar99() nounwind 302declare void @bar100() nounwind 303declare void @bar101() nounwind 304declare void @bar102() nounwind 305declare void @body() nounwind 306declare void @exit() nounwind 307declare void @loop_header() nounwind 308declare void @loop_latch() nounwind 309declare i32 @get() nounwind 310declare void @block_a_true_func() nounwind 311declare void @block_a_false_func() nounwind 312declare void @block_a_merge_func() nounwind 313 314!llvm.module.flags = !{!0} 315!0 = !{i32 1, !"ProfileSummary", !1} 316!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 317!2 = !{!"ProfileFormat", !"InstrProf"} 318!3 = !{!"TotalCount", i64 10000} 319!4 = !{!"MaxCount", i64 10} 320!5 = !{!"MaxInternalCount", i64 1} 321!6 = !{!"MaxFunctionCount", i64 1000} 322!7 = !{!"NumCounts", i64 3} 323!8 = !{!"NumFunctions", i64 3} 324!9 = !{!"DetailedSummary", !10} 325!10 = !{!11, !12, !13} 326!11 = !{i32 10000, i64 100, i32 1} 327!12 = !{i32 999000, i64 100, i32 1} 328!13 = !{i32 999999, i64 1, i32 2} 329!14 = !{!"function_entry_count", i64 0} 330