1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s 2 3; Test the register stackifier pass. 4 5target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 6target triple = "wasm32-unknown-unknown" 7 8; No because of pointer aliasing. 9 10; CHECK-LABEL: no0: 11; CHECK: return $1{{$}} 12define i32 @no0(i32* %p, i32* %q) { 13 %t = load i32, i32* %q 14 store i32 0, i32* %p 15 ret i32 %t 16} 17 18; No because of side effects. 19 20; CHECK-LABEL: no1: 21; CHECK: return $1{{$}} 22define i32 @no1(i32* %p, i32* dereferenceable(4) %q) { 23 %t = load volatile i32, i32* %q, !invariant.load !0 24 store volatile i32 0, i32* %p 25 ret i32 %t 26} 27 28; Yes because of invariant load and no side effects. 29 30; CHECK-LABEL: yes0: 31; CHECK: return $pop{{[0-9]+}}{{$}} 32define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) { 33 %t = load i32, i32* %q, !invariant.load !0 34 store i32 0, i32* %p 35 ret i32 %t 36} 37 38; Yes because of no intervening side effects. 39 40; CHECK-LABEL: yes1: 41; CHECK: return $pop0{{$}} 42define i32 @yes1(i32* %q) { 43 %t = load volatile i32, i32* %q 44 ret i32 %t 45} 46 47; Yes because undefined behavior can be sunk past a store. 48 49; CHECK-LABEL: sink_trap: 50; CHECK: return $pop{{[0-9]+}}{{$}} 51define i32 @sink_trap(i32 %x, i32 %y, i32* %p) { 52 %t = sdiv i32 %x, %y 53 store volatile i32 0, i32* %p 54 ret i32 %t 55} 56 57; Yes because the call is readnone. 58 59; CHECK-LABEL: sink_readnone_call: 60; CHECK: return $pop0{{$}} 61declare i32 @readnone_callee() readnone nounwind 62define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) { 63 %t = call i32 @readnone_callee() 64 store volatile i32 0, i32* %p 65 ret i32 %t 66} 67 68; No because the call is readonly and there's an intervening store. 69 70; CHECK-LABEL: no_sink_readonly_call: 71; CHECK: return ${{[0-9]+}}{{$}} 72declare i32 @readonly_callee() readonly nounwind 73define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) { 74 %t = call i32 @readonly_callee() 75 store i32 0, i32* %p 76 ret i32 %t 77} 78 79; Don't schedule stack uses into the stack. To reduce register pressure, the 80; scheduler might be tempted to move the definition of $2 down. However, this 81; would risk getting incorrect liveness if the instructions are later 82; rearranged to make the stack contiguous. 83 84; CHECK-LABEL: stack_uses: 85; CHECK: .param i32, i32, i32, i32{{$}} 86; CHECK-NEXT: .result i32{{$}} 87; CHECK-NEXT: block{{$}} 88; CHECK-NEXT: i32.const $push[[L13:[0-9]+]]=, 1{{$}} 89; CHECK-NEXT: i32.lt_s $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}} 90; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 2{{$}} 91; CHECK-NEXT: i32.lt_s $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}} 92; CHECK-NEXT: i32.xor $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}} 93; CHECK-NEXT: i32.const $push[[L12:[0-9]+]]=, 1{{$}} 94; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}} 95; CHECK-NEXT: i32.const $push[[L11:[0-9]+]]=, 2{{$}} 96; CHECK-NEXT: i32.lt_s $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}} 97; CHECK-NEXT: i32.xor $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}} 98; CHECK-NEXT: i32.xor $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} 99; CHECK-NEXT: i32.const $push10=, 1{{$}} 100; CHECK-NEXT: i32.ne $push8=, $pop7, $pop10{{$}} 101; CHECK-NEXT: br_if 0, $pop8{{$}} 102; CHECK-NEXT: i32.const $push9=, 0{{$}} 103; CHECK-NEXT: return $pop9{{$}} 104; CHECK-NEXT: .LBB7_2: 105; CHECK-NEXT: end_block{{$}} 106; CHECK-NEXT: i32.const $push14=, 1{{$}} 107; CHECK-NEXT: return $pop14{{$}} 108define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) { 109entry: 110 %c = icmp sle i32 %x, 0 111 %d = icmp sle i32 %y, 1 112 %e = icmp sle i32 %z, 0 113 %f = icmp sle i32 %w, 1 114 %g = xor i1 %c, %d 115 %h = xor i1 %e, %f 116 %i = xor i1 %g, %h 117 br i1 %i, label %true, label %false 118true: 119 ret i32 0 120false: 121 ret i32 1 122} 123 124; Test an interesting case where the load has multiple uses and cannot 125; be trivially stackified. However, it can be stackified with a tee_local. 126 127; CHECK-LABEL: multiple_uses: 128; CHECK: .param i32, i32, i32{{$}} 129; CHECK-NEXT: .local i32{{$}} 130; CHECK-NEXT: block{{$}} 131; CHECK-NEXT: i32.load $push[[NUM0:[0-9]+]]=, 0($2){{$}} 132; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}} 133; CHECK-NEXT: i32.ge_u $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}} 134; CHECK-NEXT: br_if 0, $pop[[NUM2]]{{$}} 135; CHECK-NEXT: i32.lt_u $push[[NUM3:[0-9]+]]=, $3, $0{{$}} 136; CHECK-NEXT: br_if 0, $pop[[NUM3]]{{$}} 137; CHECK-NEXT: i32.store $drop=, 0($2), $3{{$}} 138; CHECK-NEXT: .LBB8_3: 139; CHECK-NEXT: end_block{{$}} 140; CHECK-NEXT: return{{$}} 141define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind { 142bb: 143 br label %loop 144 145loop: 146 %tmp7 = load i32, i32* %arg2 147 %tmp8 = inttoptr i32 %tmp7 to i32* 148 %tmp9 = icmp uge i32* %tmp8, %arg1 149 %tmp10 = icmp ult i32* %tmp8, %arg0 150 %tmp11 = or i1 %tmp9, %tmp10 151 br i1 %tmp11, label %back, label %then 152 153then: 154 store i32 %tmp7, i32* %arg2 155 br label %back 156 157back: 158 br i1 undef, label %return, label %loop 159 160return: 161 ret void 162} 163 164; Don't stackify stores effects across other instructions with side effects. 165 166; CHECK: side_effects: 167; CHECK: store 168; CHECK-NEXT: call 169; CHECK-NEXT: store 170; CHECK-NEXT: call 171declare void @evoke_side_effects() 172define hidden void @stackify_store_across_side_effects(double* nocapture %d) { 173entry: 174 store double 2.0, double* %d 175 call void @evoke_side_effects() 176 store double 2.0, double* %d 177 call void @evoke_side_effects() 178 ret void 179} 180 181; Div instructions have side effects and can't be reordered, but this entire 182; function should still be able to be stackified because it's already in 183; tree order. 184 185; CHECK-LABEL: div_tree: 186; CHECK: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}} 187; CHECK-NEXT: .result i32{{$}} 188; CHECK-NEXT: i32.div_s $push[[L0:[0-9]+]]=, $0, $1{{$}} 189; CHECK-NEXT: i32.div_s $push[[L1:[0-9]+]]=, $2, $3{{$}} 190; CHECK-NEXT: i32.div_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} 191; CHECK-NEXT: i32.div_s $push[[L3:[0-9]+]]=, $4, $5{{$}} 192; CHECK-NEXT: i32.div_s $push[[L4:[0-9]+]]=, $6, $7{{$}} 193; CHECK-NEXT: i32.div_s $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}} 194; CHECK-NEXT: i32.div_s $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}} 195; CHECK-NEXT: i32.div_s $push[[L7:[0-9]+]]=, $8, $9{{$}} 196; CHECK-NEXT: i32.div_s $push[[L8:[0-9]+]]=, $10, $11{{$}} 197; CHECK-NEXT: i32.div_s $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}} 198; CHECK-NEXT: i32.div_s $push[[L10:[0-9]+]]=, $12, $13{{$}} 199; CHECK-NEXT: i32.div_s $push[[L11:[0-9]+]]=, $14, $15{{$}} 200; CHECK-NEXT: i32.div_s $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}} 201; CHECK-NEXT: i32.div_s $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}} 202; CHECK-NEXT: i32.div_s $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}} 203; CHECK-NEXT: return $pop[[L14]]{{$}} 204define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) { 205entry: 206 %div = sdiv i32 %a, %b 207 %div1 = sdiv i32 %c, %d 208 %div2 = sdiv i32 %div, %div1 209 %div3 = sdiv i32 %e, %f 210 %div4 = sdiv i32 %g, %h 211 %div5 = sdiv i32 %div3, %div4 212 %div6 = sdiv i32 %div2, %div5 213 %div7 = sdiv i32 %i, %j 214 %div8 = sdiv i32 %k, %l 215 %div9 = sdiv i32 %div7, %div8 216 %div10 = sdiv i32 %m, %n 217 %div11 = sdiv i32 %o, %p 218 %div12 = sdiv i32 %div10, %div11 219 %div13 = sdiv i32 %div9, %div12 220 %div14 = sdiv i32 %div6, %div13 221 ret i32 %div14 222} 223 224; A simple multiple-use case. 225 226; CHECK-LABEL: simple_multiple_use: 227; CHECK: .param i32, i32{{$}} 228; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}} 229; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} 230; CHECK-NEXT: call use_a@FUNCTION, $pop[[NUM1]]{{$}} 231; CHECK-NEXT: call use_b@FUNCTION, $[[NUM2]]{{$}} 232; CHECK-NEXT: return{{$}} 233declare void @use_a(i32) 234declare void @use_b(i32) 235define void @simple_multiple_use(i32 %x, i32 %y) { 236 %mul = mul i32 %y, %x 237 call void @use_a(i32 %mul) 238 call void @use_b(i32 %mul) 239 ret void 240} 241 242; Multiple uses of the same value in one instruction. 243 244; CHECK-LABEL: multiple_uses_in_same_insn: 245; CHECK: .param i32, i32{{$}} 246; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}} 247; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} 248; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}} 249; CHECK-NEXT: return{{$}} 250declare void @use_2(i32, i32) 251define void @multiple_uses_in_same_insn(i32 %x, i32 %y) { 252 %mul = mul i32 %y, %x 253 call void @use_2(i32 %mul, i32 %mul) 254 ret void 255} 256 257; Commute operands to achieve better stackifying. 258 259; CHECK-LABEL: commute: 260; CHECK-NOT: param 261; CHECK: .result i32{{$}} 262; CHECK-NEXT: i32.call $push0=, red@FUNCTION{{$}} 263; CHECK-NEXT: i32.call $push1=, green@FUNCTION{{$}} 264; CHECK-NEXT: i32.add $push2=, $pop0, $pop1{{$}} 265; CHECK-NEXT: i32.call $push3=, blue@FUNCTION{{$}} 266; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} 267; CHECK-NEXT: return $pop4{{$}} 268declare i32 @red() 269declare i32 @green() 270declare i32 @blue() 271define i32 @commute() { 272 %call = call i32 @red() 273 %call1 = call i32 @green() 274 %add = add i32 %call1, %call 275 %call2 = call i32 @blue() 276 %add3 = add i32 %add, %call2 277 ret i32 %add3 278} 279 280; Don't stackify a register when it would move a the def of the register past 281; an implicit get_local for the register. 282 283; CHECK-LABEL: no_stackify_past_use: 284; CHECK: i32.call $1=, callee@FUNCTION, $0 285; CHECK-NEXT: i32.const $push0=, 1 286; CHECK-NEXT: i32.add $push1=, $0, $pop0 287; CHECK-NEXT: i32.call $push2=, callee@FUNCTION, $pop1 288; CHECK-NEXT: i32.sub $push3=, $pop2, $1 289; CHECK-NEXT: i32.div_s $push4=, $pop3, $1 290; CHECK-NEXT: return $pop4 291declare i32 @callee(i32) 292define i32 @no_stackify_past_use(i32 %arg) { 293 %tmp1 = call i32 @callee(i32 %arg) 294 %tmp2 = add i32 %arg, 1 295 %tmp3 = call i32 @callee(i32 %tmp2) 296 %tmp5 = sub i32 %tmp3, %tmp1 297 %tmp6 = sdiv i32 %tmp5, %tmp1 298 ret i32 %tmp6 299} 300 301; This is the same as no_stackify_past_use, except using a commutative operator, 302; so we can reorder the operands and stackify. 303 304; CHECK-LABEL: commute_to_fix_ordering: 305; CHECK: i32.call $push[[L0:.+]]=, callee@FUNCTION, $0 306; CHECK: tee_local $push[[L1:.+]]=, $1=, $pop[[L0]] 307; CHECK: i32.const $push0=, 1 308; CHECK: i32.add $push1=, $0, $pop0 309; CHECK: i32.call $push2=, callee@FUNCTION, $pop1 310; CHECK: i32.add $push3=, $1, $pop2 311; CHECK: i32.mul $push4=, $pop[[L1]], $pop3 312; CHECK: return $pop4 313define i32 @commute_to_fix_ordering(i32 %arg) { 314 %tmp1 = call i32 @callee(i32 %arg) 315 %tmp2 = add i32 %arg, 1 316 %tmp3 = call i32 @callee(i32 %tmp2) 317 %tmp5 = add i32 %tmp3, %tmp1 318 %tmp6 = mul i32 %tmp5, %tmp1 319 ret i32 %tmp6 320} 321 322; Stackify individual defs of virtual registers with multiple defs. 323 324; CHECK-LABEL: multiple_defs: 325; CHECK: f64.add $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}} 326; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} 327; CHECK-NEXT: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}} 328; CHECK: $[[NUM2]]=, 329define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) { 330bb: 331 br label %bb5 332 333bb5: ; preds = %bb21, %bb 334 %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ] 335 %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ] 336 %tmp7 = fcmp olt double %tmp6, 2.323450e+01 337 br i1 %tmp7, label %bb8, label %bb21 338 339bb8: ; preds = %bb17, %bb5 340 %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ] 341 %tmp10 = fadd double %tmp6, -1.000000e+00 342 %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10 343 %tmp12 = fadd double %tmp11, %tmp9 344 br i1 %arg3, label %bb17, label %bb13 345 346bb13: ; preds = %bb8 347 %tmp14 = or i32 %arg1, 2 348 %tmp15 = icmp eq i32 %tmp14, 14 349 %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000 350 br label %bb17 351 352bb17: ; preds = %bb13, %bb8 353 %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ] 354 %tmp19 = fadd double %tmp18, %tmp12 355 %tmp20 = fcmp olt double %tmp6, 2.323450e+01 356 br i1 %tmp20, label %bb8, label %bb21 357 358bb21: ; preds = %bb17, %bb5 359 %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ] 360 %tmp23 = fadd double %tmp6, 1.000000e+00 361 br label %bb5 362} 363 364; Don't move calls past loads 365; CHECK-LABEL: no_stackify_call_past_load: 366; CHECK: i32.call $0=, red 367; CHECK: i32.const $push0=, 0 368; CHECK: i32.load $1=, count($pop0) 369@count = hidden global i32 0, align 4 370define i32 @no_stackify_call_past_load() { 371 %a = call i32 @red() 372 %b = load i32, i32* @count, align 4 373 call i32 @callee(i32 %a) 374 ret i32 %b 375 ; use of a 376} 377 378; Don't move stores past loads if there may be aliasing 379; CHECK-LABEL: no_stackify_store_past_load 380; CHECK: i32.store $[[L0:[0-9]+]]=, 0($1), $0 381; CHECK: i32.load {{.*}}, 0($2) 382; CHECK: i32.call {{.*}}, callee@FUNCTION, $[[L0]]{{$}} 383define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) { 384 store i32 %a, i32* %p1 385 %b = load i32, i32* %p2, align 4 386 call i32 @callee(i32 %a) 387 ret i32 %b 388} 389 390; Can still stackify past invariant loads. 391; CHECK-LABEL: store_past_invar_load 392; CHECK: i32.store $push{{.*}}, 0($1), $0 393; CHECK: i32.call {{.*}}, callee@FUNCTION, $pop 394; CHECK: i32.load $push{{.*}}, 0($2) 395; CHECK: return $pop 396define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) { 397 store i32 %a, i32* %p1 398 %b = load i32, i32* %p2, !invariant.load !0 399 call i32 @callee(i32 %a) 400 ret i32 %b 401} 402 403; CHECK-LABEL: ignore_dbg_value: 404; CHECK-NEXT: .Lfunc_begin 405; CHECK-NEXT: unreachable 406declare void @llvm.dbg.value(metadata, i64, metadata, metadata) 407define void @ignore_dbg_value() { 408 call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10 409 unreachable 410} 411 412; Don't stackify an expression that might use the stack into a return, since we 413; might insert a prologue before the return. 414 415; CHECK-LABEL: no_stackify_past_epilogue: 416; CHECK: return ${{[0-9]+}}{{$}} 417declare i32 @use_memory(i32*) 418define i32 @no_stackify_past_epilogue() { 419 %x = alloca i32 420 %call = call i32 @use_memory(i32* %x) 421 ret i32 %call 422} 423 424; Stackify a loop induction variable into a loop comparison. 425 426; CHECK-LABEL: stackify_indvar: 427; CHECK: i32.const $push[[L5:.+]]=, 1{{$}} 428; CHECK-NEXT: i32.add $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}} 429; CHECK-NEXT: tee_local $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}} 430; CHECK-NEXT: i32.ne $push[[L2:.+]]=, $0, $pop[[L3]]{{$}} 431define void @stackify_indvar(i32 %tmp, i32* %v) #0 { 432bb: 433 br label %bb3 434 435bb3: ; preds = %bb3, %bb2 436 %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ] 437 %tmp5 = load volatile i32, i32* %v, align 4 438 %tmp6 = add nsw i32 %tmp5, %tmp4 439 store volatile i32 %tmp6, i32* %v, align 4 440 %tmp7 = add nuw nsw i32 %tmp4, 1 441 %tmp8 = icmp eq i32 %tmp7, %tmp 442 br i1 %tmp8, label %bb10, label %bb3 443 444bb10: ; preds = %bb9, %bb 445 ret void 446} 447 448; Don't stackify a call past a __stack_pointer store. 449 450; CHECK-LABEL: stackpointer_dependency: 451; CHECK: call {{.+}}, stackpointer_callee@FUNCTION, 452; CHECK: i32.const $push[[L0:.+]]=, 0 453; CHECK-NEXT: i32.store $drop=, __stack_pointer($pop[[L0]]), 454declare i32 @stackpointer_callee(i8* readnone, i8* readnone) 455declare i8* @llvm.frameaddress(i32) 456define i32 @stackpointer_dependency(i8* readnone) { 457 %2 = tail call i8* @llvm.frameaddress(i32 0) 458 %3 = tail call i32 @stackpointer_callee(i8* %0, i8* %2) 459 ret i32 %3 460} 461 462!llvm.module.flags = !{!0} 463!llvm.dbg.cu = !{!1} 464 465!0 = !{i32 2, !"Debug Info Version", i32 3} 466!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3) 467!2 = !DIFile(filename: "test.c", directory: "/") 468!3 = !{} 469!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, variables: !3) 470!6 = !DISubroutineType(types: !3) 471!7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8) 472!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) 473!9 = !DIExpression() 474!10 = !DILocation(line: 15, column: 6, scope: !5) 475