1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS
3
4; Test the register stackifier pass.
5
6; We have two sets of tests, one with registers and implicit locals, and
7; a stack / explicit locals based version (NOREGS).
8
9target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
10target triple = "wasm32-unknown-unknown"
11
12; No because of pointer aliasing.
13
14; CHECK-LABEL: no0:
15; CHECK: return $1{{$}}
16; NOREGS-LABEL: no0:
17; NOREGS: return{{$}}
18define i32 @no0(i32* %p, i32* %q) {
19  %t = load i32, i32* %q
20  store i32 0, i32* %p
21  ret i32 %t
22}
23
24; No because of side effects.
25
26; CHECK-LABEL: no1:
27; CHECK: return $1{{$}}
28; NOREGS-LABEL: no1:
29; NOREGS: return{{$}}
30define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
31  %t = load volatile i32, i32* %q, !invariant.load !0
32  store volatile i32 0, i32* %p
33  ret i32 %t
34}
35
36; Yes because of invariant load and no side effects.
37
38; CHECK-LABEL: yes0:
39; CHECK: return $pop{{[0-9]+}}{{$}}
40; NOREGS-LABEL: yes0:
41; NOREGS: return{{$}}
42define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
43  %t = load i32, i32* %q, !invariant.load !0
44  store i32 0, i32* %p
45  ret i32 %t
46}
47
48; Yes because of no intervening side effects.
49
50; CHECK-LABEL: yes1:
51; CHECK: return $pop0{{$}}
52; NOREGS-LABEL: yes1:
53; NOREGS: return{{$}}
54define i32 @yes1(i32* %q) {
55  %t = load volatile i32, i32* %q
56  ret i32 %t
57}
58
59; Yes because undefined behavior can be sunk past a store.
60
61; CHECK-LABEL: sink_trap:
62; CHECK: return $pop{{[0-9]+}}{{$}}
63; NOREGS-LABEL: sink_trap:
64; NOREGS: return{{$}}
65define i32 @sink_trap(i32 %x, i32 %y, i32* %p) {
66  %t = sdiv i32 %x, %y
67  store volatile i32 0, i32* %p
68  ret i32 %t
69}
70
71; Yes because the call is readnone.
72
73; CHECK-LABEL: sink_readnone_call:
74; CHECK: return $pop1{{$}}
75; NOREGS-LABEL: sink_readnone_call:
76; NOREGS: return{{$}}
77declare i32 @readnone_callee() readnone nounwind
78define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) {
79  %t = call i32 @readnone_callee()
80  store volatile i32 0, i32* %p
81  ret i32 %t
82}
83
84; No because the call is readonly and there's an intervening store.
85
86; CHECK-LABEL: no_sink_readonly_call:
87; CHECK: return ${{[0-9]+}}{{$}}
88; NOREGS-LABEL: no_sink_readonly_call:
89; NOREGS: return{{$}}
90declare i32 @readonly_callee() readonly nounwind
91define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
92  %t = call i32 @readonly_callee()
93  store i32 0, i32* %p
94  ret i32 %t
95}
96
97; Don't schedule stack uses into the stack. To reduce register pressure, the
98; scheduler might be tempted to move the definition of $2 down. However, this
99; would risk getting incorrect liveness if the instructions are later
100; rearranged to make the stack contiguous.
101
102; CHECK-LABEL: stack_uses:
103; CHECK: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
104; CHECK-NEXT: block   {{$}}
105; CHECK-NEXT: i32.const   $push[[L13:[0-9]+]]=, 1{{$}}
106; CHECK-NEXT: i32.lt_s    $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}}
107; CHECK-NEXT: i32.const   $push[[L1:[0-9]+]]=, 2{{$}}
108; CHECK-NEXT: i32.lt_s    $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}}
109; CHECK-NEXT: i32.xor     $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}}
110; CHECK-NEXT: i32.const   $push[[L12:[0-9]+]]=, 1{{$}}
111; CHECK-NEXT: i32.lt_s    $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}}
112; CHECK-NEXT: i32.const   $push[[L11:[0-9]+]]=, 2{{$}}
113; CHECK-NEXT: i32.lt_s    $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}}
114; CHECK-NEXT: i32.xor     $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
115; CHECK-NEXT: i32.eq      $push7=, $pop[[L5]], $pop[[L6]]{{$}}
116; CHECK-NEXT: br_if       0, $pop7{{$}}
117; CHECK-NEXT: i32.const   $push8=, 0{{$}}
118; CHECK-NEXT: return      $pop8{{$}}
119; CHECK-NEXT: .LBB{{[0-9]+}}_2:
120; CHECK-NEXT: end_block{{$}}
121; CHECK-NEXT: i32.const   $push12=, 1{{$}}
122; CHECK-NEXT: return      $pop12{{$}}
123; NOREGS-LABEL: stack_uses:
124; NOREGS: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
125; NOREGS-NEXT: block {{$}}
126; NOREGS-NEXT: local.get 0{{$}}
127; NOREGS-NEXT: i32.const   1{{$}}
128; NOREGS-NEXT: i32.lt_s
129; NOREGS-NEXT: local.get 1{{$}}
130; NOREGS-NEXT: i32.const   2{{$}}
131; NOREGS-NEXT: i32.lt_s
132; NOREGS-NEXT: i32.xor {{$}}
133; NOREGS-NEXT: local.get 2{{$}}
134; NOREGS-NEXT: i32.const   1{{$}}
135; NOREGS-NEXT: i32.lt_s
136; NOREGS-NEXT: local.get 3{{$}}
137; NOREGS-NEXT: i32.const   2{{$}}
138; NOREGS-NEXT: i32.lt_s
139; NOREGS-NEXT: i32.xor {{$}}
140; NOREGS-NEXT: i32.eq {{$}}
141; NOREGS-NEXT: br_if       0{{$}}
142; NOREGS-NEXT: i32.const   0{{$}}
143; NOREGS-NEXT: return{{$}}
144; NOREGS-NEXT: .LBB{{[0-9]+}}_2:
145; NOREGS-NEXT: end_block{{$}}
146; NOREGS-NEXT: i32.const   1{{$}}
147; NOREGS-NEXT: return{{$}}
148define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
149entry:
150  %c = icmp sle i32 %x, 0
151  %d = icmp sle i32 %y, 1
152  %e = icmp sle i32 %z, 0
153  %f = icmp sle i32 %w, 1
154  %g = xor i1 %c, %d
155  %h = xor i1 %e, %f
156  %i = xor i1 %g, %h
157  br i1 %i, label %true, label %false
158true:
159  ret i32 0
160false:
161  ret i32 1
162}
163
164; Test an interesting case where the load has multiple uses and cannot
165; be trivially stackified. However, it can be stackified with a local.tee.
166
167; CHECK-LABEL: multiple_uses:
168; CHECK: .functype multiple_uses (i32, i32, i32) -> (){{$}}
169; CHECK-NEXT: block   {{$}}
170; CHECK-NEXT: i32.load    $push[[NUM0:[0-9]+]]=, 0($2){{$}}
171; CHECK-NEXT: local.tee   $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}}
172; CHECK-NEXT: i32.ge_u    $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}}
173; CHECK-NEXT: br_if       0, $pop[[NUM2]]{{$}}
174; CHECK-NEXT: i32.lt_u    $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
175; CHECK-NEXT: br_if       0, $pop[[NUM3]]{{$}}
176; CHECK-NEXT: i32.store   0($2), $3{{$}}
177; CHECK-NEXT: .LBB{{[0-9]+}}_3:
178; CHECK-NEXT: end_block{{$}}
179; CHECK-NEXT: return{{$}}
180; NOREGS-LABEL: multiple_uses:
181; NOREGS: .functype multiple_uses (i32, i32, i32) -> (){{$}}
182; NOREGS: .local i32{{$}}
183; NOREGS-NEXT: block {{$}}
184; NOREGS-NEXT: local.get   2{{$}}
185; NOREGS-NEXT: i32.load    0{{$}}
186; NOREGS-NEXT: local.tee   3{{$}}
187; NOREGS-NEXT: local.get   1{{$}}
188; NOREGS-NEXT: i32.ge_u
189; NOREGS-NEXT: br_if       0{{$}}
190; NOREGS-NEXT: local.get   3{{$}}
191; NOREGS-NEXT: local.get   0{{$}}
192; NOREGS-NEXT: i32.lt_u
193; NOREGS-NEXT: br_if       0{{$}}
194; NOREGS-NEXT: local.get   2{{$}}
195; NOREGS-NEXT: local.get   3{{$}}
196; NOREGS-NEXT: i32.store   0{{$}}
197; NOREGS-NEXT: .LBB{{[0-9]+}}_3:
198; NOREGS-NEXT: end_block{{$}}
199; NOREGS-NEXT: return{{$}}
200define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
201bb:
202  br label %loop
203
204loop:
205  %tmp7 = load i32, i32* %arg2
206  %tmp8 = inttoptr i32 %tmp7 to i32*
207  %tmp9 = icmp uge i32* %tmp8, %arg1
208  %tmp10 = icmp ult i32* %tmp8, %arg0
209  %tmp11 = or i1 %tmp9, %tmp10
210  br i1 %tmp11, label %back, label %then
211
212then:
213  store i32 %tmp7, i32* %arg2
214  br label %back
215
216back:
217  br i1 undef, label %return, label %loop
218
219return:
220  ret void
221}
222
223; Don't stackify stores effects across other instructions with side effects.
224
225; CHECK:      side_effects:
226; CHECK:      store
227; CHECK:      call
228; CHECK:      store
229; CHECK-NEXT: call
230; NOREGS:      side_effects:
231; NOREGS:      store
232; NOREGS:      call
233; NOREGS:      store
234; NOREGS-NEXT: call
235declare void @evoke_side_effects()
236define hidden void @stackify_store_across_side_effects(double* nocapture %d) {
237entry:
238  store double 2.0, double* %d
239  call void @evoke_side_effects()
240  store double 2.0, double* %d
241  call void @evoke_side_effects()
242  ret void
243}
244
245; Div instructions have side effects and can't be reordered, but this entire
246; function should still be able to be stackified because it's already in
247; tree order.
248
249; CHECK-LABEL: div_tree:
250; CHECK: .functype div_tree (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32){{$}}
251; CHECK-NEXT: i32.div_s   $push[[L0:[0-9]+]]=, $0, $1{{$}}
252; CHECK-NEXT: i32.div_s   $push[[L1:[0-9]+]]=, $2, $3{{$}}
253; CHECK-NEXT: i32.div_s   $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
254; CHECK-NEXT: i32.div_s   $push[[L3:[0-9]+]]=, $4, $5{{$}}
255; CHECK-NEXT: i32.div_s   $push[[L4:[0-9]+]]=, $6, $7{{$}}
256; CHECK-NEXT: i32.div_s   $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
257; CHECK-NEXT: i32.div_s   $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}}
258; CHECK-NEXT: i32.div_s   $push[[L7:[0-9]+]]=, $8, $9{{$}}
259; CHECK-NEXT: i32.div_s   $push[[L8:[0-9]+]]=, $10, $11{{$}}
260; CHECK-NEXT: i32.div_s   $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
261; CHECK-NEXT: i32.div_s   $push[[L10:[0-9]+]]=, $12, $13{{$}}
262; CHECK-NEXT: i32.div_s   $push[[L11:[0-9]+]]=, $14, $15{{$}}
263; CHECK-NEXT: i32.div_s   $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}}
264; CHECK-NEXT: i32.div_s   $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}}
265; CHECK-NEXT: i32.div_s   $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}}
266; CHECK-NEXT: return      $pop[[L14]]{{$}}
267; NOREGS-LABEL: div_tree:
268; NOREGS: .functype div_tree (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32){{$}}
269; NOREGS-NEXT: local.get 0{{$}}
270; NOREGS-NEXT: local.get 1{{$}}
271; NOREGS-NEXT: i32.div_s{{$}}
272; NOREGS-NEXT: local.get 2{{$}}
273; NOREGS-NEXT: local.get 3{{$}}
274; NOREGS-NEXT: i32.div_s{{$}}
275; NOREGS-NEXT: i32.div_s{{$}}
276; NOREGS-NEXT: local.get 4{{$}}
277; NOREGS-NEXT: local.get 5{{$}}
278; NOREGS-NEXT: i32.div_s{{$}}
279; NOREGS-NEXT: local.get 6{{$}}
280; NOREGS-NEXT: local.get 7{{$}}
281; NOREGS-NEXT: i32.div_s{{$}}
282; NOREGS-NEXT: i32.div_s{{$}}
283; NOREGS-NEXT: i32.div_s{{$}}
284; NOREGS-NEXT: local.get 8{{$}}
285; NOREGS-NEXT: local.get 9{{$}}
286; NOREGS-NEXT: i32.div_s{{$}}
287; NOREGS-NEXT: local.get 10{{$}}
288; NOREGS-NEXT: local.get 11{{$}}
289; NOREGS-NEXT: i32.div_s{{$}}
290; NOREGS-NEXT: i32.div_s{{$}}
291; NOREGS-NEXT: local.get 12{{$}}
292; NOREGS-NEXT: local.get 13{{$}}
293; NOREGS-NEXT: i32.div_s{{$}}
294; NOREGS-NEXT: local.get 14{{$}}
295; NOREGS-NEXT: local.get 15{{$}}
296; NOREGS-NEXT: i32.div_s{{$}}
297; NOREGS-NEXT: i32.div_s{{$}}
298; NOREGS-NEXT: i32.div_s{{$}}
299; NOREGS-NEXT: i32.div_s{{$}}
300; NOREGS-NEXT: return{{$}}
301define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) {
302entry:
303  %div = sdiv i32 %a, %b
304  %div1 = sdiv i32 %c, %d
305  %div2 = sdiv i32 %div, %div1
306  %div3 = sdiv i32 %e, %f
307  %div4 = sdiv i32 %g, %h
308  %div5 = sdiv i32 %div3, %div4
309  %div6 = sdiv i32 %div2, %div5
310  %div7 = sdiv i32 %i, %j
311  %div8 = sdiv i32 %k, %l
312  %div9 = sdiv i32 %div7, %div8
313  %div10 = sdiv i32 %m, %n
314  %div11 = sdiv i32 %o, %p
315  %div12 = sdiv i32 %div10, %div11
316  %div13 = sdiv i32 %div9, %div12
317  %div14 = sdiv i32 %div6, %div13
318  ret i32 %div14
319}
320
321; A simple multiple-use case.
322
323; CHECK-LABEL: simple_multiple_use:
324; CHECK:       .functype simple_multiple_use (i32, i32) -> (){{$}}
325; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
326; CHECK-NEXT:  local.tee   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
327; CHECK-NEXT:  call        use_a, $pop[[NUM1]]{{$}}
328; CHECK-NEXT:  call        use_b, $[[NUM2]]{{$}}
329; CHECK-NEXT:  return{{$}}
330; NOREGS-LABEL: simple_multiple_use:
331; NOREGS:       .functype simple_multiple_use (i32, i32) -> (){{$}}
332; NOREGS-NEXT:  local.get 1{{$}}
333; NOREGS-NEXT:  local.get 0{{$}}
334; NOREGS-NEXT:  i32.mul
335; NOREGS-NEXT:  local.tee   1{{$}}
336; NOREGS-NEXT:  call        use_a{{$}}
337; NOREGS-NEXT:  local.get   1{{$}}
338; NOREGS-NEXT:  call        use_b{{$}}
339; NOREGS-NEXT:  return{{$}}
340declare void @use_a(i32)
341declare void @use_b(i32)
342define void @simple_multiple_use(i32 %x, i32 %y) {
343  %mul = mul i32 %y, %x
344  call void @use_a(i32 %mul)
345  call void @use_b(i32 %mul)
346  ret void
347}
348
349; Multiple uses of the same value in one instruction.
350
351; CHECK-LABEL: multiple_uses_in_same_insn:
352; CHECK:       .functype multiple_uses_in_same_insn (i32, i32) -> (){{$}}
353; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
354; CHECK-NEXT:  local.tee   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
355; CHECK-NEXT:  call        use_2, $pop[[NUM1]], $[[NUM2]]{{$}}
356; CHECK-NEXT:  return{{$}}
357; NOREGS-LABEL: multiple_uses_in_same_insn:
358; NOREGS:       .functype multiple_uses_in_same_insn (i32, i32) -> (){{$}}
359; NOREGS-NEXT:  local.get 1{{$}}
360; NOREGS-NEXT:  local.get 0{{$}}
361; NOREGS-NEXT:  i32.mul
362; NOREGS-NEXT:  local.tee   1{{$}}
363; NOREGS-NEXT:  local.get   1{{$}}
364; NOREGS-NEXT:  call        use_2{{$}}
365; NOREGS-NEXT:  return{{$}}
366declare void @use_2(i32, i32)
367define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
368  %mul = mul i32 %y, %x
369  call void @use_2(i32 %mul, i32 %mul)
370  ret void
371}
372
373; Commute operands to achieve better stackifying.
374
375; CHECK-LABEL: commute:
376; CHECK:  .functype commute () -> (i32){{$}}
377; CHECK-NEXT:  call        $push0=, red{{$}}
378; CHECK-NEXT:  call        $push1=, green{{$}}
379; CHECK-NEXT:  i32.add     $push2=, $pop0, $pop1{{$}}
380; CHECK-NEXT:  call        $push3=, blue{{$}}
381; CHECK-NEXT:  i32.add     $push4=, $pop2, $pop3{{$}}
382; CHECK-NEXT:  return      $pop4{{$}}
383; NOREGS-LABEL: commute:
384; NOREGS:  .functype commute () -> (i32){{$}}
385; NOREGS-NEXT:  call        red{{$}}
386; NOREGS-NEXT:  call        green{{$}}
387; NOREGS-NEXT:  i32.add {{$}}
388; NOREGS-NEXT:  call        blue{{$}}
389; NOREGS-NEXT:  i32.add {{$}}
390; NOREGS-NEXT:  return{{$}}
391declare i32 @red()
392declare i32 @green()
393declare i32 @blue()
394define i32 @commute() {
395  %call = call i32 @red()
396  %call1 = call i32 @green()
397  %add = add i32 %call1, %call
398  %call2 = call i32 @blue()
399  %add3 = add i32 %add, %call2
400  ret i32 %add3
401}
402
403; Don't stackify a register when it would move a the def of the register past
404; an implicit local.get for the register.
405
406; CHECK-LABEL: no_stackify_past_use:
407; CHECK:      call            $1=, callee, $0
408; CHECK-NEXT: i32.const       $push0=, 1
409; CHECK-NEXT: i32.add         $push1=, $0, $pop0
410; CHECK-NEXT: call            $push2=, callee, $pop1
411; CHECK-NEXT: i32.sub         $push3=, $pop2, $1
412; CHECK-NEXT: i32.div_s       $push4=, $pop3, $1
413; CHECK-NEXT: return          $pop4
414; NOREGS-LABEL: no_stackify_past_use:
415; NOREGS:      local.get       0{{$}}
416; NOREGS-NEXT: call            callee
417; NOREGS-NEXT: local.set       1{{$}}
418; NOREGS-NEXT: local.get       0{{$}}
419; NOREGS-NEXT: i32.const       1
420; NOREGS-NEXT: i32.add
421; NOREGS-NEXT: call            callee
422; NOREGS-NEXT: local.get       1{{$}}
423; NOREGS-NEXT: i32.sub
424; NOREGS-NEXT: local.get       1{{$}}
425; NOREGS-NEXT: i32.div_s
426; NOREGS-NEXT: return
427declare i32 @callee(i32)
428define i32 @no_stackify_past_use(i32 %arg) {
429  %tmp1 = call i32 @callee(i32 %arg)
430  %tmp2 = add i32 %arg, 1
431  %tmp3 = call i32 @callee(i32 %tmp2)
432  %tmp5 = sub i32 %tmp3, %tmp1
433  %tmp6 = sdiv i32 %tmp5, %tmp1
434  ret i32 %tmp6
435}
436
437; This is the same as no_stackify_past_use, except using a commutative operator,
438; so we can reorder the operands and stackify.
439
440; CHECK-LABEL: commute_to_fix_ordering:
441; CHECK: call            $push[[L0:.+]]=, callee, $0
442; CHECK: local.tee       $push[[L1:.+]]=, $1=, $pop[[L0]]
443; CHECK: i32.const       $push0=, 1
444; CHECK: i32.add         $push1=, $0, $pop0
445; CHECK: call            $push2=, callee, $pop1
446; CHECK: i32.add         $push3=, $1, $pop2
447; CHECK: i32.mul         $push4=, $pop[[L1]], $pop3
448; CHECK: return          $pop4
449; NOREGS-LABEL: commute_to_fix_ordering:
450; NOREGS: local.get       0{{$}}
451; NOREGS: call            callee
452; NOREGS: local.tee       1
453; NOREGS: local.get       1{{$}}
454; NOREGS: local.get       0{{$}}
455; NOREGS: i32.const       1
456; NOREGS: i32.add
457; NOREGS: call            callee
458; NOREGS: i32.add
459; NOREGS: i32.mul
460; NOREGS: return
461define i32 @commute_to_fix_ordering(i32 %arg) {
462  %tmp1 = call i32 @callee(i32 %arg)
463  %tmp2 = add i32 %arg, 1
464  %tmp3 = call i32 @callee(i32 %tmp2)
465  %tmp5 = add i32 %tmp3, %tmp1
466  %tmp6 = mul i32 %tmp5, %tmp1
467  ret i32 %tmp6
468}
469
470; Stackify individual defs of virtual registers with multiple defs.
471
472; CHECK-LABEL: multiple_defs:
473; CHECK:        f64.add         $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}}
474; CHECK-NEXT:   local.tee       $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
475; CHECK-NEXT:   f64.select      $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
476; CHECK:        $[[NUM2]]=,
477; NOREGS-LABEL: multiple_defs:
478; NOREGS:        f64.add
479; NOREGS:        local.tee
480; NOREGS:        f64.select
481define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
482bb:
483  br label %bb5
484
485bb5:                                              ; preds = %bb21, %bb
486  %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ]
487  %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ]
488  %tmp7 = fcmp olt double %tmp6, 2.323450e+01
489  br i1 %tmp7, label %bb8, label %bb21
490
491bb8:                                              ; preds = %bb17, %bb5
492  %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ]
493  %tmp10 = fadd double %tmp6, -1.000000e+00
494  %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10
495  %tmp12 = fadd double %tmp11, %tmp9
496  br i1 %arg3, label %bb17, label %bb13
497
498bb13:                                             ; preds = %bb8
499  %tmp14 = or i32 %arg1, 2
500  %tmp15 = icmp eq i32 %tmp14, 14
501  %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000
502  br label %bb17
503
504bb17:                                             ; preds = %bb13, %bb8
505  %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ]
506  %tmp19 = fadd double %tmp18, %tmp12
507  %tmp20 = fcmp olt double %tmp6, 2.323450e+01
508  br i1 %tmp20, label %bb8, label %bb21
509
510bb21:                                             ; preds = %bb17, %bb5
511  %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ]
512  %tmp23 = fadd double %tmp6, 1.000000e+00
513  br i1 %arg4, label %exit, label %bb5
514exit:
515  ret void
516}
517
518; Don't move calls past loads
519; CHECK-LABEL: no_stackify_call_past_load:
520; CHECK: call $0=, red
521; CHECK: i32.const $push0=, 0
522; CHECK: i32.load $1=, count($pop0)
523; NOREGS-LABEL: no_stackify_call_past_load:
524; NOREGS: call red
525; NOREGS: i32.const 0
526; NOREGS: i32.load count
527@count = hidden global i32 0, align 4
528define i32 @no_stackify_call_past_load() {
529  %a = call i32 @red()
530  %b = load i32, i32* @count, align 4
531  call i32 @callee(i32 %a)
532  ret i32 %b
533  ; use of a
534}
535
536; Don't move stores past loads if there may be aliasing
537; CHECK-LABEL: no_stackify_store_past_load
538; CHECK: i32.store 0($1), $0
539; CHECK: i32.load {{.*}}, 0($2)
540; CHECK: call {{.*}}, callee, $0{{$}}
541; NOREGS-LABEL: no_stackify_store_past_load
542; NOREGS: i32.store 0
543; NOREGS: i32.load 0
544; NOREGS: call callee{{$}}
545define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) {
546  store i32 %a, i32* %p1
547  %b = load i32, i32* %p2, align 4
548  call i32 @callee(i32 %a)
549  ret i32 %b
550}
551
552; Can still stackify past invariant loads.
553; CHECK-LABEL: store_past_invar_load
554; CHECK: i32.store 0($1), $0
555; CHECK: call {{.*}}, callee, $0
556; CHECK: i32.load $push{{.*}}, 0($2)
557; CHECK: return $pop
558; NOREGS-LABEL: store_past_invar_load
559; NOREGS: i32.store 0
560; NOREGS: call callee
561; NOREGS: i32.load 0
562; NOREGS: return
563define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) {
564  store i32 %a, i32* %p1
565  %b = load i32, i32* %p2, !invariant.load !0
566  call i32 @callee(i32 %a)
567  ret i32 %b
568}
569
570; CHECK-LABEL: ignore_dbg_value:
571; CHECK:      .Lfunc_begin
572; CHECK:       unreachable
573; NOREGS-LABEL: ignore_dbg_value:
574; NOREGS:      .Lfunc_begin
575; NOREGS:       unreachable
576declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
577define void @ignore_dbg_value() {
578  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10
579  unreachable
580}
581
582; Don't stackify an expression that might use the stack into a return, since we
583; might insert a prologue before the return.
584
585; CHECK-LABEL: no_stackify_past_epilogue:
586; CHECK: return ${{[0-9]+}}{{$}}
587; NOREGS-LABEL: no_stackify_past_epilogue:
588; NOREGS: return{{$}}
589declare i32 @use_memory(i32*)
590define i32 @no_stackify_past_epilogue() {
591  %x = alloca i32
592  %call = call i32 @use_memory(i32* %x)
593  ret i32 %call
594}
595
596; Stackify a loop induction variable into a loop comparison.
597
598; CHECK-LABEL: stackify_indvar:
599; CHECK:             i32.const   $push[[L5:.+]]=, 1{{$}}
600; CHECK-NEXT:        i32.add     $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}}
601; CHECK-NEXT:        local.tee   $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}}
602; CHECK-NEXT:        i32.ne      $push[[L2:.+]]=, $0, $pop[[L3]]{{$}}
603; NOREGS-LABEL: stackify_indvar:
604; NOREGS:             i32.const   1{{$}}
605; NOREGS-NEXT:        i32.add
606; NOREGS-NEXT:        local.tee   2{{$}}
607; NOREGS-NEXT:        i32.ne
608define void @stackify_indvar(i32 %tmp, i32* %v) #0 {
609bb:
610  br label %bb3
611
612bb3:                                              ; preds = %bb3, %bb2
613  %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ]
614  %tmp5 = load volatile i32, i32* %v, align 4
615  %tmp6 = add nsw i32 %tmp5, %tmp4
616  store volatile i32 %tmp6, i32* %v, align 4
617  %tmp7 = add nuw nsw i32 %tmp4, 1
618  %tmp8 = icmp eq i32 %tmp7, %tmp
619  br i1 %tmp8, label %bb10, label %bb3
620
621bb10:                                             ; preds = %bb9, %bb
622  ret void
623}
624
625; Don't stackify a call past a __stack_pointer store.
626
627; CHECK-LABEL: stackpointer_dependency:
628; CHECK:      call {{.+}}, stackpointer_callee,
629; CHECK-NEXT: global.set __stack_pointer,
630; NOREGS-LABEL: stackpointer_dependency:
631; NOREGS:      call stackpointer_callee
632; NOREGS:      global.set __stack_pointer
633declare i32 @stackpointer_callee(i8* readnone, i8* readnone) nounwind readnone
634declare i8* @llvm.frameaddress(i32)
635define i32 @stackpointer_dependency(i8* readnone) {
636  %2 = tail call i8* @llvm.frameaddress(i32 0)
637  %3 = tail call i32 @stackpointer_callee(i8* %0, i8* %2)
638  ret i32 %3
639}
640
641; Stackify a call_indirect with respect to its ordering
642
643; CHECK-LABEL: call_indirect_stackify:
644; CHECK: i32.load  $push[[L4:.+]]=, 0($0)
645; CHECK-NEXT: local.tee $push[[L3:.+]]=, $0=, $pop[[L4]]
646; CHECK-NEXT: i32.load  $push[[L0:.+]]=, 0($0)
647; CHECK-NEXT: i32.load  $push[[L1:.+]]=, 0($pop[[L0]])
648; CHECK-NEXT: call_indirect $push{{.+}}=, $pop[[L3]], $1, $pop[[L1]]
649; NOREGS-LABEL: call_indirect_stackify:
650; NOREGS: i32.load  0
651; NOREGS-NEXT: local.tee 0
652; NOREGS:      i32.load  0
653; NOREGS-NEXT: i32.load  0
654; NOREGS-NEXT: call_indirect (i32, i32) -> (i32)
655%class.call_indirect = type { i32 (...)** }
656define i32 @call_indirect_stackify(%class.call_indirect** %objptr, i32 %arg) {
657  %obj = load %class.call_indirect*, %class.call_indirect** %objptr
658  %addr = bitcast %class.call_indirect* %obj to i32(%class.call_indirect*, i32)***
659  %vtable = load i32(%class.call_indirect*, i32)**, i32(%class.call_indirect*, i32)*** %addr
660  %vfn = getelementptr inbounds i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vtable, i32 0
661  %f = load i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vfn
662  %ret = call i32 %f(%class.call_indirect* %obj, i32 %arg)
663  ret i32 %ret
664}
665
666!llvm.module.flags = !{!0}
667!llvm.dbg.cu = !{!1}
668
669!0 = !{i32 2, !"Debug Info Version", i32 3}
670!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3)
671!2 = !DIFile(filename: "test.c", directory: "/")
672!3 = !{}
673!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !3)
674!6 = !DISubroutineType(types: !3)
675!7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8)
676!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
677!9 = !DIExpression()
678!10 = !DILocation(line: 15, column: 6, scope: !5)
679