1; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
2; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
3;
4; Note: Lots of tests use inline asm instead of regular calls.
5; This allows to have a better control on what the allocation will do.
6; Otherwise, we may have spill right in the entry block, defeating
7; shrink-wrapping. Moreover, some of the inline asm statement (nop)
8; are here to ensure that the related paths do not end up as critical
9; edges.
10
11
12; Initial motivating example: Simple diamond with a call just on one side.
13; CHECK-LABEL: foo:
14;
15; Compare the arguments and return
16; No prologue needed.
17; ENABLE: cmpw 3, 4
18; ENABLE-NEXT: bgelr 0
19;
20; Prologue code.
21;  At a minimum, we save/restore the link register. Other registers may be saved
22;  as well.
23; CHECK: mflr
24;
25; Compare the arguments and jump to exit.
26; After the prologue is set.
27; DISABLE: cmpw 3, 4
28; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]]
29;
30; Store %a on the stack
31; CHECK: stw 3, {{[0-9]+([0-9]+)}}
32; Set the alloca address in the second argument.
33; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
34; Set the first argument to zero.
35; CHECK-NEXT: li 3, 0
36; CHECK-NEXT: bl doSomething
37;
38; With shrink-wrapping, epilogue is just after the call.
39; Restore the link register and return.
40; Note that there could be other epilog code before the link register is
41; restored but we will not check for it here.
42; ENABLE: mtlr
43; ENABLE-NEXT: blr
44;
45; DISABLE: [[EXIT_LABEL]]:
46;
47; Without shrink-wrapping, epilogue is in the exit block.
48; Epilogue code. (What we pop does not matter.)
49; DISABLE: mtlr {{[0-9]+}}
50; DISABLE-NEXT: blr
51;
52
53define i32 @foo(i32 %a, i32 %b) {
54  %tmp = alloca i32, align 4
55  %tmp2 = icmp slt i32 %a, %b
56  br i1 %tmp2, label %true, label %false
57
58true:
59  store i32 %a, i32* %tmp, align 4
60  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
61  br label %false
62
63false:
64  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
65  ret i32 %tmp.0
66}
67
68; Function Attrs: optsize
69declare i32 @doSomething(i32, i32*)
70
71
72
73; Check that we do not perform the restore inside the loop whereas the save
74; is outside.
75; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
76;
77; Shrink-wrapping allows to skip the prologue in the else case.
78; ENABLE: cmplwi 3, 0
79; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
80;
81; Prologue code.
82; Make sure we save the link register
83; CHECK: mflr {{[0-9]+}}
84;
85; DISABLE: cmplwi 3, 0
86; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
87;
88; Loop preheader
89; CHECK-DAG: li [[SUM:[0-9]+]], 0
90; CHECK-DAG: li [[IV:[0-9]+]], 10
91;
92; Loop body
93; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
94; CHECK: bl something
95; CHECK-DAG: addi [[IV]], [[IV]], -1
96; CHECK-DAG: add [[SUM]], 3, [[SUM]]
97; CHECK-NEXT: cmplwi [[IV]], 0
98; CHECK-NEXT: bne 0, .[[LOOP]]
99;
100; Next BB.
101; CHECK: slwi 3, [[SUM]], 3
102;
103; Jump to epilogue.
104; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
105;
106; DISABLE: .[[ELSE_LABEL]]: # %if.else
107; Shift second argument by one and store into returned register.
108; DISABLE: slwi 3, 4, 1
109; DISABLE: .[[EPILOG_BB]]: # %if.end
110;
111; Epilogue code.
112; CHECK: mtlr {{[0-9]+}}
113; CHECK: blr
114;
115; ENABLE: .[[ELSE_LABEL]]: # %if.else
116; Shift second argument by one and store into returned register.
117; ENABLE: slwi 3, 4, 1
118; ENABLE-NEXT: blr
119define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
120entry:
121  %tobool = icmp eq i32 %cond, 0
122  br i1 %tobool, label %if.else, label %for.preheader
123
124for.preheader:
125  tail call void asm "nop", ""()
126  br label %for.body
127
128for.body:                                         ; preds = %entry, %for.body
129  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
130  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
131  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
132  %add = add nsw i32 %call, %sum.04
133  %inc = add nuw nsw i32 %i.05, 1
134  %exitcond = icmp eq i32 %inc, 10
135  br i1 %exitcond, label %for.end, label %for.body
136
137for.end:                                          ; preds = %for.body
138  %shl = shl i32 %add, 3
139  br label %if.end
140
141if.else:                                          ; preds = %entry
142  %mul = shl nsw i32 %N, 1
143  br label %if.end
144
145if.end:                                           ; preds = %if.else, %for.end
146  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
147  ret i32 %sum.1
148}
149
150declare i32 @something(...)
151
152; Check that we do not perform the shrink-wrapping inside the loop even
153; though that would be legal. The cost model must prevent that.
154; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
155; Prologue code.
156; Make sure we save the link register before the call
157; CHECK: mflr {{[0-9]+}}
158;
159; Loop preheader
160; CHECK-DAG: li [[SUM:[0-9]+]], 0
161; CHECK-DAG: li [[IV:[0-9]+]], 10
162;
163; Loop body
164; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
165; CHECK: bl something
166; CHECK-DAG: addi [[IV]], [[IV]], -1
167; CHECK-DAG: add [[SUM]], 3, [[SUM]]
168; CHECK-NEXT: cmplwi [[IV]], 0
169; CHECK-NEXT: bne 0, .[[LOOP]]
170;
171; Next BB
172; CHECK: %for.exit
173; CHECK: mtlr {{[0-9]+}}
174; CHECK: blr
175define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
176entry:
177  br label %for.preheader
178
179for.preheader:
180  tail call void asm "nop", ""()
181  br label %for.body
182
183for.body:                                         ; preds = %for.body, %entry
184  %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
185  %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
186  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
187  %add = add nsw i32 %call, %sum.03
188  %inc = add nuw nsw i32 %i.04, 1
189  %exitcond = icmp eq i32 %inc, 10
190  br i1 %exitcond, label %for.exit, label %for.body
191
192for.exit:
193  tail call void asm "nop", ""()
194  br label %for.end
195
196for.end:                                          ; preds = %for.body
197  ret i32 %add
198}
199
200
201; Check with a more complex case that we do not have save within the loop and
202; restore outside.
203; CHECK-LABEL: loopInfoSaveOutsideLoop:
204;
205; ENABLE: cmplwi 3, 0
206; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
207;
208; Prologue code.
209; Make sure we save the link register
210; CHECK: mflr {{[0-9]+}}
211;
212; DISABLE: std
213; DISABLE-NEXT: std
214; DISABLE: cmplwi 3, 0
215; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
216;
217; Loop preheader
218; CHECK-DAG: li [[SUM:[0-9]+]], 0
219; CHECK-DAG: li [[IV:[0-9]+]], 10
220;
221; Loop body
222; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
223; CHECK: bl something
224; CHECK-DAG: addi [[IV]], [[IV]], -1
225; CHECK-DAG: add [[SUM]], 3, [[SUM]]
226; CHECK-NEXT: cmplwi [[IV]], 0
227; CHECK-NEXT: bne 0, .[[LOOP]]
228;
229; Next BB
230; CHECK: bl somethingElse
231; CHECK: slwi 3, [[SUM]], 3
232;
233; Jump to epilogue
234; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
235;
236; DISABLE: .[[ELSE_LABEL]]: # %if.else
237; Shift second argument by one and store into returned register.
238; DISABLE: slwi 3, 4, 1
239;
240; DISABLE: .[[EPILOG_BB]]: # %if.end
241; Epilog code
242; CHECK: mtlr {{[0-9]+}}
243; CHECK: blr
244;
245; ENABLE: .[[ELSE_LABEL]]: # %if.else
246; Shift second argument by one and store into returned register.
247; ENABLE: slwi 3, 4, 1
248; ENABLE-NEXT: blr
249define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
250entry:
251  %tobool = icmp eq i32 %cond, 0
252  br i1 %tobool, label %if.else, label %for.preheader
253
254for.preheader:
255  tail call void asm "nop", ""()
256  br label %for.body
257
258for.body:                                         ; preds = %entry, %for.body
259  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
260  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
261  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
262  %add = add nsw i32 %call, %sum.04
263  %inc = add nuw nsw i32 %i.05, 1
264  %exitcond = icmp eq i32 %inc, 10
265  br i1 %exitcond, label %for.end, label %for.body
266
267for.end:                                          ; preds = %for.body
268  tail call void bitcast (void (...)* @somethingElse to void ()*)()
269  %shl = shl i32 %add, 3
270  br label %if.end
271
272if.else:                                          ; preds = %entry
273  %mul = shl nsw i32 %N, 1
274  br label %if.end
275
276if.end:                                           ; preds = %if.else, %for.end
277  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
278  ret i32 %sum.1
279}
280
281declare void @somethingElse(...)
282
283; Check with a more complex case that we do not have restore within the loop and
284; save outside.
285; CHECK-LABEL: loopInfoRestoreOutsideLoop:
286;
287; ENABLE: cmplwi 3, 0
288; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
289;
290; Prologue code.
291; Make sure we save the link register
292; CHECK: mflr {{[0-9]+}}
293;
294; DISABLE: std
295; DISABLE-NEXT: std
296; DISABLE: cmplwi 3, 0
297; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
298;
299; CHECK: bl somethingElse
300;
301; Loop preheader
302; CHECK-DAG: li [[SUM:[0-9]+]], 0
303; CHECK-DAG: li [[IV:[0-9]+]], 10
304;
305; Loop body
306; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
307; CHECK: bl something
308; CHECK-DAG: addi [[IV]], [[IV]], -1
309; CHECK-DAG: add [[SUM]], 3, [[SUM]]
310; CHECK-NEXT: cmplwi [[IV]], 0
311; CHECK-NEXT: bne 0, .[[LOOP]]
312;
313; Next BB.
314; slwi 3, [[SUM]], 3
315;
316; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
317;
318; DISABLE: .[[ELSE_LABEL]]: # %if.else
319; Shift second argument by one and store into returned register.
320; DISABLE: slwi 3, 4, 1
321; DISABLE: .[[EPILOG_BB]]: # %if.end
322;
323; Epilogue code.
324; CHECK: mtlr {{[0-9]+}}
325; CHECK: blr
326;
327; ENABLE: .[[ELSE_LABEL]]: # %if.else
328; Shift second argument by one and store into returned register.
329; ENABLE: slwi 3, 4, 1
330; ENABLE-NEXT: blr
331define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
332entry:
333  %tobool = icmp eq i32 %cond, 0
334  br i1 %tobool, label %if.else, label %if.then
335
336if.then:                                          ; preds = %entry
337  tail call void bitcast (void (...)* @somethingElse to void ()*)()
338  br label %for.body
339
340for.body:                                         ; preds = %for.body, %if.then
341  %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
342  %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
343  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
344  %add = add nsw i32 %call, %sum.04
345  %inc = add nuw nsw i32 %i.05, 1
346  %exitcond = icmp eq i32 %inc, 10
347  br i1 %exitcond, label %for.end, label %for.body
348
349for.end:                                          ; preds = %for.body
350  %shl = shl i32 %add, 3
351  br label %if.end
352
353if.else:                                          ; preds = %entry
354  %mul = shl nsw i32 %N, 1
355  br label %if.end
356
357if.end:                                           ; preds = %if.else, %for.end
358  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
359  ret i32 %sum.1
360}
361
362; Check that we handle function with no frame information correctly.
363; CHECK-LABEL: emptyFrame:
364; CHECK: # %entry
365; CHECK-NEXT: li 3, 0
366; CHECK-NEXT: blr
367define i32 @emptyFrame() {
368entry:
369  ret i32 0
370}
371
372
373; Check that we handle inline asm correctly.
374; CHECK-LABEL: inlineAsm:
375;
376; ENABLE: cmplwi 3, 0
377; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
378;
379; Prologue code.
380; Make sure we save the CSR used in the inline asm: r14
381; ENABLE-DAG: li [[IV:[0-9]+]], 10
382; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
383;
384; DISABLE: cmplwi 3, 0
385; DISABLE-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
386; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
387; DISABLE: li [[IV:[0-9]+]], 10
388;
389; CHECK: nop
390; CHECK: mtctr [[IV]]
391;
392; CHECK: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body
393; Inline asm statement.
394; CHECK: addi 14, 14, 1
395; CHECK: bdnz .[[LOOP_LABEL]]
396;
397; Epilogue code.
398; CHECK: li 3, 0
399; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
400; CHECK: nop
401; CHECK: blr
402;
403; CHECK: [[ELSE_LABEL]]
404; CHECK-NEXT: slwi 3, 4, 1
405; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
406; CHECK-NEXT: blr
407;
408define i32 @inlineAsm(i32 %cond, i32 %N) {
409entry:
410  %tobool = icmp eq i32 %cond, 0
411  br i1 %tobool, label %if.else, label %for.preheader
412
413for.preheader:
414  tail call void asm "nop", ""()
415  br label %for.body
416
417for.body:                                         ; preds = %entry, %for.body
418  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
419  tail call void asm "addi 14, 14, 1", "~{r14}"()
420  %inc = add nuw nsw i32 %i.03, 1
421  %exitcond = icmp eq i32 %inc, 10
422  br i1 %exitcond, label %for.exit, label %for.body
423
424for.exit:
425  tail call void asm "nop", ""()
426  br label %if.end
427
428if.else:                                          ; preds = %entry
429  %mul = shl nsw i32 %N, 1
430  br label %if.end
431
432if.end:                                           ; preds = %for.body, %if.else
433  %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
434  ret i32 %sum.0
435}
436
437
438; Check that we handle calls to variadic functions correctly.
439; CHECK-LABEL: callVariadicFunc:
440;
441; ENABLE: cmplwi 3, 0
442; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
443;
444; Prologue code.
445; CHECK: mflr {{[0-9]+}}
446;
447; DISABLE: cmplwi 3, 0
448; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
449;
450; Setup of the varags.
451; CHECK: mr 4, 3
452; CHECK-NEXT: mr 5, 3
453; CHECK-NEXT: mr 6, 3
454; CHECK-NEXT: mr 7, 3
455; CHECK-NEXT: mr 8, 3
456; CHECK-NEXT: mr 9, 3
457; CHECK-NEXT: bl someVariadicFunc
458; CHECK: slwi 3, 3, 3
459; DISABLE: b .[[EPILOGUE_BB:LBB[0-9_]+]]
460;
461; ENABLE: mtlr {{[0-9]+}}
462; ENABLE-NEXT: blr
463;
464; CHECK: .[[ELSE_LABEL]]: # %if.else
465; CHECK-NEXT: slwi 3, 4, 1
466;
467; DISABLE: .[[EPILOGUE_BB]]: # %if.end
468; DISABLE: mtlr
469; CHECK: blr
470define i32 @callVariadicFunc(i32 %cond, i32 %N) {
471entry:
472  %tobool = icmp eq i32 %cond, 0
473  br i1 %tobool, label %if.else, label %if.then
474
475if.then:                                          ; preds = %entry
476  %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
477  %shl = shl i32 %call, 3
478  br label %if.end
479
480if.else:                                          ; preds = %entry
481  %mul = shl nsw i32 %N, 1
482  br label %if.end
483
484if.end:                                           ; preds = %if.else, %if.then
485  %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
486  ret i32 %sum.0
487}
488
489declare i32 @someVariadicFunc(i32, ...)
490
491
492
493; Make sure we do not insert unreachable code after noreturn function.
494; Although this is not incorrect to insert such code, it is useless
495; and it hurts the binary size.
496;
497; CHECK-LABEL: noreturn:
498; DISABLE: mflr {{[0-9]+}}
499;
500; CHECK: cmplwi 3, 0
501; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]]
502;
503; CHECK: li 3, 42
504;
505; DISABLE: mtlr {{[0-9]+}}
506;
507; CHECK-NEXT: blr
508;
509; CHECK: .[[ABORT]]: # %if.abort
510;
511; ENABLE: mflr {{[0-9]+}}
512;
513; CHECK: bl abort
514; ENABLE-NOT: mtlr {{[0-9]+}}
515define i32 @noreturn(i8 signext %bad_thing) {
516entry:
517  %tobool = icmp eq i8 %bad_thing, 0
518  br i1 %tobool, label %if.end, label %if.abort
519
520if.abort:
521  tail call void @abort() #0
522  unreachable
523
524if.end:
525  ret i32 42
526}
527
528declare void @abort() #0
529
530attributes #0 = { noreturn nounwind }
531
532
533; Make sure that we handle infinite loops properly When checking that the Save
534; and Restore blocks are control flow equivalent, the loop searches for the
535; immediate (post) dominator for the (restore) save blocks. When either the Save
536; or Restore block is located in an infinite loop the only immediate (post)
537; dominator is itself. In this case, we cannot perform shrink wrapping, but we
538; should return gracefully and continue compilation.
539; The only condition for this test is the compilation finishes correctly.
540;
541; CHECK-LABEL: infiniteloop
542; CHECK: blr
543define void @infiniteloop() {
544entry:
545  br i1 undef, label %if.then, label %if.end
546
547if.then:
548  %ptr = alloca i32, i32 4
549  br label %for.body
550
551for.body:                                         ; preds = %for.body, %entry
552  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
553  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
554  %add = add nsw i32 %call, %sum.03
555  store i32 %add, i32* %ptr
556  br label %for.body
557
558if.end:
559  ret void
560}
561
562; Another infinite loop test this time with a body bigger than just one block.
563; CHECK-LABEL: infiniteloop2
564; CHECK: blr
565define void @infiniteloop2() {
566entry:
567  br i1 undef, label %if.then, label %if.end
568
569if.then:
570  %ptr = alloca i32, i32 4
571  br label %for.body
572
573for.body:                                         ; preds = %for.body, %entry
574  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
575  %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
576  %add = add nsw i32 %call, %sum.03
577  store i32 %add, i32* %ptr
578  br i1 undef, label %body1, label %body2
579
580body1:
581  tail call void asm sideeffect "nop", "~{r14}"()
582  br label %for.body
583
584body2:
585  tail call void asm sideeffect "nop", "~{r14}"()
586  br label %for.body
587
588if.end:
589  ret void
590}
591
592; Another infinite loop test this time with two nested infinite loop.
593; CHECK-LABEL: infiniteloop3
594; CHECK: Lfunc_begin[[FUNCNUM:[0-9]+]]
595; CHECK: bclr
596; CHECK: Lfunc_end[[FUNCNUM]]
597define void @infiniteloop3() {
598entry:
599  br i1 undef, label %loop2a, label %body
600
601body:                                             ; preds = %entry
602  br i1 undef, label %loop2a, label %end
603
604loop1:                                            ; preds = %loop2a, %loop2b
605  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
606  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
607  %0 = icmp eq i32* %var, null
608  %next.load = load i32*, i32** undef
609  br i1 %0, label %loop2a, label %loop2b
610
611loop2a:                                           ; preds = %loop1, %body, %entry
612  %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
613  %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
614  br label %loop1
615
616loop2b:                                           ; preds = %loop1
617  %gep1 = bitcast i32* %var.phi to i32*
618  %next.ptr = bitcast i32* %gep1 to i32**
619  store i32* %next.phi, i32** %next.ptr
620  br label %loop1
621
622end:
623  ret void
624}
625
626@columns = external global [0 x i32], align 4
627@lock = common global i32 0, align 4
628@htindex = common global i32 0, align 4
629@stride = common global i32 0, align 4
630@ht = common global i32* null, align 8
631@he = common global i8* null, align 8
632
633; Test for a bug that was caused when save point was equal to restore point.
634; Function Attrs: nounwind
635; CHECK-LABEL: transpose
636;
637; Store of callee-save register saved by shrink wrapping
638; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer!
639; CHECKXX: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
640;
641; Reload of callee-save register
642; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
643;
644; Ensure no subsequent uses of callee-save register before end of function
645; CHECK-NOT: {{[a-z]+}} [[CSR]]
646; CHECK: blr
647define signext i32 @transpose() {
648entry:
649  %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4
650  %shl.i = shl i32 %0, 7
651  %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4
652  %or.i = or i32 %shl.i, %1
653  %shl1.i = shl i32 %or.i, 7
654  %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4
655  %or2.i = or i32 %shl1.i, %2
656  %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4
657  %shl3.i = shl i32 %3, 7
658  %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4
659  %or4.i = or i32 %shl3.i, %4
660  %shl5.i = shl i32 %or4.i, 7
661  %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4
662  %or6.i = or i32 %shl5.i, %5
663  %cmp.i = icmp ugt i32 %or2.i, %or6.i
664  br i1 %cmp.i, label %cond.true.i, label %cond.false.i
665
666cond.true.i:
667  %shl7.i = shl i32 %or2.i, 7
668  %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
669  %or8.i = or i32 %6, %shl7.i
670  %conv.i = zext i32 %or8.i to i64
671  %shl9.i = shl nuw nsw i64 %conv.i, 21
672  %conv10.i = zext i32 %or6.i to i64
673  %or11.i = or i64 %shl9.i, %conv10.i
674  br label %hash.exit
675
676cond.false.i:
677  %shl12.i = shl i32 %or6.i, 7
678  %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
679  %or13.i = or i32 %7, %shl12.i
680  %conv14.i = zext i32 %or13.i to i64
681  %shl15.i = shl nuw nsw i64 %conv14.i, 21
682  %conv16.i = zext i32 %or2.i to i64
683  %or17.i = or i64 %shl15.i, %conv16.i
684  br label %hash.exit
685
686hash.exit:
687  %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
688  %shr.29.i = lshr i64 %cond.i, 17
689  %conv18.i = trunc i64 %shr.29.i to i32
690  store i32 %conv18.i, i32* @lock, align 4
691  %rem.i = srem i64 %cond.i, 1050011
692  %conv19.i = trunc i64 %rem.i to i32
693  store i32 %conv19.i, i32* @htindex, align 4
694  %rem20.i = urem i32 %conv18.i, 179
695  %add.i = or i32 %rem20.i, 131072
696  store i32 %add.i, i32* @stride, align 4
697  %8 = load i32*, i32** @ht, align 8
698  %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i
699  %9 = load i32, i32* %arrayidx, align 4
700  %cmp1 = icmp eq i32 %9, %conv18.i
701  br i1 %cmp1, label %if.then, label %if.end
702
703if.then:
704  %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
705  %10 = load i8*, i8** @he, align 8
706  %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa
707  %11 = load i8, i8* %arrayidx3, align 1
708  %conv = sext i8 %11 to i32
709  br label %cleanup
710
711if.end:
712  %add = add nsw i32 %add.i, %conv19.i
713  %cmp4 = icmp sgt i32 %add, 1050010
714  %sub = add nsw i32 %add, -1050011
715  %sub.add = select i1 %cmp4, i32 %sub, i32 %add
716  %idxprom.1 = sext i32 %sub.add to i64
717  %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1
718  %12 = load i32, i32* %arrayidx.1, align 4
719  %cmp1.1 = icmp eq i32 %12, %conv18.i
720  br i1 %cmp1.1, label %if.then, label %if.end.1
721
722cleanup:
723  %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
724  ret i32 %retval.0
725
726if.end.1:
727  %add.1 = add nsw i32 %add.i, %sub.add
728  %cmp4.1 = icmp sgt i32 %add.1, 1050010
729  %sub.1 = add nsw i32 %add.1, -1050011
730  %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
731  %idxprom.2 = sext i32 %sub.add.1 to i64
732  %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2
733  %13 = load i32, i32* %arrayidx.2, align 4
734  %cmp1.2 = icmp eq i32 %13, %conv18.i
735  br i1 %cmp1.2, label %if.then, label %if.end.2
736
737if.end.2:
738  %add.2 = add nsw i32 %add.i, %sub.add.1
739  %cmp4.2 = icmp sgt i32 %add.2, 1050010
740  %sub.2 = add nsw i32 %add.2, -1050011
741  %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
742  %idxprom.3 = sext i32 %sub.add.2 to i64
743  %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3
744  %14 = load i32, i32* %arrayidx.3, align 4
745  %cmp1.3 = icmp eq i32 %14, %conv18.i
746  br i1 %cmp1.3, label %if.then, label %if.end.3
747
748if.end.3:
749  %add.3 = add nsw i32 %add.i, %sub.add.2
750  %cmp4.3 = icmp sgt i32 %add.3, 1050010
751  %sub.3 = add nsw i32 %add.3, -1050011
752  %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
753  %idxprom.4 = sext i32 %sub.add.3 to i64
754  %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4
755  %15 = load i32, i32* %arrayidx.4, align 4
756  %cmp1.4 = icmp eq i32 %15, %conv18.i
757  br i1 %cmp1.4, label %if.then, label %if.end.4
758
759if.end.4:
760  %add.4 = add nsw i32 %add.i, %sub.add.3
761  %cmp4.4 = icmp sgt i32 %add.4, 1050010
762  %sub.4 = add nsw i32 %add.4, -1050011
763  %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
764  %idxprom.5 = sext i32 %sub.add.4 to i64
765  %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5
766  %16 = load i32, i32* %arrayidx.5, align 4
767  %cmp1.5 = icmp eq i32 %16, %conv18.i
768  br i1 %cmp1.5, label %if.then, label %if.end.5
769
770if.end.5:
771  %add.5 = add nsw i32 %add.i, %sub.add.4
772  %cmp4.5 = icmp sgt i32 %add.5, 1050010
773  %sub.5 = add nsw i32 %add.5, -1050011
774  %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
775  %idxprom.6 = sext i32 %sub.add.5 to i64
776  %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6
777  %17 = load i32, i32* %arrayidx.6, align 4
778  %cmp1.6 = icmp eq i32 %17, %conv18.i
779  br i1 %cmp1.6, label %if.then, label %if.end.6
780
781if.end.6:
782  %add.6 = add nsw i32 %add.i, %sub.add.5
783  %cmp4.6 = icmp sgt i32 %add.6, 1050010
784  %sub.6 = add nsw i32 %add.6, -1050011
785  %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
786  %idxprom.7 = sext i32 %sub.add.6 to i64
787  %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7
788  %18 = load i32, i32* %arrayidx.7, align 4
789  %cmp1.7 = icmp eq i32 %18, %conv18.i
790  br i1 %cmp1.7, label %if.then, label %cleanup
791}
792