1; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
2; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false |  FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
3;
4; Note: Lots of tests use inline asm instead of regular calls.
5; This allows to have a better control on what the allocation will do.
6; Otherwise, we may have spill right in the entry block, defeating
7; shrink-wrapping. Moreover, some of the inline asm statement (nop)
8; are here to ensure that the related paths do not end up as critical
9; edges.
10
11
12; Initial motivating example: Simple diamond with a call just on one side.
13; CHECK-LABEL: foo:
14;
15; Compare the arguments and return
16; No prologue needed.
17; ENABLE: cmpw 0, 3, 4
18; ENABLE-NEXT: bgelr 0
19;
20; Prologue code.
21;  At a minimum, we save/restore the link register. Other registers may be saved
22;  as well.
23; CHECK: mflr
24;
25; Compare the arguments and jump to exit.
26; After the prologue is set.
27; DISABLE: cmpw 0, 3, 4
28; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]]
29;
30; Store %a on the stack
31; CHECK: stw 3, {{[0-9]+([0-9]+)}}
32; Set the alloca address in the second argument.
33; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
34; Set the first argument to zero.
35; CHECK-NEXT: li 3, 0
36; CHECK-NEXT: bl doSomething
37;
38; With shrink-wrapping, epilogue is just after the call.
39; Restore the link register and return.
40; Note that there could be other epilog code before the link register is
41; restored but we will not check for it here.
42; ENABLE: mtlr
43; ENABLE-NEXT: blr
44;
45; DISABLE: [[EXIT_LABEL]]:
46;
47; Without shrink-wrapping, epilogue is in the exit block.
48; Epilogue code. (What we pop does not matter.)
49; DISABLE: mtlr {{[0-9]+}}
50; DISABLE-NEXT: blr
51;
52
53define i32 @foo(i32 %a, i32 %b) {
54  %tmp = alloca i32, align 4
55  %tmp2 = icmp slt i32 %a, %b
56  br i1 %tmp2, label %true, label %false
57
58true:
59  store i32 %a, i32* %tmp, align 4
60  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
61  br label %false
62
63false:
64  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
65  ret i32 %tmp.0
66}
67
68; Function Attrs: optsize
69declare i32 @doSomething(i32, i32*)
70
71
72
73; Check that we do not perform the restore inside the loop whereas the save
74; is outside.
75; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
76;
77; Shrink-wrapping allows to skip the prologue in the else case.
78; ENABLE: cmplwi 0, 3, 0
79; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
80;
81; Prologue code.
82; Make sure we save the link register
83; CHECK: mflr {{[0-9]+}}
84;
85; DISABLE: cmplwi 0, 3, 0
86; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
87;
88; Loop preheader
89; CHECK-DAG: li [[SUM:[0-9]+]], 0
90; CHECK-DAG: li [[IV:[0-9]+]], 10
91;
92; Loop body
93; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
94; CHECK: bl something
95; CHECK-DAG: addi [[IV]], [[IV]], -1
96; CHECK-DAG: add [[SUM]], 3, [[SUM]]
97; CHECK-NEXT: cmplwi [[IV]], 0
98; CHECK-NEXT: bne 0, .[[LOOP]]
99;
100; Next BB.
101; CHECK: slwi 3, [[SUM]], 3
102;
103; Jump to epilogue.
104; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
105;
106; DISABLE: .[[ELSE_LABEL]]: # %if.else
107; Shift second argument by one and store into returned register.
108; DISABLE: slwi 3, 4, 1
109; DISABLE: .[[EPILOG_BB]]: # %if.end
110;
111; Epilogue code.
112; CHECK: mtlr {{[0-9]+}}
113; CHECK-NEXT: blr
114;
115; ENABLE: .[[ELSE_LABEL]]: # %if.else
116; Shift second argument by one and store into returned register.
117; ENABLE: slwi 3, 4, 1
118; ENABLE-NEXT: blr
119define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
120entry:
121  %tobool = icmp eq i32 %cond, 0
122  br i1 %tobool, label %if.else, label %for.preheader
123
124for.preheader:
125  tail call void asm "nop", ""()
126  br label %for.body
127
128for.body:                                         ; preds = %entry, %for.body
129  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
130  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
131  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
132  %add = add nsw i32 %call, %sum.04
133  %inc = add nuw nsw i32 %i.05, 1
134  %exitcond = icmp eq i32 %inc, 10
135  br i1 %exitcond, label %for.end, label %for.body
136
137for.end:                                          ; preds = %for.body
138  %shl = shl i32 %add, 3
139  br label %if.end
140
141if.else:                                          ; preds = %entry
142  %mul = shl nsw i32 %N, 1
143  br label %if.end
144
145if.end:                                           ; preds = %if.else, %for.end
146  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
147  ret i32 %sum.1
148}
149
150declare i32 @something(...)
151
152; Check that we do not perform the shrink-wrapping inside the loop even
153; though that would be legal. The cost model must prevent that.
154; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
155; Prologue code.
156; Make sure we save the link register before the call
157; CHECK: mflr {{[0-9]+}}
158;
159; Loop preheader
160; CHECK-DAG: li [[SUM:[0-9]+]], 0
161; CHECK-DAG: li [[IV:[0-9]+]], 10
162;
163; Loop body
164; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
165; CHECK: bl something
166; CHECK-DAG: addi [[IV]], [[IV]], -1
167; CHECK-DAG: add [[SUM]], 3, [[SUM]]
168; CHECK-NEXT: cmplwi [[IV]], 0
169; CHECK-NEXT: bne 0, .[[LOOP]]
170;
171; Next BB
172; CHECK: %for.end
173; CHECK: mtlr {{[0-9]+}}
174; CHECK-NEXT: blr
175define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
176entry:
177  br label %for.preheader
178
179for.preheader:
180  tail call void asm "nop", ""()
181  br label %for.body
182
183for.body:                                         ; preds = %for.body, %entry
184  %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
185  %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
186  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
187  %add = add nsw i32 %call, %sum.03
188  %inc = add nuw nsw i32 %i.04, 1
189  %exitcond = icmp eq i32 %inc, 10
190  br i1 %exitcond, label %for.exit, label %for.body
191
192for.exit:
193  tail call void asm "nop", ""()
194  br label %for.end
195
196for.end:                                          ; preds = %for.body
197  ret i32 %add
198}
199
200
201; Check with a more complex case that we do not have save within the loop and
202; restore outside.
203; CHECK-LABEL: loopInfoSaveOutsideLoop:
204;
205; ENABLE: cmplwi 0, 3, 0
206; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
207;
208; Prologue code.
209; Make sure we save the link register
210; CHECK: mflr {{[0-9]+}}
211;
212; DISABLE: cmplwi 0, 3, 0
213; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
214;
215; Loop preheader
216; CHECK-DAG: li [[SUM:[0-9]+]], 0
217; CHECK-DAG: li [[IV:[0-9]+]], 10
218;
219; Loop body
220; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
221; CHECK: bl something
222; CHECK-DAG: addi [[IV]], [[IV]], -1
223; CHECK-DAG: add [[SUM]], 3, [[SUM]]
224; CHECK-NEXT: cmplwi [[IV]], 0
225; CHECK-NEXT: bne 0, .[[LOOP]]
226;
227; Next BB
228; CHECK: bl somethingElse
229; CHECK: slwi 3, [[SUM]], 3
230;
231; Jump to epilogue
232; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
233;
234; DISABLE: .[[ELSE_LABEL]]: # %if.else
235; Shift second argument by one and store into returned register.
236; DISABLE: slwi 3, 4, 1
237;
238; DISABLE: .[[EPILOG_BB]]: # %if.end
239; Epilog code
240; CHECK: mtlr {{[0-9]+}}
241; CHECK-NEXT: blr
242;
243; ENABLE: .[[ELSE_LABEL]]: # %if.else
244; Shift second argument by one and store into returned register.
245; ENABLE: slwi 3, 4, 1
246; ENABLE-NEXT: blr
247define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
248entry:
249  %tobool = icmp eq i32 %cond, 0
250  br i1 %tobool, label %if.else, label %for.preheader
251
252for.preheader:
253  tail call void asm "nop", ""()
254  br label %for.body
255
256for.body:                                         ; preds = %entry, %for.body
257  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
258  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
259  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
260  %add = add nsw i32 %call, %sum.04
261  %inc = add nuw nsw i32 %i.05, 1
262  %exitcond = icmp eq i32 %inc, 10
263  br i1 %exitcond, label %for.end, label %for.body
264
265for.end:                                          ; preds = %for.body
266  tail call void bitcast (void (...)* @somethingElse to void ()*)()
267  %shl = shl i32 %add, 3
268  br label %if.end
269
270if.else:                                          ; preds = %entry
271  %mul = shl nsw i32 %N, 1
272  br label %if.end
273
274if.end:                                           ; preds = %if.else, %for.end
275  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
276  ret i32 %sum.1
277}
278
279declare void @somethingElse(...)
280
281; Check with a more complex case that we do not have restore within the loop and
282; save outside.
283; CHECK-LABEL: loopInfoRestoreOutsideLoop:
284;
285; ENABLE: cmplwi 0, 3, 0
286; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
287;
288; Prologue code.
289; Make sure we save the link register
290; CHECK: mflr {{[0-9]+}}
291;
292; DISABLE: cmplwi 0, 3, 0
293; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
294;
295; CHECK: bl somethingElse
296;
297; Loop preheader
298; CHECK-DAG: li [[SUM:[0-9]+]], 0
299; CHECK-DAG: li [[IV:[0-9]+]], 10
300;
301; Loop body
302; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
303; CHECK: bl something
304; CHECK-DAG: addi [[IV]], [[IV]], -1
305; CHECK-DAG: add [[SUM]], 3, [[SUM]]
306; CHECK-NEXT: cmplwi [[IV]], 0
307; CHECK-NEXT: bne 0, .[[LOOP]]
308;
309; Next BB.
310; slwi 3, [[SUM]], 3
311;
312; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
313;
314; DISABLE: .[[ELSE_LABEL]]: # %if.else
315; Shift second argument by one and store into returned register.
316; DISABLE: slwi 3, 4, 1
317; DISABLE: .[[EPILOG_BB]]: # %if.end
318;
319; Epilogue code.
320; CHECK: mtlr {{[0-9]+}}
321; CHECK-NEXT: blr
322;
323; ENABLE: .[[ELSE_LABEL]]: # %if.else
324; Shift second argument by one and store into returned register.
325; ENABLE: slwi 3, 4, 1
326; ENABLE-NEXT: blr
327define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
328entry:
329  %tobool = icmp eq i32 %cond, 0
330  br i1 %tobool, label %if.else, label %if.then
331
332if.then:                                          ; preds = %entry
333  tail call void bitcast (void (...)* @somethingElse to void ()*)()
334  br label %for.body
335
336for.body:                                         ; preds = %for.body, %if.then
337  %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
338  %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
339  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
340  %add = add nsw i32 %call, %sum.04
341  %inc = add nuw nsw i32 %i.05, 1
342  %exitcond = icmp eq i32 %inc, 10
343  br i1 %exitcond, label %for.end, label %for.body
344
345for.end:                                          ; preds = %for.body
346  %shl = shl i32 %add, 3
347  br label %if.end
348
349if.else:                                          ; preds = %entry
350  %mul = shl nsw i32 %N, 1
351  br label %if.end
352
353if.end:                                           ; preds = %if.else, %for.end
354  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
355  ret i32 %sum.1
356}
357
358; Check that we handle function with no frame information correctly.
359; CHECK-LABEL: emptyFrame:
360; CHECK: # %entry
361; CHECK-NEXT: li 3, 0
362; CHECK-NEXT: blr
363define i32 @emptyFrame() {
364entry:
365  ret i32 0
366}
367
368
369; Check that we handle inline asm correctly.
370; CHECK-LABEL: inlineAsm:
371;
372; ENABLE: cmplwi 0, 3, 0
373; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
374;
375; Prologue code.
376; Make sure we save the CSR used in the inline asm: r14
377; ENABLE-DAG: li [[IV:[0-9]+]], 10
378; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
379;
380; DISABLE: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
381; DISABLE: cmplwi 0, 3, 0
382; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
383; DISABLE: li [[IV:[0-9]+]], 10
384;
385; CHECK: nop
386; CHECK: mtctr [[IV]]
387;
388; CHECK: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body
389; Inline asm statement.
390; CHECK: addi 14, 14, 1
391; CHECK: bdnz .[[LOOP_LABEL]]
392;
393; Epilogue code.
394; CHECK: li 3, 0
395; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
396; CHECK: nop
397; CHECK: blr
398;
399; CHECK: [[ELSE_LABEL]]
400; CHECK-NEXT: slwi 3, 4, 1
401; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
402; CHECK-NEXT blr
403;
404define i32 @inlineAsm(i32 %cond, i32 %N) {
405entry:
406  %tobool = icmp eq i32 %cond, 0
407  br i1 %tobool, label %if.else, label %for.preheader
408
409for.preheader:
410  tail call void asm "nop", ""()
411  br label %for.body
412
413for.body:                                         ; preds = %entry, %for.body
414  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
415  tail call void asm "addi 14, 14, 1", "~{r14}"()
416  %inc = add nuw nsw i32 %i.03, 1
417  %exitcond = icmp eq i32 %inc, 10
418  br i1 %exitcond, label %for.exit, label %for.body
419
420for.exit:
421  tail call void asm "nop", ""()
422  br label %if.end
423
424if.else:                                          ; preds = %entry
425  %mul = shl nsw i32 %N, 1
426  br label %if.end
427
428if.end:                                           ; preds = %for.body, %if.else
429  %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
430  ret i32 %sum.0
431}
432
433
434; Check that we handle calls to variadic functions correctly.
435; CHECK-LABEL: callVariadicFunc:
436;
437; ENABLE: cmplwi 0, 3, 0
438; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
439;
440; Prologue code.
441; CHECK: mflr {{[0-9]+}}
442;
443; DISABLE: cmplwi 0, 3, 0
444; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
445;
446; Setup of the varags.
447; CHECK: mr 4, 3
448; CHECK-NEXT: mr 5, 3
449; CHECK-NEXT: mr 6, 3
450; CHECK-NEXT: mr 7, 3
451; CHECK-NEXT: mr 8, 3
452; CHECK-NEXT: mr 9, 3
453; CHECK-NEXT: bl someVariadicFunc
454; CHECK: slwi 3, 3, 3
455; DISABLE: b .[[EPILOGUE_BB:LBB[0-9_]+]]
456;
457; ENABLE: mtlr {{[0-9]+}}
458; ENABLE-NEXT: blr
459;
460; CHECK: .[[ELSE_LABEL]]: # %if.else
461; CHECK-NEXT: slwi 3, 4, 1
462;
463; DISABLE: .[[EPILOGUE_BB]]: # %if.end
464; DISABLE: mtlr
465; CHECK: blr
466define i32 @callVariadicFunc(i32 %cond, i32 %N) {
467entry:
468  %tobool = icmp eq i32 %cond, 0
469  br i1 %tobool, label %if.else, label %if.then
470
471if.then:                                          ; preds = %entry
472  %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
473  %shl = shl i32 %call, 3
474  br label %if.end
475
476if.else:                                          ; preds = %entry
477  %mul = shl nsw i32 %N, 1
478  br label %if.end
479
480if.end:                                           ; preds = %if.else, %if.then
481  %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
482  ret i32 %sum.0
483}
484
485declare i32 @someVariadicFunc(i32, ...)
486
487
488
489; Make sure we do not insert unreachable code after noreturn function.
490; Although this is not incorrect to insert such code, it is useless
491; and it hurts the binary size.
492;
493; CHECK-LABEL: noreturn:
494; DISABLE: mflr {{[0-9]+}}
495;
496; CHECK: cmplwi 3, 0
497; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]]
498;
499; CHECK: li 3, 42
500;
501; DISABLE: mtlr {{[0-9]+}}
502;
503; CHECK-NEXT: blr
504;
505; CHECK: .[[ABORT]]: # %if.abort
506;
507; ENABLE: mflr {{[0-9]+}}
508;
509; CHECK: bl abort
510; ENABLE-NOT: mtlr {{[0-9]+}}
511define i32 @noreturn(i8 signext %bad_thing) {
512entry:
513  %tobool = icmp eq i8 %bad_thing, 0
514  br i1 %tobool, label %if.end, label %if.abort
515
516if.abort:
517  tail call void @abort() #0
518  unreachable
519
520if.end:
521  ret i32 42
522}
523
524declare void @abort() #0
525
526attributes #0 = { noreturn nounwind }
527
528
529; Make sure that we handle infinite loops properly When checking that the Save
530; and Restore blocks are control flow equivalent, the loop searches for the
531; immediate (post) dominator for the (restore) save blocks. When either the Save
532; or Restore block is located in an infinite loop the only immediate (post)
533; dominator is itself. In this case, we cannot perform shrink wrapping, but we
534; should return gracefully and continue compilation.
535; The only condition for this test is the compilation finishes correctly.
536;
537; CHECK-LABEL: infiniteloop
538; CHECK: blr
539define void @infiniteloop() {
540entry:
541  br i1 undef, label %if.then, label %if.end
542
543if.then:
544  %ptr = alloca i32, i32 4
545  br label %for.body
546
547for.body:                                         ; preds = %for.body, %entry
548  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
549  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
550  %add = add nsw i32 %call, %sum.03
551  store i32 %add, i32* %ptr
552  br label %for.body
553
554if.end:
555  ret void
556}
557
558; Another infinite loop test this time with a body bigger than just one block.
559; CHECK-LABEL: infiniteloop2
560; CHECK: blr
561define void @infiniteloop2() {
562entry:
563  br i1 undef, label %if.then, label %if.end
564
565if.then:
566  %ptr = alloca i32, i32 4
567  br label %for.body
568
569for.body:                                         ; preds = %for.body, %entry
570  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
571  %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
572  %add = add nsw i32 %call, %sum.03
573  store i32 %add, i32* %ptr
574  br i1 undef, label %body1, label %body2
575
576body1:
577  tail call void asm sideeffect "nop", "~{r14}"()
578  br label %for.body
579
580body2:
581  tail call void asm sideeffect "nop", "~{r14}"()
582  br label %for.body
583
584if.end:
585  ret void
586}
587
588; Another infinite loop test this time with two nested infinite loop.
589; CHECK-LABEL: infiniteloop3
590; CHECK: # %end
591define void @infiniteloop3() {
592entry:
593  br i1 undef, label %loop2a, label %body
594
595body:                                             ; preds = %entry
596  br i1 undef, label %loop2a, label %end
597
598loop1:                                            ; preds = %loop2a, %loop2b
599  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
600  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
601  %0 = icmp eq i32* %var, null
602  %next.load = load i32*, i32** undef
603  br i1 %0, label %loop2a, label %loop2b
604
605loop2a:                                           ; preds = %loop1, %body, %entry
606  %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
607  %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
608  br label %loop1
609
610loop2b:                                           ; preds = %loop1
611  %gep1 = bitcast i32* %var.phi to i32*
612  %next.ptr = bitcast i32* %gep1 to i32**
613  store i32* %next.phi, i32** %next.ptr
614  br label %loop1
615
616end:
617  ret void
618}
619
620@columns = external global [0 x i32], align 4
621@lock = common global i32 0, align 4
622@htindex = common global i32 0, align 4
623@stride = common global i32 0, align 4
624@ht = common global i32* null, align 8
625@he = common global i8* null, align 8
626
627; Test for a bug that was caused when save point was equal to restore point.
628; Function Attrs: nounwind
629; CHECK-LABEL: transpose
630;
631; Store of callee-save register saved by shrink wrapping
632; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer!
633; CHECKXX: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
634;
635; Reload of callee-save register
636; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
637;
638; Ensure no subsequent uses of callee-save register before end of function
639; CHECK-NOT: {{[a-z]+}} [[CSR]]
640; CHECK: blr
641define signext i32 @transpose() {
642entry:
643  %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4
644  %shl.i = shl i32 %0, 7
645  %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4
646  %or.i = or i32 %shl.i, %1
647  %shl1.i = shl i32 %or.i, 7
648  %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4
649  %or2.i = or i32 %shl1.i, %2
650  %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4
651  %shl3.i = shl i32 %3, 7
652  %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4
653  %or4.i = or i32 %shl3.i, %4
654  %shl5.i = shl i32 %or4.i, 7
655  %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4
656  %or6.i = or i32 %shl5.i, %5
657  %cmp.i = icmp ugt i32 %or2.i, %or6.i
658  br i1 %cmp.i, label %cond.true.i, label %cond.false.i
659
660cond.true.i:
661  %shl7.i = shl i32 %or2.i, 7
662  %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
663  %or8.i = or i32 %6, %shl7.i
664  %conv.i = zext i32 %or8.i to i64
665  %shl9.i = shl nuw nsw i64 %conv.i, 21
666  %conv10.i = zext i32 %or6.i to i64
667  %or11.i = or i64 %shl9.i, %conv10.i
668  br label %hash.exit
669
670cond.false.i:
671  %shl12.i = shl i32 %or6.i, 7
672  %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
673  %or13.i = or i32 %7, %shl12.i
674  %conv14.i = zext i32 %or13.i to i64
675  %shl15.i = shl nuw nsw i64 %conv14.i, 21
676  %conv16.i = zext i32 %or2.i to i64
677  %or17.i = or i64 %shl15.i, %conv16.i
678  br label %hash.exit
679
680hash.exit:
681  %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
682  %shr.29.i = lshr i64 %cond.i, 17
683  %conv18.i = trunc i64 %shr.29.i to i32
684  store i32 %conv18.i, i32* @lock, align 4
685  %rem.i = srem i64 %cond.i, 1050011
686  %conv19.i = trunc i64 %rem.i to i32
687  store i32 %conv19.i, i32* @htindex, align 4
688  %rem20.i = urem i32 %conv18.i, 179
689  %add.i = or i32 %rem20.i, 131072
690  store i32 %add.i, i32* @stride, align 4
691  %8 = load i32*, i32** @ht, align 8
692  %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i
693  %9 = load i32, i32* %arrayidx, align 4
694  %cmp1 = icmp eq i32 %9, %conv18.i
695  br i1 %cmp1, label %if.then, label %if.end
696
697if.then:
698  %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
699  %10 = load i8*, i8** @he, align 8
700  %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa
701  %11 = load i8, i8* %arrayidx3, align 1
702  %conv = sext i8 %11 to i32
703  br label %cleanup
704
705if.end:
706  %add = add nsw i32 %add.i, %conv19.i
707  %cmp4 = icmp sgt i32 %add, 1050010
708  %sub = add nsw i32 %add, -1050011
709  %sub.add = select i1 %cmp4, i32 %sub, i32 %add
710  %idxprom.1 = sext i32 %sub.add to i64
711  %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1
712  %12 = load i32, i32* %arrayidx.1, align 4
713  %cmp1.1 = icmp eq i32 %12, %conv18.i
714  br i1 %cmp1.1, label %if.then, label %if.end.1
715
716cleanup:
717  %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
718  ret i32 %retval.0
719
720if.end.1:
721  %add.1 = add nsw i32 %add.i, %sub.add
722  %cmp4.1 = icmp sgt i32 %add.1, 1050010
723  %sub.1 = add nsw i32 %add.1, -1050011
724  %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
725  %idxprom.2 = sext i32 %sub.add.1 to i64
726  %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2
727  %13 = load i32, i32* %arrayidx.2, align 4
728  %cmp1.2 = icmp eq i32 %13, %conv18.i
729  br i1 %cmp1.2, label %if.then, label %if.end.2
730
731if.end.2:
732  %add.2 = add nsw i32 %add.i, %sub.add.1
733  %cmp4.2 = icmp sgt i32 %add.2, 1050010
734  %sub.2 = add nsw i32 %add.2, -1050011
735  %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
736  %idxprom.3 = sext i32 %sub.add.2 to i64
737  %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3
738  %14 = load i32, i32* %arrayidx.3, align 4
739  %cmp1.3 = icmp eq i32 %14, %conv18.i
740  br i1 %cmp1.3, label %if.then, label %if.end.3
741
742if.end.3:
743  %add.3 = add nsw i32 %add.i, %sub.add.2
744  %cmp4.3 = icmp sgt i32 %add.3, 1050010
745  %sub.3 = add nsw i32 %add.3, -1050011
746  %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
747  %idxprom.4 = sext i32 %sub.add.3 to i64
748  %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4
749  %15 = load i32, i32* %arrayidx.4, align 4
750  %cmp1.4 = icmp eq i32 %15, %conv18.i
751  br i1 %cmp1.4, label %if.then, label %if.end.4
752
753if.end.4:
754  %add.4 = add nsw i32 %add.i, %sub.add.3
755  %cmp4.4 = icmp sgt i32 %add.4, 1050010
756  %sub.4 = add nsw i32 %add.4, -1050011
757  %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
758  %idxprom.5 = sext i32 %sub.add.4 to i64
759  %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5
760  %16 = load i32, i32* %arrayidx.5, align 4
761  %cmp1.5 = icmp eq i32 %16, %conv18.i
762  br i1 %cmp1.5, label %if.then, label %if.end.5
763
764if.end.5:
765  %add.5 = add nsw i32 %add.i, %sub.add.4
766  %cmp4.5 = icmp sgt i32 %add.5, 1050010
767  %sub.5 = add nsw i32 %add.5, -1050011
768  %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
769  %idxprom.6 = sext i32 %sub.add.5 to i64
770  %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6
771  %17 = load i32, i32* %arrayidx.6, align 4
772  %cmp1.6 = icmp eq i32 %17, %conv18.i
773  br i1 %cmp1.6, label %if.then, label %if.end.6
774
775if.end.6:
776  %add.6 = add nsw i32 %add.i, %sub.add.5
777  %cmp4.6 = icmp sgt i32 %add.6, 1050010
778  %sub.6 = add nsw i32 %add.6, -1050011
779  %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
780  %idxprom.7 = sext i32 %sub.add.6 to i64
781  %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7
782  %18 = load i32, i32* %arrayidx.7, align 4
783  %cmp1.7 = icmp eq i32 %18, %conv18.i
784  br i1 %cmp1.7, label %if.then, label %cleanup
785}
786