1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr9 < %s | FileCheck %s
4
5define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7) {
6; CHECK-LABEL: foo:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    cmpd 5, 7
9; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
10; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
11; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
12; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
13; CHECK-NEXT:    std 27, -40(1) # 8-byte Folded Spill
14; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
15; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
16; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
17; CHECK-NEXT:    bge 0, .LBB0_6
18; CHECK-NEXT:  # %bb.1: # %.preheader
19; CHECK-NEXT:    addi 30, 5, 1
20; CHECK-NEXT:    addi 29, 5, 3
21; CHECK-NEXT:    addi 28, 5, 2
22; CHECK-NEXT:    mulld 12, 8, 5
23; CHECK-NEXT:    addi 3, 3, 16
24; CHECK-NEXT:    mulld 0, 9, 8
25; CHECK-NEXT:    sldi 11, 10, 3
26; CHECK-NEXT:    mulld 30, 8, 30
27; CHECK-NEXT:    mulld 29, 8, 29
28; CHECK-NEXT:    mulld 8, 8, 28
29; CHECK-NEXT:    b .LBB0_3
30; CHECK-NEXT:    .p2align 4
31; CHECK-NEXT:  .LBB0_2:
32; CHECK-NEXT:    add 5, 5, 9
33; CHECK-NEXT:    add 12, 12, 0
34; CHECK-NEXT:    add 30, 30, 0
35; CHECK-NEXT:    add 29, 29, 0
36; CHECK-NEXT:    add 8, 8, 0
37; CHECK-NEXT:    cmpd 5, 7
38; CHECK-NEXT:    bge 0, .LBB0_6
39; CHECK-NEXT:  .LBB0_3: # =>This Loop Header: Depth=1
40; CHECK-NEXT:    # Child Loop BB0_5 Depth 2
41; CHECK-NEXT:    sub 28, 5, 10
42; CHECK-NEXT:    cmpd 6, 28
43; CHECK-NEXT:    bge 0, .LBB0_2
44; CHECK-NEXT:  # %bb.4:
45; CHECK-NEXT:    add 26, 6, 12
46; CHECK-NEXT:    add 25, 6, 30
47; CHECK-NEXT:    add 24, 6, 29
48; CHECK-NEXT:    add 23, 6, 8
49; CHECK-NEXT:    sldi 27, 6, 3
50; CHECK-NEXT:    sldi 26, 26, 3
51; CHECK-NEXT:    sldi 25, 25, 3
52; CHECK-NEXT:    sldi 24, 24, 3
53; CHECK-NEXT:    sldi 23, 23, 3
54; CHECK-NEXT:    add 27, 4, 27
55; CHECK-NEXT:    add 26, 3, 26
56; CHECK-NEXT:    add 25, 3, 25
57; CHECK-NEXT:    add 24, 3, 24
58; CHECK-NEXT:    add 23, 3, 23
59; CHECK-NEXT:    .p2align 5
60; CHECK-NEXT:  .LBB0_5: # Parent Loop BB0_3 Depth=1
61; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
62; CHECK-NEXT:    lfd 0, 0(27)
63; CHECK-NEXT:    lfd 1, -16(26)
64; CHECK-NEXT:    add 6, 6, 10
65; CHECK-NEXT:    cmpd 6, 28
66; CHECK-NEXT:    xsadddp 0, 0, 1
67; CHECK-NEXT:    lfd 1, -8(26)
68; CHECK-NEXT:    xsadddp 0, 0, 1
69; CHECK-NEXT:    lfd 1, 0(26)
70; CHECK-NEXT:    xsadddp 0, 0, 1
71; CHECK-NEXT:    lfd 1, 8(26)
72; CHECK-NEXT:    add 26, 26, 11
73; CHECK-NEXT:    xsadddp 0, 0, 1
74; CHECK-NEXT:    lfd 1, -16(25)
75; CHECK-NEXT:    xsadddp 0, 0, 1
76; CHECK-NEXT:    lfd 1, -8(25)
77; CHECK-NEXT:    xsadddp 0, 0, 1
78; CHECK-NEXT:    lfd 1, 0(25)
79; CHECK-NEXT:    xsadddp 0, 0, 1
80; CHECK-NEXT:    lfd 1, 8(25)
81; CHECK-NEXT:    add 25, 25, 11
82; CHECK-NEXT:    xsadddp 0, 0, 1
83; CHECK-NEXT:    lfd 1, -16(23)
84; CHECK-NEXT:    xsadddp 0, 0, 1
85; CHECK-NEXT:    lfd 1, -8(23)
86; CHECK-NEXT:    xsadddp 0, 0, 1
87; CHECK-NEXT:    lfd 1, 0(23)
88; CHECK-NEXT:    xsadddp 0, 0, 1
89; CHECK-NEXT:    lfd 1, 8(23)
90; CHECK-NEXT:    add 23, 23, 11
91; CHECK-NEXT:    xsadddp 0, 0, 1
92; CHECK-NEXT:    lfd 1, -16(24)
93; CHECK-NEXT:    xsadddp 0, 0, 1
94; CHECK-NEXT:    lfd 1, -8(24)
95; CHECK-NEXT:    xsadddp 0, 0, 1
96; CHECK-NEXT:    lfd 1, 0(24)
97; CHECK-NEXT:    xsadddp 0, 0, 1
98; CHECK-NEXT:    lfd 1, 8(24)
99; CHECK-NEXT:    add 24, 24, 11
100; CHECK-NEXT:    xsadddp 0, 0, 1
101; CHECK-NEXT:    stfd 0, 0(27)
102; CHECK-NEXT:    add 27, 27, 11
103; CHECK-NEXT:    blt 0, .LBB0_5
104; CHECK-NEXT:    b .LBB0_2
105; CHECK-NEXT:  .LBB0_6:
106; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
107; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
108; CHECK-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
109; CHECK-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
110; CHECK-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
111; CHECK-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
112; CHECK-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
113; CHECK-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
114; CHECK-NEXT:    blr
115  %9 = icmp slt i64 %2, %4
116  br i1 %9, label %10, label %97
117
11810:                                               ; preds = %8, %93
119  %11 = phi i64 [ %95, %93 ], [ %2, %8 ]
120  %12 = phi i64 [ %94, %93 ], [ %3, %8 ]
121  %13 = sub nsw i64 %11, %7
122  %14 = icmp slt i64 %12, %13
123  br i1 %14, label %15, label %93
124
12515:                                               ; preds = %10
126  %16 = mul nsw i64 %11, %5
127  %17 = add nsw i64 %11, 1
128  %18 = mul nsw i64 %17, %5
129  %19 = add nsw i64 %11, 2
130  %20 = mul nsw i64 %19, %5
131  %21 = add nsw i64 %11, 3
132  %22 = mul nsw i64 %21, %5
133  br label %23
134
13523:                                               ; preds = %15, %23
136  %24 = phi i64 [ %12, %15 ], [ %91, %23 ]
137  %25 = getelementptr inbounds double, double* %1, i64 %24
138  %26 = load double, double* %25, align 8
139  %27 = add nsw i64 %24, %16
140  %28 = getelementptr inbounds double, double* %0, i64 %27
141  %29 = load double, double* %28, align 8
142  %30 = fadd double %26, %29
143  %31 = add nsw i64 %27, 1
144  %32 = getelementptr inbounds double, double* %0, i64 %31
145  %33 = load double, double* %32, align 8
146  %34 = fadd double %30, %33
147  %35 = add nsw i64 %27, 2
148  %36 = getelementptr inbounds double, double* %0, i64 %35
149  %37 = load double, double* %36, align 8
150  %38 = fadd double %34, %37
151  %39 = add nsw i64 %27, 3
152  %40 = getelementptr inbounds double, double* %0, i64 %39
153  %41 = load double, double* %40, align 8
154  %42 = fadd double %38, %41
155  %43 = add nsw i64 %24, %18
156  %44 = getelementptr inbounds double, double* %0, i64 %43
157  %45 = load double, double* %44, align 8
158  %46 = fadd double %42, %45
159  %47 = add nsw i64 %43, 1
160  %48 = getelementptr inbounds double, double* %0, i64 %47
161  %49 = load double, double* %48, align 8
162  %50 = fadd double %46, %49
163  %51 = add nsw i64 %43, 2
164  %52 = getelementptr inbounds double, double* %0, i64 %51
165  %53 = load double, double* %52, align 8
166  %54 = fadd double %50, %53
167  %55 = add nsw i64 %43, 3
168  %56 = getelementptr inbounds double, double* %0, i64 %55
169  %57 = load double, double* %56, align 8
170  %58 = fadd double %54, %57
171  %59 = add nsw i64 %24, %20
172  %60 = getelementptr inbounds double, double* %0, i64 %59
173  %61 = load double, double* %60, align 8
174  %62 = fadd double %58, %61
175  %63 = add nsw i64 %59, 1
176  %64 = getelementptr inbounds double, double* %0, i64 %63
177  %65 = load double, double* %64, align 8
178  %66 = fadd double %62, %65
179  %67 = add nsw i64 %59, 2
180  %68 = getelementptr inbounds double, double* %0, i64 %67
181  %69 = load double, double* %68, align 8
182  %70 = fadd double %66, %69
183  %71 = add nsw i64 %59, 3
184  %72 = getelementptr inbounds double, double* %0, i64 %71
185  %73 = load double, double* %72, align 8
186  %74 = fadd double %70, %73
187  %75 = add nsw i64 %24, %22
188  %76 = getelementptr inbounds double, double* %0, i64 %75
189  %77 = load double, double* %76, align 8
190  %78 = fadd double %74, %77
191  %79 = add nsw i64 %75, 1
192  %80 = getelementptr inbounds double, double* %0, i64 %79
193  %81 = load double, double* %80, align 8
194  %82 = fadd double %78, %81
195  %83 = add nsw i64 %75, 2
196  %84 = getelementptr inbounds double, double* %0, i64 %83
197  %85 = load double, double* %84, align 8
198  %86 = fadd double %82, %85
199  %87 = add nsw i64 %75, 3
200  %88 = getelementptr inbounds double, double* %0, i64 %87
201  %89 = load double, double* %88, align 8
202  %90 = fadd double %86, %89
203  store double %90, double* %25, align 8
204  %91 = add nsw i64 %24, %7
205  %92 = icmp slt i64 %91, %13
206  br i1 %92, label %23, label %93
207
20893:                                               ; preds = %23, %10
209  %94 = phi i64 [ %12, %10 ], [ %91, %23 ]
210  %95 = add nsw i64 %11, %6
211  %96 = icmp slt i64 %95, %4
212  br i1 %96, label %10, label %97
213
21497:                                               ; preds = %93, %8
215  ret void
216}
217