1; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The
2; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused.
3
4; REQUIRES: asserts
5; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \
6; RUN:   -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \
7; RUN:   -debug-only=machine-scheduler 2>&1 | FileCheck %s
8
9define i64 @store_i64(i64* nocapture %P, i64 %v) {
10entry:
11; CHECK: ********** MI Scheduling **********
12; CHECK-LABEL: store_i64:%bb.0
13; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
14; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
15; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24
16; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16
17; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8
18; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32
19; CHECK: ********** MI Scheduling **********
20; CHECK-LABEL: store_i64:%bb.0
21; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
22; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
23; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16
24; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8
25; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24
26; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32
27  %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
28  store i64 %v, i64* %arrayidx
29  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
30  store i64 %v, i64* %arrayidx1
31  %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
32  store i64 %v, i64* %arrayidx2
33  %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
34  store i64 %v, i64* %arrayidx3
35  ret i64 %v
36}
37
38define i32 @store_i32(i32* nocapture %P, i32 %v) {
39entry:
40; CHECK: ********** MI Scheduling **********
41; CHECK-LABEL: store_i32:%bb.0
42; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
43; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
44; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52
45; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48
46; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44
47; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56
48; CHECK: ********** MI Scheduling **********
49; CHECK-LABEL: store_i32:%bb.0
50; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
51; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
52; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48
53; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44
54; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52
55; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56
56  %arrayidx = getelementptr inbounds i32, i32* %P, i32 13
57  store i32 %v, i32* %arrayidx
58  %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12
59  store i32 %v, i32* %arrayidx1
60  %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11
61  store i32 %v, i32* %arrayidx2
62  %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14
63  store i32 %v, i32* %arrayidx3
64  ret i32 %v
65}
66
67define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 {
68entry:
69; CHECK: ********** MI Scheduling **********
70; CHECK-LABEL: store_i64_neg:%bb.0
71; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
72; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
73; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24
74; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8
75; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16
76; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32
77; CHECK: ********** MI Scheduling **********
78; CHECK-LABEL: store_i64_neg:%bb.0
79; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
80; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
81; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8
82; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16
83; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24
84; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32
85  %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
86  store i64 %v, i64* %arrayidx
87  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
88  store i64 %v, i64* %arrayidx1
89  %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
90  store i64 %v, i64* %arrayidx2
91  %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
92  store i64 %v, i64* %arrayidx3
93  ret void
94}
95
96define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 {
97entry:
98; CHECK: ********** MI Scheduling **********
99; CHECK-LABEL: store_i32_neg:%bb.0
100; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
101; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
102; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12
103; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4
104; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8
105; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16
106; CHECK: ********** MI Scheduling **********
107; CHECK-LABEL: store_i32_neg:%bb.0
108; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
109; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
110; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4
111; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8
112; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12
113; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16
114  %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
115  store i32 %v, i32* %arrayidx
116  %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
117  store i32 %v, i32* %arrayidx1
118  %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
119  store i32 %v, i32* %arrayidx2
120  %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
121  store i32 %v, i32* %arrayidx3
122  ret void
123}
124
125define void @store_double(double* nocapture %P, double %v)  {
126entry:
127; CHECK: ********** MI Scheduling **********
128; CHECK-LABEL: store_double:%bb.0
129; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
130; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
131; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24
132; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8
133; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16
134; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32
135; CHECK: ********** MI Scheduling **********
136; CHECK-LABEL: store_double:%bb.0
137; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
138; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
139; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8
140; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16
141; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24
142; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32
143  %arrayidx = getelementptr inbounds double, double* %P, i64 3
144  store double %v, double* %arrayidx
145  %arrayidx1 = getelementptr inbounds double, double* %P, i64 1
146  store double %v, double* %arrayidx1
147  %arrayidx2 = getelementptr inbounds double, double* %P, i64 2
148  store double %v, double* %arrayidx2
149  %arrayidx3 = getelementptr inbounds double, double* %P, i64 4
150  store double %v, double* %arrayidx3
151  ret void
152}
153
154define void @store_float(float* nocapture %P, float %v)  {
155entry:
156; CHECK: ********** MI Scheduling **********
157; CHECK-LABEL: store_float:%bb.0
158; CHECK-NOT: Cluster ld/st
159; CHECK-NOT: Cluster ld/st
160; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12
161; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4
162; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8
163; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16
164; CHECK: ********** MI Scheduling **********
165; CHECK-LABEL: store_float:%bb.0
166; CHECK-NOT: Cluster ld/st
167; CHECK-NOT: Cluster ld/st
168; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12
169; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4
170; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8
171; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16
172  %arrayidx = getelementptr inbounds float, float* %P, i64 3
173  store float %v, float* %arrayidx
174  %arrayidx1 = getelementptr inbounds float, float* %P, i64 1
175  store float %v, float* %arrayidx1
176  %arrayidx2 = getelementptr inbounds float, float* %P, i64 2
177  store float %v, float* %arrayidx2
178  %arrayidx3 = getelementptr inbounds float, float* %P, i64 4
179  store float %v, float* %arrayidx3
180  ret void
181}
182
183; Cannot fuse the store/load if there is volatile in between
184define i64 @store_volatile(i64* nocapture %P, i64 %v) {
185entry:
186; CHECK: ********** MI Scheduling **********
187; CHECK-LABEL: store_volatile:%bb.0
188; CHECK-NOT: Cluster ld/st
189; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24
190; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16
191; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8
192; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32
193; CHECK: ********** MI Scheduling **********
194; CHECK-LABEL: store_volatile:%bb.0
195; CHECK-NOT: Cluster ld/st
196; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24
197; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16
198; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8
199; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32
200  %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
201  store volatile i64 %v, i64* %arrayidx
202  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
203  store volatile i64 %v, i64* %arrayidx1
204  %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
205  store volatile i64 %v, i64* %arrayidx2
206  %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
207  store volatile i64 %v, i64* %arrayidx3
208  ret i64 %v
209}
210
211@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4
212
213define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n)  {
214entry:
215; CHECK: ********** MI Scheduling **********
216; CHECK-LABEL: store_i32_stw_stw8:%bb.0
217; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]])
218; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24
219; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20
220; CHECK: ********** MI Scheduling **********
221; CHECK-LABEL: store_i32_stw_stw8:%bb.0
222; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]])
223; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24
224; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20
225  store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4
226  store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4
227  %add = add nsw i32 %n, %m
228  store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4
229  ret void
230}
231
232define void @store_i32_stw8(i32 signext %m, i32 signext %n)  {
233entry:
234; CHECK: ********** MI Scheduling **********
235; CHECK-LABEL: store_i32_stw8:%bb.0
236; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]])
237; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24
238; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28
239; CHECK: ********** MI Scheduling **********
240; CHECK-LABEL: store_i32_stw8:%bb.0
241; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
242; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24
243; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28
244  store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4
245  store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4
246  ret void
247}
248
249declare void @bar(i64*)
250
251define void @store_frame_index(i32 %a, i32 %b) {
252entry:
253; CHECK: ********** MI Scheduling **********
254; CHECK-LABEL: store_frame_index:%bb.0
255; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
256; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf
257; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf
258  %buf = alloca [8 x i64], align 8
259  %0 = bitcast [8 x i64]* %buf to i8*
260  %conv = zext i32 %a to i64
261  %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0
262  store i64 %conv, i64* %arrayidx, align 8
263  %conv1 = zext i32 %b to i64
264  %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1
265  store i64 %conv1, i64* %arrayidx2, align 8
266  call void @bar(i64* nonnull %arrayidx)
267  ret void
268}
269