1; REQUIRES: asserts
2; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
3
4; CHECK: ********** MI Scheduling **********
5; CHECK-LABEL: stp_i64_scale:%bb.0
6; CHECK:Cluster ld/st SU(4) - SU(3)
7; CHECK:Cluster ld/st SU(2) - SU(5)
8; CHECK:SU(4):   STRXui %1:gpr64, %0:gpr64common, 1
9; CHECK:SU(3):   STRXui %1:gpr64, %0:gpr64common, 2
10; CHECK:SU(2):   STRXui %1:gpr64, %0:gpr64common, 3
11; CHECK:SU(5):   STRXui %1:gpr64, %0:gpr64common, 4
12define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) {
13entry:
14  %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
15  store i64 %v, i64* %arrayidx
16  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
17  store i64 %v, i64* %arrayidx1
18  %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
19  store i64 %v, i64* %arrayidx2
20  %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
21  store i64 %v, i64* %arrayidx3
22  ret i64 %v
23}
24
25; CHECK: ********** MI Scheduling **********
26; CHECK-LABEL: stp_i32_scale:%bb.0
27; CHECK:Cluster ld/st SU(4) - SU(3)
28; CHECK:Cluster ld/st SU(2) - SU(5)
29; CHECK:SU(4):   STRWui %1:gpr32, %0:gpr64common, 1
30; CHECK:SU(3):   STRWui %1:gpr32, %0:gpr64common, 2
31; CHECK:SU(2):   STRWui %1:gpr32, %0:gpr64common, 3
32; CHECK:SU(5):   STRWui %1:gpr32, %0:gpr64common, 4
33define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) {
34entry:
35  %arrayidx = getelementptr inbounds i32, i32* %P, i32 3
36  store i32 %v, i32* %arrayidx
37  %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2
38  store i32 %v, i32* %arrayidx1
39  %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1
40  store i32 %v, i32* %arrayidx2
41  %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4
42  store i32 %v, i32* %arrayidx3
43  ret i32 %v
44}
45
46; CHECK:********** MI Scheduling **********
47; CHECK-LABEL:stp_i64_unscale:%bb.0 entry
48; CHECK:Cluster ld/st SU(5) - SU(2)
49; CHECK:Cluster ld/st SU(4) - SU(3)
50; CHECK:SU(5):   STURXi %1:gpr64, %0:gpr64common, -32
51; CHECK:SU(2):   STURXi %1:gpr64, %0:gpr64common, -24
52; CHECK:SU(4):   STURXi %1:gpr64, %0:gpr64common, -16
53; CHECK:SU(3):   STURXi %1:gpr64, %0:gpr64common, -8
54define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 {
55entry:
56  %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
57  store i64 %v, i64* %arrayidx
58  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
59  store i64 %v, i64* %arrayidx1
60  %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
61  store i64 %v, i64* %arrayidx2
62  %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
63  store i64 %v, i64* %arrayidx3
64  ret void
65}
66
67; CHECK:********** MI Scheduling **********
68; CHECK-LABEL:stp_i32_unscale:%bb.0 entry
69; CHECK:Cluster ld/st SU(5) - SU(2)
70; CHECK:Cluster ld/st SU(4) - SU(3)
71; CHECK:SU(5):   STURWi %1:gpr32, %0:gpr64common, -16
72; CHECK:SU(2):   STURWi %1:gpr32, %0:gpr64common, -12
73; CHECK:SU(4):   STURWi %1:gpr32, %0:gpr64common, -8
74; CHECK:SU(3):   STURWi %1:gpr32, %0:gpr64common, -4
75define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 {
76entry:
77  %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
78  store i32 %v, i32* %arrayidx
79  %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
80  store i32 %v, i32* %arrayidx1
81  %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
82  store i32 %v, i32* %arrayidx2
83  %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
84  store i32 %v, i32* %arrayidx3
85  ret void
86}
87
88; CHECK:********** MI Scheduling **********
89; CHECK-LABEL:stp_double:%bb.0
90; CHECK:Cluster ld/st SU(3) - SU(4)
91; CHECK:Cluster ld/st SU(2) - SU(5)
92; CHECK:SU(3):   STRDui %1:fpr64, %0:gpr64common, 1
93; CHECK:SU(4):   STRDui %1:fpr64, %0:gpr64common, 2
94; CHECK:SU(2):   STRDui %1:fpr64, %0:gpr64common, 3
95; CHECK:SU(5):   STRDui %1:fpr64, %0:gpr64common, 4
96define void @stp_double(double* nocapture %P, double %v)  {
97entry:
98  %arrayidx = getelementptr inbounds double, double* %P, i64 3
99  store double %v, double* %arrayidx
100  %arrayidx1 = getelementptr inbounds double, double* %P, i64 1
101  store double %v, double* %arrayidx1
102  %arrayidx2 = getelementptr inbounds double, double* %P, i64 2
103  store double %v, double* %arrayidx2
104  %arrayidx3 = getelementptr inbounds double, double* %P, i64 4
105  store double %v, double* %arrayidx3
106  ret void
107}
108
109; CHECK:********** MI Scheduling **********
110; CHECK-LABEL:stp_float:%bb.0
111; CHECK:Cluster ld/st SU(3) - SU(4)
112; CHECK:Cluster ld/st SU(2) - SU(5)
113; CHECK:SU(3):   STRSui %1:fpr32, %0:gpr64common, 1
114; CHECK:SU(4):   STRSui %1:fpr32, %0:gpr64common, 2
115; CHECK:SU(2):   STRSui %1:fpr32, %0:gpr64common, 3
116; CHECK:SU(5):   STRSui %1:fpr32, %0:gpr64common, 4
117define void @stp_float(float* nocapture %P, float %v)  {
118entry:
119  %arrayidx = getelementptr inbounds float, float* %P, i64 3
120  store float %v, float* %arrayidx
121  %arrayidx1 = getelementptr inbounds float, float* %P, i64 1
122  store float %v, float* %arrayidx1
123  %arrayidx2 = getelementptr inbounds float, float* %P, i64 2
124  store float %v, float* %arrayidx2
125  %arrayidx3 = getelementptr inbounds float, float* %P, i64 4
126  store float %v, float* %arrayidx3
127  ret void
128}
129
130; CHECK: ********** MI Scheduling **********
131; CHECK-LABEL: stp_volatile:%bb.0
132; CHECK-NOT: Cluster ld/st
133; CHECK:SU(2):   STRXui %1:gpr64, %0:gpr64common, 3 :: (volatile
134; CHECK:SU(3):   STRXui %1:gpr64, %0:gpr64common, 2 :: (volatile
135; CHECK:SU(4):   STRXui %1:gpr64, %0:gpr64common, 1 :: (volatile
136; CHECK:SU(5):   STRXui %1:gpr64, %0:gpr64common, 4 :: (volatile
137define i64 @stp_volatile(i64* nocapture %P, i64 %v) {
138entry:
139  %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
140  store volatile i64 %v, i64* %arrayidx
141  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
142  store volatile i64 %v, i64* %arrayidx1
143  %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
144  store volatile i64 %v, i64* %arrayidx2
145  %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
146  store volatile i64 %v, i64* %arrayidx3
147  ret i64 %v
148}
149
150