1; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -enable-misched -pre-RA-sched=source -scheditins=false | FileCheck %s
2;
3; Test MI-Sched suppory latency based stalls on in in-order pipeline
4; using the new machine model.
5
6target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
7
8; Don't be too strict with the top of the schedule, but most of it
9; should be nicely pipelined.
10;
11; CHECK: saxpy10:
12; CHECK: vldr
13; CHECK: vldr
14; CHECK: vldr
15; CHECK: vldr
16; CHECK: vldr
17; CHECK-NEXT: vldr
18; CHECK-NEXT: vmul
19; CHECK-NEXT: vadd
20; CHECK-NEXT: vadd
21; CHECK-NEXT: vldr
22; CHECK-NEXT: vldr
23; CHECK-NEXT: vadd
24; CHECK-NEXT: vadd
25; CHECK-NEXT: vmul
26; CHECK-NEXT: vldr
27; CHECK-NEXT: vadd
28; CHECK-NEXT: vadd
29; CHECK-NEXT: vldr
30; CHECK-NEXT: vmul
31; CHECK-NEXT: vldr
32; CHECK-NEXT: vadd
33; CHECK-NEXT: vldr
34; CHECK-NEXT: vadd
35; CHECK-NEXT: vldr
36; CHECK-NEXT: vmul
37; CHECK-NEXT: vadd
38; CHECK-NEXT: vldr
39; CHECK-NEXT: vadd
40; CHECK-NEXT: vldr
41; CHECK-NEXT: vmul
42; CHECK-NEXT: vadd
43; CHECK-NEXT: vldr
44; CHECK-NEXT: vadd
45; CHECK-NEXT: vldr
46; CHECK-NEXT: vmul
47; CHECK-NEXT: vadd
48; CHECK-NEXT: vldr
49; CHECK-NEXT: vadd
50; CHECK-NEXT: vldr
51; CHECK-NEXT: vmul
52; CHECK-NEXT: vadd
53; CHECK-NEXT: vldr
54; CHECK-NEXT: vmul
55; CHECK-NEXT: vadd
56; CHECK-NEXT: vldr
57; CHECK-NEXT: vadd
58; CHECK-NEXT: vadd
59; CHECK-NEXT: vadd
60; CHECK-NEXT: vmov
61; CHECK-NEXT: bx
62;
63; This accumulates a sum rather than storing each result.
64define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
65entry:
66  %0 = load float, float* %data1, align 4
67  %mul = fmul float %0, %a
68  %1 = load float, float* %data2, align 4
69  %add = fadd float %mul, %1
70  %add2 = fadd float %add, 0.000000e+00
71  %arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1
72  %2 = load float, float* %arrayidx.1, align 4
73  %mul.1 = fmul float %2, %a
74  %arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1
75  %3 = load float, float* %arrayidx1.1, align 4
76  %add.1 = fadd float %mul.1, %3
77  %add2.1 = fadd float %add2, %add.1
78  %arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2
79  %4 = load float, float* %arrayidx.2, align 4
80  %mul.2 = fmul float %4, %a
81  %arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2
82  %5 = load float, float* %arrayidx1.2, align 4
83  %add.2 = fadd float %mul.2, %5
84  %add2.2 = fadd float %add2.1, %add.2
85  %arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3
86  %6 = load float, float* %arrayidx.3, align 4
87  %mul.3 = fmul float %6, %a
88  %arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3
89  %7 = load float, float* %arrayidx1.3, align 4
90  %add.3 = fadd float %mul.3, %7
91  %add2.3 = fadd float %add2.2, %add.3
92  %arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4
93  %8 = load float, float* %arrayidx.4, align 4
94  %mul.4 = fmul float %8, %a
95  %arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4
96  %9 = load float, float* %arrayidx1.4, align 4
97  %add.4 = fadd float %mul.4, %9
98  %add2.4 = fadd float %add2.3, %add.4
99  %arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5
100  %10 = load float, float* %arrayidx.5, align 4
101  %mul.5 = fmul float %10, %a
102  %arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5
103  %11 = load float, float* %arrayidx1.5, align 4
104  %add.5 = fadd float %mul.5, %11
105  %add2.5 = fadd float %add2.4, %add.5
106  %arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6
107  %12 = load float, float* %arrayidx.6, align 4
108  %mul.6 = fmul float %12, %a
109  %arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6
110  %13 = load float, float* %arrayidx1.6, align 4
111  %add.6 = fadd float %mul.6, %13
112  %add2.6 = fadd float %add2.5, %add.6
113  %arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7
114  %14 = load float, float* %arrayidx.7, align 4
115  %mul.7 = fmul float %14, %a
116  %arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7
117  %15 = load float, float* %arrayidx1.7, align 4
118  %add.7 = fadd float %mul.7, %15
119  %add2.7 = fadd float %add2.6, %add.7
120  %arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8
121  %16 = load float, float* %arrayidx.8, align 4
122  %mul.8 = fmul float %16, %a
123  %arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8
124  %17 = load float, float* %arrayidx1.8, align 4
125  %add.8 = fadd float %mul.8, %17
126  %add2.8 = fadd float %add2.7, %add.8
127  %arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9
128  %18 = load float, float* %arrayidx.9, align 4
129  %mul.9 = fmul float %18, %a
130  %arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9
131  %19 = load float, float* %arrayidx1.9, align 4
132  %add.9 = fadd float %mul.9, %19
133  %add2.9 = fadd float %add2.8, %add.9
134  ret float %add2.9
135}
136