1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s
3
4define void @add_i8() {
5; CHECK-LABEL: 'add_i8'
6; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
7; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
8; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
9; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
10; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
11; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
12;
13  %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
14
15  %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
16
17  %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
18
19  %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
20
21  %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
22
23  ret void
24}
25
26define void @add_i16() {
27; CHECK-LABEL: 'add_i16'
28; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
29; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
30; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
31; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
32; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
33; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
34; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
35; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
36; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
37; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
38; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
39; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
40; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
41; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
42; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
43; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
44; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
45; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
46; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
47; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
48; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
49; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
50; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
51; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
52; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
53; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
54;
55  %a0za = zext <1 x i8> undef to <1 x i16>
56  %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
57
58  %a0sa = sext <1 x i8> undef to <1 x i16>
59  %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
60
61  %a1za = zext <2 x i8> undef to <2 x i16>
62  %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
63
64  %a1sa = sext <2 x i8> undef to <2 x i16>
65  %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
66
67  %a2za = zext <4 x i8> undef to <4 x i16>
68  %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
69
70  %a2sa = sext <4 x i8> undef to <4 x i16>
71  %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
72
73  %a3za = zext <8 x i8> undef to <8 x i16>
74  %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
75
76  %a3sa = sext <8 x i8> undef to <8 x i16>
77  %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
78
79  %a4za = zext <16 x i8> undef to <16 x i16>
80  %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
81
82  %a4sa = sext <16 x i8> undef to <16 x i16>
83  %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
84
85  %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
86
87  %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
88
89  %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
90
91  %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
92
93  %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
94
95  ret void
96}
97
98define void @add_i32() {
99; CHECK-LABEL: 'add_i32'
100; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
101; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
102; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
103; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
104; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
105; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
106; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
107; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
108; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
109; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
110; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
111; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
112; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
113; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
114; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
115; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
116; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
117; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
118; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
119; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
120; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
121; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
122; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
123; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
124; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
125; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
126; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
127; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
128; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
129; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
130; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
131; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
132; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
133; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
134; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
135; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
136; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
137; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
138; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
139; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
140; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
141; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
142; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
143; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
144; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
145; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
146;
147  %a0za = zext <1 x i8> undef to <1 x i32>
148  %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
149
150  %a0sa = sext <1 x i8> undef to <1 x i32>
151  %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
152
153  %a1za = zext <2 x i8> undef to <2 x i32>
154  %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
155
156  %a1sa = sext <2 x i8> undef to <2 x i32>
157  %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
158
159  %a2za = zext <4 x i8> undef to <4 x i32>
160  %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
161
162  %a2sa = sext <4 x i8> undef to <4 x i32>
163  %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
164
165  %a3za = zext <8 x i8> undef to <8 x i32>
166  %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
167
168  %a3sa = sext <8 x i8> undef to <8 x i32>
169  %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
170
171  %a4za = zext <16 x i8> undef to <16 x i32>
172  %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
173
174  %a4sa = sext <16 x i8> undef to <16 x i32>
175  %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
176
177  %a5za = zext <1 x i16> undef to <1 x i32>
178  %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
179
180  %a5sa = sext <1 x i16> undef to <1 x i32>
181  %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
182
183  %a6za = zext <2 x i16> undef to <2 x i32>
184  %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
185
186  %a6sa = sext <2 x i16> undef to <2 x i32>
187  %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
188
189  %a7za = zext <4 x i16> undef to <4 x i32>
190  %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
191
192  %a7sa = sext <4 x i16> undef to <4 x i32>
193  %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
194
195  %a8za = zext <8 x i16> undef to <8 x i32>
196  %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
197
198  %a8sa = sext <8 x i16> undef to <8 x i32>
199  %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
200
201  %a9za = zext <16 x i16> undef to <16 x i32>
202  %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
203
204  %a9sa = sext <16 x i16> undef to <16 x i32>
205  %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
206
207  %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
208
209  %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
210
211  %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
212
213  %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
214
215  %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
216
217  ret void
218}
219
220define void @add_i64() {
221; CHECK-LABEL: 'add_i64'
222; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
223; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
224; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
225; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
226; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
227; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
228; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
229; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
230; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
231; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
232; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
233; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
234; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
235; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
236; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
237; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
238; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
239; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
240; CHECK-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
241; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
242; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
243; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
244; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
245; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
246; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
247; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
248; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
249; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
250; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
251; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
252; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
253; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
254; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
255; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
256; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
257; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
258; CHECK-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
259; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
260; CHECK-NEXT:  Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
261; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
262; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
263; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
264; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
265; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
266; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
267; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
268; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
269; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
270; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
271; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
272; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
273; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
274; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
275; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
276; CHECK-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
277; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
278; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
279; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
280; CHECK-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
281; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
282; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
283; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
284; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
285; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
286; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
287; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
288;
289  %a0za = zext <1 x i8> undef to <1 x i64>
290  %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
291
292  %a0sa = sext <1 x i8> undef to <1 x i64>
293  %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
294
295  %a1za = zext <2 x i8> undef to <2 x i64>
296  %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
297
298  %a1sa = sext <2 x i8> undef to <2 x i64>
299  %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
300
301  %a2za = zext <4 x i8> undef to <4 x i64>
302  %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
303
304  %a2sa = sext <4 x i8> undef to <4 x i64>
305  %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
306
307  %a3za = zext <8 x i8> undef to <8 x i64>
308  %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
309
310  %a3sa = sext <8 x i8> undef to <8 x i64>
311  %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
312
313  %a4za = zext <16 x i8> undef to <16 x i64>
314  %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
315
316  %a4sa = sext <16 x i8> undef to <16 x i64>
317  %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
318
319  %a5za = zext <1 x i16> undef to <1 x i64>
320  %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
321
322  %a5sa = sext <1 x i16> undef to <1 x i64>
323  %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
324
325  %a6za = zext <2 x i16> undef to <2 x i64>
326  %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
327
328  %a6sa = sext <2 x i16> undef to <2 x i64>
329  %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
330
331  %a7za = zext <4 x i16> undef to <4 x i64>
332  %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
333
334  %a7sa = sext <4 x i16> undef to <4 x i64>
335  %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
336
337  %a8za = zext <8 x i16> undef to <8 x i64>
338  %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
339
340  %a8sa = sext <8 x i16> undef to <8 x i64>
341  %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
342
343  %a9za = zext <16 x i16> undef to <16 x i64>
344  %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
345
346  %a9sa = sext <16 x i16> undef to <16 x i64>
347  %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
348
349  %a10za = zext <1 x i32> undef to <1 x i64>
350  %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
351
352  %a10sa = sext <1 x i32> undef to <1 x i64>
353  %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
354
355  %a11za = zext <2 x i32> undef to <2 x i64>
356  %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
357
358  %a11sa = sext <2 x i32> undef to <2 x i64>
359  %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
360
361  %a12za = zext <4 x i32> undef to <4 x i64>
362  %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
363
364  %a12sa = sext <4 x i32> undef to <4 x i64>
365  %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
366
367  %a13za = zext <8 x i32> undef to <8 x i64>
368  %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
369
370  %a13sa = sext <8 x i32> undef to <8 x i64>
371  %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
372
373  %a14za = zext <16 x i32> undef to <16 x i64>
374  %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
375
376  %a14sa = sext <16 x i32> undef to <16 x i64>
377  %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
378
379  %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
380
381  %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
382
383  %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
384
385  %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
386
387  %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
388
389  ret void
390}
391
392define void @mla_i8() {
393; CHECK-LABEL: 'mla_i8'
394; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef
395; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
396; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a1m = mul <2 x i8> undef, undef
397; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
398; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef
399; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
400; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef
401; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
402; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef
403; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
404; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
405;
406  %a0m = mul <1 x i8> undef, undef
407  %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
408
409  %a1m = mul <2 x i8> undef, undef
410  %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
411
412  %a2m = mul <4 x i8> undef, undef
413  %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
414
415  %a3m = mul <8 x i8> undef, undef
416  %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
417
418  %a4m = mul <16 x i8> undef, undef
419  %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
420
421  ret void
422}
423
424define void @mla_i16() {
425; CHECK-LABEL: 'mla_i16'
426; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
427; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16>
428; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb
429; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
430; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
431; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16>
432; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb
433; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
434; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
435; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16>
436; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb
437; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
438; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
439; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16>
440; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb
441; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
442; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
443; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16>
444; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb
445; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
446; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
447; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16>
448; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb
449; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
450; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
451; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16>
452; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb
453; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
454; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
455; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16>
456; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb
457; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
458; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
459; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16>
460; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb
461; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
462; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
463; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16>
464; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb
465; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
466; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef
467; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
468; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a6m = mul <2 x i16> undef, undef
469; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
470; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef
471; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
472; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef
473; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
474; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef
475; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
476; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
477;
478  %a0za = zext <1 x i8> undef to <1 x i16>
479  %a0zb = zext <1 x i8> undef to <1 x i16>
480  %a0zm = mul <1 x i16> %a0za, %a0zb
481  %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
482
483  %a0sa = sext <1 x i8> undef to <1 x i16>
484  %a0sb = sext <1 x i8> undef to <1 x i16>
485  %a0sm = mul <1 x i16> %a0sa, %a0sb
486  %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
487
488  %a1za = zext <2 x i8> undef to <2 x i16>
489  %a1zb = zext <2 x i8> undef to <2 x i16>
490  %a1zm = mul <2 x i16> %a1za, %a1zb
491  %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
492
493  %a1sa = sext <2 x i8> undef to <2 x i16>
494  %a1sb = sext <2 x i8> undef to <2 x i16>
495  %a1sm = mul <2 x i16> %a1sa, %a1sb
496  %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
497
498  %a2za = zext <4 x i8> undef to <4 x i16>
499  %a2zb = zext <4 x i8> undef to <4 x i16>
500  %a2zm = mul <4 x i16> %a2za, %a2zb
501  %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
502
503  %a2sa = sext <4 x i8> undef to <4 x i16>
504  %a2sb = sext <4 x i8> undef to <4 x i16>
505  %a2sm = mul <4 x i16> %a2sa, %a2sb
506  %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
507
508  %a3za = zext <8 x i8> undef to <8 x i16>
509  %a3zb = zext <8 x i8> undef to <8 x i16>
510  %a3zm = mul <8 x i16> %a3za, %a3zb
511  %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
512
513  %a3sa = sext <8 x i8> undef to <8 x i16>
514  %a3sb = sext <8 x i8> undef to <8 x i16>
515  %a3sm = mul <8 x i16> %a3sa, %a3sb
516  %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
517
518  %a4za = zext <16 x i8> undef to <16 x i16>
519  %a4zb = zext <16 x i8> undef to <16 x i16>
520  %a4zm = mul <16 x i16> %a4za, %a4zb
521  %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
522
523  %a4sa = sext <16 x i8> undef to <16 x i16>
524  %a4sb = sext <16 x i8> undef to <16 x i16>
525  %a4sm = mul <16 x i16> %a4sa, %a4sb
526  %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
527
528  %a5m = mul <1 x i16> undef, undef
529  %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
530
531  %a6m = mul <2 x i16> undef, undef
532  %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
533
534  %a7m = mul <4 x i16> undef, undef
535  %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
536
537  %a8m = mul <8 x i16> undef, undef
538  %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
539
540  %a9m = mul <16 x i16> undef, undef
541  %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
542
543  ret void
544}
545
546define void @mla_i32() {
547; CHECK-LABEL: 'mla_i32'
548; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
549; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32>
550; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb
551; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
552; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
553; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32>
554; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb
555; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
556; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
557; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32>
558; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb
559; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
560; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
561; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32>
562; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb
563; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
564; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
565; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32>
566; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb
567; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
568; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
569; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32>
570; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb
571; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
572; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
573; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32>
574; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb
575; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
576; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
577; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32>
578; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb
579; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
580; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
581; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32>
582; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb
583; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
584; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
585; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32>
586; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb
587; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
588; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
589; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32>
590; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb
591; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
592; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
593; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32>
594; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb
595; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
596; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
597; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32>
598; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb
599; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
600; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
601; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32>
602; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb
603; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
604; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
605; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32>
606; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb
607; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
608; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
609; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32>
610; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb
611; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
612; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
613; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32>
614; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb
615; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
616; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
617; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32>
618; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb
619; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
620; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
621; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32>
622; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb
623; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
624; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
625; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32>
626; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb
627; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
628; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef
629; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
630; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a11m = mul <2 x i32> undef, undef
631; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
632; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef
633; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
634; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef
635; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
636; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef
637; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
638; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
639;
640  %a0za = zext <1 x i8> undef to <1 x i32>
641  %a0zb = zext <1 x i8> undef to <1 x i32>
642  %a0zm = mul <1 x i32> %a0za, %a0zb
643  %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
644
645  %a0sa = sext <1 x i8> undef to <1 x i32>
646  %a0sb = sext <1 x i8> undef to <1 x i32>
647  %a0sm = mul <1 x i32> %a0sa, %a0sb
648  %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
649
650  %a1za = zext <2 x i8> undef to <2 x i32>
651  %a1zb = zext <2 x i8> undef to <2 x i32>
652  %a1zm = mul <2 x i32> %a1za, %a1zb
653  %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
654
655  %a1sa = sext <2 x i8> undef to <2 x i32>
656  %a1sb = sext <2 x i8> undef to <2 x i32>
657  %a1sm = mul <2 x i32> %a1sa, %a1sb
658  %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
659
660  %a2za = zext <4 x i8> undef to <4 x i32>
661  %a2zb = zext <4 x i8> undef to <4 x i32>
662  %a2zm = mul <4 x i32> %a2za, %a2zb
663  %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
664
665  %a2sa = sext <4 x i8> undef to <4 x i32>
666  %a2sb = sext <4 x i8> undef to <4 x i32>
667  %a2sm = mul <4 x i32> %a2sa, %a2sb
668  %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
669
670  %a3za = zext <8 x i8> undef to <8 x i32>
671  %a3zb = zext <8 x i8> undef to <8 x i32>
672  %a3zm = mul <8 x i32> %a3za, %a3zb
673  %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
674
675  %a3sa = sext <8 x i8> undef to <8 x i32>
676  %a3sb = sext <8 x i8> undef to <8 x i32>
677  %a3sm = mul <8 x i32> %a3sa, %a3sb
678  %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
679
680  %a4za = zext <16 x i8> undef to <16 x i32>
681  %a4zb = zext <16 x i8> undef to <16 x i32>
682  %a4zm = mul <16 x i32> %a4za, %a4zb
683  %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
684
685  %a4sa = sext <16 x i8> undef to <16 x i32>
686  %a4sb = sext <16 x i8> undef to <16 x i32>
687  %a4sm = mul <16 x i32> %a4sa, %a4sb
688  %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
689
690  %a5za = zext <1 x i16> undef to <1 x i32>
691  %a5zb = zext <1 x i16> undef to <1 x i32>
692  %a5zm = mul <1 x i32> %a5za, %a5zb
693  %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
694
695  %a5sa = sext <1 x i16> undef to <1 x i32>
696  %a5sb = sext <1 x i16> undef to <1 x i32>
697  %a5sm = mul <1 x i32> %a5sa, %a5sb
698  %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
699
700  %a6za = zext <2 x i16> undef to <2 x i32>
701  %a6zb = zext <2 x i16> undef to <2 x i32>
702  %a6zm = mul <2 x i32> %a6za, %a6zb
703  %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
704
705  %a6sa = sext <2 x i16> undef to <2 x i32>
706  %a6sb = sext <2 x i16> undef to <2 x i32>
707  %a6sm = mul <2 x i32> %a6sa, %a6sb
708  %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
709
710  %a7za = zext <4 x i16> undef to <4 x i32>
711  %a7zb = zext <4 x i16> undef to <4 x i32>
712  %a7zm = mul <4 x i32> %a7za, %a7zb
713  %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
714
715  %a7sa = sext <4 x i16> undef to <4 x i32>
716  %a7sb = sext <4 x i16> undef to <4 x i32>
717  %a7sm = mul <4 x i32> %a7sa, %a7sb
718  %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
719
720  %a8za = zext <8 x i16> undef to <8 x i32>
721  %a8zb = zext <8 x i16> undef to <8 x i32>
722  %a8zm = mul <8 x i32> %a8za, %a8zb
723  %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
724
725  %a8sa = sext <8 x i16> undef to <8 x i32>
726  %a8sb = sext <8 x i16> undef to <8 x i32>
727  %a8sm = mul <8 x i32> %a8sa, %a8sb
728  %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
729
730  %a9za = zext <16 x i16> undef to <16 x i32>
731  %a9zb = zext <16 x i16> undef to <16 x i32>
732  %a9zm = mul <16 x i32> %a9za, %a9zb
733  %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
734
735  %a9sa = sext <16 x i16> undef to <16 x i32>
736  %a9sb = sext <16 x i16> undef to <16 x i32>
737  %a9sm = mul <16 x i32> %a9sa, %a9sb
738  %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
739
740  %a10m = mul <1 x i32> undef, undef
741  %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
742
743  %a11m = mul <2 x i32> undef, undef
744  %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
745
746  %a12m = mul <4 x i32> undef, undef
747  %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
748
749  %a13m = mul <8 x i32> undef, undef
750  %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
751
752  %a14m = mul <16 x i32> undef, undef
753  %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
754
755  ret void
756}
757
758define void @mla_i64() {
759; CHECK-LABEL: 'mla_i64'
760; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
761; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64>
762; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb
763; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
764; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
765; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64>
766; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb
767; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
768; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
769; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64>
770; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb
771; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
772; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
773; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64>
774; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb
775; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
776; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
777; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64>
778; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb
779; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
780; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
781; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64>
782; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb
783; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
784; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
785; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64>
786; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb
787; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
788; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
789; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64>
790; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb
791; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
792; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
793; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64>
794; CHECK-NEXT:  Cost Model: Found an estimated cost of 800 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb
795; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
796; CHECK-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
797; CHECK-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64>
798; CHECK-NEXT:  Cost Model: Found an estimated cost of 800 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb
799; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
800; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
801; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64>
802; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb
803; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
804; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
805; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64>
806; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb
807; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
808; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
809; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64>
810; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb
811; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
812; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
813; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64>
814; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb
815; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
816; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
817; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64>
818; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb
819; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
820; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
821; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64>
822; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb
823; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
824; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
825; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64>
826; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb
827; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
828; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
829; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64>
830; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb
831; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
832; CHECK-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
833; CHECK-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64>
834; CHECK-NEXT:  Cost Model: Found an estimated cost of 800 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb
835; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
836; CHECK-NEXT:  Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
837; CHECK-NEXT:  Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64>
838; CHECK-NEXT:  Cost Model: Found an estimated cost of 800 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb
839; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
840; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
841; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64>
842; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb
843; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
844; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
845; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64>
846; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb
847; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
848; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
849; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64>
850; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb
851; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
852; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
853; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64>
854; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb
855; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
856; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
857; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64>
858; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb
859; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
860; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
861; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64>
862; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb
863; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
864; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
865; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64>
866; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb
867; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
868; CHECK-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
869; CHECK-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64>
870; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb
871; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
872; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
873; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64>
874; CHECK-NEXT:  Cost Model: Found an estimated cost of 800 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb
875; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
876; CHECK-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
877; CHECK-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64>
878; CHECK-NEXT:  Cost Model: Found an estimated cost of 800 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb
879; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
880; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef
881; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
882; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a16m = mul <2 x i64> undef, undef
883; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
884; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %a17m = mul <4 x i64> undef, undef
885; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
886; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef
887; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
888; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a19m = mul <16 x i64> undef, undef
889; CHECK-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
890; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
891;
892  %a0za = zext <1 x i8> undef to <1 x i64>
893  %a0zb = zext <1 x i8> undef to <1 x i64>
894  %a0zm = mul <1 x i64> %a0za, %a0zb
895  %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
896
897  %a0sa = sext <1 x i8> undef to <1 x i64>
898  %a0sb = sext <1 x i8> undef to <1 x i64>
899  %a0sm = mul <1 x i64> %a0sa, %a0sb
900  %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
901
902  %a1za = zext <2 x i8> undef to <2 x i64>
903  %a1zb = zext <2 x i8> undef to <2 x i64>
904  %a1zm = mul <2 x i64> %a1za, %a1zb
905  %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
906
907  %a1sa = sext <2 x i8> undef to <2 x i64>
908  %a1sb = sext <2 x i8> undef to <2 x i64>
909  %a1sm = mul <2 x i64> %a1sa, %a1sb
910  %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
911
912  %a2za = zext <4 x i8> undef to <4 x i64>
913  %a2zb = zext <4 x i8> undef to <4 x i64>
914  %a2zm = mul <4 x i64> %a2za, %a2zb
915  %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
916
917  %a2sa = sext <4 x i8> undef to <4 x i64>
918  %a2sb = sext <4 x i8> undef to <4 x i64>
919  %a2sm = mul <4 x i64> %a2sa, %a2sb
920  %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
921
922  %a3za = zext <8 x i8> undef to <8 x i64>
923  %a3zb = zext <8 x i8> undef to <8 x i64>
924  %a3zm = mul <8 x i64> %a3za, %a3zb
925  %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
926
927  %a3sa = sext <8 x i8> undef to <8 x i64>
928  %a3sb = sext <8 x i8> undef to <8 x i64>
929  %a3sm = mul <8 x i64> %a3sa, %a3sb
930  %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
931
932  %a4za = zext <16 x i8> undef to <16 x i64>
933  %a4zb = zext <16 x i8> undef to <16 x i64>
934  %a4zm = mul <16 x i64> %a4za, %a4zb
935  %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
936
937  %a4sa = sext <16 x i8> undef to <16 x i64>
938  %a4sb = sext <16 x i8> undef to <16 x i64>
939  %a4sm = mul <16 x i64> %a4sa, %a4sb
940  %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
941
942  %a5za = zext <1 x i16> undef to <1 x i64>
943  %a5zb = zext <1 x i16> undef to <1 x i64>
944  %a5zm = mul <1 x i64> %a5za, %a5zb
945  %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
946
947  %a5sa = sext <1 x i16> undef to <1 x i64>
948  %a5sb = sext <1 x i16> undef to <1 x i64>
949  %a5sm = mul <1 x i64> %a5sa, %a5sb
950  %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
951
952  %a6za = zext <2 x i16> undef to <2 x i64>
953  %a6zb = zext <2 x i16> undef to <2 x i64>
954  %a6zm = mul <2 x i64> %a6za, %a6zb
955  %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
956
957  %a6sa = sext <2 x i16> undef to <2 x i64>
958  %a6sb = sext <2 x i16> undef to <2 x i64>
959  %a6sm = mul <2 x i64> %a6sa, %a6sb
960  %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
961
962  %a7za = zext <4 x i16> undef to <4 x i64>
963  %a7zb = zext <4 x i16> undef to <4 x i64>
964  %a7zm = mul <4 x i64> %a7za, %a7zb
965  %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
966
967  %a7sa = sext <4 x i16> undef to <4 x i64>
968  %a7sb = sext <4 x i16> undef to <4 x i64>
969  %a7sm = mul <4 x i64> %a7sa, %a7sb
970  %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
971
972  %a8za = zext <8 x i16> undef to <8 x i64>
973  %a8zb = zext <8 x i16> undef to <8 x i64>
974  %a8zm = mul <8 x i64> %a8za, %a8zb
975  %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
976
977  %a8sa = sext <8 x i16> undef to <8 x i64>
978  %a8sb = sext <8 x i16> undef to <8 x i64>
979  %a8sm = mul <8 x i64> %a8sa, %a8sb
980  %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
981
982  %a9za = zext <16 x i16> undef to <16 x i64>
983  %a9zb = zext <16 x i16> undef to <16 x i64>
984  %a9zm = mul <16 x i64> %a9za, %a9zb
985  %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
986
987  %a9sa = sext <16 x i16> undef to <16 x i64>
988  %a9sb = sext <16 x i16> undef to <16 x i64>
989  %a9sm = mul <16 x i64> %a9sa, %a9sb
990  %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
991
992  %a10za = zext <1 x i32> undef to <1 x i64>
993  %a10zb = zext <1 x i32> undef to <1 x i64>
994  %a10zm = mul <1 x i64> %a10za, %a10zb
995  %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
996
997  %a10sa = sext <1 x i32> undef to <1 x i64>
998  %a10sb = sext <1 x i32> undef to <1 x i64>
999  %a10sm = mul <1 x i64> %a10sa, %a10sb
1000  %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
1001
1002  %a11za = zext <2 x i32> undef to <2 x i64>
1003  %a11zb = zext <2 x i32> undef to <2 x i64>
1004  %a11zm = mul <2 x i64> %a11za, %a11zb
1005  %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
1006
1007  %a11sa = sext <2 x i32> undef to <2 x i64>
1008  %a11sb = sext <2 x i32> undef to <2 x i64>
1009  %a11sm = mul <2 x i64> %a11sa, %a11sb
1010  %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
1011
1012  %a12za = zext <4 x i32> undef to <4 x i64>
1013  %a12zb = zext <4 x i32> undef to <4 x i64>
1014  %a12zm = mul <4 x i64> %a12za, %a12zb
1015  %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
1016
1017  %a12sa = sext <4 x i32> undef to <4 x i64>
1018  %a12sb = sext <4 x i32> undef to <4 x i64>
1019  %a12sm = mul <4 x i64> %a12sa, %a12sb
1020  %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
1021
1022  %a13za = zext <8 x i32> undef to <8 x i64>
1023  %a13zb = zext <8 x i32> undef to <8 x i64>
1024  %a13zm = mul <8 x i64> %a13za, %a13zb
1025  %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
1026
1027  %a13sa = sext <8 x i32> undef to <8 x i64>
1028  %a13sb = sext <8 x i32> undef to <8 x i64>
1029  %a13sm = mul <8 x i64> %a13sa, %a13sb
1030  %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
1031
1032  %a14za = zext <16 x i32> undef to <16 x i64>
1033  %a14zb = zext <16 x i32> undef to <16 x i64>
1034  %a14zm = mul <16 x i64> %a14za, %a14zb
1035  %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
1036
1037  %a14sa = sext <16 x i32> undef to <16 x i64>
1038  %a14sb = sext <16 x i32> undef to <16 x i64>
1039  %a14sm = mul <16 x i64> %a14sa, %a14sb
1040  %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
1041
1042  %a15m = mul <1 x i64> undef, undef
1043  %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
1044
1045  %a16m = mul <2 x i64> undef, undef
1046  %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
1047
1048  %a17m = mul <4 x i64> undef, undef
1049  %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
1050
1051  %a18m = mul <8 x i64> undef, undef
1052  %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
1053
1054  %a19m = mul <16 x i64> undef, undef
1055  %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
1056
1057  ret void
1058}
1059
1060declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
1061declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>)
1062declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
1063declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
1064declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
1065declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
1066declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>)
1067declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
1068declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
1069declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
1070declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
1071declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
1072declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
1073declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
1074declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
1075declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
1076declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
1077declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
1078declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
1079declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
1080