1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
4
5declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
6
7declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
8
9declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
10
11declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
12
13declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
14
15declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
16
17declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
18
19declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
20
21declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
22
23declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
24
25declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
26
27declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
28
29declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
30
31declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
32
33declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
34
35declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
36
37declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
38
39declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
40
41declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
42
43declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
44
45define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
46; CHECK-LABEL: test_vmla_lane_s16:
47; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
48; CHECK-NEXT: ret
49entry:
50  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
51  %mul = mul <4 x i16> %shuffle, %b
52  %add = add <4 x i16> %mul, %a
53  ret <4 x i16> %add
54}
55
56define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
57; CHECK-LABEL: test_vmlaq_lane_s16:
58; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
59; CHECK-NEXT: ret
60entry:
61  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
62  %mul = mul <8 x i16> %shuffle, %b
63  %add = add <8 x i16> %mul, %a
64  ret <8 x i16> %add
65}
66
67define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
68; CHECK-LABEL: test_vmla_lane_s32:
69; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
70; CHECK-NEXT: ret
71entry:
72  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
73  %mul = mul <2 x i32> %shuffle, %b
74  %add = add <2 x i32> %mul, %a
75  ret <2 x i32> %add
76}
77
78define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
79; CHECK-LABEL: test_vmlaq_lane_s32:
80; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
81; CHECK-NEXT: ret
82entry:
83  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
84  %mul = mul <4 x i32> %shuffle, %b
85  %add = add <4 x i32> %mul, %a
86  ret <4 x i32> %add
87}
88
89define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
90; CHECK-LABEL: test_vmla_laneq_s16:
91; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
92; CHECK-NEXT: ret
93entry:
94  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
95  %mul = mul <4 x i16> %shuffle, %b
96  %add = add <4 x i16> %mul, %a
97  ret <4 x i16> %add
98}
99
100define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
101; CHECK-LABEL: test_vmlaq_laneq_s16:
102; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
103; CHECK-NEXT: ret
104entry:
105  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
106  %mul = mul <8 x i16> %shuffle, %b
107  %add = add <8 x i16> %mul, %a
108  ret <8 x i16> %add
109}
110
111define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
112; CHECK-LABEL: test_vmla_laneq_s32:
113; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
114; CHECK-NEXT: ret
115entry:
116  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
117  %mul = mul <2 x i32> %shuffle, %b
118  %add = add <2 x i32> %mul, %a
119  ret <2 x i32> %add
120}
121
122define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
123; CHECK-LABEL: test_vmlaq_laneq_s32:
124; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
125; CHECK-NEXT: ret
126entry:
127  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
128  %mul = mul <4 x i32> %shuffle, %b
129  %add = add <4 x i32> %mul, %a
130  ret <4 x i32> %add
131}
132
133define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
134; CHECK-LABEL: test_vmls_lane_s16:
135; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
136; CHECK-NEXT: ret
137entry:
138  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
139  %mul = mul <4 x i16> %shuffle, %b
140  %sub = sub <4 x i16> %a, %mul
141  ret <4 x i16> %sub
142}
143
144define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
145; CHECK-LABEL: test_vmlsq_lane_s16:
146; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
147; CHECK-NEXT: ret
148entry:
149  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
150  %mul = mul <8 x i16> %shuffle, %b
151  %sub = sub <8 x i16> %a, %mul
152  ret <8 x i16> %sub
153}
154
155define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
156; CHECK-LABEL: test_vmls_lane_s32:
157; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
158; CHECK-NEXT: ret
159entry:
160  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
161  %mul = mul <2 x i32> %shuffle, %b
162  %sub = sub <2 x i32> %a, %mul
163  ret <2 x i32> %sub
164}
165
166define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
167; CHECK-LABEL: test_vmlsq_lane_s32:
168; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
169; CHECK-NEXT: ret
170entry:
171  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
172  %mul = mul <4 x i32> %shuffle, %b
173  %sub = sub <4 x i32> %a, %mul
174  ret <4 x i32> %sub
175}
176
177define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
178; CHECK-LABEL: test_vmls_laneq_s16:
179; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
180; CHECK-NEXT: ret
181entry:
182  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
183  %mul = mul <4 x i16> %shuffle, %b
184  %sub = sub <4 x i16> %a, %mul
185  ret <4 x i16> %sub
186}
187
188define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
189; CHECK-LABEL: test_vmlsq_laneq_s16:
190; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
191; CHECK-NEXT: ret
192entry:
193  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
194  %mul = mul <8 x i16> %shuffle, %b
195  %sub = sub <8 x i16> %a, %mul
196  ret <8 x i16> %sub
197}
198
199define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
200; CHECK-LABEL: test_vmls_laneq_s32:
201; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
202; CHECK-NEXT: ret
203entry:
204  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
205  %mul = mul <2 x i32> %shuffle, %b
206  %sub = sub <2 x i32> %a, %mul
207  ret <2 x i32> %sub
208}
209
210define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
211; CHECK-LABEL: test_vmlsq_laneq_s32:
212; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
213; CHECK-NEXT: ret
214entry:
215  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
216  %mul = mul <4 x i32> %shuffle, %b
217  %sub = sub <4 x i32> %a, %mul
218  ret <4 x i32> %sub
219}
220
221define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
222; CHECK-LABEL: test_vmul_lane_s16:
223; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
224; CHECK-NEXT: ret
225entry:
226  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
227  %mul = mul <4 x i16> %shuffle, %a
228  ret <4 x i16> %mul
229}
230
231define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
232; CHECK-LABEL: test_vmulq_lane_s16:
233; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
234; CHECK-NEXT: ret
235entry:
236  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
237  %mul = mul <8 x i16> %shuffle, %a
238  ret <8 x i16> %mul
239}
240
241define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
242; CHECK-LABEL: test_vmul_lane_s32:
243; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
244; CHECK-NEXT: ret
245entry:
246  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
247  %mul = mul <2 x i32> %shuffle, %a
248  ret <2 x i32> %mul
249}
250
251define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
252; CHECK-LABEL: test_vmulq_lane_s32:
253; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
254; CHECK-NEXT: ret
255entry:
256  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
257  %mul = mul <4 x i32> %shuffle, %a
258  ret <4 x i32> %mul
259}
260
261define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
262; CHECK-LABEL: test_vmul_lane_u16:
263; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
264; CHECK-NEXT: ret
265entry:
266  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
267  %mul = mul <4 x i16> %shuffle, %a
268  ret <4 x i16> %mul
269}
270
271define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
272; CHECK-LABEL: test_vmulq_lane_u16:
273; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
274; CHECK-NEXT: ret
275entry:
276  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
277  %mul = mul <8 x i16> %shuffle, %a
278  ret <8 x i16> %mul
279}
280
281define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
282; CHECK-LABEL: test_vmul_lane_u32:
283; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
284; CHECK-NEXT: ret
285entry:
286  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
287  %mul = mul <2 x i32> %shuffle, %a
288  ret <2 x i32> %mul
289}
290
291define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
292; CHECK-LABEL: test_vmulq_lane_u32:
293; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
294; CHECK-NEXT: ret
295entry:
296  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
297  %mul = mul <4 x i32> %shuffle, %a
298  ret <4 x i32> %mul
299}
300
301define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
302; CHECK-LABEL: test_vmul_laneq_s16:
303; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
304; CHECK-NEXT: ret
305entry:
306  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
307  %mul = mul <4 x i16> %shuffle, %a
308  ret <4 x i16> %mul
309}
310
311define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
312; CHECK-LABEL: test_vmulq_laneq_s16:
313; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
314; CHECK-NEXT: ret
315entry:
316  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
317  %mul = mul <8 x i16> %shuffle, %a
318  ret <8 x i16> %mul
319}
320
321define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
322; CHECK-LABEL: test_vmul_laneq_s32:
323; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
324; CHECK-NEXT: ret
325entry:
326  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
327  %mul = mul <2 x i32> %shuffle, %a
328  ret <2 x i32> %mul
329}
330
331define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
332; CHECK-LABEL: test_vmulq_laneq_s32:
333; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
334; CHECK-NEXT: ret
335entry:
336  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
337  %mul = mul <4 x i32> %shuffle, %a
338  ret <4 x i32> %mul
339}
340
341define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
342; CHECK-LABEL: test_vmul_laneq_u16:
343; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
344; CHECK-NEXT: ret
345entry:
346  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
347  %mul = mul <4 x i16> %shuffle, %a
348  ret <4 x i16> %mul
349}
350
351define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
352; CHECK-LABEL: test_vmulq_laneq_u16:
353; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
354; CHECK-NEXT: ret
355entry:
356  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
357  %mul = mul <8 x i16> %shuffle, %a
358  ret <8 x i16> %mul
359}
360
361define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
362; CHECK-LABEL: test_vmul_laneq_u32:
363; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
364; CHECK-NEXT: ret
365entry:
366  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
367  %mul = mul <2 x i32> %shuffle, %a
368  ret <2 x i32> %mul
369}
370
371define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
372; CHECK-LABEL: test_vmulq_laneq_u32:
373; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
374; CHECK-NEXT: ret
375entry:
376  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
377  %mul = mul <4 x i32> %shuffle, %a
378  ret <4 x i32> %mul
379}
380
381define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
382; CHECK-LABEL: test_vfma_lane_f32:
383; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
384; CHECK-NEXT: ret
385entry:
386  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
387  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
388  ret <2 x float> %0
389}
390
391declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
392
393define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
394; CHECK-LABEL: test_vfmaq_lane_f32:
395; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
396; CHECK-NEXT: ret
397entry:
398  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
399  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
400  ret <4 x float> %0
401}
402
403declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
404
405define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
406; CHECK-LABEL: test_vfma_laneq_f32:
407; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
408; CHECK-NEXT: ret
409entry:
410  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
411  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
412  ret <2 x float> %0
413}
414
415define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
416; CHECK-LABEL: test_vfmaq_laneq_f32:
417; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
418; CHECK-NEXT: ret
419entry:
420  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
421  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
422  ret <4 x float> %0
423}
424
425define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
426; CHECK-LABEL: test_vfms_lane_f32:
427; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
428; CHECK-NEXT: ret
429entry:
430  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
431  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
432  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
433  ret <2 x float> %0
434}
435
436define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
437; CHECK-LABEL: test_vfmsq_lane_f32:
438; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
439; CHECK-NEXT: ret
440entry:
441  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
442  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
443  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
444  ret <4 x float> %0
445}
446
447define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
448; CHECK-LABEL: test_vfms_laneq_f32:
449; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
450; CHECK-NEXT: ret
451entry:
452  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
453  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
454  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
455  ret <2 x float> %0
456}
457
458define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
459; CHECK-LABEL: test_vfmsq_laneq_f32:
460; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
461; CHECK-NEXT: ret
462entry:
463  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
464  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
465  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
466  ret <4 x float> %0
467}
468
469define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
470; CHECK-LABEL: test_vfmaq_lane_f64:
471; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
472; CHECK-NEXT: ret
473entry:
474  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
475  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
476  ret <2 x double> %0
477}
478
479declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
480
481define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
482; CHECK-LABEL: test_vfmaq_laneq_f64:
483; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
484; CHECK-NEXT: ret
485entry:
486  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
487  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
488  ret <2 x double> %0
489}
490
491define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
492; CHECK-LABEL: test_vfmsq_lane_f64:
493; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
494; CHECK-NEXT: ret
495entry:
496  %sub = fsub <1 x double> <double -0.000000e+00>, %v
497  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
498  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
499  ret <2 x double> %0
500}
501
502define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
503; CHECK-LABEL: test_vfmsq_laneq_f64:
504; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
505; CHECK-NEXT: ret
506entry:
507  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
508  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
509  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
510  ret <2 x double> %0
511}
512
513define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
514; CHECK-LABEL: test_vfmas_laneq_f32
515; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
516; CHECK-NEXT: ret
517entry:
518  %extract = extractelement <4 x float> %v, i32 3
519  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
520  ret float %0
521}
522
523declare float @llvm.fma.f32(float, float, float)
524
525define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
526; CHECK-LABEL: test_vfmsd_lane_f64
527; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
528; CHECK-NEXT: ret
529entry:
530  %extract.rhs = extractelement <1 x double> %v, i32 0
531  %extract = fsub double -0.000000e+00, %extract.rhs
532  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
533  ret double %0
534}
535
536declare double @llvm.fma.f64(double, double, double)
537
538define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
539; CHECK-LABEL: test_vfmss_lane_f32
540; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
541; CHECK-NEXT: ret
542entry:
543  %extract.rhs = extractelement <2 x float> %v, i32 1
544  %extract = fsub float -0.000000e+00, %extract.rhs
545  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
546  ret float %0
547}
548
549define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
550; CHECK-LABEL: test_vfmss_laneq_f32
551; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
552; CHECK-NEXT: ret
553entry:
554  %extract.rhs = extractelement <4 x float> %v, i32 3
555  %extract = fsub float -0.000000e+00, %extract.rhs
556  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
557  ret float %0
558}
559
560define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
561; CHECK-LABEL: test_vfmsd_laneq_f64
562; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
563; CHECK-NEXT: ret
564entry:
565  %extract.rhs = extractelement <2 x double> %v, i32 1
566  %extract = fsub double -0.000000e+00, %extract.rhs
567  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
568  ret double %0
569}
570
571define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) {
572; CHCK-LABEL: test_vfmsd_lane_f64_0
573; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
574; CHCK-NEXT: ret
575entry:
576  %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v
577  %tmp1 = extractelement <1 x double> %tmp0, i32 0
578  %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
579  ret double %0
580}
581
582define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
583; CHECK-LABEL: test_vfmss_lane_f32_0
584; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
585; CHECK-NEXT: ret
586entry:
587  %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
588  %tmp1 = extractelement <2 x float> %tmp0, i32 1
589  %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
590  ret float %0
591}
592
593define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) {
594; CHECK-LABEL: test_vfmss_laneq_f32_0
595; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
596; CHECK-NEXT: ret
597entry:
598  %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
599  %tmp1 = extractelement <4 x float> %tmp0, i32 3
600  %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
601  ret float %0
602}
603
604define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) {
605; CHECK-LABEL: test_vfmsd_laneq_f64_0
606; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
607; CHECK-NEXT: ret
608entry:
609  %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v
610  %tmp1 = extractelement <2 x double> %tmp0, i32 1
611  %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
612  ret double %0
613}
614
615define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
616; CHECK-LABEL: test_vmlal_lane_s16:
617; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
618; CHECK-NEXT: ret
619entry:
620  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
621  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
622  %add = add <4 x i32> %vmull2.i, %a
623  ret <4 x i32> %add
624}
625
626define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
627; CHECK-LABEL: test_vmlal_lane_s32:
628; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
629; CHECK-NEXT: ret
630entry:
631  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
632  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
633  %add = add <2 x i64> %vmull2.i, %a
634  ret <2 x i64> %add
635}
636
637define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
638; CHECK-LABEL: test_vmlal_laneq_s16:
639; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
640; CHECK-NEXT: ret
641entry:
642  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
643  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
644  %add = add <4 x i32> %vmull2.i, %a
645  ret <4 x i32> %add
646}
647
648define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
649; CHECK-LABEL: test_vmlal_laneq_s32:
650; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
651; CHECK-NEXT: ret
652entry:
653  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
654  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
655  %add = add <2 x i64> %vmull2.i, %a
656  ret <2 x i64> %add
657}
658
659define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
660; CHECK-LABEL: test_vmlal_high_lane_s16:
661; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
662; CHECK-NEXT: ret
663entry:
664  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
665  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
666  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
667  %add = add <4 x i32> %vmull2.i, %a
668  ret <4 x i32> %add
669}
670
671define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
672; CHECK-LABEL: test_vmlal_high_lane_s32:
673; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
674; CHECK-NEXT: ret
675entry:
676  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
677  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
678  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
679  %add = add <2 x i64> %vmull2.i, %a
680  ret <2 x i64> %add
681}
682
683define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
684; CHECK-LABEL: test_vmlal_high_laneq_s16:
685; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
686; CHECK-NEXT: ret
687entry:
688  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
689  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
690  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
691  %add = add <4 x i32> %vmull2.i, %a
692  ret <4 x i32> %add
693}
694
695define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
696; CHECK-LABEL: test_vmlal_high_laneq_s32:
697; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
698; CHECK-NEXT: ret
699entry:
700  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
701  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
702  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
703  %add = add <2 x i64> %vmull2.i, %a
704  ret <2 x i64> %add
705}
706
707define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
708; CHECK-LABEL: test_vmlsl_lane_s16:
709; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
710; CHECK-NEXT: ret
711entry:
712  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
713  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
714  %sub = sub <4 x i32> %a, %vmull2.i
715  ret <4 x i32> %sub
716}
717
718define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
719; CHECK-LABEL: test_vmlsl_lane_s32:
720; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
721; CHECK-NEXT: ret
722entry:
723  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
724  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
725  %sub = sub <2 x i64> %a, %vmull2.i
726  ret <2 x i64> %sub
727}
728
729define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
730; CHECK-LABEL: test_vmlsl_laneq_s16:
731; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
732; CHECK-NEXT: ret
733entry:
734  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
735  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
736  %sub = sub <4 x i32> %a, %vmull2.i
737  ret <4 x i32> %sub
738}
739
740define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
741; CHECK-LABEL: test_vmlsl_laneq_s32:
742; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
743; CHECK-NEXT: ret
744entry:
745  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
746  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
747  %sub = sub <2 x i64> %a, %vmull2.i
748  ret <2 x i64> %sub
749}
750
751define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
752; CHECK-LABEL: test_vmlsl_high_lane_s16:
753; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
754; CHECK-NEXT: ret
755entry:
756  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
757  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
758  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
759  %sub = sub <4 x i32> %a, %vmull2.i
760  ret <4 x i32> %sub
761}
762
763define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
764; CHECK-LABEL: test_vmlsl_high_lane_s32:
765; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
766; CHECK-NEXT: ret
767entry:
768  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
769  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
770  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
771  %sub = sub <2 x i64> %a, %vmull2.i
772  ret <2 x i64> %sub
773}
774
775define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
776; CHECK-LABEL: test_vmlsl_high_laneq_s16:
777; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
778; CHECK-NEXT: ret
779entry:
780  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
781  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
782  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
783  %sub = sub <4 x i32> %a, %vmull2.i
784  ret <4 x i32> %sub
785}
786
787define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
788; CHECK-LABEL: test_vmlsl_high_laneq_s32:
789; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
790; CHECK-NEXT: ret
791entry:
792  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
793  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
794  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
795  %sub = sub <2 x i64> %a, %vmull2.i
796  ret <2 x i64> %sub
797}
798
799define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
800; CHECK-LABEL: test_vmlal_lane_u16:
801; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
802; CHECK-NEXT: ret
803entry:
804  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
805  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
806  %add = add <4 x i32> %vmull2.i, %a
807  ret <4 x i32> %add
808}
809
810define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
811; CHECK-LABEL: test_vmlal_lane_u32:
812; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
813; CHECK-NEXT: ret
814entry:
815  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
816  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
817  %add = add <2 x i64> %vmull2.i, %a
818  ret <2 x i64> %add
819}
820
821define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
822; CHECK-LABEL: test_vmlal_laneq_u16:
823; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
824; CHECK-NEXT: ret
825entry:
826  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
827  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
828  %add = add <4 x i32> %vmull2.i, %a
829  ret <4 x i32> %add
830}
831
832define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
833; CHECK-LABEL: test_vmlal_laneq_u32:
834; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
835; CHECK-NEXT: ret
836entry:
837  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
838  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
839  %add = add <2 x i64> %vmull2.i, %a
840  ret <2 x i64> %add
841}
842
843define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
844; CHECK-LABEL: test_vmlal_high_lane_u16:
845; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
846; CHECK-NEXT: ret
847entry:
848  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
849  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
850  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
851  %add = add <4 x i32> %vmull2.i, %a
852  ret <4 x i32> %add
853}
854
855define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
856; CHECK-LABEL: test_vmlal_high_lane_u32:
857; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
858; CHECK-NEXT: ret
859entry:
860  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
861  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
862  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
863  %add = add <2 x i64> %vmull2.i, %a
864  ret <2 x i64> %add
865}
866
867define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
868; CHECK-LABEL: test_vmlal_high_laneq_u16:
869; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
870; CHECK-NEXT: ret
871entry:
872  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
873  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
874  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
875  %add = add <4 x i32> %vmull2.i, %a
876  ret <4 x i32> %add
877}
878
879define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
880; CHECK-LABEL: test_vmlal_high_laneq_u32:
881; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
882; CHECK-NEXT: ret
883entry:
884  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
885  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
886  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
887  %add = add <2 x i64> %vmull2.i, %a
888  ret <2 x i64> %add
889}
890
891define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
892; CHECK-LABEL: test_vmlsl_lane_u16:
893; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
894; CHECK-NEXT: ret
895entry:
896  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
897  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
898  %sub = sub <4 x i32> %a, %vmull2.i
899  ret <4 x i32> %sub
900}
901
902define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
903; CHECK-LABEL: test_vmlsl_lane_u32:
904; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
905; CHECK-NEXT: ret
906entry:
907  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
908  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
909  %sub = sub <2 x i64> %a, %vmull2.i
910  ret <2 x i64> %sub
911}
912
913define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
914; CHECK-LABEL: test_vmlsl_laneq_u16:
915; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
916; CHECK-NEXT: ret
917entry:
918  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
919  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
920  %sub = sub <4 x i32> %a, %vmull2.i
921  ret <4 x i32> %sub
922}
923
924define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
925; CHECK-LABEL: test_vmlsl_laneq_u32:
926; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
927; CHECK-NEXT: ret
928entry:
929  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
930  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
931  %sub = sub <2 x i64> %a, %vmull2.i
932  ret <2 x i64> %sub
933}
934
935define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
936; CHECK-LABEL: test_vmlsl_high_lane_u16:
937; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
938; CHECK-NEXT: ret
939entry:
940  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
941  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
942  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
943  %sub = sub <4 x i32> %a, %vmull2.i
944  ret <4 x i32> %sub
945}
946
947define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
948; CHECK-LABEL: test_vmlsl_high_lane_u32:
949; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
950; CHECK-NEXT: ret
951entry:
952  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
953  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
954  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
955  %sub = sub <2 x i64> %a, %vmull2.i
956  ret <2 x i64> %sub
957}
958
959define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
960; CHECK-LABEL: test_vmlsl_high_laneq_u16:
961; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
962; CHECK-NEXT: ret
963entry:
964  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
965  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
966  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
967  %sub = sub <4 x i32> %a, %vmull2.i
968  ret <4 x i32> %sub
969}
970
971define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
972; CHECK-LABEL: test_vmlsl_high_laneq_u32:
973; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
974; CHECK-NEXT: ret
975entry:
976  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
977  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
978  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
979  %sub = sub <2 x i64> %a, %vmull2.i
980  ret <2 x i64> %sub
981}
982
983define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
984; CHECK-LABEL: test_vmull_lane_s16:
985; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
986; CHECK-NEXT: ret
987entry:
988  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
989  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
990  ret <4 x i32> %vmull2.i
991}
992
993define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
994; CHECK-LABEL: test_vmull_lane_s32:
995; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
996; CHECK-NEXT: ret
997entry:
998  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
999  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1000  ret <2 x i64> %vmull2.i
1001}
1002
1003define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
1004; CHECK-LABEL: test_vmull_lane_u16:
1005; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1006; CHECK-NEXT: ret
1007entry:
1008  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1009  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1010  ret <4 x i32> %vmull2.i
1011}
1012
1013define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
1014; CHECK-LABEL: test_vmull_lane_u32:
1015; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1016; CHECK-NEXT: ret
1017entry:
1018  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1019  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1020  ret <2 x i64> %vmull2.i
1021}
1022
1023define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1024; CHECK-LABEL: test_vmull_high_lane_s16:
1025; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1026; CHECK-NEXT: ret
1027entry:
1028  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1029  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1030  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1031  ret <4 x i32> %vmull2.i
1032}
1033
1034define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1035; CHECK-LABEL: test_vmull_high_lane_s32:
1036; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1037; CHECK-NEXT: ret
1038entry:
1039  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1040  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1041  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1042  ret <2 x i64> %vmull2.i
1043}
1044
1045define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
1046; CHECK-LABEL: test_vmull_high_lane_u16:
1047; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1048; CHECK-NEXT: ret
1049entry:
1050  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1051  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1052  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1053  ret <4 x i32> %vmull2.i
1054}
1055
1056define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
1057; CHECK-LABEL: test_vmull_high_lane_u32:
1058; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1059; CHECK-NEXT: ret
1060entry:
1061  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1062  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1063  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1064  ret <2 x i64> %vmull2.i
1065}
1066
1067define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1068; CHECK-LABEL: test_vmull_laneq_s16:
1069; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
1070; CHECK-NEXT: ret
1071entry:
1072  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1073  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1074  ret <4 x i32> %vmull2.i
1075}
1076
1077define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1078; CHECK-LABEL: test_vmull_laneq_s32:
1079; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1080; CHECK-NEXT: ret
1081entry:
1082  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1083  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1084  ret <2 x i64> %vmull2.i
1085}
1086
1087define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
1088; CHECK-LABEL: test_vmull_laneq_u16:
1089; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
1090; CHECK-NEXT: ret
1091entry:
1092  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1093  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1094  ret <4 x i32> %vmull2.i
1095}
1096
1097define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
1098; CHECK-LABEL: test_vmull_laneq_u32:
1099; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1100; CHECK-NEXT: ret
1101entry:
1102  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1103  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1104  ret <2 x i64> %vmull2.i
1105}
1106
1107define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1108; CHECK-LABEL: test_vmull_high_laneq_s16:
1109; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
1110; CHECK-NEXT: ret
1111entry:
1112  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1113  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1114  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1115  ret <4 x i32> %vmull2.i
1116}
1117
1118define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1119; CHECK-LABEL: test_vmull_high_laneq_s32:
1120; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1121; CHECK-NEXT: ret
1122entry:
1123  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1124  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1125  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1126  ret <2 x i64> %vmull2.i
1127}
1128
1129define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
1130; CHECK-LABEL: test_vmull_high_laneq_u16:
1131; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
1132; CHECK-NEXT: ret
1133entry:
1134  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1135  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1136  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1137  ret <4 x i32> %vmull2.i
1138}
1139
1140define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
1141; CHECK-LABEL: test_vmull_high_laneq_u32:
1142; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1143; CHECK-NEXT: ret
1144entry:
1145  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1146  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1147  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1148  ret <2 x i64> %vmull2.i
1149}
1150
1151define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1152; CHECK-LABEL: test_vqdmlal_lane_s16:
1153; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1154; CHECK-NEXT: ret
1155entry:
1156  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1157  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1158  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1159  ret <4 x i32> %vqdmlal4.i
1160}
1161
1162define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1163; CHECK-LABEL: test_vqdmlal_lane_s32:
1164; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1165; CHECK-NEXT: ret
1166entry:
1167  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1168  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1169  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1170  ret <2 x i64> %vqdmlal4.i
1171}
1172
1173define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1174; CHECK-LABEL: test_vqdmlal_high_lane_s16:
1175; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1176; CHECK-NEXT: ret
1177entry:
1178  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1179  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1180  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1181  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1182  ret <4 x i32> %vqdmlal4.i
1183}
1184
1185define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1186; CHECK-LABEL: test_vqdmlal_high_lane_s32:
1187; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1188; CHECK-NEXT: ret
1189entry:
1190  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1191  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1192  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1193  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1194  ret <2 x i64> %vqdmlal4.i
1195}
1196
1197define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1198; CHECK-LABEL: test_vqdmlsl_lane_s16:
1199; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1200; CHECK-NEXT: ret
1201entry:
1202  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1203  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1204  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1205  ret <4 x i32> %vqdmlsl4.i
1206}
1207
1208define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1209; CHECK-LABEL: test_vqdmlsl_lane_s32:
1210; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1211; CHECK-NEXT: ret
1212entry:
1213  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1214  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1215  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1216  ret <2 x i64> %vqdmlsl4.i
1217}
1218
1219define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1220; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
1221; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1222; CHECK-NEXT: ret
1223entry:
1224  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1225  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1226  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1227  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1228  ret <4 x i32> %vqdmlsl4.i
1229}
1230
1231define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1232; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
1233; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1234; CHECK-NEXT: ret
1235entry:
1236  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1237  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1238  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1239  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1240  ret <2 x i64> %vqdmlsl4.i
1241}
1242
1243define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1244; CHECK-LABEL: test_vqdmull_lane_s16:
1245; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1246; CHECK-NEXT: ret
1247entry:
1248  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1249  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1250  ret <4 x i32> %vqdmull2.i
1251}
1252
1253define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1254; CHECK-LABEL: test_vqdmull_lane_s32:
1255; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1256; CHECK-NEXT: ret
1257entry:
1258  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1259  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1260  ret <2 x i64> %vqdmull2.i
1261}
1262
1263define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1264; CHECK-LABEL: test_vqdmull_laneq_s16:
1265; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1266; CHECK-NEXT: ret
1267entry:
1268  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1269  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1270  ret <4 x i32> %vqdmull2.i
1271}
1272
1273define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1274; CHECK-LABEL: test_vqdmull_laneq_s32:
1275; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1276; CHECK-NEXT: ret
1277entry:
1278  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1279  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1280  ret <2 x i64> %vqdmull2.i
1281}
1282
1283define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1284; CHECK-LABEL: test_vqdmull_high_lane_s16:
1285; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1286; CHECK-NEXT: ret
1287entry:
1288  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1289  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1290  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1291  ret <4 x i32> %vqdmull2.i
1292}
1293
1294define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1295; CHECK-LABEL: test_vqdmull_high_lane_s32:
1296; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1297; CHECK-NEXT: ret
1298entry:
1299  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1300  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1301  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1302  ret <2 x i64> %vqdmull2.i
1303}
1304
1305define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1306; CHECK-LABEL: test_vqdmull_high_laneq_s16:
1307; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
1308; CHECK-NEXT: ret
1309entry:
1310  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1311  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1312  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1313  ret <4 x i32> %vqdmull2.i
1314}
1315
1316define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1317; CHECK-LABEL: test_vqdmull_high_laneq_s32:
1318; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1319; CHECK-NEXT: ret
1320entry:
1321  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1322  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1323  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1324  ret <2 x i64> %vqdmull2.i
1325}
1326
1327define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1328; CHECK-LABEL: test_vqdmulh_lane_s16:
1329; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1330; CHECK-NEXT: ret
1331entry:
1332  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1333  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1334  ret <4 x i16> %vqdmulh2.i
1335}
1336
1337define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1338; CHECK-LABEL: test_vqdmulhq_lane_s16:
1339; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1340; CHECK-NEXT: ret
1341entry:
1342  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1343  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1344  ret <8 x i16> %vqdmulh2.i
1345}
1346
1347define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1348; CHECK-LABEL: test_vqdmulh_lane_s32:
1349; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1350; CHECK-NEXT: ret
1351entry:
1352  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1353  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1354  ret <2 x i32> %vqdmulh2.i
1355}
1356
1357define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1358; CHECK-LABEL: test_vqdmulhq_lane_s32:
1359; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1360; CHECK-NEXT: ret
1361entry:
1362  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1363  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1364  ret <4 x i32> %vqdmulh2.i
1365}
1366
1367define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1368; CHECK-LABEL: test_vqrdmulh_lane_s16:
1369; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1370; CHECK-NEXT: ret
1371entry:
1372  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1373  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1374  ret <4 x i16> %vqrdmulh2.i
1375}
1376
1377define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1378; CHECK-LABEL: test_vqrdmulhq_lane_s16:
1379; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1380; CHECK-NEXT: ret
1381entry:
1382  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1383  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1384  ret <8 x i16> %vqrdmulh2.i
1385}
1386
1387define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1388; CHECK-LABEL: test_vqrdmulh_lane_s32:
1389; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1390; CHECK-NEXT: ret
1391entry:
1392  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1393  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1394  ret <2 x i32> %vqrdmulh2.i
1395}
1396
1397define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1398; CHECK-LABEL: test_vqrdmulhq_lane_s32:
1399; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1400; CHECK-NEXT: ret
1401entry:
1402  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1403  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1404  ret <4 x i32> %vqrdmulh2.i
1405}
1406
1407define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
1408; CHECK-LABEL: test_vmul_lane_f32:
1409; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1410; CHECK-NEXT: ret
1411entry:
1412  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1413  %mul = fmul <2 x float> %shuffle, %a
1414  ret <2 x float> %mul
1415}
1416
1417define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
1418; CHECK-LABEL: test_vmul_lane_f64:
1419; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
1420; CHECK-NEXT: ret
1421entry:
1422  %0 = bitcast <1 x double> %a to <8 x i8>
1423  %1 = bitcast <8 x i8> %0 to double
1424  %extract = extractelement <1 x double> %v, i32 0
1425  %2 = fmul double %1, %extract
1426  %3 = insertelement <1 x double> undef, double %2, i32 0
1427  ret <1 x double> %3
1428}
1429
1430define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
1431; CHECK-LABEL: test_vmulq_lane_f32:
1432; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1433; CHECK-NEXT: ret
1434entry:
1435  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1436  %mul = fmul <4 x float> %shuffle, %a
1437  ret <4 x float> %mul
1438}
1439
1440define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
1441; CHECK-LABEL: test_vmulq_lane_f64:
1442; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1443; CHECK-NEXT: ret
1444entry:
1445  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1446  %mul = fmul <2 x double> %shuffle, %a
1447  ret <2 x double> %mul
1448}
1449
1450define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
1451; CHECK-LABEL: test_vmul_laneq_f32:
1452; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1453; CHECK-NEXT: ret
1454entry:
1455  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1456  %mul = fmul <2 x float> %shuffle, %a
1457  ret <2 x float> %mul
1458}
1459
1460define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
1461; CHECK-LABEL: test_vmul_laneq_f64:
1462; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
1463; CHECK-NEXT: ret
1464entry:
1465  %0 = bitcast <1 x double> %a to <8 x i8>
1466  %1 = bitcast <8 x i8> %0 to double
1467  %extract = extractelement <2 x double> %v, i32 1
1468  %2 = fmul double %1, %extract
1469  %3 = insertelement <1 x double> undef, double %2, i32 0
1470  ret <1 x double> %3
1471}
1472
1473define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
1474; CHECK-LABEL: test_vmulq_laneq_f32:
1475; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1476; CHECK-NEXT: ret
1477entry:
1478  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1479  %mul = fmul <4 x float> %shuffle, %a
1480  ret <4 x float> %mul
1481}
1482
1483define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
1484; CHECK-LABEL: test_vmulq_laneq_f64:
1485; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
1486; CHECK-NEXT: ret
1487entry:
1488  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1489  %mul = fmul <2 x double> %shuffle, %a
1490  ret <2 x double> %mul
1491}
1492
1493define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
1494; CHECK-LABEL: test_vmulx_lane_f32:
1495; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1496; CHECK-NEXT: ret
1497entry:
1498  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1499  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
1500  ret <2 x float> %vmulx2.i
1501}
1502
1503define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
1504; CHECK-LABEL: test_vmulxq_lane_f32:
1505; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1506; CHECK-NEXT: ret
1507entry:
1508  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1509  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
1510  ret <4 x float> %vmulx2.i
1511}
1512
1513define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
1514; CHECK-LABEL: test_vmulxq_lane_f64:
1515; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1516; CHECK-NEXT: ret
1517entry:
1518  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1519  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
1520  ret <2 x double> %vmulx2.i
1521}
1522
1523define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
1524; CHECK-LABEL: test_vmulx_laneq_f32:
1525; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1526; CHECK-NEXT: ret
1527entry:
1528  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1529  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
1530  ret <2 x float> %vmulx2.i
1531}
1532
1533define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
1534; CHECK-LABEL: test_vmulxq_laneq_f32:
1535; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1536; CHECK-NEXT: ret
1537entry:
1538  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1539  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
1540  ret <4 x float> %vmulx2.i
1541}
1542
1543define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
1544; CHECK-LABEL: test_vmulxq_laneq_f64:
1545; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
1546; CHECK-NEXT: ret
1547entry:
1548  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1549  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
1550  ret <2 x double> %vmulx2.i
1551}
1552
1553define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
1554; CHECK-LABEL: test_vmla_lane_s16_0:
1555; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1556; CHECK-NEXT: ret
1557entry:
1558  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1559  %mul = mul <4 x i16> %shuffle, %b
1560  %add = add <4 x i16> %mul, %a
1561  ret <4 x i16> %add
1562}
1563
1564define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
1565; CHECK-LABEL: test_vmlaq_lane_s16_0:
1566; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1567; CHECK-NEXT: ret
1568entry:
1569  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1570  %mul = mul <8 x i16> %shuffle, %b
1571  %add = add <8 x i16> %mul, %a
1572  ret <8 x i16> %add
1573}
1574
1575define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
1576; CHECK-LABEL: test_vmla_lane_s32_0:
1577; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1578; CHECK-NEXT: ret
1579entry:
1580  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1581  %mul = mul <2 x i32> %shuffle, %b
1582  %add = add <2 x i32> %mul, %a
1583  ret <2 x i32> %add
1584}
1585
1586define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
1587; CHECK-LABEL: test_vmlaq_lane_s32_0:
1588; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1589; CHECK-NEXT: ret
1590entry:
1591  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1592  %mul = mul <4 x i32> %shuffle, %b
1593  %add = add <4 x i32> %mul, %a
1594  ret <4 x i32> %add
1595}
1596
1597define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
1598; CHECK-LABEL: test_vmla_laneq_s16_0:
1599; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1600; CHECK-NEXT: ret
1601entry:
1602  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1603  %mul = mul <4 x i16> %shuffle, %b
1604  %add = add <4 x i16> %mul, %a
1605  ret <4 x i16> %add
1606}
1607
1608define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
1609; CHECK-LABEL: test_vmlaq_laneq_s16_0:
1610; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1611; CHECK-NEXT: ret
1612entry:
1613  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1614  %mul = mul <8 x i16> %shuffle, %b
1615  %add = add <8 x i16> %mul, %a
1616  ret <8 x i16> %add
1617}
1618
1619define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
1620; CHECK-LABEL: test_vmla_laneq_s32_0:
1621; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1622; CHECK-NEXT: ret
1623entry:
1624  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1625  %mul = mul <2 x i32> %shuffle, %b
1626  %add = add <2 x i32> %mul, %a
1627  ret <2 x i32> %add
1628}
1629
1630define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
1631; CHECK-LABEL: test_vmlaq_laneq_s32_0:
1632; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1633; CHECK-NEXT: ret
1634entry:
1635  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1636  %mul = mul <4 x i32> %shuffle, %b
1637  %add = add <4 x i32> %mul, %a
1638  ret <4 x i32> %add
1639}
1640
1641define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
1642; CHECK-LABEL: test_vmls_lane_s16_0:
1643; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1644; CHECK-NEXT: ret
1645entry:
1646  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1647  %mul = mul <4 x i16> %shuffle, %b
1648  %sub = sub <4 x i16> %a, %mul
1649  ret <4 x i16> %sub
1650}
1651
1652define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
1653; CHECK-LABEL: test_vmlsq_lane_s16_0:
1654; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1655; CHECK-NEXT: ret
1656entry:
1657  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1658  %mul = mul <8 x i16> %shuffle, %b
1659  %sub = sub <8 x i16> %a, %mul
1660  ret <8 x i16> %sub
1661}
1662
1663define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
1664; CHECK-LABEL: test_vmls_lane_s32_0:
1665; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1666; CHECK-NEXT: ret
1667entry:
1668  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1669  %mul = mul <2 x i32> %shuffle, %b
1670  %sub = sub <2 x i32> %a, %mul
1671  ret <2 x i32> %sub
1672}
1673
1674define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
1675; CHECK-LABEL: test_vmlsq_lane_s32_0:
1676; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1677; CHECK-NEXT: ret
1678entry:
1679  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1680  %mul = mul <4 x i32> %shuffle, %b
1681  %sub = sub <4 x i32> %a, %mul
1682  ret <4 x i32> %sub
1683}
1684
1685define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
1686; CHECK-LABEL: test_vmls_laneq_s16_0:
1687; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1688; CHECK-NEXT: ret
1689entry:
1690  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1691  %mul = mul <4 x i16> %shuffle, %b
1692  %sub = sub <4 x i16> %a, %mul
1693  ret <4 x i16> %sub
1694}
1695
1696define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
1697; CHECK-LABEL: test_vmlsq_laneq_s16_0:
1698; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1699; CHECK-NEXT: ret
1700entry:
1701  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1702  %mul = mul <8 x i16> %shuffle, %b
1703  %sub = sub <8 x i16> %a, %mul
1704  ret <8 x i16> %sub
1705}
1706
1707define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
1708; CHECK-LABEL: test_vmls_laneq_s32_0:
1709; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1710; CHECK-NEXT: ret
1711entry:
1712  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1713  %mul = mul <2 x i32> %shuffle, %b
1714  %sub = sub <2 x i32> %a, %mul
1715  ret <2 x i32> %sub
1716}
1717
1718define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
1719; CHECK-LABEL: test_vmlsq_laneq_s32_0:
1720; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1721; CHECK-NEXT: ret
1722entry:
1723  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1724  %mul = mul <4 x i32> %shuffle, %b
1725  %sub = sub <4 x i32> %a, %mul
1726  ret <4 x i32> %sub
1727}
1728
1729define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
1730; CHECK-LABEL: test_vmul_lane_s16_0:
1731; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1732; CHECK-NEXT: ret
1733entry:
1734  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1735  %mul = mul <4 x i16> %shuffle, %a
1736  ret <4 x i16> %mul
1737}
1738
1739define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
1740; CHECK-LABEL: test_vmulq_lane_s16_0:
1741; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1742; CHECK-NEXT: ret
1743entry:
1744  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1745  %mul = mul <8 x i16> %shuffle, %a
1746  ret <8 x i16> %mul
1747}
1748
1749define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
1750; CHECK-LABEL: test_vmul_lane_s32_0:
1751; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1752; CHECK-NEXT: ret
1753entry:
1754  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1755  %mul = mul <2 x i32> %shuffle, %a
1756  ret <2 x i32> %mul
1757}
1758
1759define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
1760; CHECK-LABEL: test_vmulq_lane_s32_0:
1761; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1762; CHECK-NEXT: ret
1763entry:
1764  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1765  %mul = mul <4 x i32> %shuffle, %a
1766  ret <4 x i32> %mul
1767}
1768
1769define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
1770; CHECK-LABEL: test_vmul_lane_u16_0:
1771; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1772; CHECK-NEXT: ret
1773entry:
1774  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1775  %mul = mul <4 x i16> %shuffle, %a
1776  ret <4 x i16> %mul
1777}
1778
1779define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
1780; CHECK-LABEL: test_vmulq_lane_u16_0:
1781; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1782; CHECK-NEXT: ret
1783entry:
1784  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1785  %mul = mul <8 x i16> %shuffle, %a
1786  ret <8 x i16> %mul
1787}
1788
1789define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
1790; CHECK-LABEL: test_vmul_lane_u32_0:
1791; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1792; CHECK-NEXT: ret
1793entry:
1794  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1795  %mul = mul <2 x i32> %shuffle, %a
1796  ret <2 x i32> %mul
1797}
1798
1799define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
1800; CHECK-LABEL: test_vmulq_lane_u32_0:
1801; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1802; CHECK-NEXT: ret
1803entry:
1804  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1805  %mul = mul <4 x i32> %shuffle, %a
1806  ret <4 x i32> %mul
1807}
1808
1809define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
1810; CHECK-LABEL: test_vmul_laneq_s16_0:
1811; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1812; CHECK-NEXT: ret
1813entry:
1814  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1815  %mul = mul <4 x i16> %shuffle, %a
1816  ret <4 x i16> %mul
1817}
1818
1819define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
1820; CHECK-LABEL: test_vmulq_laneq_s16_0:
1821; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1822; CHECK-NEXT: ret
1823entry:
1824  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1825  %mul = mul <8 x i16> %shuffle, %a
1826  ret <8 x i16> %mul
1827}
1828
1829define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
1830; CHECK-LABEL: test_vmul_laneq_s32_0:
1831; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1832; CHECK-NEXT: ret
1833entry:
1834  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1835  %mul = mul <2 x i32> %shuffle, %a
1836  ret <2 x i32> %mul
1837}
1838
1839define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
1840; CHECK-LABEL: test_vmulq_laneq_s32_0:
1841; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1842; CHECK-NEXT: ret
1843entry:
1844  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1845  %mul = mul <4 x i32> %shuffle, %a
1846  ret <4 x i32> %mul
1847}
1848
1849define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
1850; CHECK-LABEL: test_vmul_laneq_u16_0:
1851; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1852; CHECK-NEXT: ret
1853entry:
1854  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1855  %mul = mul <4 x i16> %shuffle, %a
1856  ret <4 x i16> %mul
1857}
1858
1859define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
1860; CHECK-LABEL: test_vmulq_laneq_u16_0:
1861; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1862; CHECK-NEXT: ret
1863entry:
1864  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1865  %mul = mul <8 x i16> %shuffle, %a
1866  ret <8 x i16> %mul
1867}
1868
1869define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
1870; CHECK-LABEL: test_vmul_laneq_u32_0:
1871; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1872; CHECK-NEXT: ret
1873entry:
1874  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1875  %mul = mul <2 x i32> %shuffle, %a
1876  ret <2 x i32> %mul
1877}
1878
1879define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
1880; CHECK-LABEL: test_vmulq_laneq_u32_0:
1881; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1882; CHECK-NEXT: ret
1883entry:
1884  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1885  %mul = mul <4 x i32> %shuffle, %a
1886  ret <4 x i32> %mul
1887}
1888
1889define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
1890; CHECK-LABEL: test_vfma_lane_f32_0:
1891; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1892; CHECK-NEXT: ret
1893entry:
1894  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
1895  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1896  ret <2 x float> %0
1897}
1898
1899define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
1900; CHECK-LABEL: test_vfmaq_lane_f32_0:
1901; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1902; CHECK-NEXT: ret
1903entry:
1904  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
1905  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1906  ret <4 x float> %0
1907}
1908
1909define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
1910; CHECK-LABEL: test_vfma_laneq_f32_0:
1911; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1912; CHECK-NEXT: ret
1913entry:
1914  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
1915  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1916  ret <2 x float> %0
1917}
1918
1919define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
1920; CHECK-LABEL: test_vfmaq_laneq_f32_0:
1921; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1922; CHECK-NEXT: ret
1923entry:
1924  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
1925  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1926  ret <4 x float> %0
1927}
1928
1929define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
1930; CHECK-LABEL: test_vfms_lane_f32_0:
1931; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1932; CHECK-NEXT: ret
1933entry:
1934  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
1935  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
1936  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1937  ret <2 x float> %0
1938}
1939
1940define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
1941; CHECK-LABEL: test_vfmsq_lane_f32_0:
1942; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1943; CHECK-NEXT: ret
1944entry:
1945  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
1946  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
1947  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1948  ret <4 x float> %0
1949}
1950
1951define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
1952; CHECK-LABEL: test_vfms_laneq_f32_0:
1953; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1954; CHECK-NEXT: ret
1955entry:
1956  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
1957  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
1958  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1959  ret <2 x float> %0
1960}
1961
1962define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
1963; CHECK-LABEL: test_vfmsq_laneq_f32_0:
1964; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1965; CHECK-NEXT: ret
1966entry:
1967  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
1968  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
1969  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1970  ret <4 x float> %0
1971}
1972
1973define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
1974; CHECK-LABEL: test_vfmaq_laneq_f64_0:
1975; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1976; CHECK-NEXT: ret
1977entry:
1978  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
1979  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
1980  ret <2 x double> %0
1981}
1982
1983define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
1984; CHECK-LABEL: test_vfmsq_laneq_f64_0:
1985; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1986; CHECK-NEXT: ret
1987entry:
1988  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
1989  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
1990  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
1991  ret <2 x double> %0
1992}
1993
1994define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1995; CHECK-LABEL: test_vmlal_lane_s16_0:
1996; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1997; CHECK-NEXT: ret
1998entry:
1999  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2000  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2001  %add = add <4 x i32> %vmull2.i, %a
2002  ret <4 x i32> %add
2003}
2004
2005define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2006; CHECK-LABEL: test_vmlal_lane_s32_0:
2007; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2008; CHECK-NEXT: ret
2009entry:
2010  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2011  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2012  %add = add <2 x i64> %vmull2.i, %a
2013  ret <2 x i64> %add
2014}
2015
2016define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2017; CHECK-LABEL: test_vmlal_laneq_s16_0:
2018; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2019; CHECK-NEXT: ret
2020entry:
2021  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2022  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2023  %add = add <4 x i32> %vmull2.i, %a
2024  ret <4 x i32> %add
2025}
2026
2027define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2028; CHECK-LABEL: test_vmlal_laneq_s32_0:
2029; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2030; CHECK-NEXT: ret
2031entry:
2032  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2033  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2034  %add = add <2 x i64> %vmull2.i, %a
2035  ret <2 x i64> %add
2036}
2037
2038define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2039; CHECK-LABEL: test_vmlal_high_lane_s16_0:
2040; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2041; CHECK-NEXT: ret
2042entry:
2043  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2044  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2045  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2046  %add = add <4 x i32> %vmull2.i, %a
2047  ret <4 x i32> %add
2048}
2049
2050define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2051; CHECK-LABEL: test_vmlal_high_lane_s32_0:
2052; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2053; CHECK-NEXT: ret
2054entry:
2055  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2056  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2057  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2058  %add = add <2 x i64> %vmull2.i, %a
2059  ret <2 x i64> %add
2060}
2061
2062define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2063; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
2064; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2065; CHECK-NEXT: ret
2066entry:
2067  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2068  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2069  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2070  %add = add <4 x i32> %vmull2.i, %a
2071  ret <4 x i32> %add
2072}
2073
2074define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2075; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
2076; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2077; CHECK-NEXT: ret
2078entry:
2079  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2080  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2081  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2082  %add = add <2 x i64> %vmull2.i, %a
2083  ret <2 x i64> %add
2084}
2085
2086define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2087; CHECK-LABEL: test_vmlsl_lane_s16_0:
2088; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2089; CHECK-NEXT: ret
2090entry:
2091  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2092  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2093  %sub = sub <4 x i32> %a, %vmull2.i
2094  ret <4 x i32> %sub
2095}
2096
2097define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2098; CHECK-LABEL: test_vmlsl_lane_s32_0:
2099; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2100; CHECK-NEXT: ret
2101entry:
2102  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2103  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2104  %sub = sub <2 x i64> %a, %vmull2.i
2105  ret <2 x i64> %sub
2106}
2107
2108define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2109; CHECK-LABEL: test_vmlsl_laneq_s16_0:
2110; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2111; CHECK-NEXT: ret
2112entry:
2113  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2114  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2115  %sub = sub <4 x i32> %a, %vmull2.i
2116  ret <4 x i32> %sub
2117}
2118
2119define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2120; CHECK-LABEL: test_vmlsl_laneq_s32_0:
2121; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2122; CHECK-NEXT: ret
2123entry:
2124  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2125  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2126  %sub = sub <2 x i64> %a, %vmull2.i
2127  ret <2 x i64> %sub
2128}
2129
2130define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2131; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
2132; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2133; CHECK-NEXT: ret
2134entry:
2135  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2136  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2137  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2138  %sub = sub <4 x i32> %a, %vmull2.i
2139  ret <4 x i32> %sub
2140}
2141
2142define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2143; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
2144; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2145; CHECK-NEXT: ret
2146entry:
2147  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2148  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2149  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2150  %sub = sub <2 x i64> %a, %vmull2.i
2151  ret <2 x i64> %sub
2152}
2153
2154define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2155; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
2156; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2157; CHECK-NEXT: ret
2158entry:
2159  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2160  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2161  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2162  %sub = sub <4 x i32> %a, %vmull2.i
2163  ret <4 x i32> %sub
2164}
2165
2166define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2167; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
2168; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2169; CHECK-NEXT: ret
2170entry:
2171  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2172  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2173  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2174  %sub = sub <2 x i64> %a, %vmull2.i
2175  ret <2 x i64> %sub
2176}
2177
2178define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2179; CHECK-LABEL: test_vmlal_lane_u16_0:
2180; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2181; CHECK-NEXT: ret
2182entry:
2183  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2184  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2185  %add = add <4 x i32> %vmull2.i, %a
2186  ret <4 x i32> %add
2187}
2188
2189define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2190; CHECK-LABEL: test_vmlal_lane_u32_0:
2191; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2192; CHECK-NEXT: ret
2193entry:
2194  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2195  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2196  %add = add <2 x i64> %vmull2.i, %a
2197  ret <2 x i64> %add
2198}
2199
2200define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2201; CHECK-LABEL: test_vmlal_laneq_u16_0:
2202; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2203; CHECK-NEXT: ret
2204entry:
2205  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2206  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2207  %add = add <4 x i32> %vmull2.i, %a
2208  ret <4 x i32> %add
2209}
2210
2211define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2212; CHECK-LABEL: test_vmlal_laneq_u32_0:
2213; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2214; CHECK-NEXT: ret
2215entry:
2216  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2217  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2218  %add = add <2 x i64> %vmull2.i, %a
2219  ret <2 x i64> %add
2220}
2221
2222define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2223; CHECK-LABEL: test_vmlal_high_lane_u16_0:
2224; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2225; CHECK-NEXT: ret
2226entry:
2227  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2228  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2229  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2230  %add = add <4 x i32> %vmull2.i, %a
2231  ret <4 x i32> %add
2232}
2233
2234define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2235; CHECK-LABEL: test_vmlal_high_lane_u32_0:
2236; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2237; CHECK-NEXT: ret
2238entry:
2239  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2240  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2241  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2242  %add = add <2 x i64> %vmull2.i, %a
2243  ret <2 x i64> %add
2244}
2245
2246define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2247; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
2248; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2249; CHECK-NEXT: ret
2250entry:
2251  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2252  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2253  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2254  %add = add <4 x i32> %vmull2.i, %a
2255  ret <4 x i32> %add
2256}
2257
2258define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2259; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
2260; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2261; CHECK-NEXT: ret
2262entry:
2263  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2264  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2265  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2266  %add = add <2 x i64> %vmull2.i, %a
2267  ret <2 x i64> %add
2268}
2269
2270define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2271; CHECK-LABEL: test_vmlsl_lane_u16_0:
2272; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2273; CHECK-NEXT: ret
2274entry:
2275  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2276  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2277  %sub = sub <4 x i32> %a, %vmull2.i
2278  ret <4 x i32> %sub
2279}
2280
2281define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2282; CHECK-LABEL: test_vmlsl_lane_u32_0:
2283; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2284; CHECK-NEXT: ret
2285entry:
2286  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2287  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2288  %sub = sub <2 x i64> %a, %vmull2.i
2289  ret <2 x i64> %sub
2290}
2291
2292define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2293; CHECK-LABEL: test_vmlsl_laneq_u16_0:
2294; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2295; CHECK-NEXT: ret
2296entry:
2297  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2298  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2299  %sub = sub <4 x i32> %a, %vmull2.i
2300  ret <4 x i32> %sub
2301}
2302
2303define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2304; CHECK-LABEL: test_vmlsl_laneq_u32_0:
2305; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2306; CHECK-NEXT: ret
2307entry:
2308  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2309  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2310  %sub = sub <2 x i64> %a, %vmull2.i
2311  ret <2 x i64> %sub
2312}
2313
2314define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2315; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
2316; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2317; CHECK-NEXT: ret
2318entry:
2319  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2320  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2321  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2322  %sub = sub <4 x i32> %a, %vmull2.i
2323  ret <4 x i32> %sub
2324}
2325
2326define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2327; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
2328; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2329; CHECK-NEXT: ret
2330entry:
2331  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2332  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2333  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2334  %sub = sub <2 x i64> %a, %vmull2.i
2335  ret <2 x i64> %sub
2336}
2337
2338define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2339; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
2340; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2341; CHECK-NEXT: ret
2342entry:
2343  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2344  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2345  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2346  %sub = sub <4 x i32> %a, %vmull2.i
2347  ret <4 x i32> %sub
2348}
2349
2350define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2351; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
2352; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2353; CHECK-NEXT: ret
2354entry:
2355  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2356  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2357  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2358  %sub = sub <2 x i64> %a, %vmull2.i
2359  ret <2 x i64> %sub
2360}
2361
2362define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2363; CHECK-LABEL: test_vmull_lane_s16_0:
2364; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2365; CHECK-NEXT: ret
2366entry:
2367  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2368  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2369  ret <4 x i32> %vmull2.i
2370}
2371
2372define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2373; CHECK-LABEL: test_vmull_lane_s32_0:
2374; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2375; CHECK-NEXT: ret
2376entry:
2377  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2378  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2379  ret <2 x i64> %vmull2.i
2380}
2381
2382define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
2383; CHECK-LABEL: test_vmull_lane_u16_0:
2384; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2385; CHECK-NEXT: ret
2386entry:
2387  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2388  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2389  ret <4 x i32> %vmull2.i
2390}
2391
2392define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
2393; CHECK-LABEL: test_vmull_lane_u32_0:
2394; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2395; CHECK-NEXT: ret
2396entry:
2397  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2398  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2399  ret <2 x i64> %vmull2.i
2400}
2401
2402define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2403; CHECK-LABEL: test_vmull_high_lane_s16_0:
2404; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2405; CHECK-NEXT: ret
2406entry:
2407  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2408  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2409  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2410  ret <4 x i32> %vmull2.i
2411}
2412
2413define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2414; CHECK-LABEL: test_vmull_high_lane_s32_0:
2415; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2416; CHECK-NEXT: ret
2417entry:
2418  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2419  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2420  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2421  ret <2 x i64> %vmull2.i
2422}
2423
2424define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
2425; CHECK-LABEL: test_vmull_high_lane_u16_0:
2426; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2427; CHECK-NEXT: ret
2428entry:
2429  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2430  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2431  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2432  ret <4 x i32> %vmull2.i
2433}
2434
2435define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
2436; CHECK-LABEL: test_vmull_high_lane_u32_0:
2437; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2438; CHECK-NEXT: ret
2439entry:
2440  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2441  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2442  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2443  ret <2 x i64> %vmull2.i
2444}
2445
2446define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
2447; CHECK-LABEL: test_vmull_laneq_s16_0:
2448; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2449; CHECK-NEXT: ret
2450entry:
2451  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2452  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2453  ret <4 x i32> %vmull2.i
2454}
2455
2456define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
2457; CHECK-LABEL: test_vmull_laneq_s32_0:
2458; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2459; CHECK-NEXT: ret
2460entry:
2461  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2462  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2463  ret <2 x i64> %vmull2.i
2464}
2465
2466define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
2467; CHECK-LABEL: test_vmull_laneq_u16_0:
2468; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2469; CHECK-NEXT: ret
2470entry:
2471  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2472  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2473  ret <4 x i32> %vmull2.i
2474}
2475
2476define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
2477; CHECK-LABEL: test_vmull_laneq_u32_0:
2478; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2479; CHECK-NEXT: ret
2480entry:
2481  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2482  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2483  ret <2 x i64> %vmull2.i
2484}
2485
2486define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
2487; CHECK-LABEL: test_vmull_high_laneq_s16_0:
2488; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2489; CHECK-NEXT: ret
2490entry:
2491  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2492  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2493  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2494  ret <4 x i32> %vmull2.i
2495}
2496
2497define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
2498; CHECK-LABEL: test_vmull_high_laneq_s32_0:
2499; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2500; CHECK-NEXT: ret
2501entry:
2502  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2503  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2504  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2505  ret <2 x i64> %vmull2.i
2506}
2507
2508define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
2509; CHECK-LABEL: test_vmull_high_laneq_u16_0:
2510; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2511; CHECK-NEXT: ret
2512entry:
2513  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2514  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2515  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2516  ret <4 x i32> %vmull2.i
2517}
2518
2519define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
2520; CHECK-LABEL: test_vmull_high_laneq_u32_0:
2521; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2522; CHECK-NEXT: ret
2523entry:
2524  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2525  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2526  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2527  ret <2 x i64> %vmull2.i
2528}
2529
2530define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2531; CHECK-LABEL: test_vqdmlal_lane_s16_0:
2532; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2533; CHECK-NEXT: ret
2534entry:
2535  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2536  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2537  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
2538  ret <4 x i32> %vqdmlal4.i
2539}
2540
2541define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2542; CHECK-LABEL: test_vqdmlal_lane_s32_0:
2543; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2544; CHECK-NEXT: ret
2545entry:
2546  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2547  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2548  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
2549  ret <2 x i64> %vqdmlal4.i
2550}
2551
2552define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2553; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
2554; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2555; CHECK-NEXT: ret
2556entry:
2557  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2558  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2559  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2560  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
2561  ret <4 x i32> %vqdmlal4.i
2562}
2563
2564define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2565; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
2566; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2567; CHECK-NEXT: ret
2568entry:
2569  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2570  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2571  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2572  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
2573  ret <2 x i64> %vqdmlal4.i
2574}
2575
2576define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2577; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
2578; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2579; CHECK-NEXT: ret
2580entry:
2581  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2582  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2583  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
2584  ret <4 x i32> %vqdmlsl4.i
2585}
2586
2587define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2588; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
2589; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2590; CHECK-NEXT: ret
2591entry:
2592  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2593  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2594  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
2595  ret <2 x i64> %vqdmlsl4.i
2596}
2597
2598define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2599; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
2600; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2601; CHECK-NEXT: ret
2602entry:
2603  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2604  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2605  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2606  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
2607  ret <4 x i32> %vqdmlsl4.i
2608}
2609
2610define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2611; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
2612; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2613; CHECK-NEXT: ret
2614entry:
2615  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2616  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2617  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2618  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
2619  ret <2 x i64> %vqdmlsl4.i
2620}
2621
2622define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2623; CHECK-LABEL: test_vqdmull_lane_s16_0:
2624; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2625; CHECK-NEXT: ret
2626entry:
2627  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2628  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2629  ret <4 x i32> %vqdmull2.i
2630}
2631
2632define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2633; CHECK-LABEL: test_vqdmull_lane_s32_0:
2634; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2635; CHECK-NEXT: ret
2636entry:
2637  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2638  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2639  ret <2 x i64> %vqdmull2.i
2640}
2641
2642define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
2643; CHECK-LABEL: test_vqdmull_laneq_s16_0:
2644; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2645; CHECK-NEXT: ret
2646entry:
2647  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2648  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2649  ret <4 x i32> %vqdmull2.i
2650}
2651
2652define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
2653; CHECK-LABEL: test_vqdmull_laneq_s32_0:
2654; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2655; CHECK-NEXT: ret
2656entry:
2657  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2658  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2659  ret <2 x i64> %vqdmull2.i
2660}
2661
2662define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2663; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
2664; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2665; CHECK-NEXT: ret
2666entry:
2667  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2668  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2669  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2670  ret <4 x i32> %vqdmull2.i
2671}
2672
2673define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2674; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
2675; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2676; CHECK-NEXT: ret
2677entry:
2678  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2679  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2680  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2681  ret <2 x i64> %vqdmull2.i
2682}
2683
2684define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
2685; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
2686; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2687; CHECK-NEXT: ret
2688entry:
2689  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2690  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2691  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2692  ret <4 x i32> %vqdmull2.i
2693}
2694
2695define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
2696; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
2697; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2698; CHECK-NEXT: ret
2699entry:
2700  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2701  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2702  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2703  ret <2 x i64> %vqdmull2.i
2704}
2705
2706define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2707; CHECK-LABEL: test_vqdmulh_lane_s16_0:
2708; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2709; CHECK-NEXT: ret
2710entry:
2711  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2712  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
2713  ret <4 x i16> %vqdmulh2.i
2714}
2715
2716define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2717; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
2718; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2719; CHECK-NEXT: ret
2720entry:
2721  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2722  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
2723  ret <8 x i16> %vqdmulh2.i
2724}
2725
2726define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2727; CHECK-LABEL: test_vqdmulh_lane_s32_0:
2728; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2729; CHECK-NEXT: ret
2730entry:
2731  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2732  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
2733  ret <2 x i32> %vqdmulh2.i
2734}
2735
2736define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2737; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
2738; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2739; CHECK-NEXT: ret
2740entry:
2741  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2742  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
2743  ret <4 x i32> %vqdmulh2.i
2744}
2745
2746define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2747; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
2748; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2749; CHECK-NEXT: ret
2750entry:
2751  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2752  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
2753  ret <4 x i16> %vqrdmulh2.i
2754}
2755
2756define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2757; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
2758; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2759; CHECK-NEXT: ret
2760entry:
2761  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2762  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
2763  ret <8 x i16> %vqrdmulh2.i
2764}
2765
2766define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2767; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
2768; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2769; CHECK-NEXT: ret
2770entry:
2771  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2772  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
2773  ret <2 x i32> %vqrdmulh2.i
2774}
2775
2776define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2777; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
2778; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2779; CHECK-NEXT: ret
2780entry:
2781  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2782  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
2783  ret <4 x i32> %vqrdmulh2.i
2784}
2785
2786define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
2787; CHECK-LABEL: test_vmul_lane_f32_0:
2788; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2789; CHECK-NEXT: ret
2790entry:
2791  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
2792  %mul = fmul <2 x float> %shuffle, %a
2793  ret <2 x float> %mul
2794}
2795
2796define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
2797; CHECK-LABEL: test_vmulq_lane_f32_0:
2798; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2799; CHECK-NEXT: ret
2800entry:
2801  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
2802  %mul = fmul <4 x float> %shuffle, %a
2803  ret <4 x float> %mul
2804}
2805
2806define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
2807; CHECK-LABEL: test_vmul_laneq_f32_0:
2808; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2809; CHECK-NEXT: ret
2810entry:
2811  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
2812  %mul = fmul <2 x float> %shuffle, %a
2813  ret <2 x float> %mul
2814}
2815
2816define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
2817; CHECK-LABEL: test_vmul_laneq_f64_0:
2818; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
2819; CHECK-NEXT: ret
2820entry:
2821  %0 = bitcast <1 x double> %a to <8 x i8>
2822  %1 = bitcast <8 x i8> %0 to double
2823  %extract = extractelement <2 x double> %v, i32 0
2824  %2 = fmul double %1, %extract
2825  %3 = insertelement <1 x double> undef, double %2, i32 0
2826  ret <1 x double> %3
2827}
2828
2829define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
2830; CHECK-LABEL: test_vmulq_laneq_f32_0:
2831; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2832; CHECK-NEXT: ret
2833entry:
2834  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
2835  %mul = fmul <4 x float> %shuffle, %a
2836  ret <4 x float> %mul
2837}
2838
2839define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
2840; CHECK-LABEL: test_vmulq_laneq_f64_0:
2841; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
2842; CHECK-NEXT: ret
2843entry:
2844  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
2845  %mul = fmul <2 x double> %shuffle, %a
2846  ret <2 x double> %mul
2847}
2848
2849define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
2850; CHECK-LABEL: test_vmulx_lane_f32_0:
2851; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2852; CHECK-NEXT: ret
2853entry:
2854  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
2855  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2856  ret <2 x float> %vmulx2.i
2857}
2858
2859define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
2860; CHECK-LABEL: test_vmulxq_lane_f32_0:
2861; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2862; CHECK-NEXT: ret
2863entry:
2864  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
2865  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2866  ret <4 x float> %vmulx2.i
2867}
2868
2869define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
2870; CHECK-LABEL: test_vmulxq_lane_f64_0:
2871; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
2872; CHECK-NEXT: ret
2873entry:
2874  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
2875  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2876  ret <2 x double> %vmulx2.i
2877}
2878
2879define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
2880; CHECK-LABEL: test_vmulx_laneq_f32_0:
2881; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2882; CHECK-NEXT: ret
2883entry:
2884  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
2885  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2886  ret <2 x float> %vmulx2.i
2887}
2888
2889define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
2890; CHECK-LABEL: test_vmulxq_laneq_f32_0:
2891; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2892; CHECK-NEXT: ret
2893entry:
2894  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
2895  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2896  ret <4 x float> %vmulx2.i
2897}
2898
2899define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
2900; CHECK-LABEL: test_vmulxq_laneq_f64_0:
2901; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
2902; CHECK-NEXT: ret
2903entry:
2904  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
2905  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2906  ret <2 x double> %vmulx2.i
2907}
2908
2909