1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2
3declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
4
5declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
6
7declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
8
9declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
10
11declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
12
13declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
14
15declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
16
17declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
18
19declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
20
21declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
22
23declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
24
25declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
26
27declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
28
29declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
30
31declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
32
33declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
34
35declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
36
37declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
38
39declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
40
41declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
42
43declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
44
45declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
46
47declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
48
49declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
50
51declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
52
53define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
54; CHECK-LABEL: test_vaddl_s8:
55; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
56entry:
57  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
58  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
59  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
60  ret <8 x i16> %add.i
61}
62
63define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
64; CHECK-LABEL: test_vaddl_s16:
65; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
66entry:
67  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
68  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
69  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
70  ret <4 x i32> %add.i
71}
72
73define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
74; CHECK-LABEL: test_vaddl_s32:
75; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
76entry:
77  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
78  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
79  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
80  ret <2 x i64> %add.i
81}
82
83define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
84; CHECK-LABEL: test_vaddl_u8:
85; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
86entry:
87  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
88  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
89  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
90  ret <8 x i16> %add.i
91}
92
93define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
94; CHECK-LABEL: test_vaddl_u16:
95; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
96entry:
97  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
98  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
99  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
100  ret <4 x i32> %add.i
101}
102
103define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
104; CHECK-LABEL: test_vaddl_u32:
105; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
106entry:
107  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
108  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
109  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
110  ret <2 x i64> %add.i
111}
112
113define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
114; CHECK-LABEL: test_vaddl_high_s8:
115; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
116entry:
117  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
118  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
119  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
120  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
121  %add.i = add <8 x i16> %0, %1
122  ret <8 x i16> %add.i
123}
124
125define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
126; CHECK-LABEL: test_vaddl_high_s16:
127; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
128entry:
129  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
130  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
131  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
132  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
133  %add.i = add <4 x i32> %0, %1
134  ret <4 x i32> %add.i
135}
136
137define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
138; CHECK-LABEL: test_vaddl_high_s32:
139; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
140entry:
141  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
142  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
143  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
144  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
145  %add.i = add <2 x i64> %0, %1
146  ret <2 x i64> %add.i
147}
148
149define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
150; CHECK-LABEL: test_vaddl_high_u8:
151; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
152entry:
153  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
154  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
155  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
156  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
157  %add.i = add <8 x i16> %0, %1
158  ret <8 x i16> %add.i
159}
160
161define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
162; CHECK-LABEL: test_vaddl_high_u16:
163; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
164entry:
165  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
167  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
168  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
169  %add.i = add <4 x i32> %0, %1
170  ret <4 x i32> %add.i
171}
172
173define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
174; CHECK-LABEL: test_vaddl_high_u32:
175; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
176entry:
177  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
178  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
179  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
180  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
181  %add.i = add <2 x i64> %0, %1
182  ret <2 x i64> %add.i
183}
184
185define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
186; CHECK-LABEL: test_vaddw_s8:
187; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
188entry:
189  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
190  %add.i = add <8 x i16> %vmovl.i.i, %a
191  ret <8 x i16> %add.i
192}
193
194define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
195; CHECK-LABEL: test_vaddw_s16:
196; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
197entry:
198  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
199  %add.i = add <4 x i32> %vmovl.i.i, %a
200  ret <4 x i32> %add.i
201}
202
203define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
204; CHECK-LABEL: test_vaddw_s32:
205; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
206entry:
207  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
208  %add.i = add <2 x i64> %vmovl.i.i, %a
209  ret <2 x i64> %add.i
210}
211
212define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
213; CHECK-LABEL: test_vaddw_u8:
214; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
215entry:
216  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
217  %add.i = add <8 x i16> %vmovl.i.i, %a
218  ret <8 x i16> %add.i
219}
220
221define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
222; CHECK-LABEL: test_vaddw_u16:
223; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
224entry:
225  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
226  %add.i = add <4 x i32> %vmovl.i.i, %a
227  ret <4 x i32> %add.i
228}
229
230define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
231; CHECK-LABEL: test_vaddw_u32:
232; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
233entry:
234  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
235  %add.i = add <2 x i64> %vmovl.i.i, %a
236  ret <2 x i64> %add.i
237}
238
239define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
240; CHECK-LABEL: test_vaddw_high_s8:
241; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
242entry:
243  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
244  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
245  %add.i = add <8 x i16> %0, %a
246  ret <8 x i16> %add.i
247}
248
249define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
250; CHECK-LABEL: test_vaddw_high_s16:
251; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
252entry:
253  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
254  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
255  %add.i = add <4 x i32> %0, %a
256  ret <4 x i32> %add.i
257}
258
259define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
260; CHECK-LABEL: test_vaddw_high_s32:
261; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
262entry:
263  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
264  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
265  %add.i = add <2 x i64> %0, %a
266  ret <2 x i64> %add.i
267}
268
269define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
270; CHECK-LABEL: test_vaddw_high_u8:
271; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
272entry:
273  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
274  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
275  %add.i = add <8 x i16> %0, %a
276  ret <8 x i16> %add.i
277}
278
279define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
280; CHECK-LABEL: test_vaddw_high_u16:
281; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
282entry:
283  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
284  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
285  %add.i = add <4 x i32> %0, %a
286  ret <4 x i32> %add.i
287}
288
289define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
290; CHECK-LABEL: test_vaddw_high_u32:
291; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
292entry:
293  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
294  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
295  %add.i = add <2 x i64> %0, %a
296  ret <2 x i64> %add.i
297}
298
299define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
300; CHECK-LABEL: test_vsubl_s8:
301; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
302entry:
303  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
304  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
305  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
306  ret <8 x i16> %sub.i
307}
308
309define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
310; CHECK-LABEL: test_vsubl_s16:
311; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
312entry:
313  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
314  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
315  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
316  ret <4 x i32> %sub.i
317}
318
319define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
320; CHECK-LABEL: test_vsubl_s32:
321; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
322entry:
323  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
324  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
325  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
326  ret <2 x i64> %sub.i
327}
328
329define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
330; CHECK-LABEL: test_vsubl_u8:
331; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
332entry:
333  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
334  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
335  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
336  ret <8 x i16> %sub.i
337}
338
339define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
340; CHECK-LABEL: test_vsubl_u16:
341; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
342entry:
343  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
344  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
345  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
346  ret <4 x i32> %sub.i
347}
348
349define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
350; CHECK-LABEL: test_vsubl_u32:
351; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
352entry:
353  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
354  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
355  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
356  ret <2 x i64> %sub.i
357}
358
359define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
360; CHECK-LABEL: test_vsubl_high_s8:
361; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
362entry:
363  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
364  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
365  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
366  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
367  %sub.i = sub <8 x i16> %0, %1
368  ret <8 x i16> %sub.i
369}
370
371define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
372; CHECK-LABEL: test_vsubl_high_s16:
373; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
374entry:
375  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
376  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
377  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
378  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
379  %sub.i = sub <4 x i32> %0, %1
380  ret <4 x i32> %sub.i
381}
382
383define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
384; CHECK-LABEL: test_vsubl_high_s32:
385; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
386entry:
387  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
388  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
389  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
390  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
391  %sub.i = sub <2 x i64> %0, %1
392  ret <2 x i64> %sub.i
393}
394
395define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
396; CHECK-LABEL: test_vsubl_high_u8:
397; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
398entry:
399  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
400  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
401  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
402  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
403  %sub.i = sub <8 x i16> %0, %1
404  ret <8 x i16> %sub.i
405}
406
407define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
408; CHECK-LABEL: test_vsubl_high_u16:
409; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
410entry:
411  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
412  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
413  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
414  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
415  %sub.i = sub <4 x i32> %0, %1
416  ret <4 x i32> %sub.i
417}
418
419define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
420; CHECK-LABEL: test_vsubl_high_u32:
421; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
422entry:
423  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
424  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
425  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
426  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
427  %sub.i = sub <2 x i64> %0, %1
428  ret <2 x i64> %sub.i
429}
430
431define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
432; CHECK-LABEL: test_vsubw_s8:
433; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
434entry:
435  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
436  %sub.i = sub <8 x i16> %a, %vmovl.i.i
437  ret <8 x i16> %sub.i
438}
439
440define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
441; CHECK-LABEL: test_vsubw_s16:
442; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
443entry:
444  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
445  %sub.i = sub <4 x i32> %a, %vmovl.i.i
446  ret <4 x i32> %sub.i
447}
448
449define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
450; CHECK-LABEL: test_vsubw_s32:
451; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
452entry:
453  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
454  %sub.i = sub <2 x i64> %a, %vmovl.i.i
455  ret <2 x i64> %sub.i
456}
457
458define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
459; CHECK-LABEL: test_vsubw_u8:
460; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
461entry:
462  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
463  %sub.i = sub <8 x i16> %a, %vmovl.i.i
464  ret <8 x i16> %sub.i
465}
466
467define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
468; CHECK-LABEL: test_vsubw_u16:
469; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
470entry:
471  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
472  %sub.i = sub <4 x i32> %a, %vmovl.i.i
473  ret <4 x i32> %sub.i
474}
475
476define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
477; CHECK-LABEL: test_vsubw_u32:
478; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
479entry:
480  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
481  %sub.i = sub <2 x i64> %a, %vmovl.i.i
482  ret <2 x i64> %sub.i
483}
484
485define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
486; CHECK-LABEL: test_vsubw_high_s8:
487; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
488entry:
489  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
490  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
491  %sub.i = sub <8 x i16> %a, %0
492  ret <8 x i16> %sub.i
493}
494
495define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
496; CHECK-LABEL: test_vsubw_high_s16:
497; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
498entry:
499  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
500  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
501  %sub.i = sub <4 x i32> %a, %0
502  ret <4 x i32> %sub.i
503}
504
505define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
506; CHECK-LABEL: test_vsubw_high_s32:
507; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
508entry:
509  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
510  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
511  %sub.i = sub <2 x i64> %a, %0
512  ret <2 x i64> %sub.i
513}
514
515define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
516; CHECK-LABEL: test_vsubw_high_u8:
517; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
518entry:
519  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
520  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
521  %sub.i = sub <8 x i16> %a, %0
522  ret <8 x i16> %sub.i
523}
524
525define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
526; CHECK-LABEL: test_vsubw_high_u16:
527; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
528entry:
529  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
530  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
531  %sub.i = sub <4 x i32> %a, %0
532  ret <4 x i32> %sub.i
533}
534
535define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
536; CHECK-LABEL: test_vsubw_high_u32:
537; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
538entry:
539  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
540  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
541  %sub.i = sub <2 x i64> %a, %0
542  ret <2 x i64> %sub.i
543}
544
545define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
546; CHECK-LABEL: test_vaddhn_s16:
547; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
548entry:
549  %vaddhn.i = add <8 x i16> %a, %b
550  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
551  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
552  ret <8 x i8> %vaddhn2.i
553}
554
555define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
556; CHECK-LABEL: test_vaddhn_s32:
557; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
558entry:
559  %vaddhn.i = add <4 x i32> %a, %b
560  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
561  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
562  ret <4 x i16> %vaddhn2.i
563}
564
565define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
566; CHECK-LABEL: test_vaddhn_s64:
567; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
568entry:
569  %vaddhn.i = add <2 x i64> %a, %b
570  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
571  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
572  ret <2 x i32> %vaddhn2.i
573}
574
575define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
576; CHECK-LABEL: test_vaddhn_u16:
577; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
578entry:
579  %vaddhn.i = add <8 x i16> %a, %b
580  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
581  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
582  ret <8 x i8> %vaddhn2.i
583}
584
585define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
586; CHECK-LABEL: test_vaddhn_u32:
587; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
588entry:
589  %vaddhn.i = add <4 x i32> %a, %b
590  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
591  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
592  ret <4 x i16> %vaddhn2.i
593}
594
595define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
596; CHECK-LABEL: test_vaddhn_u64:
597; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
598entry:
599  %vaddhn.i = add <2 x i64> %a, %b
600  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
601  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
602  ret <2 x i32> %vaddhn2.i
603}
604
605define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
606; CHECK-LABEL: test_vaddhn_high_s16:
607; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
608entry:
609  %vaddhn.i.i = add <8 x i16> %a, %b
610  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
611  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
612  %0 = bitcast <8 x i8> %r to <1 x i64>
613  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
614  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
615  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
616  ret <16 x i8> %2
617}
618
619define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
620; CHECK-LABEL: test_vaddhn_high_s32:
621; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
622entry:
623  %vaddhn.i.i = add <4 x i32> %a, %b
624  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
625  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
626  %0 = bitcast <4 x i16> %r to <1 x i64>
627  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
628  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
629  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
630  ret <8 x i16> %2
631}
632
633define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
634; CHECK-LABEL: test_vaddhn_high_s64:
635; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
636entry:
637  %vaddhn.i.i = add <2 x i64> %a, %b
638  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
639  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
640  %0 = bitcast <2 x i32> %r to <1 x i64>
641  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
642  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
643  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
644  ret <4 x i32> %2
645}
646
647define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
648; CHECK-LABEL: test_vaddhn_high_u16:
649; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
650entry:
651  %vaddhn.i.i = add <8 x i16> %a, %b
652  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
653  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
654  %0 = bitcast <8 x i8> %r to <1 x i64>
655  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
656  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
657  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
658  ret <16 x i8> %2
659}
660
661define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
662; CHECK-LABEL: test_vaddhn_high_u32:
663; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
664entry:
665  %vaddhn.i.i = add <4 x i32> %a, %b
666  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
667  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
668  %0 = bitcast <4 x i16> %r to <1 x i64>
669  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
670  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
671  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
672  ret <8 x i16> %2
673}
674
675define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
676; CHECK-LABEL: test_vaddhn_high_u64:
677; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
678entry:
679  %vaddhn.i.i = add <2 x i64> %a, %b
680  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
681  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
682  %0 = bitcast <2 x i32> %r to <1 x i64>
683  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
684  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
685  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
686  ret <4 x i32> %2
687}
688
689define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
690; CHECK-LABEL: test_vraddhn_s16:
691; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
692entry:
693  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
694  ret <8 x i8> %vraddhn2.i
695}
696
697define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
698; CHECK-LABEL: test_vraddhn_s32:
699; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
700entry:
701  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
702  ret <4 x i16> %vraddhn2.i
703}
704
705define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
706; CHECK-LABEL: test_vraddhn_s64:
707; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
708entry:
709  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
710  ret <2 x i32> %vraddhn2.i
711}
712
713define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
714; CHECK-LABEL: test_vraddhn_u16:
715; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
716entry:
717  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
718  ret <8 x i8> %vraddhn2.i
719}
720
721define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
722; CHECK-LABEL: test_vraddhn_u32:
723; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
724entry:
725  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
726  ret <4 x i16> %vraddhn2.i
727}
728
729define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
730; CHECK-LABEL: test_vraddhn_u64:
731; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
732entry:
733  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
734  ret <2 x i32> %vraddhn2.i
735}
736
737define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
738; CHECK-LABEL: test_vraddhn_high_s16:
739; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
740entry:
741  %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
742  %0 = bitcast <8 x i8> %r to <1 x i64>
743  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
744  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
745  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
746  ret <16 x i8> %2
747}
748
749define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
750; CHECK-LABEL: test_vraddhn_high_s32:
751; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
752entry:
753  %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
754  %0 = bitcast <4 x i16> %r to <1 x i64>
755  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
756  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
757  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
758  ret <8 x i16> %2
759}
760
761define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
762; CHECK-LABEL: test_vraddhn_high_s64:
763; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
764entry:
765  %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
766  %0 = bitcast <2 x i32> %r to <1 x i64>
767  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
768  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
769  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
770  ret <4 x i32> %2
771}
772
773define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
774; CHECK-LABEL: test_vraddhn_high_u16:
775; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
776entry:
777  %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
778  %0 = bitcast <8 x i8> %r to <1 x i64>
779  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
780  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
781  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
782  ret <16 x i8> %2
783}
784
785define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
786; CHECK-LABEL: test_vraddhn_high_u32:
787; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
788entry:
789  %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
790  %0 = bitcast <4 x i16> %r to <1 x i64>
791  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
792  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
793  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
794  ret <8 x i16> %2
795}
796
797define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
798; CHECK-LABEL: test_vraddhn_high_u64:
799; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
800entry:
801  %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
802  %0 = bitcast <2 x i32> %r to <1 x i64>
803  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
804  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
805  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
806  ret <4 x i32> %2
807}
808
809define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
810; CHECK-LABEL: test_vsubhn_s16:
811; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
812entry:
813  %vsubhn.i = sub <8 x i16> %a, %b
814  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
815  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
816  ret <8 x i8> %vsubhn2.i
817}
818
819define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
820; CHECK-LABEL: test_vsubhn_s32:
821; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
822entry:
823  %vsubhn.i = sub <4 x i32> %a, %b
824  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
825  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
826  ret <4 x i16> %vsubhn2.i
827}
828
829define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
830; CHECK-LABEL: test_vsubhn_s64:
831; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
832entry:
833  %vsubhn.i = sub <2 x i64> %a, %b
834  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
835  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
836  ret <2 x i32> %vsubhn2.i
837}
838
839define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
840; CHECK-LABEL: test_vsubhn_u16:
841; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
842entry:
843  %vsubhn.i = sub <8 x i16> %a, %b
844  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
845  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
846  ret <8 x i8> %vsubhn2.i
847}
848
849define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
850; CHECK-LABEL: test_vsubhn_u32:
851; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
852entry:
853  %vsubhn.i = sub <4 x i32> %a, %b
854  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
855  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
856  ret <4 x i16> %vsubhn2.i
857}
858
859define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
860; CHECK-LABEL: test_vsubhn_u64:
861; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
862entry:
863  %vsubhn.i = sub <2 x i64> %a, %b
864  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
865  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
866  ret <2 x i32> %vsubhn2.i
867}
868
869define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
870; CHECK-LABEL: test_vsubhn_high_s16:
871; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
872entry:
873  %vsubhn.i.i = sub <8 x i16> %a, %b
874  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
875  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
876  %0 = bitcast <8 x i8> %r to <1 x i64>
877  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
878  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
879  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
880  ret <16 x i8> %2
881}
882
883define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
884; CHECK-LABEL: test_vsubhn_high_s32:
885; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
886entry:
887  %vsubhn.i.i = sub <4 x i32> %a, %b
888  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
889  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
890  %0 = bitcast <4 x i16> %r to <1 x i64>
891  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
892  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
893  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
894  ret <8 x i16> %2
895}
896
897define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
898; CHECK-LABEL: test_vsubhn_high_s64:
899; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
900entry:
901  %vsubhn.i.i = sub <2 x i64> %a, %b
902  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
903  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
904  %0 = bitcast <2 x i32> %r to <1 x i64>
905  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
906  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
907  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
908  ret <4 x i32> %2
909}
910
911define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
912; CHECK-LABEL: test_vsubhn_high_u16:
913; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
914entry:
915  %vsubhn.i.i = sub <8 x i16> %a, %b
916  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
917  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
918  %0 = bitcast <8 x i8> %r to <1 x i64>
919  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
920  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
921  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
922  ret <16 x i8> %2
923}
924
925define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
926; CHECK-LABEL: test_vsubhn_high_u32:
927; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
928entry:
929  %vsubhn.i.i = sub <4 x i32> %a, %b
930  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
931  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
932  %0 = bitcast <4 x i16> %r to <1 x i64>
933  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
934  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
935  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
936  ret <8 x i16> %2
937}
938
939define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
940; CHECK-LABEL: test_vsubhn_high_u64:
941; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
942entry:
943  %vsubhn.i.i = sub <2 x i64> %a, %b
944  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
945  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
946  %0 = bitcast <2 x i32> %r to <1 x i64>
947  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
948  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
949  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
950  ret <4 x i32> %2
951}
952
953define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
954; CHECK-LABEL: test_vrsubhn_s16:
955; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
956entry:
957  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
958  ret <8 x i8> %vrsubhn2.i
959}
960
961define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
962; CHECK-LABEL: test_vrsubhn_s32:
963; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
964entry:
965  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
966  ret <4 x i16> %vrsubhn2.i
967}
968
969define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
970; CHECK-LABEL: test_vrsubhn_s64:
971; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
972entry:
973  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
974  ret <2 x i32> %vrsubhn2.i
975}
976
977define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
978; CHECK-LABEL: test_vrsubhn_u16:
979; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
980entry:
981  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
982  ret <8 x i8> %vrsubhn2.i
983}
984
985define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
986; CHECK-LABEL: test_vrsubhn_u32:
987; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
988entry:
989  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
990  ret <4 x i16> %vrsubhn2.i
991}
992
993define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
994; CHECK-LABEL: test_vrsubhn_u64:
995; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
996entry:
997  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
998  ret <2 x i32> %vrsubhn2.i
999}
1000
1001define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
1002; CHECK-LABEL: test_vrsubhn_high_s16:
1003; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1004entry:
1005  %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
1006  %0 = bitcast <8 x i8> %r to <1 x i64>
1007  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
1008  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1009  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
1010  ret <16 x i8> %2
1011}
1012
1013define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
1014; CHECK-LABEL: test_vrsubhn_high_s32:
1015; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1016entry:
1017  %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
1018  %0 = bitcast <4 x i16> %r to <1 x i64>
1019  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
1020  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1021  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
1022  ret <8 x i16> %2
1023}
1024
1025define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
1026; CHECK-LABEL: test_vrsubhn_high_s64:
1027; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1028entry:
1029  %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
1030  %0 = bitcast <2 x i32> %r to <1 x i64>
1031  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
1032  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1033  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
1034  ret <4 x i32> %2
1035}
1036
1037define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
1038; CHECK-LABEL: test_vrsubhn_high_u16:
1039; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1040entry:
1041  %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
1042  %0 = bitcast <8 x i8> %r to <1 x i64>
1043  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
1044  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1045  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
1046  ret <16 x i8> %2
1047}
1048
1049define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
1050; CHECK-LABEL: test_vrsubhn_high_u32:
1051; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1052entry:
1053  %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
1054  %0 = bitcast <4 x i16> %r to <1 x i64>
1055  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
1056  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1057  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
1058  ret <8 x i16> %2
1059}
1060
1061define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
1062; CHECK-LABEL: test_vrsubhn_high_u64:
1063; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1064entry:
1065  %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
1066  %0 = bitcast <2 x i32> %r to <1 x i64>
1067  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
1068  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1069  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
1070  ret <4 x i32> %2
1071}
1072
1073define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
1074; CHECK-LABEL: test_vabdl_s8:
1075; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1076entry:
1077  %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
1078  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
1079  ret <8 x i16> %vmovl.i.i
1080}
1081
1082define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
1083; CHECK-LABEL: test_vabdl_s16:
1084; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1085entry:
1086  %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
1087  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
1088  ret <4 x i32> %vmovl.i.i
1089}
1090
1091define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
1092; CHECK-LABEL: test_vabdl_s32:
1093; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1094entry:
1095  %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
1096  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
1097  ret <2 x i64> %vmovl.i.i
1098}
1099
1100define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
1101; CHECK-LABEL: test_vabdl_u8:
1102; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1103entry:
1104  %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
1105  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
1106  ret <8 x i16> %vmovl.i.i
1107}
1108
1109define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
1110; CHECK-LABEL: test_vabdl_u16:
1111; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1112entry:
1113  %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
1114  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
1115  ret <4 x i32> %vmovl.i.i
1116}
1117
1118define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
1119; CHECK-LABEL: test_vabdl_u32:
1120; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1121entry:
1122  %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
1123  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
1124  ret <2 x i64> %vmovl.i.i
1125}
1126
1127define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1128; CHECK-LABEL: test_vabal_s8:
1129; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1130entry:
1131  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
1132  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1133  %add.i = add <8 x i16> %vmovl.i.i.i, %a
1134  ret <8 x i16> %add.i
1135}
1136
1137define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1138; CHECK-LABEL: test_vabal_s16:
1139; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1140entry:
1141  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
1142  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1143  %add.i = add <4 x i32> %vmovl.i.i.i, %a
1144  ret <4 x i32> %add.i
1145}
1146
1147define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1148; CHECK-LABEL: test_vabal_s32:
1149; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1150entry:
1151  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
1152  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1153  %add.i = add <2 x i64> %vmovl.i.i.i, %a
1154  ret <2 x i64> %add.i
1155}
1156
1157define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1158; CHECK-LABEL: test_vabal_u8:
1159; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1160entry:
1161  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
1162  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1163  %add.i = add <8 x i16> %vmovl.i.i.i, %a
1164  ret <8 x i16> %add.i
1165}
1166
1167define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1168; CHECK-LABEL: test_vabal_u16:
1169; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1170entry:
1171  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
1172  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1173  %add.i = add <4 x i32> %vmovl.i.i.i, %a
1174  ret <4 x i32> %add.i
1175}
1176
1177define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1178; CHECK-LABEL: test_vabal_u32:
1179; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1180entry:
1181  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
1182  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1183  %add.i = add <2 x i64> %vmovl.i.i.i, %a
1184  ret <2 x i64> %add.i
1185}
1186
1187define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
1188; CHECK-LABEL: test_vabdl_high_s8:
1189; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1190entry:
1191  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1192  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1193  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1194  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1195  ret <8 x i16> %vmovl.i.i.i
1196}
1197
1198define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
1199; CHECK-LABEL: test_vabdl_high_s16:
1200; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1201entry:
1202  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1203  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1204  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1205  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1206  ret <4 x i32> %vmovl.i.i.i
1207}
1208
1209define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
1210; CHECK-LABEL: test_vabdl_high_s32:
1211; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1212entry:
1213  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1214  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1215  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1216  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1217  ret <2 x i64> %vmovl.i.i.i
1218}
1219
1220define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
1221; CHECK-LABEL: test_vabdl_high_u8:
1222; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1223entry:
1224  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1225  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1226  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1227  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1228  ret <8 x i16> %vmovl.i.i.i
1229}
1230
1231define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
1232; CHECK-LABEL: test_vabdl_high_u16:
1233; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1234entry:
1235  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1236  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1237  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1238  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1239  ret <4 x i32> %vmovl.i.i.i
1240}
1241
1242define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
1243; CHECK-LABEL: test_vabdl_high_u32:
1244; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1245entry:
1246  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1247  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1248  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1249  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1250  ret <2 x i64> %vmovl.i.i.i
1251}
1252
1253define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1254; CHECK-LABEL: test_vabal_high_s8:
1255; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1256entry:
1257  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1258  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1259  %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1260  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
1261  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
1262  ret <8 x i16> %add.i.i
1263}
1264
1265define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1266; CHECK-LABEL: test_vabal_high_s16:
1267; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1268entry:
1269  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1270  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1271  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1272  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
1273  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
1274  ret <4 x i32> %add.i.i
1275}
1276
1277define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1278; CHECK-LABEL: test_vabal_high_s32:
1279; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1280entry:
1281  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1282  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1283  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1284  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
1285  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
1286  ret <2 x i64> %add.i.i
1287}
1288
1289define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1290; CHECK-LABEL: test_vabal_high_u8:
1291; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1292entry:
1293  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1294  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1295  %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1296  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
1297  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
1298  ret <8 x i16> %add.i.i
1299}
1300
1301define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1302; CHECK-LABEL: test_vabal_high_u16:
1303; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1304entry:
1305  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1306  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1307  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1308  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
1309  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
1310  ret <4 x i32> %add.i.i
1311}
1312
1313define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1314; CHECK-LABEL: test_vabal_high_u32:
1315; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1316entry:
1317  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1318  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1319  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1320  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
1321  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
1322  ret <2 x i64> %add.i.i
1323}
1324
1325define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
1326; CHECK-LABEL: test_vmull_s8:
1327; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1328entry:
1329  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
1330  ret <8 x i16> %vmull.i
1331}
1332
1333define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
1334; CHECK-LABEL: test_vmull_s16:
1335; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1336entry:
1337  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
1338  ret <4 x i32> %vmull2.i
1339}
1340
1341define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
1342; CHECK-LABEL: test_vmull_s32:
1343; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1344entry:
1345  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
1346  ret <2 x i64> %vmull2.i
1347}
1348
1349define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
1350; CHECK-LABEL: test_vmull_u8:
1351; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1352entry:
1353  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
1354  ret <8 x i16> %vmull.i
1355}
1356
1357define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
1358; CHECK-LABEL: test_vmull_u16:
1359; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1360entry:
1361  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
1362  ret <4 x i32> %vmull2.i
1363}
1364
1365define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
1366; CHECK-LABEL: test_vmull_u32:
1367; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1368entry:
1369  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
1370  ret <2 x i64> %vmull2.i
1371}
1372
1373define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
1374; CHECK-LABEL: test_vmull_high_s8:
1375; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1376entry:
1377  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1378  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1379  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1380  ret <8 x i16> %vmull.i.i
1381}
1382
1383define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
1384; CHECK-LABEL: test_vmull_high_s16:
1385; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1386entry:
1387  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1388  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1389  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1390  ret <4 x i32> %vmull2.i.i
1391}
1392
1393define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
1394; CHECK-LABEL: test_vmull_high_s32:
1395; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1396entry:
1397  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1398  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1399  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1400  ret <2 x i64> %vmull2.i.i
1401}
1402
1403define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
1404; CHECK-LABEL: test_vmull_high_u8:
1405; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1406entry:
1407  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1408  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1409  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1410  ret <8 x i16> %vmull.i.i
1411}
1412
1413define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
1414; CHECK-LABEL: test_vmull_high_u16:
1415; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1416entry:
1417  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1418  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1419  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1420  ret <4 x i32> %vmull2.i.i
1421}
1422
1423define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
1424; CHECK-LABEL: test_vmull_high_u32:
1425; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1426entry:
1427  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1428  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1429  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1430  ret <2 x i64> %vmull2.i.i
1431}
1432
1433define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1434; CHECK-LABEL: test_vmlal_s8:
1435; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1436entry:
1437  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
1438  %add.i = add <8 x i16> %vmull.i.i, %a
1439  ret <8 x i16> %add.i
1440}
1441
1442define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1443; CHECK-LABEL: test_vmlal_s16:
1444; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1445entry:
1446  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
1447  %add.i = add <4 x i32> %vmull2.i.i, %a
1448  ret <4 x i32> %add.i
1449}
1450
1451define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1452; CHECK-LABEL: test_vmlal_s32:
1453; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1454entry:
1455  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
1456  %add.i = add <2 x i64> %vmull2.i.i, %a
1457  ret <2 x i64> %add.i
1458}
1459
1460define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1461; CHECK-LABEL: test_vmlal_u8:
1462; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1463entry:
1464  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
1465  %add.i = add <8 x i16> %vmull.i.i, %a
1466  ret <8 x i16> %add.i
1467}
1468
1469define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1470; CHECK-LABEL: test_vmlal_u16:
1471; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1472entry:
1473  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
1474  %add.i = add <4 x i32> %vmull2.i.i, %a
1475  ret <4 x i32> %add.i
1476}
1477
1478define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1479; CHECK-LABEL: test_vmlal_u32:
1480; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1481entry:
1482  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
1483  %add.i = add <2 x i64> %vmull2.i.i, %a
1484  ret <2 x i64> %add.i
1485}
1486
1487define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1488; CHECK-LABEL: test_vmlal_high_s8:
1489; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1490entry:
1491  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1492  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1493  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1494  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
1495  ret <8 x i16> %add.i.i
1496}
1497
1498define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1499; CHECK-LABEL: test_vmlal_high_s16:
1500; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1501entry:
1502  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1503  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1504  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1505  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
1506  ret <4 x i32> %add.i.i
1507}
1508
1509define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1510; CHECK-LABEL: test_vmlal_high_s32:
1511; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1512entry:
1513  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1514  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1515  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1516  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
1517  ret <2 x i64> %add.i.i
1518}
1519
1520define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1521; CHECK-LABEL: test_vmlal_high_u8:
1522; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1523entry:
1524  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1525  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1526  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1527  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
1528  ret <8 x i16> %add.i.i
1529}
1530
1531define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1532; CHECK-LABEL: test_vmlal_high_u16:
1533; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1534entry:
1535  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1536  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1537  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1538  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
1539  ret <4 x i32> %add.i.i
1540}
1541
1542define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1543; CHECK-LABEL: test_vmlal_high_u32:
1544; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1545entry:
1546  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1547  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1548  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1549  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
1550  ret <2 x i64> %add.i.i
1551}
1552
1553define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1554; CHECK-LABEL: test_vmlsl_s8:
1555; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1556entry:
1557  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
1558  %sub.i = sub <8 x i16> %a, %vmull.i.i
1559  ret <8 x i16> %sub.i
1560}
1561
1562define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1563; CHECK-LABEL: test_vmlsl_s16:
1564; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1565entry:
1566  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
1567  %sub.i = sub <4 x i32> %a, %vmull2.i.i
1568  ret <4 x i32> %sub.i
1569}
1570
1571define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1572; CHECK-LABEL: test_vmlsl_s32:
1573; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1574entry:
1575  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
1576  %sub.i = sub <2 x i64> %a, %vmull2.i.i
1577  ret <2 x i64> %sub.i
1578}
1579
1580define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1581; CHECK-LABEL: test_vmlsl_u8:
1582; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1583entry:
1584  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
1585  %sub.i = sub <8 x i16> %a, %vmull.i.i
1586  ret <8 x i16> %sub.i
1587}
1588
1589define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1590; CHECK-LABEL: test_vmlsl_u16:
1591; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1592entry:
1593  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
1594  %sub.i = sub <4 x i32> %a, %vmull2.i.i
1595  ret <4 x i32> %sub.i
1596}
1597
1598define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1599; CHECK-LABEL: test_vmlsl_u32:
1600; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1601entry:
1602  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
1603  %sub.i = sub <2 x i64> %a, %vmull2.i.i
1604  ret <2 x i64> %sub.i
1605}
1606
1607define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1608; CHECK-LABEL: test_vmlsl_high_s8:
1609; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1610entry:
1611  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1612  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1613  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1614  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
1615  ret <8 x i16> %sub.i.i
1616}
1617
1618define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1619; CHECK-LABEL: test_vmlsl_high_s16:
1620; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1621entry:
1622  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1623  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1624  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1625  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
1626  ret <4 x i32> %sub.i.i
1627}
1628
1629define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1630; CHECK-LABEL: test_vmlsl_high_s32:
1631; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1632entry:
1633  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1634  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1635  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1636  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
1637  ret <2 x i64> %sub.i.i
1638}
1639
1640define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1641; CHECK-LABEL: test_vmlsl_high_u8:
1642; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1643entry:
1644  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1645  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1646  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1647  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
1648  ret <8 x i16> %sub.i.i
1649}
1650
1651define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1652; CHECK-LABEL: test_vmlsl_high_u16:
1653; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1654entry:
1655  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1656  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1657  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1658  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
1659  ret <4 x i32> %sub.i.i
1660}
1661
1662define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1663; CHECK-LABEL: test_vmlsl_high_u32:
1664; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1665entry:
1666  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1667  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1668  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1669  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
1670  ret <2 x i64> %sub.i.i
1671}
1672
1673define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
1674; CHECK-LABEL: test_vqdmull_s16:
1675; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1676entry:
1677  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
1678  ret <4 x i32> %vqdmull2.i
1679}
1680
1681define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
1682; CHECK-LABEL: test_vqdmull_s32:
1683; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1684entry:
1685  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
1686  ret <2 x i64> %vqdmull2.i
1687}
1688
1689define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1690; CHECK-LABEL: test_vqdmlal_s16:
1691; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1692entry:
1693  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
1694  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1695  ret <4 x i32> %vqdmlal4.i
1696}
1697
1698define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1699; CHECK-LABEL: test_vqdmlal_s32:
1700; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1701entry:
1702  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
1703  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1704  ret <2 x i64> %vqdmlal4.i
1705}
1706
1707define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1708; CHECK-LABEL: test_vqdmlsl_s16:
1709; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1710entry:
1711  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
1712  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1713  ret <4 x i32> %vqdmlsl4.i
1714}
1715
1716define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1717; CHECK-LABEL: test_vqdmlsl_s32:
1718; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1719entry:
1720  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
1721  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1722  ret <2 x i64> %vqdmlsl4.i
1723}
1724
1725define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
1726; CHECK-LABEL: test_vqdmull_high_s16:
1727; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1728entry:
1729  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1730  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1731  %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1732  ret <4 x i32> %vqdmull2.i.i
1733}
1734
1735define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
1736; CHECK-LABEL: test_vqdmull_high_s32:
1737; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1738entry:
1739  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1740  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1741  %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1742  ret <2 x i64> %vqdmull2.i.i
1743}
1744
1745define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1746; CHECK-LABEL: test_vqdmlal_high_s16:
1747; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1748entry:
1749  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1750  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1751  %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1752  %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
1753  ret <4 x i32> %vqdmlal4.i.i
1754}
1755
1756define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1757; CHECK-LABEL: test_vqdmlal_high_s32:
1758; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1759entry:
1760  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1761  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1762  %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1763  %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
1764  ret <2 x i64> %vqdmlal4.i.i
1765}
1766
1767define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1768; CHECK-LABEL: test_vqdmlsl_high_s16:
1769; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1770entry:
1771  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1772  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1773  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1774  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
1775  ret <4 x i32> %vqdmlsl4.i.i
1776}
1777
1778define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1779; CHECK-LABEL: test_vqdmlsl_high_s32:
1780; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1781entry:
1782  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1783  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1784  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1785  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
1786  ret <2 x i64> %vqdmlsl4.i.i
1787}
1788
1789define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
1790; CHECK-LABEL: test_vmull_p8:
1791; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1792entry:
1793  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
1794  ret <8 x i16> %vmull.i
1795}
1796
1797define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
1798; CHECK-LABEL: test_vmull_high_p8:
1799; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1800entry:
1801  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1802  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1803  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1804  ret <8 x i16> %vmull.i.i
1805}
1806
1807define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
1808; CHECK-LABEL: test_vmull_p64
1809; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
1810entry:
1811  %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
1812  %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
1813  ret i128 %vmull3.i
1814}
1815
1816define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
1817; CHECK-LABEL: test_vmull_high_p64
1818; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1819entry:
1820  %0 = extractelement <2 x i64> %a, i32 1
1821  %1 = extractelement <2 x i64> %b, i32 1
1822  %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1
1823  %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
1824  ret i128 %vmull3.i.i
1825}
1826
1827declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
1828
1829
1830