1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2
3define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4;CHECK-LABEL: sqadd8b:
5;CHECK: sqadd.8b
6	%tmp1 = load <8 x i8>, <8 x i8>* %A
7	%tmp2 = load <8 x i8>, <8 x i8>* %B
8	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
9	ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13;CHECK-LABEL: sqadd4h:
14;CHECK: sqadd.4h
15	%tmp1 = load <4 x i16>, <4 x i16>* %A
16	%tmp2 = load <4 x i16>, <4 x i16>* %B
17	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
18	ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
22;CHECK-LABEL: sqadd2s:
23;CHECK: sqadd.2s
24	%tmp1 = load <2 x i32>, <2 x i32>* %A
25	%tmp2 = load <2 x i32>, <2 x i32>* %B
26	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
27	ret <2 x i32> %tmp3
28}
29
30define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
31;CHECK-LABEL: uqadd8b:
32;CHECK: uqadd.8b
33	%tmp1 = load <8 x i8>, <8 x i8>* %A
34	%tmp2 = load <8 x i8>, <8 x i8>* %B
35	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
36	ret <8 x i8> %tmp3
37}
38
39define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
40;CHECK-LABEL: uqadd4h:
41;CHECK: uqadd.4h
42	%tmp1 = load <4 x i16>, <4 x i16>* %A
43	%tmp2 = load <4 x i16>, <4 x i16>* %B
44	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
45	ret <4 x i16> %tmp3
46}
47
48define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
49;CHECK-LABEL: uqadd2s:
50;CHECK: uqadd.2s
51	%tmp1 = load <2 x i32>, <2 x i32>* %A
52	%tmp2 = load <2 x i32>, <2 x i32>* %B
53	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
54	ret <2 x i32> %tmp3
55}
56
57define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
58;CHECK-LABEL: sqadd16b:
59;CHECK: sqadd.16b
60	%tmp1 = load <16 x i8>, <16 x i8>* %A
61	%tmp2 = load <16 x i8>, <16 x i8>* %B
62	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
63	ret <16 x i8> %tmp3
64}
65
66define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
67;CHECK-LABEL: sqadd8h:
68;CHECK: sqadd.8h
69	%tmp1 = load <8 x i16>, <8 x i16>* %A
70	%tmp2 = load <8 x i16>, <8 x i16>* %B
71	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
72	ret <8 x i16> %tmp3
73}
74
75define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
76;CHECK-LABEL: sqadd4s:
77;CHECK: sqadd.4s
78	%tmp1 = load <4 x i32>, <4 x i32>* %A
79	%tmp2 = load <4 x i32>, <4 x i32>* %B
80	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
81	ret <4 x i32> %tmp3
82}
83
84define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
85;CHECK-LABEL: sqadd2d:
86;CHECK: sqadd.2d
87	%tmp1 = load <2 x i64>, <2 x i64>* %A
88	%tmp2 = load <2 x i64>, <2 x i64>* %B
89	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
90	ret <2 x i64> %tmp3
91}
92
93define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
94;CHECK-LABEL: uqadd16b:
95;CHECK: uqadd.16b
96	%tmp1 = load <16 x i8>, <16 x i8>* %A
97	%tmp2 = load <16 x i8>, <16 x i8>* %B
98	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
99	ret <16 x i8> %tmp3
100}
101
102define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
103;CHECK-LABEL: uqadd8h:
104;CHECK: uqadd.8h
105	%tmp1 = load <8 x i16>, <8 x i16>* %A
106	%tmp2 = load <8 x i16>, <8 x i16>* %B
107	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
108	ret <8 x i16> %tmp3
109}
110
111define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
112;CHECK-LABEL: uqadd4s:
113;CHECK: uqadd.4s
114	%tmp1 = load <4 x i32>, <4 x i32>* %A
115	%tmp2 = load <4 x i32>, <4 x i32>* %B
116	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
117	ret <4 x i32> %tmp3
118}
119
120define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
121;CHECK-LABEL: uqadd2d:
122;CHECK: uqadd.2d
123	%tmp1 = load <2 x i64>, <2 x i64>* %A
124	%tmp2 = load <2 x i64>, <2 x i64>* %B
125	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
126	ret <2 x i64> %tmp3
127}
128
129declare <8 x i8>  @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
130declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
131declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
132declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
133
134declare <8 x i8>  @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
135declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
136declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
137declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
138
139declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
140declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
141declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
142declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
143
144declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
145declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
146declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
147declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
148
149define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
150;CHECK-LABEL: usqadd8b:
151;CHECK: usqadd.8b
152	%tmp1 = load <8 x i8>, <8 x i8>* %A
153	%tmp2 = load <8 x i8>, <8 x i8>* %B
154	%tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
155	ret <8 x i8> %tmp3
156}
157
158define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
159;CHECK-LABEL: usqadd4h:
160;CHECK: usqadd.4h
161	%tmp1 = load <4 x i16>, <4 x i16>* %A
162	%tmp2 = load <4 x i16>, <4 x i16>* %B
163	%tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
164	ret <4 x i16> %tmp3
165}
166
167define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
168;CHECK-LABEL: usqadd2s:
169;CHECK: usqadd.2s
170	%tmp1 = load <2 x i32>, <2 x i32>* %A
171	%tmp2 = load <2 x i32>, <2 x i32>* %B
172	%tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
173	ret <2 x i32> %tmp3
174}
175
176define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
177;CHECK-LABEL: usqadd16b:
178;CHECK: usqadd.16b
179	%tmp1 = load <16 x i8>, <16 x i8>* %A
180	%tmp2 = load <16 x i8>, <16 x i8>* %B
181	%tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
182	ret <16 x i8> %tmp3
183}
184
185define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
186;CHECK-LABEL: usqadd8h:
187;CHECK: usqadd.8h
188	%tmp1 = load <8 x i16>, <8 x i16>* %A
189	%tmp2 = load <8 x i16>, <8 x i16>* %B
190	%tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
191	ret <8 x i16> %tmp3
192}
193
194define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
195;CHECK-LABEL: usqadd4s:
196;CHECK: usqadd.4s
197	%tmp1 = load <4 x i32>, <4 x i32>* %A
198	%tmp2 = load <4 x i32>, <4 x i32>* %B
199	%tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
200	ret <4 x i32> %tmp3
201}
202
203define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
204;CHECK-LABEL: usqadd2d:
205;CHECK: usqadd.2d
206	%tmp1 = load <2 x i64>, <2 x i64>* %A
207	%tmp2 = load <2 x i64>, <2 x i64>* %B
208	%tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
209	ret <2 x i64> %tmp3
210}
211
212define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
213; CHECK-LABEL: usqadd_d:
214; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
215  %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
216  ret i64 %sum
217}
218
219define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
220; CHECK-LABEL: usqadd_s:
221; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
222  %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
223  ret i32 %sum
224}
225
226declare <8 x i8>  @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
227declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
228declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
229declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
230declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone
231declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone
232
233declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
234declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
235declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
236declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
237
238define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
239;CHECK-LABEL: suqadd8b:
240;CHECK: suqadd.8b
241	%tmp1 = load <8 x i8>, <8 x i8>* %A
242	%tmp2 = load <8 x i8>, <8 x i8>* %B
243	%tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
244	ret <8 x i8> %tmp3
245}
246
247define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
248;CHECK-LABEL: suqadd4h:
249;CHECK: suqadd.4h
250	%tmp1 = load <4 x i16>, <4 x i16>* %A
251	%tmp2 = load <4 x i16>, <4 x i16>* %B
252	%tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
253	ret <4 x i16> %tmp3
254}
255
256define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
257;CHECK-LABEL: suqadd2s:
258;CHECK: suqadd.2s
259	%tmp1 = load <2 x i32>, <2 x i32>* %A
260	%tmp2 = load <2 x i32>, <2 x i32>* %B
261	%tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
262	ret <2 x i32> %tmp3
263}
264
265define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
266;CHECK-LABEL: suqadd16b:
267;CHECK: suqadd.16b
268	%tmp1 = load <16 x i8>, <16 x i8>* %A
269	%tmp2 = load <16 x i8>, <16 x i8>* %B
270	%tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
271	ret <16 x i8> %tmp3
272}
273
274define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
275;CHECK-LABEL: suqadd8h:
276;CHECK: suqadd.8h
277	%tmp1 = load <8 x i16>, <8 x i16>* %A
278	%tmp2 = load <8 x i16>, <8 x i16>* %B
279	%tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
280	ret <8 x i16> %tmp3
281}
282
283define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
284;CHECK-LABEL: suqadd4s:
285;CHECK: suqadd.4s
286	%tmp1 = load <4 x i32>, <4 x i32>* %A
287	%tmp2 = load <4 x i32>, <4 x i32>* %B
288	%tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
289	ret <4 x i32> %tmp3
290}
291
292define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
293;CHECK-LABEL: suqadd2d:
294;CHECK: suqadd.2d
295	%tmp1 = load <2 x i64>, <2 x i64>* %A
296	%tmp2 = load <2 x i64>, <2 x i64>* %B
297	%tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
298	ret <2 x i64> %tmp3
299}
300
301define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
302; CHECK-LABEL: suqadd_1d:
303; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
304  %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
305  ret <1 x i64> %sum
306}
307
308define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
309; CHECK-LABEL: suqadd_d:
310; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
311  %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
312  ret i64 %sum
313}
314
315define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
316; CHECK-LABEL: suqadd_s:
317; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
318  %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
319  ret i32 %sum
320}
321
322declare <8 x i8>  @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
323declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
324declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
325declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
326declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone
327declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone
328
329declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
330declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
331declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
332declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
333