1; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
2
3declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
4declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
5
6define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
7; CHECK: test_uabd_v8i8:
8  %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
9; CHECK: uabd v0.8b, v0.8b, v1.8b
10  ret <8 x i8> %abd
11}
12
13define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
14; CHECK: test_uaba_v8i8:
15  %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
16  %aba = add <8 x i8> %lhs, %abd
17; CHECK: uaba v0.8b, v0.8b, v1.8b
18  ret <8 x i8> %aba
19}
20
21define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
22; CHECK: test_sabd_v8i8:
23  %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
24; CHECK: sabd v0.8b, v0.8b, v1.8b
25  ret <8 x i8> %abd
26}
27
28define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
29; CHECK: test_saba_v8i8:
30  %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
31  %aba = add <8 x i8> %lhs, %abd
32; CHECK: saba v0.8b, v0.8b, v1.8b
33  ret <8 x i8> %aba
34}
35
36declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
37declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
38
39define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
40; CHECK: test_uabd_v16i8:
41  %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
42; CHECK: uabd v0.16b, v0.16b, v1.16b
43  ret <16 x i8> %abd
44}
45
46define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
47; CHECK: test_uaba_v16i8:
48  %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
49  %aba = add <16 x i8> %lhs, %abd
50; CHECK: uaba v0.16b, v0.16b, v1.16b
51  ret <16 x i8> %aba
52}
53
54define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
55; CHECK: test_sabd_v16i8:
56  %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
57; CHECK: sabd v0.16b, v0.16b, v1.16b
58  ret <16 x i8> %abd
59}
60
61define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
62; CHECK: test_saba_v16i8:
63  %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
64  %aba = add <16 x i8> %lhs, %abd
65; CHECK: saba v0.16b, v0.16b, v1.16b
66  ret <16 x i8> %aba
67}
68
69declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
70declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
71
72define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
73; CHECK: test_uabd_v4i16:
74  %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
75; CHECK: uabd v0.4h, v0.4h, v1.4h
76  ret <4 x i16> %abd
77}
78
79define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
80; CHECK: test_uaba_v4i16:
81  %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
82  %aba = add <4 x i16> %lhs, %abd
83; CHECK: uaba v0.4h, v0.4h, v1.4h
84  ret <4 x i16> %aba
85}
86
87define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
88; CHECK: test_sabd_v4i16:
89  %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
90; CHECK: sabd v0.4h, v0.4h, v1.4h
91  ret <4 x i16> %abd
92}
93
94define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
95; CHECK: test_saba_v4i16:
96  %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
97  %aba = add <4 x i16> %lhs, %abd
98; CHECK: saba v0.4h, v0.4h, v1.4h
99  ret <4 x i16> %aba
100}
101
102declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
103declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
104
105define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
106; CHECK: test_uabd_v8i16:
107  %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
108; CHECK: uabd v0.8h, v0.8h, v1.8h
109  ret <8 x i16> %abd
110}
111
112define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
113; CHECK: test_uaba_v8i16:
114  %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
115  %aba = add <8 x i16> %lhs, %abd
116; CHECK: uaba v0.8h, v0.8h, v1.8h
117  ret <8 x i16> %aba
118}
119
120define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
121; CHECK: test_sabd_v8i16:
122  %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
123; CHECK: sabd v0.8h, v0.8h, v1.8h
124  ret <8 x i16> %abd
125}
126
127define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
128; CHECK: test_saba_v8i16:
129  %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
130  %aba = add <8 x i16> %lhs, %abd
131; CHECK: saba v0.8h, v0.8h, v1.8h
132  ret <8 x i16> %aba
133}
134
135declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
136declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
137
138define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
139; CHECK: test_uabd_v2i32:
140  %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
141; CHECK: uabd v0.2s, v0.2s, v1.2s
142  ret <2 x i32> %abd
143}
144
145define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
146; CHECK: test_uaba_v2i32:
147  %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
148  %aba = add <2 x i32> %lhs, %abd
149; CHECK: uaba v0.2s, v0.2s, v1.2s
150  ret <2 x i32> %aba
151}
152
153define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
154; CHECK: test_sabd_v2i32:
155  %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
156; CHECK: sabd v0.2s, v0.2s, v1.2s
157  ret <2 x i32> %abd
158}
159
160define <2 x i32> @test_sabd_v2i32_const() {
161; CHECK: test_sabd_v2i32_const:
162; CHECK: movi     d1, #0x00ffffffff0000
163; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
164  %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
165    <2 x i32> <i32 -2147483648, i32 2147450880>,
166    <2 x i32> <i32 -65536, i32 65535>)
167  ret <2 x i32> %1
168}
169
170define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
171; CHECK: test_saba_v2i32:
172  %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
173  %aba = add <2 x i32> %lhs, %abd
174; CHECK: saba v0.2s, v0.2s, v1.2s
175  ret <2 x i32> %aba
176}
177
178declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
179declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
180
181define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
182; CHECK: test_uabd_v4i32:
183  %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
184; CHECK: uabd v0.4s, v0.4s, v1.4s
185  ret <4 x i32> %abd
186}
187
188define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
189; CHECK: test_uaba_v4i32:
190  %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
191  %aba = add <4 x i32> %lhs, %abd
192; CHECK: uaba v0.4s, v0.4s, v1.4s
193  ret <4 x i32> %aba
194}
195
196define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
197; CHECK: test_sabd_v4i32:
198  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
199; CHECK: sabd v0.4s, v0.4s, v1.4s
200  ret <4 x i32> %abd
201}
202
203define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
204; CHECK: test_saba_v4i32:
205  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
206  %aba = add <4 x i32> %lhs, %abd
207; CHECK: saba v0.4s, v0.4s, v1.4s
208  ret <4 x i32> %aba
209}
210
211declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>)
212
213define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
214; CHECK: test_fabd_v2f32:
215  %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
216; CHECK: fabd v0.2s, v0.2s, v1.2s
217  ret <2 x float> %abd
218}
219
220declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>)
221
222define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
223; CHECK: test_fabd_v4f32:
224  %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
225; CHECK: fabd v0.4s, v0.4s, v1.4s
226  ret <4 x float> %abd
227}
228
229declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>)
230
231define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
232; CHECK: test_fabd_v2f64:
233  %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
234; CHECK: fabd v0.2d, v0.2d, v1.2d
235  ret <2 x double> %abd
236}
237