Lines Matching full:x

4 define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
7 %tmp1 = load <8 x i8>, <8 x i8>* %A
8 %tmp2 = load <8 x i8>, <8 x i8>* %B
9 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
10 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
11 ret <8 x i16> %tmp4
14 define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
17 %tmp1 = load <4 x i16>, <4 x i16>* %A
18 %tmp2 = load <4 x i16>, <4 x i16>* %B
19 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
20 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
21 ret <4 x i32> %tmp4
24 define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
27 %tmp1 = load <2 x i32>, <2 x i32>* %A
28 %tmp2 = load <2 x i32>, <2 x i32>* %B
29 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
30 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
31 ret <2 x i64> %tmp4
34 define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
37 %load1 = load <16 x i8>, <16 x i8>* %A
38 %load2 = load <16 x i8>, <16 x i8>* %B
39 …%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
40 …%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
41 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
42 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
43 ret <8 x i16> %tmp4
46 define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
49 %load1 = load <8 x i16>, <8 x i16>* %A
50 %load2 = load <8 x i16>, <8 x i16>* %B
51 … %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
52 … %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
53 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
54 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
55 ret <4 x i32> %tmp4
58 define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
61 %load1 = load <4 x i32>, <4 x i32>* %A
62 %load2 = load <4 x i32>, <4 x i32>* %B
63 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
64 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
65 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
66 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
67 ret <2 x i64> %tmp4
70 define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
73 %tmp1 = load <8 x i8>, <8 x i8>* %A
74 %tmp2 = load <8 x i8>, <8 x i8>* %B
75 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
76 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
77 ret <8 x i16> %tmp4
80 define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
83 %tmp1 = load <4 x i16>, <4 x i16>* %A
84 %tmp2 = load <4 x i16>, <4 x i16>* %B
85 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
86 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
87 ret <4 x i32> %tmp4
90 define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
93 %tmp1 = load <2 x i32>, <2 x i32>* %A
94 %tmp2 = load <2 x i32>, <2 x i32>* %B
95 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
96 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
97 ret <2 x i64> %tmp4
100 define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
103 %load1 = load <16 x i8>, <16 x i8>* %A
104 %load2 = load <16 x i8>, <16 x i8>* %B
105 …%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
106 …%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
108 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
109 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
110 ret <8 x i16> %tmp4
113 define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
116 %load1 = load <8 x i16>, <8 x i16>* %A
117 %load2 = load <8 x i16>, <8 x i16>* %B
118 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
119 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
120 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
121 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
122 ret <4 x i32> %tmp4
125 define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
128 %load1 = load <4 x i32>, <4 x i32>* %A
129 %load2 = load <4 x i32>, <4 x i32>* %B
130 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
131 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
132 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
133 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
134 ret <2 x i64> %tmp4
137 define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) {
141 %aload = load <16 x i8>, <16 x i8>* %a, align 1
142 %bload = load <16 x i8>, <16 x i8>* %b, align 1
143 %aext = zext <16 x i8> %aload to <16 x i16>
144 %bext = zext <16 x i8> %bload to <16 x i16>
145 %abdiff = sub nsw <16 x i16> %aext, %bext
146 %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer
147 %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff
148 %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff
149 …%rdx.shuf = shufflevector <16 x i16> %absel, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i…
150 %bin1.rdx = add <16 x i16> %absel, %rdx.shuf
151 …%rdx.shufx = shufflevector <16 x i16> %bin1.rdx, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6…
152 %bin.rdx = add <16 x i16> %bin1.rdx, %rdx.shufx
153 …%rdx.shuf136 = shufflevector <16 x i16> %bin.rdx, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 …
154 %bin.rdx137 = add <16 x i16> %bin.rdx, %rdx.shuf136
155 …%rdx.shuf138 = shufflevector <16 x i16> %bin.rdx137, <16 x i16> undef, <16 x i32> <i32 1, i32 unde…
156 %bin.rdx139 = add <16 x i16> %bin.rdx137, %rdx.shuf138
157 %reduced_v = extractelement <16 x i16> %bin.rdx139, i16 0
161 define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) {
165 %aload = load <8 x i16>, <8 x i16>* %a, align 1
166 %bload = load <8 x i16>, <8 x i16>* %b, align 1
167 %aext = zext <8 x i16> %aload to <8 x i32>
168 %bext = zext <8 x i16> %bload to <8 x i32>
169 %abdiff = sub nsw <8 x i32> %aext, %bext
170 %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
171 %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
172 %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
173 …%rdx.shuf = shufflevector <8 x i32> %absel, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7…
174 %bin.rdx = add <8 x i32> %absel, %rdx.shuf
175 …%rdx.shuf136 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 und…
176 %bin.rdx137 = add <8 x i32> %bin.rdx, %rdx.shuf136
177 …%rdx.shuf138 = shufflevector <8 x i32> %bin.rdx137, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, …
178 %bin.rdx139 = add <8 x i32> %bin.rdx137, %rdx.shuf138
179 %reduced_v = extractelement <8 x i32> %bin.rdx139, i32 0
183 define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
187 %aload = load <4 x i32>, <4 x i32>* %a, align 1
188 %bload = load <4 x i32>, <4 x i32>* %b, align 1
189 %aext = zext <4 x i32> %aload to <4 x i64>
190 %bext = zext <4 x i32> %bload to <4 x i64>
191 %abdiff = sub nsw <4 x i64> %aext, %bext
192 %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
193 %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
194 %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
195 …%rdx.shuf136 = shufflevector <4 x i64> %absel, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef…
196 %bin.rdx137 = add <4 x i64> %absel, %rdx.shuf136
197 …%rdx.shuf138 = shufflevector <4 x i64> %bin.rdx137, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, …
198 %bin.rdx139 = add <4 x i64> %bin.rdx137, %rdx.shuf138
199 %reduced_v = extractelement <4 x i64> %bin.rdx139, i16 0
203 define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
206 %tmp1 = load <2 x float>, <2 x float>* %A
207 %tmp2 = load <2 x float>, <2 x float>* %B
208 %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
209 ret <2 x float> %tmp3
212 define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
215 %tmp1 = load <4 x float>, <4 x float>* %A
216 %tmp2 = load <4 x float>, <4 x float>* %B
217 %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
218 ret <4 x float> %tmp3
221 define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
224 %tmp1 = load <2 x double>, <2 x double>* %A
225 %tmp2 = load <2 x double>, <2 x double>* %B
226 … %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
227 ret <2 x double> %tmp3
230 declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone
231 declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
232 declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
234 define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
237 %tmp1 = load <8 x i8>, <8 x i8>* %A
238 %tmp2 = load <8 x i8>, <8 x i8>* %B
239 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
240 ret <8 x i8> %tmp3
243 define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
246 %tmp1 = load <16 x i8>, <16 x i8>* %A
247 %tmp2 = load <16 x i8>, <16 x i8>* %B
248 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
249 ret <16 x i8> %tmp3
252 define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
255 %tmp1 = load <4 x i16>, <4 x i16>* %A
256 %tmp2 = load <4 x i16>, <4 x i16>* %B
257 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
258 ret <4 x i16> %tmp3
261 define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
264 %tmp1 = load <8 x i16>, <8 x i16>* %A
265 %tmp2 = load <8 x i16>, <8 x i16>* %B
266 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
267 ret <8 x i16> %tmp3
270 define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
273 %tmp1 = load <2 x i32>, <2 x i32>* %A
274 %tmp2 = load <2 x i32>, <2 x i32>* %B
275 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
276 ret <2 x i32> %tmp3
279 define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
282 %tmp1 = load <4 x i32>, <4 x i32>* %A
283 %tmp2 = load <4 x i32>, <4 x i32>* %B
284 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
285 ret <4 x i32> %tmp3
288 declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
289 declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
290 declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
291 declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
292 declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
293 declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
295 define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
298 %tmp1 = load <8 x i8>, <8 x i8>* %A
299 %tmp2 = load <8 x i8>, <8 x i8>* %B
300 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
301 ret <8 x i8> %tmp3
304 define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
307 %tmp1 = load <16 x i8>, <16 x i8>* %A
308 %tmp2 = load <16 x i8>, <16 x i8>* %B
309 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
310 ret <16 x i8> %tmp3
313 define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
316 %tmp1 = load <4 x i16>, <4 x i16>* %A
317 %tmp2 = load <4 x i16>, <4 x i16>* %B
318 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
319 ret <4 x i16> %tmp3
322 define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
325 %tmp1 = load <8 x i16>, <8 x i16>* %A
326 %tmp2 = load <8 x i16>, <8 x i16>* %B
327 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
328 ret <8 x i16> %tmp3
331 define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
334 %tmp1 = load <2 x i32>, <2 x i32>* %A
335 %tmp2 = load <2 x i32>, <2 x i32>* %B
336 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
337 ret <2 x i32> %tmp3
340 define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
343 %tmp1 = load <4 x i32>, <4 x i32>* %A
344 %tmp2 = load <4 x i32>, <4 x i32>* %B
345 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
346 ret <4 x i32> %tmp3
349 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
350 declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
351 declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
352 declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
353 declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
354 declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
356 define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
359 %tmp1 = load <8 x i8>, <8 x i8>* %A
360 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1)
361 ret <8 x i8> %tmp3
364 define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
367 %tmp1 = load <16 x i8>, <16 x i8>* %A
368 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1)
369 ret <16 x i8> %tmp3
372 define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
375 %tmp1 = load <4 x i16>, <4 x i16>* %A
376 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1)
377 ret <4 x i16> %tmp3
380 define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
383 %tmp1 = load <8 x i16>, <8 x i16>* %A
384 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1)
385 ret <8 x i16> %tmp3
388 define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
391 %tmp1 = load <2 x i32>, <2 x i32>* %A
392 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1)
393 ret <2 x i32> %tmp3
396 define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
399 %tmp1 = load <4 x i32>, <4 x i32>* %A
400 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1)
401 ret <4 x i32> %tmp3
404 declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
405 declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone
406 declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone
407 declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone
408 declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone
409 declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
411 define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
414 %tmp1 = load <8 x i8>, <8 x i8>* %A
415 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1)
416 ret <8 x i8> %tmp3
419 define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
422 %tmp1 = load <16 x i8>, <16 x i8>* %A
423 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1)
424 ret <16 x i8> %tmp3
427 define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
430 %tmp1 = load <4 x i16>, <4 x i16>* %A
431 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1)
432 ret <4 x i16> %tmp3
435 define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
438 %tmp1 = load <8 x i16>, <8 x i16>* %A
439 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1)
440 ret <8 x i16> %tmp3
443 define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
446 %tmp1 = load <2 x i32>, <2 x i32>* %A
447 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1)
448 ret <2 x i32> %tmp3
451 define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
454 %tmp1 = load <4 x i32>, <4 x i32>* %A
455 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1)
456 ret <4 x i32> %tmp3
459 declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
460 declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone
461 declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone
462 declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone
463 declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone
464 declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
466 define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
469 %tmp1 = load <8 x i8>, <8 x i8>* %A
470 %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1)
471 ret <8 x i8> %tmp3
474 define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
477 %tmp1 = load <16 x i8>, <16 x i8>* %A
478 %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1)
479 ret <16 x i8> %tmp3
482 define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
485 %tmp1 = load <4 x i16>, <4 x i16>* %A
486 %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1)
487 ret <4 x i16> %tmp3
490 define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
493 %tmp1 = load <8 x i16>, <8 x i16>* %A
494 %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1)
495 ret <8 x i16> %tmp3
498 define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
501 %tmp1 = load <2 x i32>, <2 x i32>* %A
502 %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1)
503 ret <2 x i32> %tmp3
506 define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
509 %tmp1 = load <4 x i32>, <4 x i32>* %A
510 %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1)
511 ret <4 x i32> %tmp3
514 define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
517 %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A)
518 ret <1 x i64> %abs
528 declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone
529 declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone
530 declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone
531 declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone
532 declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone
533 declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone
534 declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone
537 define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind {
540 %tmp1 = load <8 x i8>, <8 x i8>* %A
541 %tmp2 = load <8 x i8>, <8 x i8>* %B
542 %tmp3 = load <8 x i16>, <8 x i16>* %C
543 %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
544 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
545 %tmp5 = add <8 x i16> %tmp3, %tmp4.1
546 ret <8 x i16> %tmp5
549 define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
552 %tmp1 = load <4 x i16>, <4 x i16>* %A
553 %tmp2 = load <4 x i16>, <4 x i16>* %B
554 %tmp3 = load <4 x i32>, <4 x i32>* %C
555 %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
556 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
557 %tmp5 = add <4 x i32> %tmp3, %tmp4.1
558 ret <4 x i32> %tmp5
561 define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
564 %tmp1 = load <2 x i32>, <2 x i32>* %A
565 %tmp2 = load <2 x i32>, <2 x i32>* %B
566 %tmp3 = load <2 x i64>, <2 x i64>* %C
567 %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
568 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
569 %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
570 %tmp5 = add <2 x i64> %tmp3, %tmp4.1
571 ret <2 x i64> %tmp5
574 define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
577 %load1 = load <16 x i8>, <16 x i8>* %A
578 %load2 = load <16 x i8>, <16 x i8>* %B
579 %tmp3 = load <8 x i16>, <8 x i16>* %C
580 …%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
581 …%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
582 %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
583 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
584 %tmp5 = add <8 x i16> %tmp3, %tmp4.1
585 ret <8 x i16> %tmp5
588 define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
591 %load1 = load <8 x i16>, <8 x i16>* %A
592 %load2 = load <8 x i16>, <8 x i16>* %B
593 %tmp3 = load <4 x i32>, <4 x i32>* %C
594 … %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
595 … %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
596 %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
597 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
598 %tmp5 = add <4 x i32> %tmp3, %tmp4.1
599 ret <4 x i32> %tmp5
602 define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
605 %load1 = load <4 x i32>, <4 x i32>* %A
606 %load2 = load <4 x i32>, <4 x i32>* %B
607 %tmp3 = load <2 x i64>, <2 x i64>* %C
608 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
609 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
610 %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
611 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
612 %tmp5 = add <2 x i64> %tmp3, %tmp4.1
613 ret <2 x i64> %tmp5
616 define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind {
619 %tmp1 = load <8 x i8>, <8 x i8>* %A
620 %tmp2 = load <8 x i8>, <8 x i8>* %B
621 %tmp3 = load <8 x i16>, <8 x i16>* %C
622 %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
623 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
624 %tmp5 = add <8 x i16> %tmp3, %tmp4.1
625 ret <8 x i16> %tmp5
628 define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
631 %tmp1 = load <4 x i16>, <4 x i16>* %A
632 %tmp2 = load <4 x i16>, <4 x i16>* %B
633 %tmp3 = load <4 x i32>, <4 x i32>* %C
634 %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
635 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
636 %tmp5 = add <4 x i32> %tmp3, %tmp4.1
637 ret <4 x i32> %tmp5
640 define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
643 %tmp1 = load <2 x i32>, <2 x i32>* %A
644 %tmp2 = load <2 x i32>, <2 x i32>* %B
645 %tmp3 = load <2 x i64>, <2 x i64>* %C
646 %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
647 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
648 %tmp5 = add <2 x i64> %tmp3, %tmp4.1
649 ret <2 x i64> %tmp5
652 define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
655 %load1 = load <16 x i8>, <16 x i8>* %A
656 %load2 = load <16 x i8>, <16 x i8>* %B
657 %tmp3 = load <8 x i16>, <8 x i16>* %C
658 …%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
659 …%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, …
660 %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
661 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
662 %tmp5 = add <8 x i16> %tmp3, %tmp4.1
663 ret <8 x i16> %tmp5
666 define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
669 %load1 = load <8 x i16>, <8 x i16>* %A
670 %load2 = load <8 x i16>, <8 x i16>* %B
671 %tmp3 = load <4 x i32>, <4 x i32>* %C
672 … %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
673 … %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
674 %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
675 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
676 %tmp5 = add <4 x i32> %tmp3, %tmp4.1
677 ret <4 x i32> %tmp5
680 define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
683 %load1 = load <4 x i32>, <4 x i32>* %A
684 %load2 = load <4 x i32>, <4 x i32>* %B
685 %tmp3 = load <2 x i64>, <2 x i64>* %C
686 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
687 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
688 %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
689 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
690 %tmp5 = add <2 x i64> %tmp3, %tmp4.1
691 ret <2 x i64> %tmp5
694 define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
697 %tmp1 = load <8 x i8>, <8 x i8>* %A
698 %tmp2 = load <8 x i8>, <8 x i8>* %B
699 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
700 %tmp4 = load <8 x i8>, <8 x i8>* %C
701 %tmp5 = add <8 x i8> %tmp3, %tmp4
702 ret <8 x i8> %tmp5
705 define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
708 %tmp1 = load <16 x i8>, <16 x i8>* %A
709 %tmp2 = load <16 x i8>, <16 x i8>* %B
710 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
711 %tmp4 = load <16 x i8>, <16 x i8>* %C
712 %tmp5 = add <16 x i8> %tmp3, %tmp4
713 ret <16 x i8> %tmp5
716 define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
719 %tmp1 = load <4 x i16>, <4 x i16>* %A
720 %tmp2 = load <4 x i16>, <4 x i16>* %B
721 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
722 %tmp4 = load <4 x i16>, <4 x i16>* %C
723 %tmp5 = add <4 x i16> %tmp3, %tmp4
724 ret <4 x i16> %tmp5
727 define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
730 %tmp1 = load <8 x i16>, <8 x i16>* %A
731 %tmp2 = load <8 x i16>, <8 x i16>* %B
732 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
733 %tmp4 = load <8 x i16>, <8 x i16>* %C
734 %tmp5 = add <8 x i16> %tmp3, %tmp4
735 ret <8 x i16> %tmp5
738 define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
741 %tmp1 = load <2 x i32>, <2 x i32>* %A
742 %tmp2 = load <2 x i32>, <2 x i32>* %B
743 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
744 %tmp4 = load <2 x i32>, <2 x i32>* %C
745 %tmp5 = add <2 x i32> %tmp3, %tmp4
746 ret <2 x i32> %tmp5
749 define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
752 %tmp1 = load <4 x i32>, <4 x i32>* %A
753 %tmp2 = load <4 x i32>, <4 x i32>* %B
754 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
755 %tmp4 = load <4 x i32>, <4 x i32>* %C
756 %tmp5 = add <4 x i32> %tmp3, %tmp4
757 ret <4 x i32> %tmp5
760 define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
763 %tmp1 = load <8 x i8>, <8 x i8>* %A
764 %tmp2 = load <8 x i8>, <8 x i8>* %B
765 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
766 %tmp4 = load <8 x i8>, <8 x i8>* %C
767 %tmp5 = add <8 x i8> %tmp3, %tmp4
768 ret <8 x i8> %tmp5
771 define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
774 %tmp1 = load <16 x i8>, <16 x i8>* %A
775 %tmp2 = load <16 x i8>, <16 x i8>* %B
776 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
777 %tmp4 = load <16 x i8>, <16 x i8>* %C
778 %tmp5 = add <16 x i8> %tmp3, %tmp4
779 ret <16 x i8> %tmp5
782 define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
785 %tmp1 = load <4 x i16>, <4 x i16>* %A
786 %tmp2 = load <4 x i16>, <4 x i16>* %B
787 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
788 %tmp4 = load <4 x i16>, <4 x i16>* %C
789 %tmp5 = add <4 x i16> %tmp3, %tmp4
790 ret <4 x i16> %tmp5
793 define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
796 %tmp1 = load <8 x i16>, <8 x i16>* %A
797 %tmp2 = load <8 x i16>, <8 x i16>* %B
798 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
799 %tmp4 = load <8 x i16>, <8 x i16>* %C
800 %tmp5 = add <8 x i16> %tmp3, %tmp4
801 ret <8 x i16> %tmp5
804 define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
807 %tmp1 = load <2 x i32>, <2 x i32>* %A
808 %tmp2 = load <2 x i32>, <2 x i32>* %B
809 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
810 %tmp4 = load <2 x i32>, <2 x i32>* %C
811 %tmp5 = add <2 x i32> %tmp3, %tmp4
812 ret <2 x i32> %tmp5
815 define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
818 %tmp1 = load <4 x i32>, <4 x i32>* %A
819 %tmp2 = load <4 x i32>, <4 x i32>* %B
820 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
821 %tmp4 = load <4 x i32>, <4 x i32>* %C
822 %tmp5 = add <4 x i32> %tmp3, %tmp4
823 ret <4 x i32> %tmp5
844 define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
848 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
849 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
851 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
853 …%res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) n…
854 %res1 = zext <2 x i32> %res to <2 x i64>
855 ret <2 x i64> %res1
858 define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
862 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
863 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
865 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
867 …%res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) n…
868 %res1 = zext <2 x i32> %res to <2 x i64>
869 ret <2 x i64> %res1
872 define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
876 %tmp1neg = sub <2 x i32> zeroinitializer, %a
877 %b = icmp sge <2 x i32> %a, zeroinitializer
878 %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
879 ret <2 x i32> %abs
882 define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
886 %tmp1neg = sub <4 x i16> zeroinitializer, %a
887 %b = icmp sgt <4 x i16> %a, zeroinitializer
888 %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
889 ret <4 x i16> %abs
892 define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
896 %tmp1neg = sub <8 x i8> zeroinitializer, %a
897 %b = icmp slt <8 x i8> %a, zeroinitializer
898 %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a
899 ret <8 x i8> %abs
902 define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
906 %tmp1neg = sub <4 x i32> zeroinitializer, %a
907 %b = icmp sge <4 x i32> %a, zeroinitializer
908 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
909 ret <4 x i32> %abs
912 define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
916 %tmp1neg = sub <8 x i16> zeroinitializer, %a
917 %b = icmp sgt <8 x i16> %a, zeroinitializer
918 %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
919 ret <8 x i16> %abs
922 define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
926 %tmp1neg = sub <16 x i8> zeroinitializer, %a
927 %b = icmp slt <16 x i8> %a, zeroinitializer
928 %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
929 ret <16 x i8> %abs
932 define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
936 %tmp1neg = sub <2 x i64> zeroinitializer, %a
937 %b = icmp sle <2 x i64> %a, zeroinitializer
938 %abs = select <2 x i1> %b, <2 x i64> %tmp1neg, <2 x i64> %a
939 ret <2 x i64> %abs