1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc <4 x i32> @add_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
5; CHECK-LABEL: add_v4i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vpt.i32 eq, q0, zr
8; CHECK-NEXT:    vaddt.i32 q0, q1, q2
9; CHECK-NEXT:    bx lr
10entry:
11  %c = icmp eq <4 x i32> %z, zeroinitializer
12  %a = add <4 x i32> %x, %y
13  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
14  ret <4 x i32> %b
15}
16
17define arm_aapcs_vfpcc <8 x i16> @add_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
18; CHECK-LABEL: add_v8i16:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vpt.i16 eq, q0, zr
21; CHECK-NEXT:    vaddt.i16 q0, q1, q2
22; CHECK-NEXT:    bx lr
23entry:
24  %c = icmp eq <8 x i16> %z, zeroinitializer
25  %a = add <8 x i16> %x, %y
26  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
27  ret <8 x i16> %b
28}
29
30define arm_aapcs_vfpcc <16 x i8> @add_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
31; CHECK-LABEL: add_v16i8:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vpt.i8 eq, q0, zr
34; CHECK-NEXT:    vaddt.i8 q0, q1, q2
35; CHECK-NEXT:    bx lr
36entry:
37  %c = icmp eq <16 x i8> %z, zeroinitializer
38  %a = add <16 x i8> %x, %y
39  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
40  ret <16 x i8> %b
41}
42
43define arm_aapcs_vfpcc <4 x i32> @sub_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
44; CHECK-LABEL: sub_v4i32:
45; CHECK:       @ %bb.0: @ %entry
46; CHECK-NEXT:    vpt.i32 eq, q0, zr
47; CHECK-NEXT:    vsubt.i32 q0, q1, q2
48; CHECK-NEXT:    bx lr
49entry:
50  %c = icmp eq <4 x i32> %z, zeroinitializer
51  %a = sub <4 x i32> %x, %y
52  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
53  ret <4 x i32> %b
54}
55
56define arm_aapcs_vfpcc <8 x i16> @sub_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
57; CHECK-LABEL: sub_v8i16:
58; CHECK:       @ %bb.0: @ %entry
59; CHECK-NEXT:    vpt.i16 eq, q0, zr
60; CHECK-NEXT:    vsubt.i16 q0, q1, q2
61; CHECK-NEXT:    bx lr
62entry:
63  %c = icmp eq <8 x i16> %z, zeroinitializer
64  %a = sub <8 x i16> %x, %y
65  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
66  ret <8 x i16> %b
67}
68
69define arm_aapcs_vfpcc <16 x i8> @sub_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
70; CHECK-LABEL: sub_v16i8:
71; CHECK:       @ %bb.0: @ %entry
72; CHECK-NEXT:    vpt.i8 eq, q0, zr
73; CHECK-NEXT:    vsubt.i8 q0, q1, q2
74; CHECK-NEXT:    bx lr
75entry:
76  %c = icmp eq <16 x i8> %z, zeroinitializer
77  %a = sub <16 x i8> %x, %y
78  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
79  ret <16 x i8> %b
80}
81
82define arm_aapcs_vfpcc <4 x i32> @mul_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
83; CHECK-LABEL: mul_v4i32:
84; CHECK:       @ %bb.0: @ %entry
85; CHECK-NEXT:    vpt.i32 eq, q0, zr
86; CHECK-NEXT:    vmult.i32 q0, q1, q2
87; CHECK-NEXT:    bx lr
88entry:
89  %c = icmp eq <4 x i32> %z, zeroinitializer
90  %a = mul <4 x i32> %x, %y
91  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
92  ret <4 x i32> %b
93}
94
95define arm_aapcs_vfpcc <8 x i16> @mul_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
96; CHECK-LABEL: mul_v8i16:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vpt.i16 eq, q0, zr
99; CHECK-NEXT:    vmult.i16 q0, q1, q2
100; CHECK-NEXT:    bx lr
101entry:
102  %c = icmp eq <8 x i16> %z, zeroinitializer
103  %a = mul <8 x i16> %x, %y
104  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
105  ret <8 x i16> %b
106}
107
108define arm_aapcs_vfpcc <16 x i8> @mul_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
109; CHECK-LABEL: mul_v16i8:
110; CHECK:       @ %bb.0: @ %entry
111; CHECK-NEXT:    vpt.i8 eq, q0, zr
112; CHECK-NEXT:    vmult.i8 q0, q1, q2
113; CHECK-NEXT:    bx lr
114entry:
115  %c = icmp eq <16 x i8> %z, zeroinitializer
116  %a = mul <16 x i8> %x, %y
117  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
118  ret <16 x i8> %b
119}
120
121define arm_aapcs_vfpcc <4 x i32> @and_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
122; CHECK-LABEL: and_v4i32:
123; CHECK:       @ %bb.0: @ %entry
124; CHECK-NEXT:    vpt.i32 eq, q0, zr
125; CHECK-NEXT:    vandt q0, q1, q2
126; CHECK-NEXT:    bx lr
127entry:
128  %c = icmp eq <4 x i32> %z, zeroinitializer
129  %a = and <4 x i32> %x, %y
130  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
131  ret <4 x i32> %b
132}
133
134define arm_aapcs_vfpcc <8 x i16> @and_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
135; CHECK-LABEL: and_v8i16:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vpt.i16 eq, q0, zr
138; CHECK-NEXT:    vandt q0, q1, q2
139; CHECK-NEXT:    bx lr
140entry:
141  %c = icmp eq <8 x i16> %z, zeroinitializer
142  %a = and <8 x i16> %x, %y
143  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
144  ret <8 x i16> %b
145}
146
147define arm_aapcs_vfpcc <16 x i8> @and_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
148; CHECK-LABEL: and_v16i8:
149; CHECK:       @ %bb.0: @ %entry
150; CHECK-NEXT:    vpt.i8 eq, q0, zr
151; CHECK-NEXT:    vandt q0, q1, q2
152; CHECK-NEXT:    bx lr
153entry:
154  %c = icmp eq <16 x i8> %z, zeroinitializer
155  %a = and <16 x i8> %x, %y
156  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
157  ret <16 x i8> %b
158}
159
160define arm_aapcs_vfpcc <4 x i32> @or_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
161; CHECK-LABEL: or_v4i32:
162; CHECK:       @ %bb.0: @ %entry
163; CHECK-NEXT:    vpt.i32 eq, q0, zr
164; CHECK-NEXT:    vorrt q0, q1, q2
165; CHECK-NEXT:    bx lr
166entry:
167  %c = icmp eq <4 x i32> %z, zeroinitializer
168  %a = or <4 x i32> %x, %y
169  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
170  ret <4 x i32> %b
171}
172
173define arm_aapcs_vfpcc <8 x i16> @or_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
174; CHECK-LABEL: or_v8i16:
175; CHECK:       @ %bb.0: @ %entry
176; CHECK-NEXT:    vpt.i16 eq, q0, zr
177; CHECK-NEXT:    vorrt q0, q1, q2
178; CHECK-NEXT:    bx lr
179entry:
180  %c = icmp eq <8 x i16> %z, zeroinitializer
181  %a = or <8 x i16> %x, %y
182  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
183  ret <8 x i16> %b
184}
185
186define arm_aapcs_vfpcc <16 x i8> @or_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
187; CHECK-LABEL: or_v16i8:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vpt.i8 eq, q0, zr
190; CHECK-NEXT:    vorrt q0, q1, q2
191; CHECK-NEXT:    bx lr
192entry:
193  %c = icmp eq <16 x i8> %z, zeroinitializer
194  %a = or <16 x i8> %x, %y
195  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
196  ret <16 x i8> %b
197}
198
199define arm_aapcs_vfpcc <4 x i32> @xor_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
200; CHECK-LABEL: xor_v4i32:
201; CHECK:       @ %bb.0: @ %entry
202; CHECK-NEXT:    vpt.i32 eq, q0, zr
203; CHECK-NEXT:    veort q0, q1, q2
204; CHECK-NEXT:    bx lr
205entry:
206  %c = icmp eq <4 x i32> %z, zeroinitializer
207  %a = xor <4 x i32> %x, %y
208  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
209  ret <4 x i32> %b
210}
211
212define arm_aapcs_vfpcc <8 x i16> @xor_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
213; CHECK-LABEL: xor_v8i16:
214; CHECK:       @ %bb.0: @ %entry
215; CHECK-NEXT:    vpt.i16 eq, q0, zr
216; CHECK-NEXT:    veort q0, q1, q2
217; CHECK-NEXT:    bx lr
218entry:
219  %c = icmp eq <8 x i16> %z, zeroinitializer
220  %a = xor <8 x i16> %x, %y
221  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
222  ret <8 x i16> %b
223}
224
225define arm_aapcs_vfpcc <16 x i8> @xor_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
226; CHECK-LABEL: xor_v16i8:
227; CHECK:       @ %bb.0: @ %entry
228; CHECK-NEXT:    vpt.i8 eq, q0, zr
229; CHECK-NEXT:    veort q0, q1, q2
230; CHECK-NEXT:    bx lr
231entry:
232  %c = icmp eq <16 x i8> %z, zeroinitializer
233  %a = xor <16 x i8> %x, %y
234  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
235  ret <16 x i8> %b
236}
237
238define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
239; CHECK-LABEL: andnot_v4i32:
240; CHECK:       @ %bb.0: @ %entry
241; CHECK-NEXT:    vpt.i32 eq, q0, zr
242; CHECK-NEXT:    vbict q0, q1, q2
243; CHECK-NEXT:    bx lr
244entry:
245  %c = icmp eq <4 x i32> %z, zeroinitializer
246  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
247  %a = and <4 x i32> %x, %y1
248  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
249  ret <4 x i32> %b
250}
251
252define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
253; CHECK-LABEL: andnot_v8i16:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    vpt.i16 eq, q0, zr
256; CHECK-NEXT:    vbict q0, q1, q2
257; CHECK-NEXT:    bx lr
258entry:
259  %c = icmp eq <8 x i16> %z, zeroinitializer
260  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
261  %a = and <8 x i16> %x, %y1
262  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
263  ret <8 x i16> %b
264}
265
266define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
267; CHECK-LABEL: andnot_v16i8:
268; CHECK:       @ %bb.0: @ %entry
269; CHECK-NEXT:    vpt.i8 eq, q0, zr
270; CHECK-NEXT:    vbict q0, q1, q2
271; CHECK-NEXT:    bx lr
272entry:
273  %c = icmp eq <16 x i8> %z, zeroinitializer
274  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
275  %a = and <16 x i8> %x, %y1
276  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
277  ret <16 x i8> %b
278}
279
280define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
281; CHECK-LABEL: ornot_v4i32:
282; CHECK:       @ %bb.0: @ %entry
283; CHECK-NEXT:    vpt.i32 eq, q0, zr
284; CHECK-NEXT:    vornt q0, q1, q2
285; CHECK-NEXT:    bx lr
286entry:
287  %c = icmp eq <4 x i32> %z, zeroinitializer
288  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
289  %a = or <4 x i32> %x, %y1
290  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
291  ret <4 x i32> %b
292}
293
294define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
295; CHECK-LABEL: ornot_v8i16:
296; CHECK:       @ %bb.0: @ %entry
297; CHECK-NEXT:    vpt.i16 eq, q0, zr
298; CHECK-NEXT:    vornt q0, q1, q2
299; CHECK-NEXT:    bx lr
300entry:
301  %c = icmp eq <8 x i16> %z, zeroinitializer
302  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
303  %a = or <8 x i16> %x, %y1
304  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
305  ret <8 x i16> %b
306}
307
308define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
309; CHECK-LABEL: ornot_v16i8:
310; CHECK:       @ %bb.0: @ %entry
311; CHECK-NEXT:    vpt.i8 eq, q0, zr
312; CHECK-NEXT:    vornt q0, q1, q2
313; CHECK-NEXT:    bx lr
314entry:
315  %c = icmp eq <16 x i8> %z, zeroinitializer
316  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
317  %a = or <16 x i8> %x, %y1
318  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
319  ret <16 x i8> %b
320}
321
322define arm_aapcs_vfpcc <4 x float> @fadd_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
323; CHECK-LABEL: fadd_v4f32:
324; CHECK:       @ %bb.0: @ %entry
325; CHECK-NEXT:    vpt.f32 eq, q0, zr
326; CHECK-NEXT:    vaddt.f32 q0, q1, q2
327; CHECK-NEXT:    bx lr
328entry:
329  %c = fcmp oeq <4 x float> %z, zeroinitializer
330  %a = fadd <4 x float> %x, %y
331  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
332  ret <4 x float> %b
333}
334
335define arm_aapcs_vfpcc <8 x half> @fadd_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
336; CHECK-LABEL: fadd_v8f16:
337; CHECK:       @ %bb.0: @ %entry
338; CHECK-NEXT:    vpt.f16 eq, q0, zr
339; CHECK-NEXT:    vaddt.f16 q0, q1, q2
340; CHECK-NEXT:    bx lr
341entry:
342  %c = fcmp oeq <8 x half> %z, zeroinitializer
343  %a = fadd <8 x half> %x, %y
344  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
345  ret <8 x half> %b
346}
347
348define arm_aapcs_vfpcc <4 x float> @fsub_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
349; CHECK-LABEL: fsub_v4f32:
350; CHECK:       @ %bb.0: @ %entry
351; CHECK-NEXT:    vpt.f32 eq, q0, zr
352; CHECK-NEXT:    vsubt.f32 q0, q1, q2
353; CHECK-NEXT:    bx lr
354entry:
355  %c = fcmp oeq <4 x float> %z, zeroinitializer
356  %a = fsub <4 x float> %x, %y
357  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
358  ret <4 x float> %b
359}
360
361define arm_aapcs_vfpcc <8 x half> @fsub_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
362; CHECK-LABEL: fsub_v8f16:
363; CHECK:       @ %bb.0: @ %entry
364; CHECK-NEXT:    vpt.f16 eq, q0, zr
365; CHECK-NEXT:    vsubt.f16 q0, q1, q2
366; CHECK-NEXT:    bx lr
367entry:
368  %c = fcmp oeq <8 x half> %z, zeroinitializer
369  %a = fsub <8 x half> %x, %y
370  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
371  ret <8 x half> %b
372}
373
374define arm_aapcs_vfpcc <4 x float> @fmul_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
375; CHECK-LABEL: fmul_v4f32:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vpt.f32 eq, q0, zr
378; CHECK-NEXT:    vmult.f32 q0, q1, q2
379; CHECK-NEXT:    bx lr
380entry:
381  %c = fcmp oeq <4 x float> %z, zeroinitializer
382  %a = fmul <4 x float> %x, %y
383  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
384  ret <4 x float> %b
385}
386
387define arm_aapcs_vfpcc <8 x half> @fmul_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
388; CHECK-LABEL: fmul_v8f16:
389; CHECK:       @ %bb.0: @ %entry
390; CHECK-NEXT:    vpt.f16 eq, q0, zr
391; CHECK-NEXT:    vmult.f16 q0, q1, q2
392; CHECK-NEXT:    bx lr
393entry:
394  %c = fcmp oeq <8 x half> %z, zeroinitializer
395  %a = fmul <8 x half> %x, %y
396  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
397  ret <8 x half> %b
398}
399
400define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
401; CHECK-LABEL: icmp_slt_v4i32:
402; CHECK:       @ %bb.0: @ %entry
403; CHECK-NEXT:    vpt.i32 eq, q0, zr
404; CHECK-NEXT:    vmint.s32 q0, q1, q2
405; CHECK-NEXT:    bx lr
406entry:
407  %c = icmp eq <4 x i32> %z, zeroinitializer
408  %a1 = icmp slt <4 x i32> %x, %y
409  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
410  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
411  ret <4 x i32> %b
412}
413
414define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
415; CHECK-LABEL: icmp_slt_v8i16:
416; CHECK:       @ %bb.0: @ %entry
417; CHECK-NEXT:    vpt.i16 eq, q0, zr
418; CHECK-NEXT:    vmint.s16 q0, q1, q2
419; CHECK-NEXT:    bx lr
420entry:
421  %c = icmp eq <8 x i16> %z, zeroinitializer
422  %a1 = icmp slt <8 x i16> %x, %y
423  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
424  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
425  ret <8 x i16> %b
426}
427
428define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
429; CHECK-LABEL: icmp_slt_v16i8:
430; CHECK:       @ %bb.0: @ %entry
431; CHECK-NEXT:    vpt.i8 eq, q0, zr
432; CHECK-NEXT:    vmint.s8 q0, q1, q2
433; CHECK-NEXT:    bx lr
434entry:
435  %c = icmp eq <16 x i8> %z, zeroinitializer
436  %a1 = icmp slt <16 x i8> %x, %y
437  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
438  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
439  ret <16 x i8> %b
440}
441
442define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
443; CHECK-LABEL: icmp_sgt_v4i32:
444; CHECK:       @ %bb.0: @ %entry
445; CHECK-NEXT:    vpt.i32 eq, q0, zr
446; CHECK-NEXT:    vmaxt.s32 q0, q1, q2
447; CHECK-NEXT:    bx lr
448entry:
449  %c = icmp eq <4 x i32> %z, zeroinitializer
450  %a1 = icmp sgt <4 x i32> %x, %y
451  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
452  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
453  ret <4 x i32> %b
454}
455
456define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
457; CHECK-LABEL: icmp_sgt_v8i16:
458; CHECK:       @ %bb.0: @ %entry
459; CHECK-NEXT:    vpt.i16 eq, q0, zr
460; CHECK-NEXT:    vmaxt.s16 q0, q1, q2
461; CHECK-NEXT:    bx lr
462entry:
463  %c = icmp eq <8 x i16> %z, zeroinitializer
464  %a1 = icmp sgt <8 x i16> %x, %y
465  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
466  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
467  ret <8 x i16> %b
468}
469
470define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
471; CHECK-LABEL: icmp_sgt_v16i8:
472; CHECK:       @ %bb.0: @ %entry
473; CHECK-NEXT:    vpt.i8 eq, q0, zr
474; CHECK-NEXT:    vmaxt.s8 q0, q1, q2
475; CHECK-NEXT:    bx lr
476entry:
477  %c = icmp eq <16 x i8> %z, zeroinitializer
478  %a1 = icmp sgt <16 x i8> %x, %y
479  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
480  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
481  ret <16 x i8> %b
482}
483
484define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
485; CHECK-LABEL: icmp_ult_v4i32:
486; CHECK:       @ %bb.0: @ %entry
487; CHECK-NEXT:    vpt.i32 eq, q0, zr
488; CHECK-NEXT:    vmint.u32 q0, q1, q2
489; CHECK-NEXT:    bx lr
490entry:
491  %c = icmp eq <4 x i32> %z, zeroinitializer
492  %a1 = icmp ult <4 x i32> %x, %y
493  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
494  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
495  ret <4 x i32> %b
496}
497
498define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
499; CHECK-LABEL: icmp_ult_v8i16:
500; CHECK:       @ %bb.0: @ %entry
501; CHECK-NEXT:    vpt.i16 eq, q0, zr
502; CHECK-NEXT:    vmint.u16 q0, q1, q2
503; CHECK-NEXT:    bx lr
504entry:
505  %c = icmp eq <8 x i16> %z, zeroinitializer
506  %a1 = icmp ult <8 x i16> %x, %y
507  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
508  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
509  ret <8 x i16> %b
510}
511
512define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
513; CHECK-LABEL: icmp_ult_v16i8:
514; CHECK:       @ %bb.0: @ %entry
515; CHECK-NEXT:    vpt.i8 eq, q0, zr
516; CHECK-NEXT:    vmint.u8 q0, q1, q2
517; CHECK-NEXT:    bx lr
518entry:
519  %c = icmp eq <16 x i8> %z, zeroinitializer
520  %a1 = icmp ult <16 x i8> %x, %y
521  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
522  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
523  ret <16 x i8> %b
524}
525
526define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
527; CHECK-LABEL: icmp_ugt_v4i32:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    vpt.i32 eq, q0, zr
530; CHECK-NEXT:    vmaxt.u32 q0, q1, q2
531; CHECK-NEXT:    bx lr
532entry:
533  %c = icmp eq <4 x i32> %z, zeroinitializer
534  %a1 = icmp ugt <4 x i32> %x, %y
535  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
536  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
537  ret <4 x i32> %b
538}
539
540define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
541; CHECK-LABEL: icmp_ugt_v8i16:
542; CHECK:       @ %bb.0: @ %entry
543; CHECK-NEXT:    vpt.i16 eq, q0, zr
544; CHECK-NEXT:    vmaxt.u16 q0, q1, q2
545; CHECK-NEXT:    bx lr
546entry:
547  %c = icmp eq <8 x i16> %z, zeroinitializer
548  %a1 = icmp ugt <8 x i16> %x, %y
549  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
550  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
551  ret <8 x i16> %b
552}
553
554define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
555; CHECK-LABEL: icmp_ugt_v16i8:
556; CHECK:       @ %bb.0: @ %entry
557; CHECK-NEXT:    vpt.i8 eq, q0, zr
558; CHECK-NEXT:    vmaxt.u8 q0, q1, q2
559; CHECK-NEXT:    bx lr
560entry:
561  %c = icmp eq <16 x i8> %z, zeroinitializer
562  %a1 = icmp ugt <16 x i8> %x, %y
563  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
564  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
565  ret <16 x i8> %b
566}
567
568define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
569; CHECK-LABEL: fcmp_fast_olt_v4f32:
570; CHECK:       @ %bb.0: @ %entry
571; CHECK-NEXT:    vpt.f32 eq, q0, zr
572; CHECK-NEXT:    vminnmt.f32 q0, q1, q2
573; CHECK-NEXT:    bx lr
574entry:
575  %c = fcmp oeq <4 x float> %z, zeroinitializer
576  %a1 = fcmp fast olt <4 x float> %x, %y
577  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
578  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
579  ret <4 x float> %b
580}
581
582define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
583; CHECK-LABEL: fcmp_fast_olt_v8f16:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vpt.f16 eq, q0, zr
586; CHECK-NEXT:    vminnmt.f16 q0, q1, q2
587; CHECK-NEXT:    bx lr
588entry:
589  %c = fcmp oeq <8 x half> %z, zeroinitializer
590  %a1 = fcmp fast olt <8 x half> %x, %y
591  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
592  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
593  ret <8 x half> %b
594}
595
596define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
597; CHECK-LABEL: fcmp_fast_ogt_v4f32:
598; CHECK:       @ %bb.0: @ %entry
599; CHECK-NEXT:    vpt.f32 eq, q0, zr
600; CHECK-NEXT:    vmaxnmt.f32 q0, q1, q2
601; CHECK-NEXT:    bx lr
602entry:
603  %c = fcmp oeq <4 x float> %z, zeroinitializer
604  %a1 = fcmp fast ogt <4 x float> %x, %y
605  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
606  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
607  ret <4 x float> %b
608}
609
610define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
611; CHECK-LABEL: fcmp_fast_ogt_v8f16:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vpt.f16 eq, q0, zr
614; CHECK-NEXT:    vmaxnmt.f16 q0, q1, q2
615; CHECK-NEXT:    bx lr
616entry:
617  %c = fcmp oeq <8 x half> %z, zeroinitializer
618  %a1 = fcmp fast ogt <8 x half> %x, %y
619  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
620  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
621  ret <8 x half> %b
622}
623
624define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
625; CHECK-LABEL: sadd_sat_v4i32:
626; CHECK:       @ %bb.0: @ %entry
627; CHECK-NEXT:    vpt.i32 eq, q0, zr
628; CHECK-NEXT:    vqaddt.s32 q0, q1, q2
629; CHECK-NEXT:    bx lr
630entry:
631  %c = icmp eq <4 x i32> %z, zeroinitializer
632  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
633  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
634  ret <4 x i32> %b
635}
636
637define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
638; CHECK-LABEL: sadd_sat_v8i16:
639; CHECK:       @ %bb.0: @ %entry
640; CHECK-NEXT:    vpt.i16 eq, q0, zr
641; CHECK-NEXT:    vqaddt.s16 q0, q1, q2
642; CHECK-NEXT:    bx lr
643entry:
644  %c = icmp eq <8 x i16> %z, zeroinitializer
645  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
646  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
647  ret <8 x i16> %b
648}
649
650define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
651; CHECK-LABEL: sadd_sat_v16i8:
652; CHECK:       @ %bb.0: @ %entry
653; CHECK-NEXT:    vpt.i8 eq, q0, zr
654; CHECK-NEXT:    vqaddt.s8 q0, q1, q2
655; CHECK-NEXT:    bx lr
656entry:
657  %c = icmp eq <16 x i8> %z, zeroinitializer
658  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
659  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
660  ret <16 x i8> %b
661}
662
663define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
664; CHECK-LABEL: uadd_sat_v4i32:
665; CHECK:       @ %bb.0: @ %entry
666; CHECK-NEXT:    vpt.i32 eq, q0, zr
667; CHECK-NEXT:    vqaddt.u32 q0, q1, q2
668; CHECK-NEXT:    bx lr
669entry:
670  %c = icmp eq <4 x i32> %z, zeroinitializer
671  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
672  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
673  ret <4 x i32> %b
674}
675
676define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
677; CHECK-LABEL: uadd_sat_v8i16:
678; CHECK:       @ %bb.0: @ %entry
679; CHECK-NEXT:    vpt.i16 eq, q0, zr
680; CHECK-NEXT:    vqaddt.u16 q0, q1, q2
681; CHECK-NEXT:    bx lr
682entry:
683  %c = icmp eq <8 x i16> %z, zeroinitializer
684  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
685  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
686  ret <8 x i16> %b
687}
688
689define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
690; CHECK-LABEL: uadd_sat_v16i8:
691; CHECK:       @ %bb.0: @ %entry
692; CHECK-NEXT:    vpt.i8 eq, q0, zr
693; CHECK-NEXT:    vqaddt.u8 q0, q1, q2
694; CHECK-NEXT:    bx lr
695entry:
696  %c = icmp eq <16 x i8> %z, zeroinitializer
697  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
698  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
699  ret <16 x i8> %b
700}
701
702define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
703; CHECK-LABEL: ssub_sat_v4i32:
704; CHECK:       @ %bb.0: @ %entry
705; CHECK-NEXT:    vpt.i32 eq, q0, zr
706; CHECK-NEXT:    vqsubt.s32 q0, q1, q2
707; CHECK-NEXT:    bx lr
708entry:
709  %c = icmp eq <4 x i32> %z, zeroinitializer
710  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
711  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
712  ret <4 x i32> %b
713}
714
715define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
716; CHECK-LABEL: ssub_sat_v8i16:
717; CHECK:       @ %bb.0: @ %entry
718; CHECK-NEXT:    vpt.i16 eq, q0, zr
719; CHECK-NEXT:    vqsubt.s16 q0, q1, q2
720; CHECK-NEXT:    bx lr
721entry:
722  %c = icmp eq <8 x i16> %z, zeroinitializer
723  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
724  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
725  ret <8 x i16> %b
726}
727
728define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
729; CHECK-LABEL: ssub_sat_v16i8:
730; CHECK:       @ %bb.0: @ %entry
731; CHECK-NEXT:    vpt.i8 eq, q0, zr
732; CHECK-NEXT:    vqsubt.s8 q0, q1, q2
733; CHECK-NEXT:    bx lr
734entry:
735  %c = icmp eq <16 x i8> %z, zeroinitializer
736  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
737  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
738  ret <16 x i8> %b
739}
740
741define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
742; CHECK-LABEL: usub_sat_v4i32:
743; CHECK:       @ %bb.0: @ %entry
744; CHECK-NEXT:    vpt.i32 eq, q0, zr
745; CHECK-NEXT:    vqsubt.u32 q0, q1, q2
746; CHECK-NEXT:    bx lr
747entry:
748  %c = icmp eq <4 x i32> %z, zeroinitializer
749  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
750  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
751  ret <4 x i32> %b
752}
753
754define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
755; CHECK-LABEL: usub_sat_v8i16:
756; CHECK:       @ %bb.0: @ %entry
757; CHECK-NEXT:    vpt.i16 eq, q0, zr
758; CHECK-NEXT:    vqsubt.u16 q0, q1, q2
759; CHECK-NEXT:    bx lr
760entry:
761  %c = icmp eq <8 x i16> %z, zeroinitializer
762  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
763  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
764  ret <8 x i16> %b
765}
766
767define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
768; CHECK-LABEL: usub_sat_v16i8:
769; CHECK:       @ %bb.0: @ %entry
770; CHECK-NEXT:    vpt.i8 eq, q0, zr
771; CHECK-NEXT:    vqsubt.u8 q0, q1, q2
772; CHECK-NEXT:    bx lr
773entry:
774  %c = icmp eq <16 x i8> %z, zeroinitializer
775  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
776  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
777  ret <16 x i8> %b
778}
779
780define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
781; CHECK-LABEL: addqr_v4i32:
782; CHECK:       @ %bb.0: @ %entry
783; CHECK-NEXT:    vpt.i32 eq, q0, zr
784; CHECK-NEXT:    vaddt.i32 q0, q1, r0
785; CHECK-NEXT:    bx lr
786entry:
787  %c = icmp eq <4 x i32> %z, zeroinitializer
788  %i = insertelement <4 x i32> undef, i32 %y, i32 0
789  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
790  %a = add <4 x i32> %x, %ys
791  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
792  ret <4 x i32> %b
793}
794
795define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
796; CHECK-LABEL: addqr_v8i16:
797; CHECK:       @ %bb.0: @ %entry
798; CHECK-NEXT:    vpt.i16 eq, q0, zr
799; CHECK-NEXT:    vaddt.i16 q0, q1, r0
800; CHECK-NEXT:    bx lr
801entry:
802  %c = icmp eq <8 x i16> %z, zeroinitializer
803  %i = insertelement <8 x i16> undef, i16 %y, i32 0
804  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
805  %a = add <8 x i16> %x, %ys
806  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
807  ret <8 x i16> %b
808}
809
810define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
811; CHECK-LABEL: addqr_v16i8:
812; CHECK:       @ %bb.0: @ %entry
813; CHECK-NEXT:    vpt.i8 eq, q0, zr
814; CHECK-NEXT:    vaddt.i8 q0, q1, r0
815; CHECK-NEXT:    bx lr
816entry:
817  %c = icmp eq <16 x i8> %z, zeroinitializer
818  %i = insertelement <16 x i8> undef, i8 %y, i32 0
819  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
820  %a = add <16 x i8> %x, %ys
821  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
822  ret <16 x i8> %b
823}
824
825define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
826; CHECK-LABEL: subqr_v4i32:
827; CHECK:       @ %bb.0: @ %entry
828; CHECK-NEXT:    vpt.i32 eq, q0, zr
829; CHECK-NEXT:    vsubt.i32 q0, q1, r0
830; CHECK-NEXT:    bx lr
831entry:
832  %c = icmp eq <4 x i32> %z, zeroinitializer
833  %i = insertelement <4 x i32> undef, i32 %y, i32 0
834  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
835  %a = sub <4 x i32> %x, %ys
836  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
837  ret <4 x i32> %b
838}
839
840define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
841; CHECK-LABEL: subqr_v8i16:
842; CHECK:       @ %bb.0: @ %entry
843; CHECK-NEXT:    vpt.i16 eq, q0, zr
844; CHECK-NEXT:    vsubt.i16 q0, q1, r0
845; CHECK-NEXT:    bx lr
846entry:
847  %c = icmp eq <8 x i16> %z, zeroinitializer
848  %i = insertelement <8 x i16> undef, i16 %y, i32 0
849  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
850  %a = sub <8 x i16> %x, %ys
851  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
852  ret <8 x i16> %b
853}
854
855define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
856; CHECK-LABEL: subqr_v16i8:
857; CHECK:       @ %bb.0: @ %entry
858; CHECK-NEXT:    vpt.i8 eq, q0, zr
859; CHECK-NEXT:    vsubt.i8 q0, q1, r0
860; CHECK-NEXT:    bx lr
861entry:
862  %c = icmp eq <16 x i8> %z, zeroinitializer
863  %i = insertelement <16 x i8> undef, i8 %y, i32 0
864  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
865  %a = sub <16 x i8> %x, %ys
866  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
867  ret <16 x i8> %b
868}
869
870define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
871; CHECK-LABEL: mulqr_v4i32:
872; CHECK:       @ %bb.0: @ %entry
873; CHECK-NEXT:    vpt.i32 eq, q0, zr
874; CHECK-NEXT:    vmult.i32 q0, q1, r0
875; CHECK-NEXT:    bx lr
876entry:
877  %c = icmp eq <4 x i32> %z, zeroinitializer
878  %i = insertelement <4 x i32> undef, i32 %y, i32 0
879  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
880  %a = mul <4 x i32> %x, %ys
881  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
882  ret <4 x i32> %b
883}
884
885define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
886; CHECK-LABEL: mulqr_v8i16:
887; CHECK:       @ %bb.0: @ %entry
888; CHECK-NEXT:    vpt.i16 eq, q0, zr
889; CHECK-NEXT:    vmult.i16 q0, q1, r0
890; CHECK-NEXT:    bx lr
891entry:
892  %c = icmp eq <8 x i16> %z, zeroinitializer
893  %i = insertelement <8 x i16> undef, i16 %y, i32 0
894  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
895  %a = mul <8 x i16> %x, %ys
896  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
897  ret <8 x i16> %b
898}
899
900define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
901; CHECK-LABEL: mulqr_v16i8:
902; CHECK:       @ %bb.0: @ %entry
903; CHECK-NEXT:    vpt.i8 eq, q0, zr
904; CHECK-NEXT:    vmult.i8 q0, q1, r0
905; CHECK-NEXT:    bx lr
906entry:
907  %c = icmp eq <16 x i8> %z, zeroinitializer
908  %i = insertelement <16 x i8> undef, i8 %y, i32 0
909  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
910  %a = mul <16 x i8> %x, %ys
911  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
912  ret <16 x i8> %b
913}
914
915define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) {
916; CHECK-LABEL: faddqr_v4f32:
917; CHECK:       @ %bb.0: @ %entry
918; CHECK-NEXT:    vmov r0, s8
919; CHECK-NEXT:    vpt.f32 eq, q0, zr
920; CHECK-NEXT:    vaddt.f32 q0, q1, r0
921; CHECK-NEXT:    bx lr
922entry:
923  %c = fcmp oeq <4 x float> %z, zeroinitializer
924  %i = insertelement <4 x float> undef, float %y, i32 0
925  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
926  %a = fadd <4 x float> %x, %ys
927  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
928  ret <4 x float> %b
929}
930
931define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) {
932; CHECK-LABEL: faddqr_v8f16:
933; CHECK:       @ %bb.0: @ %entry
934; CHECK-NEXT:    vmov.f16 r0, s8
935; CHECK-NEXT:    vpt.f16 eq, q0, zr
936; CHECK-NEXT:    vaddt.f16 q0, q1, r0
937; CHECK-NEXT:    bx lr
938entry:
939  %c = fcmp oeq <8 x half> %z, zeroinitializer
940  %i = insertelement <8 x half> undef, half %y, i32 0
941  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
942  %a = fadd <8 x half> %x, %ys
943  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
944  ret <8 x half> %b
945}
946
947define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) {
948; CHECK-LABEL: fsubqr_v4f32:
949; CHECK:       @ %bb.0: @ %entry
950; CHECK-NEXT:    vmov r0, s8
951; CHECK-NEXT:    vpt.f32 eq, q0, zr
952; CHECK-NEXT:    vsubt.f32 q0, q1, r0
953; CHECK-NEXT:    bx lr
954entry:
955  %c = fcmp oeq <4 x float> %z, zeroinitializer
956  %i = insertelement <4 x float> undef, float %y, i32 0
957  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
958  %a = fsub <4 x float> %x, %ys
959  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
960  ret <4 x float> %b
961}
962
963define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) {
964; CHECK-LABEL: fsubqr_v8f16:
965; CHECK:       @ %bb.0: @ %entry
966; CHECK-NEXT:    vmov.f16 r0, s8
967; CHECK-NEXT:    vpt.f16 eq, q0, zr
968; CHECK-NEXT:    vsubt.f16 q0, q1, r0
969; CHECK-NEXT:    bx lr
970entry:
971  %c = fcmp oeq <8 x half> %z, zeroinitializer
972  %i = insertelement <8 x half> undef, half %y, i32 0
973  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
974  %a = fsub <8 x half> %x, %ys
975  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
976  ret <8 x half> %b
977}
978
979define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) {
980; CHECK-LABEL: fmulqr_v4f32:
981; CHECK:       @ %bb.0: @ %entry
982; CHECK-NEXT:    vmov r0, s8
983; CHECK-NEXT:    vpt.f32 eq, q0, zr
984; CHECK-NEXT:    vmult.f32 q0, q1, r0
985; CHECK-NEXT:    bx lr
986entry:
987  %c = fcmp oeq <4 x float> %z, zeroinitializer
988  %i = insertelement <4 x float> undef, float %y, i32 0
989  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
990  %a = fmul <4 x float> %x, %ys
991  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
992  ret <4 x float> %b
993}
994
995define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) {
996; CHECK-LABEL: fmulqr_v8f16:
997; CHECK:       @ %bb.0: @ %entry
998; CHECK-NEXT:    vmov.f16 r0, s8
999; CHECK-NEXT:    vpt.f16 eq, q0, zr
1000; CHECK-NEXT:    vmult.f16 q0, q1, r0
1001; CHECK-NEXT:    bx lr
1002entry:
1003  %c = fcmp oeq <8 x half> %z, zeroinitializer
1004  %i = insertelement <8 x half> undef, half %y, i32 0
1005  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1006  %a = fmul <8 x half> %x, %ys
1007  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
1008  ret <8 x half> %b
1009}
1010
1011define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1012; CHECK-LABEL: sadd_satqr_v4i32:
1013; CHECK:       @ %bb.0: @ %entry
1014; CHECK-NEXT:    vpt.i32 eq, q0, zr
1015; CHECK-NEXT:    vqaddt.s32 q0, q1, r0
1016; CHECK-NEXT:    bx lr
1017entry:
1018  %c = icmp eq <4 x i32> %z, zeroinitializer
1019  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1020  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1021  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1022  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1023  ret <4 x i32> %b
1024}
1025
1026define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1027; CHECK-LABEL: sadd_satqr_v8i16:
1028; CHECK:       @ %bb.0: @ %entry
1029; CHECK-NEXT:    vpt.i16 eq, q0, zr
1030; CHECK-NEXT:    vqaddt.s16 q0, q1, r0
1031; CHECK-NEXT:    bx lr
1032entry:
1033  %c = icmp eq <8 x i16> %z, zeroinitializer
1034  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1035  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1036  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1037  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1038  ret <8 x i16> %b
1039}
1040
1041define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1042; CHECK-LABEL: sadd_satqr_v16i8:
1043; CHECK:       @ %bb.0: @ %entry
1044; CHECK-NEXT:    vpt.i8 eq, q0, zr
1045; CHECK-NEXT:    vqaddt.s8 q0, q1, r0
1046; CHECK-NEXT:    bx lr
1047entry:
1048  %c = icmp eq <16 x i8> %z, zeroinitializer
1049  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1050  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1051  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1052  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1053  ret <16 x i8> %b
1054}
1055
1056define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1057; CHECK-LABEL: uadd_satqr_v4i32:
1058; CHECK:       @ %bb.0: @ %entry
1059; CHECK-NEXT:    vpt.i32 eq, q0, zr
1060; CHECK-NEXT:    vqaddt.u32 q0, q1, r0
1061; CHECK-NEXT:    bx lr
1062entry:
1063  %c = icmp eq <4 x i32> %z, zeroinitializer
1064  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1065  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1066  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1067  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1068  ret <4 x i32> %b
1069}
1070
1071define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1072; CHECK-LABEL: uadd_satqr_v8i16:
1073; CHECK:       @ %bb.0: @ %entry
1074; CHECK-NEXT:    vpt.i16 eq, q0, zr
1075; CHECK-NEXT:    vqaddt.u16 q0, q1, r0
1076; CHECK-NEXT:    bx lr
1077entry:
1078  %c = icmp eq <8 x i16> %z, zeroinitializer
1079  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1080  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1081  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1082  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1083  ret <8 x i16> %b
1084}
1085
1086define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1087; CHECK-LABEL: uadd_satqr_v16i8:
1088; CHECK:       @ %bb.0: @ %entry
1089; CHECK-NEXT:    vpt.i8 eq, q0, zr
1090; CHECK-NEXT:    vqaddt.u8 q0, q1, r0
1091; CHECK-NEXT:    bx lr
1092entry:
1093  %c = icmp eq <16 x i8> %z, zeroinitializer
1094  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1095  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1096  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1097  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1098  ret <16 x i8> %b
1099}
1100
1101define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1102; CHECK-LABEL: ssub_satqr_v4i32:
1103; CHECK:       @ %bb.0: @ %entry
1104; CHECK-NEXT:    vpt.i32 eq, q0, zr
1105; CHECK-NEXT:    vqsubt.s32 q0, q1, r0
1106; CHECK-NEXT:    bx lr
1107entry:
1108  %c = icmp eq <4 x i32> %z, zeroinitializer
1109  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1110  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1111  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1112  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1113  ret <4 x i32> %b
1114}
1115
1116define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1117; CHECK-LABEL: ssub_satqr_v8i16:
1118; CHECK:       @ %bb.0: @ %entry
1119; CHECK-NEXT:    vpt.i16 eq, q0, zr
1120; CHECK-NEXT:    vqsubt.s16 q0, q1, r0
1121; CHECK-NEXT:    bx lr
1122entry:
1123  %c = icmp eq <8 x i16> %z, zeroinitializer
1124  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1125  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1126  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1127  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1128  ret <8 x i16> %b
1129}
1130
1131define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1132; CHECK-LABEL: ssub_satqr_v16i8:
1133; CHECK:       @ %bb.0: @ %entry
1134; CHECK-NEXT:    vpt.i8 eq, q0, zr
1135; CHECK-NEXT:    vqsubt.s8 q0, q1, r0
1136; CHECK-NEXT:    bx lr
1137entry:
1138  %c = icmp eq <16 x i8> %z, zeroinitializer
1139  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1140  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1141  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1142  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1143  ret <16 x i8> %b
1144}
1145
1146define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1147; CHECK-LABEL: usub_satqr_v4i32:
1148; CHECK:       @ %bb.0: @ %entry
1149; CHECK-NEXT:    vpt.i32 eq, q0, zr
1150; CHECK-NEXT:    vqsubt.u32 q0, q1, r0
1151; CHECK-NEXT:    bx lr
1152entry:
1153  %c = icmp eq <4 x i32> %z, zeroinitializer
1154  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1155  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1156  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1157  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1158  ret <4 x i32> %b
1159}
1160
1161define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1162; CHECK-LABEL: usub_satqr_v8i16:
1163; CHECK:       @ %bb.0: @ %entry
1164; CHECK-NEXT:    vpt.i16 eq, q0, zr
1165; CHECK-NEXT:    vqsubt.u16 q0, q1, r0
1166; CHECK-NEXT:    bx lr
1167entry:
1168  %c = icmp eq <8 x i16> %z, zeroinitializer
1169  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1170  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1171  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1172  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1173  ret <8 x i16> %b
1174}
1175
1176define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1177; CHECK-LABEL: usub_satqr_v16i8:
1178; CHECK:       @ %bb.0: @ %entry
1179; CHECK-NEXT:    vpt.i8 eq, q0, zr
1180; CHECK-NEXT:    vqsubt.u8 q0, q1, r0
1181; CHECK-NEXT:    bx lr
1182entry:
1183  %c = icmp eq <16 x i8> %z, zeroinitializer
1184  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1185  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1186  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1187  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1188  ret <16 x i8> %b
1189}
1190
1191declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1192declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1193declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1194declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1195declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1196declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1197declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1198declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1199declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1200declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1201declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1202declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1203