1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN:   -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3 
4 // Test new aarch64 intrinsics and types
5 
6 #include <arm_neon.h>
7 
8 // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 {
9 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #2
10 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
11 // CHECK:   ret i16 [[TMP0]]
test_vaddlv_s8(int8x8_t a)12 int16_t test_vaddlv_s8(int8x8_t a) {
13   return vaddlv_s8(a);
14 }
15 
16 // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 {
17 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
19 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> [[TMP1]]) #2
20 // CHECK:   ret i32 [[VADDLV_I]]
test_vaddlv_s16(int16x4_t a)21 int32_t test_vaddlv_s16(int16x4_t a) {
22   return vaddlv_s16(a);
23 }
24 
25 // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 {
26 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #2
27 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
28 // CHECK:   ret i16 [[TMP0]]
test_vaddlv_u8(uint8x8_t a)29 uint16_t test_vaddlv_u8(uint8x8_t a) {
30   return vaddlv_u8(a);
31 }
32 
33 // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 {
34 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
35 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
36 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> [[TMP1]]) #2
37 // CHECK:   ret i32 [[VADDLV_I]]
test_vaddlv_u16(uint16x4_t a)38 uint32_t test_vaddlv_u16(uint16x4_t a) {
39   return vaddlv_u16(a);
40 }
41 
42 // CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #0 {
43 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #2
44 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
45 // CHECK:   ret i16 [[TMP0]]
test_vaddlvq_s8(int8x16_t a)46 int16_t test_vaddlvq_s8(int8x16_t a) {
47   return vaddlvq_s8(a);
48 }
49 
50 // CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 {
51 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
52 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
53 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> [[TMP1]]) #2
54 // CHECK:   ret i32 [[VADDLV_I]]
test_vaddlvq_s16(int16x8_t a)55 int32_t test_vaddlvq_s16(int16x8_t a) {
56   return vaddlvq_s16(a);
57 }
58 
59 // CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 {
60 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
61 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
62 // CHECK:   [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> [[TMP1]]) #2
63 // CHECK:   ret i64 [[VADDLVQ_S32_I]]
test_vaddlvq_s32(int32x4_t a)64 int64_t test_vaddlvq_s32(int32x4_t a) {
65   return vaddlvq_s32(a);
66 }
67 
68 // CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #0 {
69 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #2
70 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
71 // CHECK:   ret i16 [[TMP0]]
test_vaddlvq_u8(uint8x16_t a)72 uint16_t test_vaddlvq_u8(uint8x16_t a) {
73   return vaddlvq_u8(a);
74 }
75 
76 // CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 {
77 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
78 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
79 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[TMP1]]) #2
80 // CHECK:   ret i32 [[VADDLV_I]]
test_vaddlvq_u16(uint16x8_t a)81 uint32_t test_vaddlvq_u16(uint16x8_t a) {
82   return vaddlvq_u16(a);
83 }
84 
85 // CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 {
86 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
87 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
88 // CHECK:   [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[TMP1]]) #2
89 // CHECK:   ret i64 [[VADDLVQ_U32_I]]
test_vaddlvq_u32(uint32x4_t a)90 uint64_t test_vaddlvq_u32(uint32x4_t a) {
91   return vaddlvq_u32(a);
92 }
93 
94 // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 {
95 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #2
96 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
97 // CHECK:   ret i8 [[TMP0]]
test_vmaxv_s8(int8x8_t a)98 int8_t test_vmaxv_s8(int8x8_t a) {
99   return vmaxv_s8(a);
100 }
101 
102 // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 {
103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
104 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
105 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[TMP1]]) #2
106 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
107 // CHECK:   ret i16 [[TMP2]]
test_vmaxv_s16(int16x4_t a)108 int16_t test_vmaxv_s16(int16x4_t a) {
109   return vmaxv_s16(a);
110 }
111 
112 // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 {
113 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #2
114 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
115 // CHECK:   ret i8 [[TMP0]]
test_vmaxv_u8(uint8x8_t a)116 uint8_t test_vmaxv_u8(uint8x8_t a) {
117   return vmaxv_u8(a);
118 }
119 
120 // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 {
121 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
122 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
123 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[TMP1]]) #2
124 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
125 // CHECK:   ret i16 [[TMP2]]
test_vmaxv_u16(uint16x4_t a)126 uint16_t test_vmaxv_u16(uint16x4_t a) {
127   return vmaxv_u16(a);
128 }
129 
130 // CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #0 {
131 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #2
132 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
133 // CHECK:   ret i8 [[TMP0]]
test_vmaxvq_s8(int8x16_t a)134 int8_t test_vmaxvq_s8(int8x16_t a) {
135   return vmaxvq_s8(a);
136 }
137 
138 // CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 {
139 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
140 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
141 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[TMP1]]) #2
142 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
143 // CHECK:   ret i16 [[TMP2]]
test_vmaxvq_s16(int16x8_t a)144 int16_t test_vmaxvq_s16(int16x8_t a) {
145   return vmaxvq_s16(a);
146 }
147 
148 // CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 {
149 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
150 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
151 // CHECK:   [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[TMP1]]) #2
152 // CHECK:   ret i32 [[VMAXVQ_S32_I]]
test_vmaxvq_s32(int32x4_t a)153 int32_t test_vmaxvq_s32(int32x4_t a) {
154   return vmaxvq_s32(a);
155 }
156 
157 // CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #0 {
158 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #2
159 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
160 // CHECK:   ret i8 [[TMP0]]
test_vmaxvq_u8(uint8x16_t a)161 uint8_t test_vmaxvq_u8(uint8x16_t a) {
162   return vmaxvq_u8(a);
163 }
164 
165 // CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 {
166 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
167 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
168 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[TMP1]]) #2
169 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
170 // CHECK:   ret i16 [[TMP2]]
test_vmaxvq_u16(uint16x8_t a)171 uint16_t test_vmaxvq_u16(uint16x8_t a) {
172   return vmaxvq_u16(a);
173 }
174 
175 // CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 {
176 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
177 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
178 // CHECK:   [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[TMP1]]) #2
179 // CHECK:   ret i32 [[VMAXVQ_U32_I]]
test_vmaxvq_u32(uint32x4_t a)180 uint32_t test_vmaxvq_u32(uint32x4_t a) {
181   return vmaxvq_u32(a);
182 }
183 
184 // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 {
185 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #2
186 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
187 // CHECK:   ret i8 [[TMP0]]
test_vminv_s8(int8x8_t a)188 int8_t test_vminv_s8(int8x8_t a) {
189   return vminv_s8(a);
190 }
191 
192 // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 {
193 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
194 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
195 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[TMP1]]) #2
196 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
197 // CHECK:   ret i16 [[TMP2]]
test_vminv_s16(int16x4_t a)198 int16_t test_vminv_s16(int16x4_t a) {
199   return vminv_s16(a);
200 }
201 
202 // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 {
203 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #2
204 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
205 // CHECK:   ret i8 [[TMP0]]
test_vminv_u8(uint8x8_t a)206 uint8_t test_vminv_u8(uint8x8_t a) {
207   return vminv_u8(a);
208 }
209 
210 // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 {
211 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
212 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
213 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[TMP1]]) #2
214 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
215 // CHECK:   ret i16 [[TMP2]]
test_vminv_u16(uint16x4_t a)216 uint16_t test_vminv_u16(uint16x4_t a) {
217   return vminv_u16(a);
218 }
219 
220 // CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #0 {
221 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #2
222 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
223 // CHECK:   ret i8 [[TMP0]]
test_vminvq_s8(int8x16_t a)224 int8_t test_vminvq_s8(int8x16_t a) {
225   return vminvq_s8(a);
226 }
227 
228 // CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 {
229 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
230 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
231 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[TMP1]]) #2
232 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
233 // CHECK:   ret i16 [[TMP2]]
test_vminvq_s16(int16x8_t a)234 int16_t test_vminvq_s16(int16x8_t a) {
235   return vminvq_s16(a);
236 }
237 
238 // CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 {
239 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
240 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
241 // CHECK:   [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[TMP1]]) #2
242 // CHECK:   ret i32 [[VMINVQ_S32_I]]
test_vminvq_s32(int32x4_t a)243 int32_t test_vminvq_s32(int32x4_t a) {
244   return vminvq_s32(a);
245 }
246 
247 // CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #0 {
248 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #2
249 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
250 // CHECK:   ret i8 [[TMP0]]
test_vminvq_u8(uint8x16_t a)251 uint8_t test_vminvq_u8(uint8x16_t a) {
252   return vminvq_u8(a);
253 }
254 
255 // CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 {
256 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
257 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
258 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[TMP1]]) #2
259 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
260 // CHECK:   ret i16 [[TMP2]]
test_vminvq_u16(uint16x8_t a)261 uint16_t test_vminvq_u16(uint16x8_t a) {
262   return vminvq_u16(a);
263 }
264 
265 // CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 {
266 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
267 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
268 // CHECK:   [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[TMP1]]) #2
269 // CHECK:   ret i32 [[VMINVQ_U32_I]]
test_vminvq_u32(uint32x4_t a)270 uint32_t test_vminvq_u32(uint32x4_t a) {
271   return vminvq_u32(a);
272 }
273 
274 // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 {
275 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #2
276 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
277 // CHECK:   ret i8 [[TMP0]]
test_vaddv_s8(int8x8_t a)278 int8_t test_vaddv_s8(int8x8_t a) {
279   return vaddv_s8(a);
280 }
281 
282 // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 {
283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
284 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
285 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[TMP1]]) #2
286 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
287 // CHECK:   ret i16 [[TMP2]]
test_vaddv_s16(int16x4_t a)288 int16_t test_vaddv_s16(int16x4_t a) {
289   return vaddv_s16(a);
290 }
291 
292 // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 {
293 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #2
294 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
295 // CHECK:   ret i8 [[TMP0]]
test_vaddv_u8(uint8x8_t a)296 uint8_t test_vaddv_u8(uint8x8_t a) {
297   return vaddv_u8(a);
298 }
299 
300 // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 {
301 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
302 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
303 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[TMP1]]) #2
304 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
305 // CHECK:   ret i16 [[TMP2]]
test_vaddv_u16(uint16x4_t a)306 uint16_t test_vaddv_u16(uint16x4_t a) {
307   return vaddv_u16(a);
308 }
309 
310 // CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #0 {
311 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #2
312 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
313 // CHECK:   ret i8 [[TMP0]]
test_vaddvq_s8(int8x16_t a)314 int8_t test_vaddvq_s8(int8x16_t a) {
315   return vaddvq_s8(a);
316 }
317 
318 // CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 {
319 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
320 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
321 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[TMP1]]) #2
322 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
323 // CHECK:   ret i16 [[TMP2]]
test_vaddvq_s16(int16x8_t a)324 int16_t test_vaddvq_s16(int16x8_t a) {
325   return vaddvq_s16(a);
326 }
327 
328 // CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 {
329 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
330 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
331 // CHECK:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[TMP1]]) #2
332 // CHECK:   ret i32 [[VADDVQ_S32_I]]
test_vaddvq_s32(int32x4_t a)333 int32_t test_vaddvq_s32(int32x4_t a) {
334   return vaddvq_s32(a);
335 }
336 
337 // CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #0 {
338 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #2
339 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
340 // CHECK:   ret i8 [[TMP0]]
test_vaddvq_u8(uint8x16_t a)341 uint8_t test_vaddvq_u8(uint8x16_t a) {
342   return vaddvq_u8(a);
343 }
344 
345 // CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 {
346 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
347 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
348 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[TMP1]]) #2
349 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
350 // CHECK:   ret i16 [[TMP2]]
test_vaddvq_u16(uint16x8_t a)351 uint16_t test_vaddvq_u16(uint16x8_t a) {
352   return vaddvq_u16(a);
353 }
354 
355 // CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 {
356 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
357 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
358 // CHECK:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[TMP1]]) #2
359 // CHECK:   ret i32 [[VADDVQ_U32_I]]
test_vaddvq_u32(uint32x4_t a)360 uint32_t test_vaddvq_u32(uint32x4_t a) {
361   return vaddvq_u32(a);
362 }
363 
364 // CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 {
365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
366 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
367 // CHECK:   [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> [[TMP1]]) #2
368 // CHECK:   ret float [[VMAXVQ_F32_I]]
test_vmaxvq_f32(float32x4_t a)369 float32_t test_vmaxvq_f32(float32x4_t a) {
370   return vmaxvq_f32(a);
371 }
372 
373 // CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 {
374 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
375 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
376 // CHECK:   [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[TMP1]]) #2
377 // CHECK:   ret float [[VMINVQ_F32_I]]
test_vminvq_f32(float32x4_t a)378 float32_t test_vminvq_f32(float32x4_t a) {
379   return vminvq_f32(a);
380 }
381 
382 // CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 {
383 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
384 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
385 // CHECK:   [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> [[TMP1]]) #2
386 // CHECK:   ret float [[VMAXNMVQ_F32_I]]
test_vmaxnmvq_f32(float32x4_t a)387 float32_t test_vmaxnmvq_f32(float32x4_t a) {
388   return vmaxnmvq_f32(a);
389 }
390 
391 // CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 {
392 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
393 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
394 // CHECK:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[TMP1]]) #2
395 // CHECK:   ret float [[VMINNMVQ_F32_I]]
test_vminnmvq_f32(float32x4_t a)396 float32_t test_vminnmvq_f32(float32x4_t a) {
397   return vminnmvq_f32(a);
398 }
399