1 // RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
2 // RUN:   -fallow-half-arguments-and-returns -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg | FileCheck %s
4 
5 #include <arm_neon.h>
6 
7 // CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
8 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
9 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)10 uint8_t test_vget_lane_u8(uint8x8_t a) {
11   return vget_lane_u8(a, 7);
12 }
13 
14 // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
15 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
18 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)19 uint16_t test_vget_lane_u16(uint16x4_t a) {
20   return vget_lane_u16(a, 3);
21 }
22 
23 // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
24 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
25 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
26 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
27 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)28 uint32_t test_vget_lane_u32(uint32x2_t a) {
29   return vget_lane_u32(a, 1);
30 }
31 
32 // CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
33 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
34 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)35 int8_t test_vget_lane_s8(int8x8_t a) {
36   return vget_lane_s8(a, 7);
37 }
38 
39 // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
40 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
41 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
42 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
43 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)44 int16_t test_vget_lane_s16(int16x4_t a) {
45   return vget_lane_s16(a, 3);
46 }
47 
48 // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
49 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
50 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
51 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
52 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)53 int32_t test_vget_lane_s32(int32x2_t a) {
54   return vget_lane_s32(a, 1);
55 }
56 
57 // CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
58 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
59 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)60 poly8_t test_vget_lane_p8(poly8x8_t a) {
61   return vget_lane_p8(a, 7);
62 }
63 
64 // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
65 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
66 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
67 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
68 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)69 poly16_t test_vget_lane_p16(poly16x4_t a) {
70   return vget_lane_p16(a, 3);
71 }
72 
73 // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
74 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
75 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
76 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
77 // CHECK:   ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)78 float32_t test_vget_lane_f32(float32x2_t a) {
79   return vget_lane_f32(a, 1);
80 }
81 
82 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
83 // CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
84 // CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
85 // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
86 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
87 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
88 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
89 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
90 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
91 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
92 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
93 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
94 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
95 // CHECK:   ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)96 float32_t test_vget_lane_f16(float16x4_t a) {
97   return vget_lane_f16(a, 1);
98 }
99 
100 // CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
101 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
102 // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_u8(uint8x16_t a)103 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
104   return vgetq_lane_u8(a, 15);
105 }
106 
107 // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
108 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
109 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
110 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
111 // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_u16(uint16x8_t a)112 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
113   return vgetq_lane_u16(a, 7);
114 }
115 
116 // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
118 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
119 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
120 // CHECK:   ret i32 [[VGETQ_LANE]]
test_vgetq_lane_u32(uint32x4_t a)121 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
122   return vgetq_lane_u32(a, 3);
123 }
124 
125 // CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
126 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
127 // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_s8(int8x16_t a)128 int8_t test_vgetq_lane_s8(int8x16_t a) {
129   return vgetq_lane_s8(a, 15);
130 }
131 
132 // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
133 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
134 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
135 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
136 // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_s16(int16x8_t a)137 int16_t test_vgetq_lane_s16(int16x8_t a) {
138   return vgetq_lane_s16(a, 7);
139 }
140 
141 // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
142 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
143 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
144 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
145 // CHECK:   ret i32 [[VGETQ_LANE]]
test_vgetq_lane_s32(int32x4_t a)146 int32_t test_vgetq_lane_s32(int32x4_t a) {
147   return vgetq_lane_s32(a, 3);
148 }
149 
150 // CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
151 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
152 // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_p8(poly8x16_t a)153 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
154   return vgetq_lane_p8(a, 15);
155 }
156 
157 // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
158 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
159 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
160 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
161 // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_p16(poly16x8_t a)162 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
163   return vgetq_lane_p16(a, 7);
164 }
165 
166 // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
167 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
168 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
169 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
170 // CHECK:   ret float [[VGETQ_LANE]]
test_vgetq_lane_f32(float32x4_t a)171 float32_t test_vgetq_lane_f32(float32x4_t a) {
172   return vgetq_lane_f32(a, 3);
173 }
174 
175 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
176 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
177 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
178 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
179 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
180 // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
181 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
182 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
183 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
184 // CHECK:   store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
185 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
186 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
187 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
188 // CHECK:   ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)189 float32_t test_vgetq_lane_f16(float16x8_t a) {
190   return vgetq_lane_f16(a, 3);
191 }
192 
193 // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
194 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
195 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
196 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
197 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)198 int64_t test_vget_lane_s64(int64x1_t a) {
199   return vget_lane_s64(a, 0);
200 }
201 
202 // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
203 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
204 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
205 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
206 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)207 uint64_t test_vget_lane_u64(uint64x1_t a) {
208   return vget_lane_u64(a, 0);
209 }
210 
211 // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
213 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
214 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
215 // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_s64(int64x2_t a)216 int64_t test_vgetq_lane_s64(int64x2_t a) {
217   return vgetq_lane_s64(a, 1);
218 }
219 
220 // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
221 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
222 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
223 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
224 // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_u64(uint64x2_t a)225 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
226   return vgetq_lane_u64(a, 1);
227 }
228 
229 
230 // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 {
231 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
232 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)233 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
234   return vset_lane_u8(a, b, 7);
235 }
236 
237 // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
238 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
239 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
240 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
241 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)242 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
243   return vset_lane_u16(a, b, 3);
244 }
245 
246 // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
247 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
248 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
249 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
250 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)251 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
252   return vset_lane_u32(a, b, 1);
253 }
254 
255 // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 {
256 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
257 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)258 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
259   return vset_lane_s8(a, b, 7);
260 }
261 
262 // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
263 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
264 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
265 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
266 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)267 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
268   return vset_lane_s16(a, b, 3);
269 }
270 
271 // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
272 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
273 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
274 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
275 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)276 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
277   return vset_lane_s32(a, b, 1);
278 }
279 
280 // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 {
281 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
282 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)283 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
284   return vset_lane_p8(a, b, 7);
285 }
286 
287 // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
289 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
290 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
291 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)292 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
293   return vset_lane_p16(a, b, 3);
294 }
295 
296 // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
297 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
299 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
300 // CHECK:   ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)301 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
302   return vset_lane_f32(a, b, 1);
303 }
304 
305 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
306 // CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
307 // CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
308 // CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
309 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
310 // CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
311 // CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
312 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
313 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
314 // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
315 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
316 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
317 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
318 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
319 // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
320 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
321 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
322 // CHECK:   ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)323 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
324   return vset_lane_f16(*a, b, 3);
325 }
326 
327 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
328 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
329 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)330 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
331   return vsetq_lane_u8(a, b, 15);
332 }
333 
334 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
335 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
336 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
337 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
338 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)339 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
340   return vsetq_lane_u16(a, b, 7);
341 }
342 
343 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
344 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
345 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
346 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
347 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)348 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
349   return vsetq_lane_u32(a, b, 3);
350 }
351 
352 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
353 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
354 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)355 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
356   return vsetq_lane_s8(a, b, 15);
357 }
358 
359 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
360 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
361 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
362 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
363 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)364 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
365   return vsetq_lane_s16(a, b, 7);
366 }
367 
368 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
369 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
370 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
371 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
372 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)373 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
374   return vsetq_lane_s32(a, b, 3);
375 }
376 
377 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
378 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
379 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)380 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
381   return vsetq_lane_p8(a, b, 15);
382 }
383 
384 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
385 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
386 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
387 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
388 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)389 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
390   return vsetq_lane_p16(a, b, 7);
391 }
392 
393 // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
394 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
395 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
396 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
397 // CHECK:   ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)398 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
399   return vsetq_lane_f32(a, b, 3);
400 }
401 
402 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
403 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
404 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
405 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
406 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
407 // CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
408 // CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
409 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
410 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
411 // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
412 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
413 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
414 // CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
415 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
416 // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
417 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
418 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
419 // CHECK:   ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)420 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
421   return vsetq_lane_f16(*a, b, 7);
422 }
423 
424 // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
425 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
426 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
427 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
428 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)429 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
430   return vset_lane_s64(a, b, 0);
431 }
432 
433 // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
434 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
435 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
436 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
437 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)438 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
439   return vset_lane_u64(a, b, 0);
440 }
441 
442 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
443 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
444 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
445 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
446 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)447 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
448   return vsetq_lane_s64(a, b, 1);
449 }
450 
451 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
453 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
454 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
455 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)456 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
457   return vsetq_lane_u64(a, b, 1);
458 }
459