1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2
3define void @st1lane_16b(<16 x i8> %A, i8* %D) {
4; CHECK-LABEL: st1lane_16b
5; CHECK: st1.b
6  %tmp = extractelement <16 x i8> %A, i32 1
7  store i8 %tmp, i8* %D
8  ret void
9}
10
11define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
12; CHECK-LABEL: st1lane_ro_16b
13; CHECK: add x[[XREG:[0-9]+]], x0, x1
14; CHECK: st1.b { v0 }[1], [x[[XREG]]]
15  %ptr = getelementptr i8, i8* %D, i64 %offset
16  %tmp = extractelement <16 x i8> %A, i32 1
17  store i8 %tmp, i8* %ptr
18  ret void
19}
20
21define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
22; CHECK-LABEL: st1lane0_ro_16b
23; CHECK: add x[[XREG:[0-9]+]], x0, x1
24; CHECK: st1.b { v0 }[0], [x[[XREG]]]
25  %ptr = getelementptr i8, i8* %D, i64 %offset
26  %tmp = extractelement <16 x i8> %A, i32 0
27  store i8 %tmp, i8* %ptr
28  ret void
29}
30
31define void @st1lane_8h(<8 x i16> %A, i16* %D) {
32; CHECK-LABEL: st1lane_8h
33; CHECK: st1.h
34  %tmp = extractelement <8 x i16> %A, i32 1
35  store i16 %tmp, i16* %D
36  ret void
37}
38
39define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
40; CHECK-LABEL: st1lane_ro_8h
41; CHECK: add x[[XREG:[0-9]+]], x0, x1
42; CHECK: st1.h { v0 }[1], [x[[XREG]]]
43  %ptr = getelementptr i16, i16* %D, i64 %offset
44  %tmp = extractelement <8 x i16> %A, i32 1
45  store i16 %tmp, i16* %ptr
46  ret void
47}
48
49define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
50; CHECK-LABEL: st1lane0_ro_8h
51; CHECK: str h0, [x0, x1, lsl #1]
52  %ptr = getelementptr i16, i16* %D, i64 %offset
53  %tmp = extractelement <8 x i16> %A, i32 0
54  store i16 %tmp, i16* %ptr
55  ret void
56}
57
58define void @st1lane_4s(<4 x i32> %A, i32* %D) {
59; CHECK-LABEL: st1lane_4s
60; CHECK: st1.s
61  %tmp = extractelement <4 x i32> %A, i32 1
62  store i32 %tmp, i32* %D
63  ret void
64}
65
66define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
67; CHECK-LABEL: st1lane_ro_4s
68; CHECK: add x[[XREG:[0-9]+]], x0, x1
69; CHECK: st1.s { v0 }[1], [x[[XREG]]]
70  %ptr = getelementptr i32, i32* %D, i64 %offset
71  %tmp = extractelement <4 x i32> %A, i32 1
72  store i32 %tmp, i32* %ptr
73  ret void
74}
75
76define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
77; CHECK-LABEL: st1lane0_ro_4s
78; CHECK: str s0, [x0, x1, lsl #2]
79  %ptr = getelementptr i32, i32* %D, i64 %offset
80  %tmp = extractelement <4 x i32> %A, i32 0
81  store i32 %tmp, i32* %ptr
82  ret void
83}
84
85define void @st1lane_4s_float(<4 x float> %A, float* %D) {
86; CHECK-LABEL: st1lane_4s_float
87; CHECK: st1.s
88  %tmp = extractelement <4 x float> %A, i32 1
89  store float %tmp, float* %D
90  ret void
91}
92
93define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
94; CHECK-LABEL: st1lane_ro_4s_float
95; CHECK: add x[[XREG:[0-9]+]], x0, x1
96; CHECK: st1.s { v0 }[1], [x[[XREG]]]
97  %ptr = getelementptr float, float* %D, i64 %offset
98  %tmp = extractelement <4 x float> %A, i32 1
99  store float %tmp, float* %ptr
100  ret void
101}
102
103define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
104; CHECK-LABEL: st1lane0_ro_4s_float
105; CHECK: str s0, [x0, x1, lsl #2]
106  %ptr = getelementptr float, float* %D, i64 %offset
107  %tmp = extractelement <4 x float> %A, i32 0
108  store float %tmp, float* %ptr
109  ret void
110}
111
112define void @st1lane_2d(<2 x i64> %A, i64* %D) {
113; CHECK-LABEL: st1lane_2d
114; CHECK: st1.d
115  %tmp = extractelement <2 x i64> %A, i32 1
116  store i64 %tmp, i64* %D
117  ret void
118}
119
120define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
121; CHECK-LABEL: st1lane_ro_2d
122; CHECK: add x[[XREG:[0-9]+]], x0, x1
123; CHECK: st1.d { v0 }[1], [x[[XREG]]]
124  %ptr = getelementptr i64, i64* %D, i64 %offset
125  %tmp = extractelement <2 x i64> %A, i32 1
126  store i64 %tmp, i64* %ptr
127  ret void
128}
129
130define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
131; CHECK-LABEL: st1lane0_ro_2d
132; CHECK: str d0, [x0, x1, lsl #3]
133  %ptr = getelementptr i64, i64* %D, i64 %offset
134  %tmp = extractelement <2 x i64> %A, i32 0
135  store i64 %tmp, i64* %ptr
136  ret void
137}
138
139define void @st1lane_2d_double(<2 x double> %A, double* %D) {
140; CHECK-LABEL: st1lane_2d_double
141; CHECK: st1.d
142  %tmp = extractelement <2 x double> %A, i32 1
143  store double %tmp, double* %D
144  ret void
145}
146
147define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
148; CHECK-LABEL: st1lane_ro_2d_double
149; CHECK: add x[[XREG:[0-9]+]], x0, x1
150; CHECK: st1.d { v0 }[1], [x[[XREG]]]
151  %ptr = getelementptr double, double* %D, i64 %offset
152  %tmp = extractelement <2 x double> %A, i32 1
153  store double %tmp, double* %ptr
154  ret void
155}
156
157define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
158; CHECK-LABEL: st1lane0_ro_2d_double
159; CHECK: str d0, [x0, x1, lsl #3]
160  %ptr = getelementptr double, double* %D, i64 %offset
161  %tmp = extractelement <2 x double> %A, i32 0
162  store double %tmp, double* %ptr
163  ret void
164}
165
166define void @st1lane_8b(<8 x i8> %A, i8* %D) {
167; CHECK-LABEL: st1lane_8b
168; CHECK: st1.b
169  %tmp = extractelement <8 x i8> %A, i32 1
170  store i8 %tmp, i8* %D
171  ret void
172}
173
174define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
175; CHECK-LABEL: st1lane_ro_8b
176; CHECK: add x[[XREG:[0-9]+]], x0, x1
177; CHECK: st1.b { v0 }[1], [x[[XREG]]]
178  %ptr = getelementptr i8, i8* %D, i64 %offset
179  %tmp = extractelement <8 x i8> %A, i32 1
180  store i8 %tmp, i8* %ptr
181  ret void
182}
183
184define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
185; CHECK-LABEL: st1lane0_ro_8b
186; CHECK: add x[[XREG:[0-9]+]], x0, x1
187; CHECK: st1.b { v0 }[0], [x[[XREG]]]
188  %ptr = getelementptr i8, i8* %D, i64 %offset
189  %tmp = extractelement <8 x i8> %A, i32 0
190  store i8 %tmp, i8* %ptr
191  ret void
192}
193
194define void @st1lane_4h(<4 x i16> %A, i16* %D) {
195; CHECK-LABEL: st1lane_4h
196; CHECK: st1.h
197  %tmp = extractelement <4 x i16> %A, i32 1
198  store i16 %tmp, i16* %D
199  ret void
200}
201
202define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
203; CHECK-LABEL: st1lane_ro_4h
204; CHECK: add x[[XREG:[0-9]+]], x0, x1
205; CHECK: st1.h { v0 }[1], [x[[XREG]]]
206  %ptr = getelementptr i16, i16* %D, i64 %offset
207  %tmp = extractelement <4 x i16> %A, i32 1
208  store i16 %tmp, i16* %ptr
209  ret void
210}
211
212define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
213; CHECK-LABEL: st1lane0_ro_4h
214; CHECK: str h0, [x0, x1, lsl #1]
215  %ptr = getelementptr i16, i16* %D, i64 %offset
216  %tmp = extractelement <4 x i16> %A, i32 0
217  store i16 %tmp, i16* %ptr
218  ret void
219}
220
221define void @st1lane_2s(<2 x i32> %A, i32* %D) {
222; CHECK-LABEL: st1lane_2s
223; CHECK: st1.s
224  %tmp = extractelement <2 x i32> %A, i32 1
225  store i32 %tmp, i32* %D
226  ret void
227}
228
229define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
230; CHECK-LABEL: st1lane_ro_2s
231; CHECK: add x[[XREG:[0-9]+]], x0, x1
232; CHECK: st1.s { v0 }[1], [x[[XREG]]]
233  %ptr = getelementptr i32, i32* %D, i64 %offset
234  %tmp = extractelement <2 x i32> %A, i32 1
235  store i32 %tmp, i32* %ptr
236  ret void
237}
238
239define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
240; CHECK-LABEL: st1lane0_ro_2s
241; CHECK: str s0, [x0, x1, lsl #2]
242  %ptr = getelementptr i32, i32* %D, i64 %offset
243  %tmp = extractelement <2 x i32> %A, i32 0
244  store i32 %tmp, i32* %ptr
245  ret void
246}
247
248define void @st1lane_2s_float(<2 x float> %A, float* %D) {
249; CHECK-LABEL: st1lane_2s_float
250; CHECK: st1.s
251  %tmp = extractelement <2 x float> %A, i32 1
252  store float %tmp, float* %D
253  ret void
254}
255
256define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
257; CHECK-LABEL: st1lane_ro_2s_float
258; CHECK: add x[[XREG:[0-9]+]], x0, x1
259; CHECK: st1.s { v0 }[1], [x[[XREG]]]
260  %ptr = getelementptr float, float* %D, i64 %offset
261  %tmp = extractelement <2 x float> %A, i32 1
262  store float %tmp, float* %ptr
263  ret void
264}
265
266define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
267; CHECK-LABEL: st1lane0_ro_2s_float
268; CHECK: str s0, [x0, x1, lsl #2]
269  %ptr = getelementptr float, float* %D, i64 %offset
270  %tmp = extractelement <2 x float> %A, i32 0
271  store float %tmp, float* %ptr
272  ret void
273}
274
275define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
276; CHECK-LABEL: st2lane_16b
277; CHECK: st2.b
278  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
279  ret void
280}
281
282define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
283; CHECK-LABEL: st2lane_8h
284; CHECK: st2.h
285  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
286  ret void
287}
288
289define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
290; CHECK-LABEL: st2lane_4s
291; CHECK: st2.s
292  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
293  ret void
294}
295
296define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
297; CHECK-LABEL: st2lane_2d
298; CHECK: st2.d
299  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
300  ret void
301}
302
303declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
304declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
305declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
306declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
307
308define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
309; CHECK-LABEL: st3lane_16b
310; CHECK: st3.b
311  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
312  ret void
313}
314
315define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
316; CHECK-LABEL: st3lane_8h
317; CHECK: st3.h
318  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
319  ret void
320}
321
322define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
323; CHECK-LABEL: st3lane_4s
324; CHECK: st3.s
325  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
326  ret void
327}
328
329define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
330; CHECK-LABEL: st3lane_2d
331; CHECK: st3.d
332  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
333  ret void
334}
335
336declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
337declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
338declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
339declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
340
341define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
342; CHECK-LABEL: st4lane_16b
343; CHECK: st4.b
344  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
345  ret void
346}
347
348define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
349; CHECK-LABEL: st4lane_8h
350; CHECK: st4.h
351  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
352  ret void
353}
354
355define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
356; CHECK-LABEL: st4lane_4s
357; CHECK: st4.s
358  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
359  ret void
360}
361
362define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
363; CHECK-LABEL: st4lane_2d
364; CHECK: st4.d
365  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
366  ret void
367}
368
369declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
370declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
371declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
372declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
373
374
375define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
376; CHECK-LABEL: st2_8b
377; CHECK: st2.8b
378	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
379	ret void
380}
381
382define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
383; CHECK-LABEL: st3_8b
384; CHECK: st3.8b
385	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
386	ret void
387}
388
389define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
390; CHECK-LABEL: st4_8b
391; CHECK: st4.8b
392	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
393	ret void
394}
395
396declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
397declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
398declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
399
400define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
401; CHECK-LABEL: st2_16b
402; CHECK: st2.16b
403	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
404	ret void
405}
406
407define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
408; CHECK-LABEL: st3_16b
409; CHECK: st3.16b
410	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
411	ret void
412}
413
414define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
415; CHECK-LABEL: st4_16b
416; CHECK: st4.16b
417	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
418	ret void
419}
420
421declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
422declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
423declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
424
425define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
426; CHECK-LABEL: st2_4h
427; CHECK: st2.4h
428	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
429	ret void
430}
431
432define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
433; CHECK-LABEL: st3_4h
434; CHECK: st3.4h
435	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
436	ret void
437}
438
439define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
440; CHECK-LABEL: st4_4h
441; CHECK: st4.4h
442	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
443	ret void
444}
445
446declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
447declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
448declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
449
450define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
451; CHECK-LABEL: st2_8h
452; CHECK: st2.8h
453	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
454	ret void
455}
456
457define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
458; CHECK-LABEL: st3_8h
459; CHECK: st3.8h
460	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
461	ret void
462}
463
464define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
465; CHECK-LABEL: st4_8h
466; CHECK: st4.8h
467	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
468	ret void
469}
470
471declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
472declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
473declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
474
475define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
476; CHECK-LABEL: st2_2s
477; CHECK: st2.2s
478	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
479	ret void
480}
481
482define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
483; CHECK-LABEL: st3_2s
484; CHECK: st3.2s
485	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
486	ret void
487}
488
489define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
490; CHECK-LABEL: st4_2s
491; CHECK: st4.2s
492	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
493	ret void
494}
495
496declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
497declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
498declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
499
500define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
501; CHECK-LABEL: st2_4s
502; CHECK: st2.4s
503	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
504	ret void
505}
506
507define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
508; CHECK-LABEL: st3_4s
509; CHECK: st3.4s
510	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
511	ret void
512}
513
514define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
515; CHECK-LABEL: st4_4s
516; CHECK: st4.4s
517	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
518	ret void
519}
520
521declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
522declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
523declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
524
525; If there's only one element, st2/3/4 don't make much sense, stick to st1.
526define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
527; CHECK-LABEL: st2_1d
528; CHECK: st1.1d
529	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
530	ret void
531}
532
533define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
534; CHECK-LABEL: st3_1d
535; CHECK: st1.1d
536	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
537	ret void
538}
539
540define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
541; CHECK-LABEL: st4_1d
542; CHECK: st1.1d
543	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
544	ret void
545}
546
547declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
548declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
549declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
550
551define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
552; CHECK-LABEL: st2_2d
553; CHECK: st2.2d
554	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
555	ret void
556}
557
558define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
559; CHECK-LABEL: st3_2d
560; CHECK: st3.2d
561	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
562	ret void
563}
564
565define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
566; CHECK-LABEL: st4_2d
567; CHECK: st4.2d
568	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
569	ret void
570}
571
572declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
573declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
574declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
575
576declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
577declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
578declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
579declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
580declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
581declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
582
583define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
584; CHECK-LABEL: st1_x2_v8i8:
585; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
586  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
587  ret void
588}
589
590define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
591; CHECK-LABEL: st1_x2_v4i16:
592; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
593  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
594  ret void
595}
596
597define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
598; CHECK-LABEL: st1_x2_v2i32:
599; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
600  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
601  ret void
602}
603
604define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
605; CHECK-LABEL: st1_x2_v2f32:
606; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
607  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
608  ret void
609}
610
611define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
612; CHECK-LABEL: st1_x2_v1i64:
613; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
614  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
615  ret void
616}
617
618define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
619; CHECK-LABEL: st1_x2_v1f64:
620; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
621  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
622  ret void
623}
624
625declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
626declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
627declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
628declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
629declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
630declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
631
632define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
633; CHECK-LABEL: st1_x2_v16i8:
634; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
635  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
636  ret void
637}
638
639define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
640; CHECK-LABEL: st1_x2_v8i16:
641; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
642  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
643  ret void
644}
645
646define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
647; CHECK-LABEL: st1_x2_v4i32:
648; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
649  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
650  ret void
651}
652
653define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
654; CHECK-LABEL: st1_x2_v4f32:
655; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
656  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
657  ret void
658}
659
660define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
661; CHECK-LABEL: st1_x2_v2i64:
662; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
663  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
664  ret void
665}
666
667define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
668; CHECK-LABEL: st1_x2_v2f64:
669; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
670  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
671  ret void
672}
673
674declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
675declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
676declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
677declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
678declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
679declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
680
681define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
682; CHECK-LABEL: st1_x3_v8i8:
683; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
684  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
685  ret void
686}
687
688define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
689; CHECK-LABEL: st1_x3_v4i16:
690; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
691  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
692  ret void
693}
694
695define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
696; CHECK-LABEL: st1_x3_v2i32:
697; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
698  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
699  ret void
700}
701
702define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
703; CHECK-LABEL: st1_x3_v2f32:
704; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
705  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
706  ret void
707}
708
709define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
710; CHECK-LABEL: st1_x3_v1i64:
711; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
712  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
713  ret void
714}
715
716define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
717; CHECK-LABEL: st1_x3_v1f64:
718; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
719  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
720  ret void
721}
722
723declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
724declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
725declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
726declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
727declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
728declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
729
730define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
731; CHECK-LABEL: st1_x3_v16i8:
732; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
733  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
734  ret void
735}
736
737define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
738; CHECK-LABEL: st1_x3_v8i16:
739; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
740  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
741  ret void
742}
743
744define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
745; CHECK-LABEL: st1_x3_v4i32:
746; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
747  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
748  ret void
749}
750
751define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
752; CHECK-LABEL: st1_x3_v4f32:
753; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
754  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
755  ret void
756}
757
758define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
759; CHECK-LABEL: st1_x3_v2i64:
760; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
761  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
762  ret void
763}
764
765define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
766; CHECK-LABEL: st1_x3_v2f64:
767; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
768  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
769  ret void
770}
771
772
773declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
774declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
775declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
776declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
777declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
778declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
779
780define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
781; CHECK-LABEL: st1_x4_v8i8:
782; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
783  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
784  ret void
785}
786
787define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
788; CHECK-LABEL: st1_x4_v4i16:
789; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
790  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
791  ret void
792}
793
794define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
795; CHECK-LABEL: st1_x4_v2i32:
796; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
797  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
798  ret void
799}
800
801define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
802; CHECK-LABEL: st1_x4_v2f32:
803; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
804  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
805  ret void
806}
807
808define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
809; CHECK-LABEL: st1_x4_v1i64:
810; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
811  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
812  ret void
813}
814
815define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
816; CHECK-LABEL: st1_x4_v1f64:
817; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
818  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
819  ret void
820}
821
822declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
823declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
824declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
825declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
826declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
827declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
828
829define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
830; CHECK-LABEL: st1_x4_v16i8:
831; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
832  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
833  ret void
834}
835
836define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
837; CHECK-LABEL: st1_x4_v8i16:
838; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
839  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
840  ret void
841}
842
843define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
844; CHECK-LABEL: st1_x4_v4i32:
845; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
846  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
847  ret void
848}
849
850define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
851; CHECK-LABEL: st1_x4_v4f32:
852; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
853  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
854  ret void
855}
856
857define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
858; CHECK-LABEL: st1_x4_v2i64:
859; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
860  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
861  ret void
862}
863
864define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
865; CHECK-LABEL: st1_x4_v2f64:
866; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
867  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
868  ret void
869}
870