1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) {
5; CHECK-LABEL: test_vldrbq_gather_offset_s16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrb.s16 q1, [r0, q0]
8; CHECK-NEXT:    vmov q0, q1
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
12  ret <8 x i16> %0
13}
14
15declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32)
16
17define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) {
18; CHECK-LABEL: test_vldrbq_gather_offset_s32:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vldrb.s32 q1, [r0, q0]
21; CHECK-NEXT:    vmov q0, q1
22; CHECK-NEXT:    bx lr
23entry:
24  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0)
25  ret <4 x i32> %0
26}
27
28declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32)
29
30define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) {
31; CHECK-LABEL: test_vldrbq_gather_offset_s8:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vldrb.u8 q1, [r0, q0]
34; CHECK-NEXT:    vmov q0, q1
35; CHECK-NEXT:    bx lr
36entry:
37  %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0)
38  ret <16 x i8> %0
39}
40
41declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32)
42
43define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) {
44; CHECK-LABEL: test_vldrbq_gather_offset_u16:
45; CHECK:       @ %bb.0: @ %entry
46; CHECK-NEXT:    vldrb.u16 q1, [r0, q0]
47; CHECK-NEXT:    vmov q0, q1
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1)
51  ret <8 x i16> %0
52}
53
54define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) {
55; CHECK-LABEL: test_vldrbq_gather_offset_u32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vldrb.u32 q1, [r0, q0]
58; CHECK-NEXT:    vmov q0, q1
59; CHECK-NEXT:    bx lr
60entry:
61  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1)
62  ret <4 x i32> %0
63}
64
65define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) {
66; CHECK-LABEL: test_vldrbq_gather_offset_u8:
67; CHECK:       @ %bb.0: @ %entry
68; CHECK-NEXT:    vldrb.u8 q1, [r0, q0]
69; CHECK-NEXT:    vmov q0, q1
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1)
73  ret <16 x i8> %0
74}
75
76define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
77; CHECK-LABEL: test_vldrbq_gather_offset_z_s16:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vmsr p0, r1
80; CHECK-NEXT:    vpst
81; CHECK-NEXT:    vldrbt.s16 q1, [r0, q0]
82; CHECK-NEXT:    vmov q0, q1
83; CHECK-NEXT:    bx lr
84entry:
85  %0 = zext i16 %p to i32
86  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
87  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1)
88  ret <8 x i16> %2
89}
90
91declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
92
93declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>)
94
95define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
96; CHECK-LABEL: test_vldrbq_gather_offset_z_s32:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vmsr p0, r1
99; CHECK-NEXT:    vpst
100; CHECK-NEXT:    vldrbt.s32 q1, [r0, q0]
101; CHECK-NEXT:    vmov q0, q1
102; CHECK-NEXT:    bx lr
103entry:
104  %0 = zext i16 %p to i32
105  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
106  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
107  ret <4 x i32> %2
108}
109
110declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
111
112declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>)
113
114define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
115; CHECK-LABEL: test_vldrbq_gather_offset_z_s8:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vmsr p0, r1
118; CHECK-NEXT:    vpst
119; CHECK-NEXT:    vldrbt.u8 q1, [r0, q0]
120; CHECK-NEXT:    vmov q0, q1
121; CHECK-NEXT:    bx lr
122entry:
123  %0 = zext i16 %p to i32
124  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
125  %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1)
126  ret <16 x i8> %2
127}
128
129declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
130
131declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>)
132
133define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
134; CHECK-LABEL: test_vldrbq_gather_offset_z_u16:
135; CHECK:       @ %bb.0: @ %entry
136; CHECK-NEXT:    vmsr p0, r1
137; CHECK-NEXT:    vpst
138; CHECK-NEXT:    vldrbt.u16 q1, [r0, q0]
139; CHECK-NEXT:    vmov q0, q1
140; CHECK-NEXT:    bx lr
141entry:
142  %0 = zext i16 %p to i32
143  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
144  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1)
145  ret <8 x i16> %2
146}
147
148define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
149; CHECK-LABEL: test_vldrbq_gather_offset_z_u32:
150; CHECK:       @ %bb.0: @ %entry
151; CHECK-NEXT:    vmsr p0, r1
152; CHECK-NEXT:    vpst
153; CHECK-NEXT:    vldrbt.u32 q1, [r0, q0]
154; CHECK-NEXT:    vmov q0, q1
155; CHECK-NEXT:    bx lr
156entry:
157  %0 = zext i16 %p to i32
158  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
159  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1)
160  ret <4 x i32> %2
161}
162
163define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
164; CHECK-LABEL: test_vldrbq_gather_offset_z_u8:
165; CHECK:       @ %bb.0: @ %entry
166; CHECK-NEXT:    vmsr p0, r1
167; CHECK-NEXT:    vpst
168; CHECK-NEXT:    vldrbt.u8 q1, [r0, q0]
169; CHECK-NEXT:    vmov q0, q1
170; CHECK-NEXT:    bx lr
171entry:
172  %0 = zext i16 %p to i32
173  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
174  %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1)
175  ret <16 x i8> %2
176}
177
178define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
179; CHECK-LABEL: test_vldrdq_gather_base_s64:
180; CHECK:       @ %bb.0: @ %entry
181; CHECK-NEXT:    vldrd.u64 q1, [q0, #616]
182; CHECK-NEXT:    vmov q0, q1
183; CHECK-NEXT:    bx lr
184entry:
185  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
186  ret <2 x i64> %0
187}
188
189declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)
190
191define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) {
192; CHECK-LABEL: test_vldrdq_gather_base_u64:
193; CHECK:       @ %bb.0: @ %entry
194; CHECK-NEXT:    vldrd.u64 q1, [q0, #-336]
195; CHECK-NEXT:    vmov q0, q1
196; CHECK-NEXT:    bx lr
197entry:
198  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336)
199  ret <2 x i64> %0
200}
201
202define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) {
203; CHECK-LABEL: test_vldrdq_gather_base_wb_s64:
204; CHECK:       @ %bb.0: @ %entry
205; CHECK-NEXT:    vldrw.u32 q1, [r0]
206; CHECK-NEXT:    vldrd.u64 q0, [q1, #576]!
207; CHECK-NEXT:    vstrw.32 q1, [r0]
208; CHECK-NEXT:    bx lr
209entry:
210  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
211  %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
212  %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
213  store <2 x i64> %2, <2 x i64>* %addr, align 8
214  %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
215  ret <2 x i64> %3
216}
217
218declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)
219
220define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) {
221; CHECK-LABEL: test_vldrdq_gather_base_wb_u64:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vldrw.u32 q1, [r0]
224; CHECK-NEXT:    vldrd.u64 q0, [q1, #-328]!
225; CHECK-NEXT:    vstrw.32 q1, [r0]
226; CHECK-NEXT:    bx lr
227entry:
228  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
229  %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328)
230  %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
231  store <2 x i64> %2, <2 x i64>* %addr, align 8
232  %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
233  ret <2 x i64> %3
234}
235
236define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) {
237; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vmsr p0, r1
240; CHECK-NEXT:    vldrw.u32 q1, [r0]
241; CHECK-NEXT:    vpst
242; CHECK-NEXT:    vldrdt.u64 q0, [q1, #664]!
243; CHECK-NEXT:    vstrw.32 q1, [r0]
244; CHECK-NEXT:    bx lr
245entry:
246  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
247  %1 = zext i16 %p to i32
248  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
249  %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
250  %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
251  store <2 x i64> %4, <2 x i64>* %addr, align 8
252  %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
253  ret <2 x i64> %5
254}
255
256declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
257
258define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) {
259; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
260; CHECK:       @ %bb.0: @ %entry
261; CHECK-NEXT:    vmsr p0, r1
262; CHECK-NEXT:    vldrw.u32 q1, [r0]
263; CHECK-NEXT:    vpst
264; CHECK-NEXT:    vldrdt.u64 q0, [q1, #656]!
265; CHECK-NEXT:    vstrw.32 q1, [r0]
266; CHECK-NEXT:    bx lr
267entry:
268  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
269  %1 = zext i16 %p to i32
270  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
271  %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2)
272  %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
273  store <2 x i64> %4, <2 x i64>* %addr, align 8
274  %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
275  ret <2 x i64> %5
276}
277
278define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
279; CHECK-LABEL: test_vldrdq_gather_base_z_s64:
280; CHECK:       @ %bb.0: @ %entry
281; CHECK-NEXT:    vmsr p0, r0
282; CHECK-NEXT:    vpst
283; CHECK-NEXT:    vldrdt.u64 q1, [q0, #888]
284; CHECK-NEXT:    vmov q0, q1
285; CHECK-NEXT:    bx lr
286entry:
287  %0 = zext i16 %p to i32
288  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
289  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1)
290  ret <2 x i64> %2
291}
292
293declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
294
295define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) {
296; CHECK-LABEL: test_vldrdq_gather_base_z_u64:
297; CHECK:       @ %bb.0: @ %entry
298; CHECK-NEXT:    vmsr p0, r0
299; CHECK-NEXT:    vpst
300; CHECK-NEXT:    vldrdt.u64 q1, [q0, #-1000]
301; CHECK-NEXT:    vmov q0, q1
302; CHECK-NEXT:    bx lr
303entry:
304  %0 = zext i16 %p to i32
305  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
306  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 -1000, <4 x i1> %1)
307  ret <2 x i64> %2
308}
309
310define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) {
311; CHECK-LABEL: test_vldrdq_gather_offset_s64:
312; CHECK:       @ %bb.0: @ %entry
313; CHECK-NEXT:    vldrd.u64 q1, [r0, q0]
314; CHECK-NEXT:    vmov q0, q1
315; CHECK-NEXT:    bx lr
316entry:
317  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0)
318  ret <2 x i64> %0
319}
320
321declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32)
322
323define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) {
324; CHECK-LABEL: test_vldrdq_gather_offset_u64:
325; CHECK:       @ %bb.0: @ %entry
326; CHECK-NEXT:    vldrd.u64 q1, [r0, q0]
327; CHECK-NEXT:    vmov q0, q1
328; CHECK-NEXT:    bx lr
329entry:
330  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1)
331  ret <2 x i64> %0
332}
333
334define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
335; CHECK-LABEL: test_vldrdq_gather_offset_z_s64:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    vmsr p0, r1
338; CHECK-NEXT:    vpst
339; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0]
340; CHECK-NEXT:    vmov q0, q1
341; CHECK-NEXT:    bx lr
342entry:
343  %0 = zext i16 %p to i32
344  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
345  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
346  ret <2 x i64> %2
347}
348
349declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>)
350
351define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
352; CHECK-LABEL: test_vldrdq_gather_offset_z_u64:
353; CHECK:       @ %bb.0: @ %entry
354; CHECK-NEXT:    vmsr p0, r1
355; CHECK-NEXT:    vpst
356; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0]
357; CHECK-NEXT:    vmov q0, q1
358; CHECK-NEXT:    bx lr
359entry:
360  %0 = zext i16 %p to i32
361  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
362  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1)
363  ret <2 x i64> %2
364}
365
366define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) {
367; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64:
368; CHECK:       @ %bb.0: @ %entry
369; CHECK-NEXT:    vldrd.u64 q1, [r0, q0, uxtw #3]
370; CHECK-NEXT:    vmov q0, q1
371; CHECK-NEXT:    bx lr
372entry:
373  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0)
374  ret <2 x i64> %0
375}
376
377define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) {
378; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64:
379; CHECK:       @ %bb.0: @ %entry
380; CHECK-NEXT:    vldrd.u64 q1, [r0, q0, uxtw #3]
381; CHECK-NEXT:    vmov q0, q1
382; CHECK-NEXT:    bx lr
383entry:
384  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1)
385  ret <2 x i64> %0
386}
387
388define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
389; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vmsr p0, r1
392; CHECK-NEXT:    vpst
393; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0, uxtw #3]
394; CHECK-NEXT:    vmov q0, q1
395; CHECK-NEXT:    bx lr
396entry:
397  %0 = zext i16 %p to i32
398  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
399  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1)
400  ret <2 x i64> %2
401}
402
403define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
404; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64:
405; CHECK:       @ %bb.0: @ %entry
406; CHECK-NEXT:    vmsr p0, r1
407; CHECK-NEXT:    vpst
408; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0, uxtw #3]
409; CHECK-NEXT:    vmov q0, q1
410; CHECK-NEXT:    bx lr
411entry:
412  %0 = zext i16 %p to i32
413  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
414  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1)
415  ret <2 x i64> %2
416}
417
418define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) {
419; CHECK-LABEL: test_vldrhq_gather_offset_f16:
420; CHECK:       @ %bb.0: @ %entry
421; CHECK-NEXT:    vldrh.u16 q1, [r0, q0]
422; CHECK-NEXT:    vmov q0, q1
423; CHECK-NEXT:    bx lr
424entry:
425  %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
426  ret <8 x half> %0
427}
428
429declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32)
430
431define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) {
432; CHECK-LABEL: test_vldrhq_gather_offset_s16:
433; CHECK:       @ %bb.0: @ %entry
434; CHECK-NEXT:    vldrh.u16 q1, [r0, q0]
435; CHECK-NEXT:    vmov q0, q1
436; CHECK-NEXT:    bx lr
437entry:
438  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
439  ret <8 x i16> %0
440}
441
442declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32)
443
444define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) {
445; CHECK-LABEL: test_vldrhq_gather_offset_s32:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    vldrh.s32 q1, [r0, q0]
448; CHECK-NEXT:    vmov q0, q1
449; CHECK-NEXT:    bx lr
450entry:
451  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0)
452  ret <4 x i32> %0
453}
454
455declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32)
456
457define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) {
458; CHECK-LABEL: test_vldrhq_gather_offset_u16:
459; CHECK:       @ %bb.0: @ %entry
460; CHECK-NEXT:    vldrh.u16 q1, [r0, q0]
461; CHECK-NEXT:    vmov q0, q1
462; CHECK-NEXT:    bx lr
463entry:
464  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1)
465  ret <8 x i16> %0
466}
467
468define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) {
469; CHECK-LABEL: test_vldrhq_gather_offset_u32:
470; CHECK:       @ %bb.0: @ %entry
471; CHECK-NEXT:    vldrh.u32 q1, [r0, q0]
472; CHECK-NEXT:    vmov q0, q1
473; CHECK-NEXT:    bx lr
474entry:
475  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1)
476  ret <4 x i32> %0
477}
478
479define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
480; CHECK-LABEL: test_vldrhq_gather_offset_z_f16:
481; CHECK:       @ %bb.0: @ %entry
482; CHECK-NEXT:    vmsr p0, r1
483; CHECK-NEXT:    vpst
484; CHECK-NEXT:    vldrht.u16 q1, [r0, q0]
485; CHECK-NEXT:    vmov q0, q1
486; CHECK-NEXT:    bx lr
487entry:
488  %0 = zext i16 %p to i32
489  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
490  %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
491  ret <8 x half> %2
492}
493
494declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>)
495
496define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
497; CHECK-LABEL: test_vldrhq_gather_offset_z_s16:
498; CHECK:       @ %bb.0: @ %entry
499; CHECK-NEXT:    vmsr p0, r1
500; CHECK-NEXT:    vpst
501; CHECK-NEXT:    vldrht.u16 q1, [r0, q0]
502; CHECK-NEXT:    vmov q0, q1
503; CHECK-NEXT:    bx lr
504entry:
505  %0 = zext i16 %p to i32
506  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
507  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
508  ret <8 x i16> %2
509}
510
511declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>)
512
513define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
514; CHECK-LABEL: test_vldrhq_gather_offset_z_s32:
515; CHECK:       @ %bb.0: @ %entry
516; CHECK-NEXT:    vmsr p0, r1
517; CHECK-NEXT:    vpst
518; CHECK-NEXT:    vldrht.s32 q1, [r0, q0]
519; CHECK-NEXT:    vmov q0, q1
520; CHECK-NEXT:    bx lr
521entry:
522  %0 = zext i16 %p to i32
523  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
524  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1)
525  ret <4 x i32> %2
526}
527
528declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>)
529
530define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
531; CHECK-LABEL: test_vldrhq_gather_offset_z_u16:
532; CHECK:       @ %bb.0: @ %entry
533; CHECK-NEXT:    vmsr p0, r1
534; CHECK-NEXT:    vpst
535; CHECK-NEXT:    vldrht.u16 q1, [r0, q0]
536; CHECK-NEXT:    vmov q0, q1
537; CHECK-NEXT:    bx lr
538entry:
539  %0 = zext i16 %p to i32
540  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
541  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1)
542  ret <8 x i16> %2
543}
544
545define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
546; CHECK-LABEL: test_vldrhq_gather_offset_z_u32:
547; CHECK:       @ %bb.0: @ %entry
548; CHECK-NEXT:    vmsr p0, r1
549; CHECK-NEXT:    vpst
550; CHECK-NEXT:    vldrht.u32 q1, [r0, q0]
551; CHECK-NEXT:    vmov q0, q1
552; CHECK-NEXT:    bx lr
553entry:
554  %0 = zext i16 %p to i32
555  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
556  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1)
557  ret <4 x i32> %2
558}
559
560define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) {
561; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16:
562; CHECK:       @ %bb.0: @ %entry
563; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
564; CHECK-NEXT:    vmov q0, q1
565; CHECK-NEXT:    bx lr
566entry:
567  %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
568  ret <8 x half> %0
569}
570
571define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) {
572; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16:
573; CHECK:       @ %bb.0: @ %entry
574; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
575; CHECK-NEXT:    vmov q0, q1
576; CHECK-NEXT:    bx lr
577entry:
578  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
579  ret <8 x i16> %0
580}
581
582define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) {
583; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vldrh.s32 q1, [r0, q0, uxtw #1]
586; CHECK-NEXT:    vmov q0, q1
587; CHECK-NEXT:    bx lr
588entry:
589  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0)
590  ret <4 x i32> %0
591}
592
593define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) {
594; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16:
595; CHECK:       @ %bb.0: @ %entry
596; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
597; CHECK-NEXT:    vmov q0, q1
598; CHECK-NEXT:    bx lr
599entry:
600  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1)
601  ret <8 x i16> %0
602}
603
604define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) {
605; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32:
606; CHECK:       @ %bb.0: @ %entry
607; CHECK-NEXT:    vldrh.u32 q1, [r0, q0, uxtw #1]
608; CHECK-NEXT:    vmov q0, q1
609; CHECK-NEXT:    bx lr
610entry:
611  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1)
612  ret <4 x i32> %0
613}
614
615define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
616; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16:
617; CHECK:       @ %bb.0: @ %entry
618; CHECK-NEXT:    vmsr p0, r1
619; CHECK-NEXT:    vpst
620; CHECK-NEXT:    vldrht.u16 q1, [r0, q0, uxtw #1]
621; CHECK-NEXT:    vmov q0, q1
622; CHECK-NEXT:    bx lr
623entry:
624  %0 = zext i16 %p to i32
625  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
626  %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
627  ret <8 x half> %2
628}
629
630define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
631; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16:
632; CHECK:       @ %bb.0: @ %entry
633; CHECK-NEXT:    vmsr p0, r1
634; CHECK-NEXT:    vpst
635; CHECK-NEXT:    vldrht.u16 q1, [r0, q0, uxtw #1]
636; CHECK-NEXT:    vmov q0, q1
637; CHECK-NEXT:    bx lr
638entry:
639  %0 = zext i16 %p to i32
640  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
641  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
642  ret <8 x i16> %2
643}
644
645define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
646; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32:
647; CHECK:       @ %bb.0: @ %entry
648; CHECK-NEXT:    vmsr p0, r1
649; CHECK-NEXT:    vpst
650; CHECK-NEXT:    vldrht.s32 q1, [r0, q0, uxtw #1]
651; CHECK-NEXT:    vmov q0, q1
652; CHECK-NEXT:    bx lr
653entry:
654  %0 = zext i16 %p to i32
655  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
656  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1)
657  ret <4 x i32> %2
658}
659
660define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
661; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16:
662; CHECK:       @ %bb.0: @ %entry
663; CHECK-NEXT:    vmsr p0, r1
664; CHECK-NEXT:    vpst
665; CHECK-NEXT:    vldrht.u16 q1, [r0, q0, uxtw #1]
666; CHECK-NEXT:    vmov q0, q1
667; CHECK-NEXT:    bx lr
668entry:
669  %0 = zext i16 %p to i32
670  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
671  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1)
672  ret <8 x i16> %2
673}
674
675define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
676; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32:
677; CHECK:       @ %bb.0: @ %entry
678; CHECK-NEXT:    vmsr p0, r1
679; CHECK-NEXT:    vpst
680; CHECK-NEXT:    vldrht.u32 q1, [r0, q0, uxtw #1]
681; CHECK-NEXT:    vmov q0, q1
682; CHECK-NEXT:    bx lr
683entry:
684  %0 = zext i16 %p to i32
685  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
686  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1)
687  ret <4 x i32> %2
688}
689
690define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) {
691; CHECK-LABEL: test_vldrwq_gather_base_f32:
692; CHECK:       @ %bb.0: @ %entry
693; CHECK-NEXT:    vldrw.u32 q1, [q0, #12]
694; CHECK-NEXT:    vmov q0, q1
695; CHECK-NEXT:    bx lr
696entry:
697  %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12)
698  ret <4 x float> %0
699}
700
701declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32)
702
703define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) {
704; CHECK-LABEL: test_vldrwq_gather_base_s32:
705; CHECK:       @ %bb.0: @ %entry
706; CHECK-NEXT:    vldrw.u32 q1, [q0, #400]
707; CHECK-NEXT:    vmov q0, q1
708; CHECK-NEXT:    bx lr
709entry:
710  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400)
711  ret <4 x i32> %0
712}
713
714declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32)
715
716define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) {
717; CHECK-LABEL: test_vldrwq_gather_base_u32:
718; CHECK:       @ %bb.0: @ %entry
719; CHECK-NEXT:    vldrw.u32 q1, [q0, #284]
720; CHECK-NEXT:    vmov q0, q1
721; CHECK-NEXT:    bx lr
722entry:
723  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284)
724  ret <4 x i32> %0
725}
726
727define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) {
728; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
729; CHECK:       @ %bb.0: @ %entry
730; CHECK-NEXT:    vldrw.u32 q1, [r0]
731; CHECK-NEXT:    vldrw.u32 q0, [q1, #-64]!
732; CHECK-NEXT:    vstrw.32 q1, [r0]
733; CHECK-NEXT:    bx lr
734entry:
735  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
736  %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64)
737  %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
738  store <4 x i32> %2, <4 x i32>* %addr, align 8
739  %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
740  ret <4 x float> %3
741}
742
743declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)
744
745define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) {
746; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
747; CHECK:       @ %bb.0: @ %entry
748; CHECK-NEXT:    vldrw.u32 q1, [r0]
749; CHECK-NEXT:    vldrw.u32 q0, [q1, #80]!
750; CHECK-NEXT:    vstrw.32 q1, [r0]
751; CHECK-NEXT:    bx lr
752entry:
753  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
754  %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
755  %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
756  store <4 x i32> %2, <4 x i32>* %addr, align 8
757  %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
758  ret <4 x i32> %3
759}
760
761declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)
762
763define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) {
764; CHECK-LABEL: test_vldrwq_gather_base_wb_u32:
765; CHECK:       @ %bb.0: @ %entry
766; CHECK-NEXT:    vldrw.u32 q1, [r0]
767; CHECK-NEXT:    vldrw.u32 q0, [q1, #480]!
768; CHECK-NEXT:    vstrw.32 q1, [r0]
769; CHECK-NEXT:    bx lr
770entry:
771  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
772  %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480)
773  %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
774  store <4 x i32> %2, <4 x i32>* %addr, align 8
775  %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
776  ret <4 x i32> %3
777}
778
779define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) {
780; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32:
781; CHECK:       @ %bb.0: @ %entry
782; CHECK-NEXT:    vmsr p0, r1
783; CHECK-NEXT:    vldrw.u32 q1, [r0]
784; CHECK-NEXT:    vpst
785; CHECK-NEXT:    vldrwt.u32 q0, [q1, #-352]!
786; CHECK-NEXT:    vstrw.32 q1, [r0]
787; CHECK-NEXT:    bx lr
788entry:
789  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
790  %1 = zext i16 %p to i32
791  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
792  %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2)
793  %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
794  store <4 x i32> %4, <4 x i32>* %addr, align 8
795  %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
796  ret <4 x float> %5
797}
798
799declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
800
801define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) {
802; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32:
803; CHECK:       @ %bb.0: @ %entry
804; CHECK-NEXT:    vmsr p0, r1
805; CHECK-NEXT:    vldrw.u32 q1, [r0]
806; CHECK-NEXT:    vpst
807; CHECK-NEXT:    vldrwt.u32 q0, [q1, #276]!
808; CHECK-NEXT:    vstrw.32 q1, [r0]
809; CHECK-NEXT:    bx lr
810entry:
811  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
812  %1 = zext i16 %p to i32
813  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
814  %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2)
815  %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
816  store <4 x i32> %4, <4 x i32>* %addr, align 8
817  %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
818  ret <4 x i32> %5
819}
820
821declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
822
823define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) {
824; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32:
825; CHECK:       @ %bb.0: @ %entry
826; CHECK-NEXT:    vmsr p0, r1
827; CHECK-NEXT:    vldrw.u32 q1, [r0]
828; CHECK-NEXT:    vpst
829; CHECK-NEXT:    vldrwt.u32 q0, [q1, #88]!
830; CHECK-NEXT:    vstrw.32 q1, [r0]
831; CHECK-NEXT:    bx lr
832entry:
833  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
834  %1 = zext i16 %p to i32
835  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
836  %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2)
837  %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
838  store <4 x i32> %4, <4 x i32>* %addr, align 8
839  %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
840  ret <4 x i32> %5
841}
842
843define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
844; CHECK-LABEL: test_vldrwq_gather_base_z_f32:
845; CHECK:       @ %bb.0: @ %entry
846; CHECK-NEXT:    vmsr p0, r0
847; CHECK-NEXT:    vpst
848; CHECK-NEXT:    vldrwt.u32 q1, [q0, #-300]
849; CHECK-NEXT:    vmov q0, q1
850; CHECK-NEXT:    bx lr
851entry:
852  %0 = zext i16 %p to i32
853  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
854  %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1)
855  ret <4 x float> %2
856}
857
858declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
859
860define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) {
861; CHECK-LABEL: test_vldrwq_gather_base_z_s32:
862; CHECK:       @ %bb.0: @ %entry
863; CHECK-NEXT:    vmsr p0, r0
864; CHECK-NEXT:    vpst
865; CHECK-NEXT:    vldrwt.u32 q1, [q0, #440]
866; CHECK-NEXT:    vmov q0, q1
867; CHECK-NEXT:    bx lr
868entry:
869  %0 = zext i16 %p to i32
870  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
871  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1)
872  ret <4 x i32> %2
873}
874
875declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
876
877define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) {
878; CHECK-LABEL: test_vldrwq_gather_base_z_u32:
879; CHECK:       @ %bb.0: @ %entry
880; CHECK-NEXT:    vmsr p0, r0
881; CHECK-NEXT:    vpst
882; CHECK-NEXT:    vldrwt.u32 q1, [q0, #300]
883; CHECK-NEXT:    vmov q0, q1
884; CHECK-NEXT:    bx lr
885entry:
886  %0 = zext i16 %p to i32
887  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
888  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
889  ret <4 x i32> %2
890}
891
892define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) {
893; CHECK-LABEL: test_vldrwq_gather_offset_f32:
894; CHECK:       @ %bb.0: @ %entry
895; CHECK-NEXT:    vldrw.u32 q1, [r0, q0]
896; CHECK-NEXT:    vmov q0, q1
897; CHECK-NEXT:    bx lr
898entry:
899  %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
900  ret <4 x float> %0
901}
902
903declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32)
904
905define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) {
906; CHECK-LABEL: test_vldrwq_gather_offset_s32:
907; CHECK:       @ %bb.0: @ %entry
908; CHECK-NEXT:    vldrw.u32 q1, [r0, q0]
909; CHECK-NEXT:    vmov q0, q1
910; CHECK-NEXT:    bx lr
911entry:
912  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
913  ret <4 x i32> %0
914}
915
916declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32)
917
918define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) {
919; CHECK-LABEL: test_vldrwq_gather_offset_u32:
920; CHECK:       @ %bb.0: @ %entry
921; CHECK-NEXT:    vldrw.u32 q1, [r0, q0]
922; CHECK-NEXT:    vmov q0, q1
923; CHECK-NEXT:    bx lr
924entry:
925  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1)
926  ret <4 x i32> %0
927}
928
929define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
930; CHECK-LABEL: test_vldrwq_gather_offset_z_f32:
931; CHECK:       @ %bb.0: @ %entry
932; CHECK-NEXT:    vmsr p0, r1
933; CHECK-NEXT:    vpst
934; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0]
935; CHECK-NEXT:    vmov q0, q1
936; CHECK-NEXT:    bx lr
937entry:
938  %0 = zext i16 %p to i32
939  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
940  %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
941  ret <4 x float> %2
942}
943
944declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>)
945
946define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
947; CHECK-LABEL: test_vldrwq_gather_offset_z_s32:
948; CHECK:       @ %bb.0: @ %entry
949; CHECK-NEXT:    vmsr p0, r1
950; CHECK-NEXT:    vpst
951; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0]
952; CHECK-NEXT:    vmov q0, q1
953; CHECK-NEXT:    bx lr
954entry:
955  %0 = zext i16 %p to i32
956  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
957  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
958  ret <4 x i32> %2
959}
960
961declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>)
962
963define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
964; CHECK-LABEL: test_vldrwq_gather_offset_z_u32:
965; CHECK:       @ %bb.0: @ %entry
966; CHECK-NEXT:    vmsr p0, r1
967; CHECK-NEXT:    vpst
968; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0]
969; CHECK-NEXT:    vmov q0, q1
970; CHECK-NEXT:    bx lr
971entry:
972  %0 = zext i16 %p to i32
973  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
974  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1)
975  ret <4 x i32> %2
976}
977
978define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) {
979; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32:
980; CHECK:       @ %bb.0: @ %entry
981; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
982; CHECK-NEXT:    vmov q0, q1
983; CHECK-NEXT:    bx lr
984entry:
985  %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
986  ret <4 x float> %0
987}
988
989define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) {
990; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32:
991; CHECK:       @ %bb.0: @ %entry
992; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
993; CHECK-NEXT:    vmov q0, q1
994; CHECK-NEXT:    bx lr
995entry:
996  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
997  ret <4 x i32> %0
998}
999
1000define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) {
1001; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32:
1002; CHECK:       @ %bb.0: @ %entry
1003; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
1004; CHECK-NEXT:    vmov q0, q1
1005; CHECK-NEXT:    bx lr
1006entry:
1007  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1)
1008  ret <4 x i32> %0
1009}
1010
1011define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
1012; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32:
1013; CHECK:       @ %bb.0: @ %entry
1014; CHECK-NEXT:    vmsr p0, r1
1015; CHECK-NEXT:    vpst
1016; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0, uxtw #2]
1017; CHECK-NEXT:    vmov q0, q1
1018; CHECK-NEXT:    bx lr
1019entry:
1020  %0 = zext i16 %p to i32
1021  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1022  %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1023  ret <4 x float> %2
1024}
1025
1026define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
1027; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32:
1028; CHECK:       @ %bb.0: @ %entry
1029; CHECK-NEXT:    vmsr p0, r1
1030; CHECK-NEXT:    vpst
1031; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0, uxtw #2]
1032; CHECK-NEXT:    vmov q0, q1
1033; CHECK-NEXT:    bx lr
1034entry:
1035  %0 = zext i16 %p to i32
1036  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1037  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1038  ret <4 x i32> %2
1039}
1040
1041define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
1042; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32:
1043; CHECK:       @ %bb.0: @ %entry
1044; CHECK-NEXT:    vmsr p0, r1
1045; CHECK-NEXT:    vpst
1046; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0, uxtw #2]
1047; CHECK-NEXT:    vmov q0, q1
1048; CHECK-NEXT:    bx lr
1049entry:
1050  %0 = zext i16 %p to i32
1051  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1052  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1)
1053  ret <4 x i32> %2
1054}
1055
1056define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1057; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16:
1058; CHECK:       @ %bb.0: @ %entry
1059; CHECK-NEXT:    vmsr p0, r1
1060; CHECK-NEXT:    vpst
1061; CHECK-NEXT:    vstrbt.16 q1, [r0, q0]
1062; CHECK-NEXT:    bx lr
1063entry:
1064  %0 = zext i16 %p to i32
1065  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1066  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1067  ret void
1068}
1069
1070declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
1071
1072define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1073; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32:
1074; CHECK:       @ %bb.0: @ %entry
1075; CHECK-NEXT:    vmsr p0, r1
1076; CHECK-NEXT:    vpst
1077; CHECK-NEXT:    vstrbt.32 q1, [r0, q0]
1078; CHECK-NEXT:    bx lr
1079entry:
1080  %0 = zext i16 %p to i32
1081  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1082  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1083  ret void
1084}
1085
1086declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1087
1088define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1089; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8:
1090; CHECK:       @ %bb.0: @ %entry
1091; CHECK-NEXT:    vmsr p0, r1
1092; CHECK-NEXT:    vpst
1093; CHECK-NEXT:    vstrbt.8 q1, [r0, q0]
1094; CHECK-NEXT:    bx lr
1095entry:
1096  %0 = zext i16 %p to i32
1097  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1098  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1099  ret void
1100}
1101
1102declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)
1103
1104define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1105; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16:
1106; CHECK:       @ %bb.0: @ %entry
1107; CHECK-NEXT:    vmsr p0, r1
1108; CHECK-NEXT:    vpst
1109; CHECK-NEXT:    vstrbt.16 q1, [r0, q0]
1110; CHECK-NEXT:    bx lr
1111entry:
1112  %0 = zext i16 %p to i32
1113  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1114  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1115  ret void
1116}
1117
1118define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1119; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32:
1120; CHECK:       @ %bb.0: @ %entry
1121; CHECK-NEXT:    vmsr p0, r1
1122; CHECK-NEXT:    vpst
1123; CHECK-NEXT:    vstrbt.32 q1, [r0, q0]
1124; CHECK-NEXT:    bx lr
1125entry:
1126  %0 = zext i16 %p to i32
1127  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1128  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1129  ret void
1130}
1131
1132define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1133; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8:
1134; CHECK:       @ %bb.0: @ %entry
1135; CHECK-NEXT:    vmsr p0, r1
1136; CHECK-NEXT:    vpst
1137; CHECK-NEXT:    vstrbt.8 q1, [r0, q0]
1138; CHECK-NEXT:    bx lr
1139entry:
1140  %0 = zext i16 %p to i32
1141  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1142  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1143  ret void
1144}
1145
1146define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
1147; CHECK-LABEL: test_vstrbq_scatter_offset_s16:
1148; CHECK:       @ %bb.0: @ %entry
1149; CHECK-NEXT:    vstrb.16 q1, [r0, q0]
1150; CHECK-NEXT:    bx lr
1151entry:
1152  call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1153  ret void
1154}
1155
1156declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
1157
1158define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
1159; CHECK-LABEL: test_vstrbq_scatter_offset_s32:
1160; CHECK:       @ %bb.0: @ %entry
1161; CHECK-NEXT:    vstrb.32 q1, [r0, q0]
1162; CHECK-NEXT:    bx lr
1163entry:
1164  call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1165  ret void
1166}
1167
1168declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
1169
1170define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
1171; CHECK-LABEL: test_vstrbq_scatter_offset_s8:
1172; CHECK:       @ %bb.0: @ %entry
1173; CHECK-NEXT:    vstrb.8 q1, [r0, q0]
1174; CHECK-NEXT:    bx lr
1175entry:
1176  call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1177  ret void
1178}
1179
1180declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
1181
1182define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
1183; CHECK-LABEL: test_vstrbq_scatter_offset_u16:
1184; CHECK:       @ %bb.0: @ %entry
1185; CHECK-NEXT:    vstrb.16 q1, [r0, q0]
1186; CHECK-NEXT:    bx lr
1187entry:
1188  call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1189  ret void
1190}
1191
1192define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
1193; CHECK-LABEL: test_vstrbq_scatter_offset_u32:
1194; CHECK:       @ %bb.0: @ %entry
1195; CHECK-NEXT:    vstrb.32 q1, [r0, q0]
1196; CHECK-NEXT:    bx lr
1197entry:
1198  call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1199  ret void
1200}
1201
1202define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
1203; CHECK-LABEL: test_vstrbq_scatter_offset_u8:
1204; CHECK:       @ %bb.0: @ %entry
1205; CHECK-NEXT:    vstrb.8 q1, [r0, q0]
1206; CHECK-NEXT:    bx lr
1207entry:
1208  call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1209  ret void
1210}
1211
1212define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1213; CHECK-LABEL: test_vstrdq_scatter_base_p_s64:
1214; CHECK:       @ %bb.0: @ %entry
1215; CHECK-NEXT:    vmsr p0, r0
1216; CHECK-NEXT:    vpst
1217; CHECK-NEXT:    vstrdt.64 q1, [q0, #888]
1218; CHECK-NEXT:    bx lr
1219entry:
1220  %0 = zext i16 %p to i32
1221  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1222  call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1)
1223  ret void
1224}
1225
1226declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
1227
1228define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1229; CHECK-LABEL: test_vstrdq_scatter_base_p_u64:
1230; CHECK:       @ %bb.0: @ %entry
1231; CHECK-NEXT:    vmsr p0, r0
1232; CHECK-NEXT:    vpst
1233; CHECK-NEXT:    vstrdt.64 q1, [q0, #264]
1234; CHECK-NEXT:    bx lr
1235entry:
1236  %0 = zext i16 %p to i32
1237  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1238  call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1)
1239  ret void
1240}
1241
1242define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) {
1243; CHECK-LABEL: test_vstrdq_scatter_base_s64:
1244; CHECK:       @ %bb.0: @ %entry
1245; CHECK-NEXT:    vstrd.64 q1, [q0, #408]
1246; CHECK-NEXT:    bx lr
1247entry:
1248  call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value)
1249  ret void
1250}
1251
1252declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1253
1254define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
1255; CHECK-LABEL: test_vstrdq_scatter_base_u64:
1256; CHECK:       @ %bb.0: @ %entry
1257; CHECK-NEXT:    vstrd.64 q1, [q0, #-472]
1258; CHECK-NEXT:    bx lr
1259entry:
1260  call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value)
1261  ret void
1262}
1263
1264define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
1265; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64:
1266; CHECK:       @ %bb.0: @ %entry
1267; CHECK-NEXT:    vldrw.u32 q1, [r0]
1268; CHECK-NEXT:    vmsr p0, r1
1269; CHECK-NEXT:    vpst
1270; CHECK-NEXT:    vstrdt.64 q0, [q1, #248]!
1271; CHECK-NEXT:    vstrw.32 q1, [r0]
1272; CHECK-NEXT:    bx lr
1273entry:
1274  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1275  %1 = zext i16 %p to i32
1276  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1277  %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
1278  store <2 x i64> %3, <2 x i64>* %addr, align 8
1279  ret void
1280}
1281
1282declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
1283
1284define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
1285; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64:
1286; CHECK:       @ %bb.0: @ %entry
1287; CHECK-NEXT:    vldrw.u32 q1, [r0]
1288; CHECK-NEXT:    vmsr p0, r1
1289; CHECK-NEXT:    vpst
1290; CHECK-NEXT:    vstrdt.64 q0, [q1, #136]!
1291; CHECK-NEXT:    vstrw.32 q1, [r0]
1292; CHECK-NEXT:    bx lr
1293entry:
1294  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1295  %1 = zext i16 %p to i32
1296  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1297  %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2)
1298  store <2 x i64> %3, <2 x i64>* %addr, align 8
1299  ret void
1300}
1301
1302define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) {
1303; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64:
1304; CHECK:       @ %bb.0: @ %entry
1305; CHECK-NEXT:    vldrw.u32 q1, [r0]
1306; CHECK-NEXT:    vstrd.64 q0, [q1, #208]!
1307; CHECK-NEXT:    vstrw.32 q1, [r0]
1308; CHECK-NEXT:    bx lr
1309entry:
1310  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1311  %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
1312  store <2 x i64> %1, <2 x i64>* %addr, align 8
1313  ret void
1314}
1315
1316declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1317
1318define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) {
1319; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64:
1320; CHECK:       @ %bb.0: @ %entry
1321; CHECK-NEXT:    vldrw.u32 q1, [r0]
1322; CHECK-NEXT:    vstrd.64 q0, [q1, #-168]!
1323; CHECK-NEXT:    vstrw.32 q1, [r0]
1324; CHECK-NEXT:    bx lr
1325entry:
1326  %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1327  %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value)
1328  store <2 x i64> %1, <2 x i64>* %addr, align 8
1329  ret void
1330}
1331
1332define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1333; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64:
1334; CHECK:       @ %bb.0: @ %entry
1335; CHECK-NEXT:    vmsr p0, r1
1336; CHECK-NEXT:    vpst
1337; CHECK-NEXT:    vstrdt.64 q1, [r0, q0]
1338; CHECK-NEXT:    bx lr
1339entry:
1340  %0 = zext i16 %p to i32
1341  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1342  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
1343  ret void
1344}
1345
1346declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
1347
1348define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1349; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64:
1350; CHECK:       @ %bb.0: @ %entry
1351; CHECK-NEXT:    vmsr p0, r1
1352; CHECK-NEXT:    vpst
1353; CHECK-NEXT:    vstrdt.64 q1, [r0, q0]
1354; CHECK-NEXT:    bx lr
1355entry:
1356  %0 = zext i16 %p to i32
1357  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1358  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
1359  ret void
1360}
1361
1362define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1363; CHECK-LABEL: test_vstrdq_scatter_offset_s64:
1364; CHECK:       @ %bb.0: @ %entry
1365; CHECK-NEXT:    vstrd.64 q1, [r0, q0]
1366; CHECK-NEXT:    bx lr
1367entry:
1368  call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1369  ret void
1370}
1371
1372declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32)
1373
1374define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1375; CHECK-LABEL: test_vstrdq_scatter_offset_u64:
1376; CHECK:       @ %bb.0: @ %entry
1377; CHECK-NEXT:    vstrd.64 q1, [r0, q0]
1378; CHECK-NEXT:    bx lr
1379entry:
1380  call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1381  ret void
1382}
1383
1384define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1385; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64:
1386; CHECK:       @ %bb.0: @ %entry
1387; CHECK-NEXT:    vmsr p0, r1
1388; CHECK-NEXT:    vpst
1389; CHECK-NEXT:    vstrdt.64 q1, [r0, q0, uxtw #3]
1390; CHECK-NEXT:    bx lr
1391entry:
1392  %0 = zext i16 %p to i32
1393  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1394  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
1395  ret void
1396}
1397
1398define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1399; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64:
1400; CHECK:       @ %bb.0: @ %entry
1401; CHECK-NEXT:    vmsr p0, r1
1402; CHECK-NEXT:    vpst
1403; CHECK-NEXT:    vstrdt.64 q1, [r0, q0, uxtw #3]
1404; CHECK-NEXT:    bx lr
1405entry:
1406  %0 = zext i16 %p to i32
1407  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1408  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
1409  ret void
1410}
1411
1412define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1413; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64:
1414; CHECK:       @ %bb.0: @ %entry
1415; CHECK-NEXT:    vstrd.64 q1, [r0, q0, uxtw #3]
1416; CHECK-NEXT:    bx lr
1417entry:
1418  call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1419  ret void
1420}
1421
1422define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1423; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64:
1424; CHECK:       @ %bb.0: @ %entry
1425; CHECK-NEXT:    vstrd.64 q1, [r0, q0, uxtw #3]
1426; CHECK-NEXT:    bx lr
1427entry:
1428  call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1429  ret void
1430}
1431
1432define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
1433; CHECK-LABEL: test_vstrhq_scatter_offset_f16:
1434; CHECK:       @ %bb.0: @ %entry
1435; CHECK-NEXT:    vstrh.16 q1, [r0, q0]
1436; CHECK-NEXT:    bx lr
1437entry:
1438  call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0)
1439  ret void
1440}
1441
1442declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32)
1443
1444define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1445; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16:
1446; CHECK:       @ %bb.0: @ %entry
1447; CHECK-NEXT:    vmsr p0, r1
1448; CHECK-NEXT:    vpst
1449; CHECK-NEXT:    vstrht.16 q1, [r0, q0]
1450; CHECK-NEXT:    bx lr
1451entry:
1452  %0 = zext i16 %p to i32
1453  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1454  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1)
1455  ret void
1456}
1457
1458declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>)
1459
1460define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1461; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16:
1462; CHECK:       @ %bb.0: @ %entry
1463; CHECK-NEXT:    vmsr p0, r1
1464; CHECK-NEXT:    vpst
1465; CHECK-NEXT:    vstrht.16 q1, [r0, q0]
1466; CHECK-NEXT:    bx lr
1467entry:
1468  %0 = zext i16 %p to i32
1469  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1470  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1471  ret void
1472}
1473
1474declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
1475
1476define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1477; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32:
1478; CHECK:       @ %bb.0: @ %entry
1479; CHECK-NEXT:    vmsr p0, r1
1480; CHECK-NEXT:    vpst
1481; CHECK-NEXT:    vstrht.32 q1, [r0, q0]
1482; CHECK-NEXT:    bx lr
1483entry:
1484  %0 = zext i16 %p to i32
1485  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1486  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1487  ret void
1488}
1489
1490declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1491
1492define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1493; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16:
1494; CHECK:       @ %bb.0: @ %entry
1495; CHECK-NEXT:    vmsr p0, r1
1496; CHECK-NEXT:    vpst
1497; CHECK-NEXT:    vstrht.16 q1, [r0, q0]
1498; CHECK-NEXT:    bx lr
1499entry:
1500  %0 = zext i16 %p to i32
1501  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1502  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1503  ret void
1504}
1505
1506define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1507; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32:
1508; CHECK:       @ %bb.0: @ %entry
1509; CHECK-NEXT:    vmsr p0, r1
1510; CHECK-NEXT:    vpst
1511; CHECK-NEXT:    vstrht.32 q1, [r0, q0]
1512; CHECK-NEXT:    bx lr
1513entry:
1514  %0 = zext i16 %p to i32
1515  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1516  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1517  ret void
1518}
1519
1520define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1521; CHECK-LABEL: test_vstrhq_scatter_offset_s16:
1522; CHECK:       @ %bb.0: @ %entry
1523; CHECK-NEXT:    vstrh.16 q1, [r0, q0]
1524; CHECK-NEXT:    bx lr
1525entry:
1526  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1527  ret void
1528}
1529
1530declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32)
1531
1532define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1533; CHECK-LABEL: test_vstrhq_scatter_offset_s32:
1534; CHECK:       @ %bb.0: @ %entry
1535; CHECK-NEXT:    vstrh.32 q1, [r0, q0]
1536; CHECK-NEXT:    bx lr
1537entry:
1538  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1539  ret void
1540}
1541
1542declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32)
1543
1544define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1545; CHECK-LABEL: test_vstrhq_scatter_offset_u16:
1546; CHECK:       @ %bb.0: @ %entry
1547; CHECK-NEXT:    vstrh.16 q1, [r0, q0]
1548; CHECK-NEXT:    bx lr
1549entry:
1550  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1551  ret void
1552}
1553
1554define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1555; CHECK-LABEL: test_vstrhq_scatter_offset_u32:
1556; CHECK:       @ %bb.0: @ %entry
1557; CHECK-NEXT:    vstrh.32 q1, [r0, q0]
1558; CHECK-NEXT:    bx lr
1559entry:
1560  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1561  ret void
1562}
1563
1564define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
1565; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16:
1566; CHECK:       @ %bb.0: @ %entry
1567; CHECK-NEXT:    vstrh.16 q1, [r0, q0, uxtw #1]
1568; CHECK-NEXT:    bx lr
1569entry:
1570  call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1)
1571  ret void
1572}
1573
1574define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1575; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16:
1576; CHECK:       @ %bb.0: @ %entry
1577; CHECK-NEXT:    vmsr p0, r1
1578; CHECK-NEXT:    vpst
1579; CHECK-NEXT:    vstrht.16 q1, [r0, q0, uxtw #1]
1580; CHECK-NEXT:    bx lr
1581entry:
1582  %0 = zext i16 %p to i32
1583  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1584  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1)
1585  ret void
1586}
1587
1588define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1589; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16:
1590; CHECK:       @ %bb.0: @ %entry
1591; CHECK-NEXT:    vmsr p0, r1
1592; CHECK-NEXT:    vpst
1593; CHECK-NEXT:    vstrht.16 q1, [r0, q0, uxtw #1]
1594; CHECK-NEXT:    bx lr
1595entry:
1596  %0 = zext i16 %p to i32
1597  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1598  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1599  ret void
1600}
1601
1602define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1603; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32:
1604; CHECK:       @ %bb.0: @ %entry
1605; CHECK-NEXT:    vmsr p0, r1
1606; CHECK-NEXT:    vpst
1607; CHECK-NEXT:    vstrht.32 q1, [r0, q0, uxtw #1]
1608; CHECK-NEXT:    bx lr
1609entry:
1610  %0 = zext i16 %p to i32
1611  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1612  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1613  ret void
1614}
1615
1616define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1617; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16:
1618; CHECK:       @ %bb.0: @ %entry
1619; CHECK-NEXT:    vmsr p0, r1
1620; CHECK-NEXT:    vpst
1621; CHECK-NEXT:    vstrht.16 q1, [r0, q0, uxtw #1]
1622; CHECK-NEXT:    bx lr
1623entry:
1624  %0 = zext i16 %p to i32
1625  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1626  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1627  ret void
1628}
1629
1630define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1631; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32:
1632; CHECK:       @ %bb.0: @ %entry
1633; CHECK-NEXT:    vmsr p0, r1
1634; CHECK-NEXT:    vpst
1635; CHECK-NEXT:    vstrht.32 q1, [r0, q0, uxtw #1]
1636; CHECK-NEXT:    bx lr
1637entry:
1638  %0 = zext i16 %p to i32
1639  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1640  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1641  ret void
1642}
1643
1644define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1645; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16:
1646; CHECK:       @ %bb.0: @ %entry
1647; CHECK-NEXT:    vstrh.16 q1, [r0, q0, uxtw #1]
1648; CHECK-NEXT:    bx lr
1649entry:
1650  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1651  ret void
1652}
1653
1654define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1655; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32:
1656; CHECK:       @ %bb.0: @ %entry
1657; CHECK-NEXT:    vstrh.32 q1, [r0, q0, uxtw #1]
1658; CHECK-NEXT:    bx lr
1659entry:
1660  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1661  ret void
1662}
1663
1664define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1665; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16:
1666; CHECK:       @ %bb.0: @ %entry
1667; CHECK-NEXT:    vstrh.16 q1, [r0, q0, uxtw #1]
1668; CHECK-NEXT:    bx lr
1669entry:
1670  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1671  ret void
1672}
1673
1674define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1675; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32:
1676; CHECK:       @ %bb.0: @ %entry
1677; CHECK-NEXT:    vstrh.32 q1, [r0, q0, uxtw #1]
1678; CHECK-NEXT:    bx lr
1679entry:
1680  call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1681  ret void
1682}
1683
1684define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) {
1685; CHECK-LABEL: test_vstrwq_scatter_base_f32:
1686; CHECK:       @ %bb.0: @ %entry
1687; CHECK-NEXT:    vstrw.32 q1, [q0, #380]
1688; CHECK-NEXT:    bx lr
1689entry:
1690  call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value)
1691  ret void
1692}
1693
1694declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1695
1696define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) {
1697; CHECK-LABEL: test_vstrwq_scatter_base_p_f32:
1698; CHECK:       @ %bb.0: @ %entry
1699; CHECK-NEXT:    vmsr p0, r0
1700; CHECK-NEXT:    vpst
1701; CHECK-NEXT:    vstrwt.32 q1, [q0, #-400]
1702; CHECK-NEXT:    bx lr
1703entry:
1704  %0 = zext i16 %p to i32
1705  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1706  call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1)
1707  ret void
1708}
1709
1710declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1711
1712define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1713; CHECK-LABEL: test_vstrwq_scatter_base_p_s32:
1714; CHECK:       @ %bb.0: @ %entry
1715; CHECK-NEXT:    vmsr p0, r0
1716; CHECK-NEXT:    vpst
1717; CHECK-NEXT:    vstrwt.32 q1, [q0, #48]
1718; CHECK-NEXT:    bx lr
1719entry:
1720  %0 = zext i16 %p to i32
1721  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1722  call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1)
1723  ret void
1724}
1725
1726declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1727
1728define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1729; CHECK-LABEL: test_vstrwq_scatter_base_p_u32:
1730; CHECK:       @ %bb.0: @ %entry
1731; CHECK-NEXT:    vmsr p0, r0
1732; CHECK-NEXT:    vpst
1733; CHECK-NEXT:    vstrwt.32 q1, [q0, #-376]
1734; CHECK-NEXT:    bx lr
1735entry:
1736  %0 = zext i16 %p to i32
1737  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1738  call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1)
1739  ret void
1740}
1741
1742define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) {
1743; CHECK-LABEL: test_vstrwq_scatter_base_s32:
1744; CHECK:       @ %bb.0: @ %entry
1745; CHECK-NEXT:    vstrw.32 q1, [q0, #156]
1746; CHECK-NEXT:    bx lr
1747entry:
1748  call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value)
1749  ret void
1750}
1751
1752declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1753
1754define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) {
1755; CHECK-LABEL: test_vstrwq_scatter_base_u32:
1756; CHECK:       @ %bb.0: @ %entry
1757; CHECK-NEXT:    vstrw.32 q1, [q0, #212]
1758; CHECK-NEXT:    bx lr
1759entry:
1760  call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value)
1761  ret void
1762}
1763
1764define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) {
1765; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32:
1766; CHECK:       @ %bb.0: @ %entry
1767; CHECK-NEXT:    vldrw.u32 q1, [r0]
1768; CHECK-NEXT:    vstrw.32 q0, [q1, #-412]!
1769; CHECK-NEXT:    vstrw.32 q1, [r0]
1770; CHECK-NEXT:    bx lr
1771entry:
1772  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1773  %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value)
1774  store <4 x i32> %1, <4 x i32>* %addr, align 8
1775  ret void
1776}
1777
1778declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1779
1780define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) {
1781; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32:
1782; CHECK:       @ %bb.0: @ %entry
1783; CHECK-NEXT:    vldrw.u32 q1, [r0]
1784; CHECK-NEXT:    vmsr p0, r1
1785; CHECK-NEXT:    vpst
1786; CHECK-NEXT:    vstrwt.32 q0, [q1, #236]!
1787; CHECK-NEXT:    vstrw.32 q1, [r0]
1788; CHECK-NEXT:    bx lr
1789entry:
1790  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1791  %1 = zext i16 %p to i32
1792  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1793  %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2)
1794  store <4 x i32> %3, <4 x i32>* %addr, align 8
1795  ret void
1796}
1797
1798declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1799
1800define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
1801; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32:
1802; CHECK:       @ %bb.0: @ %entry
1803; CHECK-NEXT:    vldrw.u32 q1, [r0]
1804; CHECK-NEXT:    vmsr p0, r1
1805; CHECK-NEXT:    vpst
1806; CHECK-NEXT:    vstrwt.32 q0, [q1, #328]!
1807; CHECK-NEXT:    vstrw.32 q1, [r0]
1808; CHECK-NEXT:    bx lr
1809entry:
1810  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1811  %1 = zext i16 %p to i32
1812  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1813  %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2)
1814  store <4 x i32> %3, <4 x i32>* %addr, align 8
1815  ret void
1816}
1817
1818declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1819
1820define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
1821; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32:
1822; CHECK:       @ %bb.0: @ %entry
1823; CHECK-NEXT:    vldrw.u32 q1, [r0]
1824; CHECK-NEXT:    vmsr p0, r1
1825; CHECK-NEXT:    vpst
1826; CHECK-NEXT:    vstrwt.32 q0, [q1, #412]!
1827; CHECK-NEXT:    vstrw.32 q1, [r0]
1828; CHECK-NEXT:    bx lr
1829entry:
1830  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1831  %1 = zext i16 %p to i32
1832  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1833  %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2)
1834  store <4 x i32> %3, <4 x i32>* %addr, align 8
1835  ret void
1836}
1837
1838define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) {
1839; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32:
1840; CHECK:       @ %bb.0: @ %entry
1841; CHECK-NEXT:    vldrw.u32 q1, [r0]
1842; CHECK-NEXT:    vstrw.32 q0, [q1, #-152]!
1843; CHECK-NEXT:    vstrw.32 q1, [r0]
1844; CHECK-NEXT:    bx lr
1845entry:
1846  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1847  %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value)
1848  store <4 x i32> %1, <4 x i32>* %addr, align 8
1849  ret void
1850}
1851
1852declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1853
1854define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) {
1855; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32:
1856; CHECK:       @ %bb.0: @ %entry
1857; CHECK-NEXT:    vldrw.u32 q1, [r0]
1858; CHECK-NEXT:    vstrw.32 q0, [q1, #64]!
1859; CHECK-NEXT:    vstrw.32 q1, [r0]
1860; CHECK-NEXT:    bx lr
1861entry:
1862  %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1863  %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value)
1864  store <4 x i32> %1, <4 x i32>* %addr, align 8
1865  ret void
1866}
1867
1868define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
1869; CHECK-LABEL: test_vstrwq_scatter_offset_f32:
1870; CHECK:       @ %bb.0: @ %entry
1871; CHECK-NEXT:    vstrw.32 q1, [r0, q0]
1872; CHECK-NEXT:    bx lr
1873entry:
1874  call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0)
1875  ret void
1876}
1877
1878declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32)
1879
1880define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1881; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32:
1882; CHECK:       @ %bb.0: @ %entry
1883; CHECK-NEXT:    vmsr p0, r1
1884; CHECK-NEXT:    vpst
1885; CHECK-NEXT:    vstrwt.32 q1, [r0, q0]
1886; CHECK-NEXT:    bx lr
1887entry:
1888  %0 = zext i16 %p to i32
1889  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1890  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1)
1891  ret void
1892}
1893
1894declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>)
1895
1896define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1897; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32:
1898; CHECK:       @ %bb.0: @ %entry
1899; CHECK-NEXT:    vmsr p0, r1
1900; CHECK-NEXT:    vpst
1901; CHECK-NEXT:    vstrwt.32 q1, [r0, q0]
1902; CHECK-NEXT:    bx lr
1903entry:
1904  %0 = zext i16 %p to i32
1905  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1906  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1907  ret void
1908}
1909
1910declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1911
1912define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1913; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32:
1914; CHECK:       @ %bb.0: @ %entry
1915; CHECK-NEXT:    vmsr p0, r1
1916; CHECK-NEXT:    vpst
1917; CHECK-NEXT:    vstrwt.32 q1, [r0, q0]
1918; CHECK-NEXT:    bx lr
1919entry:
1920  %0 = zext i16 %p to i32
1921  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1922  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1923  ret void
1924}
1925
1926define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
1927; CHECK-LABEL: test_vstrwq_scatter_offset_s32:
1928; CHECK:       @ %bb.0: @ %entry
1929; CHECK-NEXT:    vstrw.32 q1, [r0, q0]
1930; CHECK-NEXT:    bx lr
1931entry:
1932  call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1933  ret void
1934}
1935
1936declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32)
1937
1938define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
1939; CHECK-LABEL: test_vstrwq_scatter_offset_u32:
1940; CHECK:       @ %bb.0: @ %entry
1941; CHECK-NEXT:    vstrw.32 q1, [r0, q0]
1942; CHECK-NEXT:    bx lr
1943entry:
1944  call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1945  ret void
1946}
1947
1948define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
1949; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32:
1950; CHECK:       @ %bb.0: @ %entry
1951; CHECK-NEXT:    vstrw.32 q1, [r0, q0, uxtw #2]
1952; CHECK-NEXT:    bx lr
1953entry:
1954  call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2)
1955  ret void
1956}
1957
1958define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1959; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32:
1960; CHECK:       @ %bb.0: @ %entry
1961; CHECK-NEXT:    vmsr p0, r1
1962; CHECK-NEXT:    vpst
1963; CHECK-NEXT:    vstrwt.32 q1, [r0, q0, uxtw #2]
1964; CHECK-NEXT:    bx lr
1965entry:
1966  %0 = zext i16 %p to i32
1967  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1968  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1)
1969  ret void
1970}
1971
1972define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1973; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32:
1974; CHECK:       @ %bb.0: @ %entry
1975; CHECK-NEXT:    vmsr p0, r1
1976; CHECK-NEXT:    vpst
1977; CHECK-NEXT:    vstrwt.32 q1, [r0, q0, uxtw #2]
1978; CHECK-NEXT:    bx lr
1979entry:
1980  %0 = zext i16 %p to i32
1981  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1982  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1983  ret void
1984}
1985
1986define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1987; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32:
1988; CHECK:       @ %bb.0: @ %entry
1989; CHECK-NEXT:    vmsr p0, r1
1990; CHECK-NEXT:    vpst
1991; CHECK-NEXT:    vstrwt.32 q1, [r0, q0, uxtw #2]
1992; CHECK-NEXT:    bx lr
1993entry:
1994  %0 = zext i16 %p to i32
1995  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1996  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1997  ret void
1998}
1999
2000define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
2001; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32:
2002; CHECK:       @ %bb.0: @ %entry
2003; CHECK-NEXT:    vstrw.32 q1, [r0, q0, uxtw #2]
2004; CHECK-NEXT:    bx lr
2005entry:
2006  call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
2007  ret void
2008}
2009
2010define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
2011; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32:
2012; CHECK:       @ %bb.0: @ %entry
2013; CHECK-NEXT:    vstrw.32 q1, [r0, q0, uxtw #2]
2014; CHECK-NEXT:    bx lr
2015entry:
2016  call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
2017  ret void
2018}
2019