1; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; Masked Loads
9;
10
11define <vscale x 2 x i64> @masked_load_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i1> %mask) nounwind {
12; CHECK-LABEL: masked_load_nxv2i64:
13; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
14; CHECK-NEXT: ret
15  %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
16  ret <vscale x 2 x i64> %load
17}
18
19define <vscale x 4 x i32> @masked_load_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i1> %mask) nounwind {
20; CHECK-LABEL: masked_load_nxv4i32:
21; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
22; CHECK-NEXT: ret
23  %load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
24  ret <vscale x 4 x i32> %load
25}
26
27define <vscale x 8 x i16> @masked_load_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i1> %mask) nounwind {
28; CHECK-LABEL: masked_load_nxv8i16:
29; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
30; CHECK-NEXT: ret
31  %load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef)
32  ret <vscale x 8 x i16> %load
33}
34
35define <vscale x 16 x i8> @masked_load_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i1> %mask) nounwind {
36; CHECK-LABEL: masked_load_nxv16i8:
37; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
38; CHECK-NEXT: ret
39  %load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef)
40  ret <vscale x 16 x i8> %load
41}
42
43define <vscale x 2 x double> @masked_load_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x i1> %mask) nounwind {
44; CHECK-LABEL: masked_load_nxv2f64:
45; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
46; CHECK-NEXT: ret
47  %load = call <vscale x 2 x double> @llvm.masked.load.nxv2f64(<vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
48  ret <vscale x 2 x double> %load
49}
50
51define <vscale x 2 x float> @masked_load_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x i1> %mask) nounwind {
52; CHECK-LABEL: masked_load_nxv2f32:
53; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
54; CHECK-NEXT: ret
55  %load = call <vscale x 2 x float> @llvm.masked.load.nxv2f32(<vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
56  ret <vscale x 2 x float> %load
57}
58
59define <vscale x 2 x half> @masked_load_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x i1> %mask) nounwind {
60; CHECK-LABEL: masked_load_nxv2f16:
61; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
62; CHECK-NEXT: ret
63  %load = call <vscale x 2 x half> @llvm.masked.load.nxv2f16(<vscale x 2 x half> *%a, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
64  ret <vscale x 2 x half> %load
65}
66
67define <vscale x 4 x float> @masked_load_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x i1> %mask) nounwind {
68; CHECK-LABEL: masked_load_nxv4f32:
69; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
70; CHECK-NEXT: ret
71  %load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> undef)
72  ret <vscale x 4 x float> %load
73}
74
75define <vscale x 4 x half> @masked_load_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x i1> %mask) nounwind {
76; CHECK-LABEL: masked_load_nxv4f16:
77; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
78; CHECK-NEXT: ret
79  %load = call <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x half> undef)
80  ret <vscale x 4 x half> %load
81}
82
83define <vscale x 8 x half> @masked_load_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x i1> %mask) nounwind {
84; CHECK-LABEL: masked_load_nxv8f16:
85; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
86; CHECK-NEXT: ret
87  %load = call <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x half> undef)
88  ret <vscale x 8 x half> %load
89}
90
91define <vscale x 8 x bfloat> @masked_load_nxv8bf16(<vscale x 8 x bfloat> *%a, <vscale x 8 x i1> %mask) nounwind #0 {
92; CHECK-LABEL: masked_load_nxv8bf16:
93; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
94; CHECK-NEXT: ret
95  %load = call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16(<vscale x 8 x bfloat> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> undef)
96  ret <vscale x 8 x bfloat> %load
97}
98
99;
100; Masked Stores
101;
102
103define void @masked_store_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i1> %mask) nounwind {
104; CHECK-LABEL: masked_store_nxv2i64:
105; CHECK-NEXT: st1d { z0.d }, p0, [x0]
106; CHECK-NEXT: ret
107  call void @llvm.masked.store.nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask)
108  ret void
109}
110
111define void @masked_store_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i32> %val, <vscale x 4 x i1> %mask) nounwind {
112; CHECK-LABEL: masked_store_nxv4i32:
113; CHECK-NEXT: st1w { z0.s }, p0, [x0]
114; CHECK-NEXT: ret
115  call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask)
116  ret void
117}
118
119define void @masked_store_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i16> %val, <vscale x 8 x i1> %mask) nounwind {
120; CHECK-LABEL: masked_store_nxv8i16:
121; CHECK-NEXT: st1h { z0.h }, p0, [x0]
122; CHECK-NEXT: ret
123  call void @llvm.masked.store.nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask)
124  ret void
125}
126
127define void @masked_store_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i8> %val, <vscale x 16 x i1> %mask) nounwind {
128; CHECK-LABEL: masked_store_nxv16i8:
129; CHECK-NEXT: st1b { z0.b }, p0, [x0]
130; CHECK-NEXT: ret
131  call void @llvm.masked.store.nxv16i8(<vscale x 16 x i8> %val, <vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask)
132  ret void
133}
134
135define void @masked_store_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x double> %val, <vscale x 2 x i1> %mask) nounwind {
136; CHECK-LABEL: masked_store_nxv2f64:
137; CHECK-NEXT: st1d { z0.d }, p0, [x0]
138; CHECK-NEXT: ret
139  call void @llvm.masked.store.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask)
140  ret void
141}
142
143define void @masked_store_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x float> %val, <vscale x 2 x i1> %mask) nounwind {
144; CHECK-LABEL: masked_store_nxv2f32:
145; CHECK-NEXT: st1w { z0.d }, p0, [x0]
146; CHECK-NEXT: ret
147  call void @llvm.masked.store.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask)
148  ret void
149}
150
151define void @masked_store_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x half> %val, <vscale x 2 x i1> %mask) nounwind {
152; CHECK-LABEL: masked_store_nxv2f16:
153; CHECK-NEXT: st1h { z0.d }, p0, [x0]
154; CHECK-NEXT: ret
155  call void @llvm.masked.store.nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half> *%a, i32 4, <vscale x 2 x i1> %mask)
156  ret void
157}
158
159define void @masked_store_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x float> %val, <vscale x 4 x i1> %mask) nounwind {
160; CHECK-LABEL: masked_store_nxv4f32:
161; CHECK-NEXT: st1w { z0.s }, p0, [x0]
162; CHECK-NEXT: ret
163  call void @llvm.masked.store.nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask)
164  ret void
165}
166
167define void @masked_store_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x half> %val, <vscale x 4 x i1> %mask) nounwind {
168; CHECK-LABEL: masked_store_nxv4f16:
169; CHECK-NEXT: st1h { z0.s }, p0, [x0]
170; CHECK-NEXT: ret
171  call void @llvm.masked.store.nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask)
172  ret void
173}
174
175define void @masked_store_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x half> %val, <vscale x 8 x i1> %mask) nounwind {
176; CHECK-LABEL: masked_store_nxv8f16:
177; CHECK-NEXT: st1h { z0.h }, p0, [x0]
178; CHECK-NEXT: ret
179  call void @llvm.masked.store.nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask)
180  ret void
181}
182
183define void @masked_store_nxv8bf16(<vscale x 8 x bfloat> *%a, <vscale x 8 x bfloat> %val, <vscale x 8 x i1> %mask) nounwind #0 {
184; CHECK-LABEL: masked_store_nxv8bf16:
185; CHECK-NEXT: st1h { z0.h }, p0, [x0]
186; CHECK-NEXT: ret
187  call void @llvm.masked.store.nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x bfloat> *%a, i32 2, <vscale x 8 x i1> %mask)
188  ret void
189}
190
191;
192; Masked load store of pointer data type
193;
194
195; Pointer of integer type
196
197define <vscale x 2 x i8*> @masked.load.nxv2p0i8(<vscale x 2 x i8*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
198; CHECK-LABEL: masked.load.nxv2p0i8:
199; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
200; CHECK-NEXT:    ret
201  %v = call <vscale x 2 x i8*> @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(<vscale x 2 x i8*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i8*> undef)
202  ret <vscale x 2 x i8*> %v
203}
204define <vscale x 2 x i16*> @masked.load.nxv2p0i16(<vscale x 2 x i16*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
205; CHECK-LABEL: masked.load.nxv2p0i16:
206; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
207; CHECK-NEXT:    ret
208  %v = call <vscale x 2 x i16*> @llvm.masked.load.nxv2p0i16.p0nxv2p0i16(<vscale x 2 x i16*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i16*> undef)
209  ret <vscale x 2 x i16*> %v
210}
211define <vscale x 2 x i32*> @masked.load.nxv2p0i32(<vscale x 2 x i32*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
212; CHECK-LABEL: masked.load.nxv2p0i32:
213; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
214; CHECK-NEXT:    ret
215  %v = call <vscale x 2 x i32*> @llvm.masked.load.nxv2p0i32.p0nxv2p0i32(<vscale x 2 x i32*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i32*> undef)
216  ret <vscale x 2 x i32*> %v
217}
218define <vscale x 2 x i64*> @masked.load.nxv2p0i64(<vscale x 2 x i64*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
219; CHECK-LABEL: masked.load.nxv2p0i64:
220; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
221; CHECK-NEXT:    ret
222  %v = call <vscale x 2 x i64*> @llvm.masked.load.nxv2p0i64.p0nxv2p0i64(<vscale x 2 x i64*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64*> undef)
223  ret <vscale x 2 x i64*> %v
224}
225
226; Pointer of floating-point type
227
228define <vscale x 2 x bfloat*> @masked.load.nxv2p0bf16(<vscale x 2 x bfloat*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind #0 {
229; CHECK-LABEL: masked.load.nxv2p0bf16:
230; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
231; CHECK-NEXT:    ret
232  %v = call <vscale x 2 x bfloat*> @llvm.masked.load.nxv2p0bf16.p0nxv2p0bf16(<vscale x 2 x bfloat*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat*> undef)
233  ret <vscale x 2 x bfloat*> %v
234}
235define <vscale x 2 x half*> @masked.load.nxv2p0f16(<vscale x 2 x half*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
236; CHECK-LABEL: masked.load.nxv2p0f16:
237; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
238; CHECK-NEXT:    ret
239  %v = call <vscale x 2 x half*> @llvm.masked.load.nxv2p0f16.p0nxv2p0f16(<vscale x 2 x half*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x half*> undef)
240  ret <vscale x 2 x half*> %v
241}
242define <vscale x 2 x float*> @masked.load.nxv2p0f32(<vscale x 2 x float*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
243; CHECK-LABEL: masked.load.nxv2p0f32:
244; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
245; CHECK-NEXT:    ret
246  %v = call <vscale x 2 x float*> @llvm.masked.load.nxv2p0f32.p0nxv2p0f32(<vscale x 2 x float*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x float*> undef)
247  ret <vscale x 2 x float*> %v
248}
249define <vscale x 2 x double*> @masked.load.nxv2p0f64(<vscale x 2 x double*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
250; CHECK-LABEL: masked.load.nxv2p0f64:
251; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
252; CHECK-NEXT:    ret
253  %v = call <vscale x 2 x double*> @llvm.masked.load.nxv2p0f64.p0nxv2p0f64(<vscale x 2 x double*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double*> undef)
254  ret <vscale x 2 x double*> %v
255}
256
257; Pointer of array type
258
259define void @masked.store.nxv2p0a64i16(<vscale x 2 x [64 x i16]*> %data, <vscale x 2 x [64 x i16]*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
260; CHECK-LABEL: masked.store.nxv2p0a64i16:
261; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
262; CHECK-NEXT:    ret
263  call void @llvm.masked.store.nxv2p0a64i16.p0nxv2p0a64i16(<vscale x 2 x [64 x i16]*> %data, <vscale x 2 x [64 x i16]*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask)
264  ret void
265}
266
267; Pointer of struct type
268
269%struct = type { i8*, i32 }
270define void @masked.store.nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale x 2 x %struct*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
271; CHECK-LABEL: masked.store.nxv2p0s_struct:
272; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
273; CHECK-NEXT:    ret
274  call void @llvm.masked.store.nxv2p0s_struct.p0nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale x 2 x %struct*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask)
275  ret void
276}
277
278
279declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
280declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
281declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
282declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
283
284declare <vscale x 2 x double> @llvm.masked.load.nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
285declare <vscale x 2 x float> @llvm.masked.load.nxv2f32(<vscale x 2 x float>*, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
286declare <vscale x 2 x half> @llvm.masked.load.nxv2f16(<vscale x 2 x half>*, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
287declare <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
288declare <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
289declare <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
290declare <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16(<vscale x 8 x bfloat>*, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
291
292declare void @llvm.masked.store.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>*, i32, <vscale x 2 x i1>)
293declare void @llvm.masked.store.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>*, i32, <vscale x 4 x i1>)
294declare void @llvm.masked.store.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32, <vscale x 8 x i1>)
295declare void @llvm.masked.store.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32, <vscale x 16 x i1>)
296
297declare void @llvm.masked.store.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>*, i32, <vscale x 2 x i1>)
298declare void @llvm.masked.store.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>*, i32, <vscale x 2 x i1>)
299declare void @llvm.masked.store.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>*, i32, <vscale x 2 x i1>)
300declare void @llvm.masked.store.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>*, i32, <vscale x 4 x i1>)
301declare void @llvm.masked.store.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>*, i32, <vscale x 4 x i1>)
302declare void @llvm.masked.store.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>*, i32, <vscale x 8 x i1>)
303declare void @llvm.masked.store.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>*, i32, <vscale x 8 x i1>)
304
305declare <vscale x 2 x i8*> @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(<vscale x 2 x i8*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i8*>)
306declare <vscale x 2 x i16*> @llvm.masked.load.nxv2p0i16.p0nxv2p0i16(<vscale x 2 x i16*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i16*>)
307declare <vscale x 2 x i32*> @llvm.masked.load.nxv2p0i32.p0nxv2p0i32(<vscale x 2 x i32*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i32*>)
308declare <vscale x 2 x i64*> @llvm.masked.load.nxv2p0i64.p0nxv2p0i64(<vscale x 2 x i64*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i64*>)
309
310declare <vscale x 2 x bfloat*> @llvm.masked.load.nxv2p0bf16.p0nxv2p0bf16(<vscale x 2 x bfloat*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x bfloat*>)
311declare <vscale x 2 x half*> @llvm.masked.load.nxv2p0f16.p0nxv2p0f16(<vscale x 2 x half*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x half*>)
312declare <vscale x 2 x float*> @llvm.masked.load.nxv2p0f32.p0nxv2p0f32(<vscale x 2 x float*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x float*>)
313declare <vscale x 2 x double*> @llvm.masked.load.nxv2p0f64.p0nxv2p0f64(<vscale x 2 x double*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x double*>)
314
315declare void @llvm.masked.store.nxv2p0a64i16.p0nxv2p0a64i16(<vscale x 2 x [64 x i16]*>, <vscale x 2 x [64 x i16]*>*, i32 immarg, <vscale x 2 x i1>)
316
317declare void @llvm.masked.store.nxv2p0s_struct.p0nxv2p0s_struct(<vscale x 2 x %struct*>, <vscale x 2 x %struct*>*, i32 immarg, <vscale x 2 x i1>)
318
319; +bf16 is required for the bfloat version.
320attributes #0 = { "target-features"="+sve,+bf16" }
321