Lines Matching +full:- +full:4

2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr) {
5 ; CHECK-LABEL: zext_unscaled_i8_i32:
7 ; CHECK-NEXT: vldrw.u32 q1, [r1]
8 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
9 ; CHECK-NEXT: bx lr
11 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
12 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
13 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
14 %gather.zext = zext <4 x i8> %gather to <4 x i32>
15 ret <4 x i32> %gather.zext
18 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr) {
19 ; CHECK-LABEL: sext_unscaled_i8_i32:
21 ; CHECK-NEXT: vldrw.u32 q1, [r1]
22 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
23 ; CHECK-NEXT: bx lr
25 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
26 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
27 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
28 %gather.sext = sext <4 x i8> %gather to <4 x i32>
29 ret <4 x i32> %gather.sext
32 define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i16_i32(i8* %base, <4 x i32>* %offptr) {
33 ; CHECK-LABEL: zext_unscaled_i16_i32:
35 ; CHECK-NEXT: vldrw.u32 q1, [r1]
36 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
37 ; CHECK-NEXT: bx lr
39 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
40 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
41 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
42 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
43 %gather.zext = zext <4 x i16> %gather to <4 x i32>
44 ret <4 x i32> %gather.zext
47 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i16_i32(i8* %base, <4 x i32>* %offptr) {
48 ; CHECK-LABEL: sext_unscaled_i16_i32:
50 ; CHECK-NEXT: vldrw.u32 q1, [r1]
51 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
52 ; CHECK-NEXT: bx lr
54 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
55 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
56 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
57 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
58 %gather.sext = sext <4 x i16> %gather to <4 x i32>
59 ret <4 x i32> %gather.sext
62 define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32(i8* %base, <4 x i32>* %offptr) {
63 ; CHECK-LABEL: unscaled_i32_i32:
65 ; CHECK-NEXT: vldrw.u32 q1, [r1]
66 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
67 ; CHECK-NEXT: bx lr
69 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
70 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
71 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
72 …%gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 t…
73 ret <4 x i32> %gather
76 define arm_aapcs_vfpcc <4 x float> @unscaled_f32_i32(i8* %base, <4 x i32>* %offptr) {
77 ; CHECK-LABEL: unscaled_f32_i32:
79 ; CHECK-NEXT: vldrw.u32 q1, [r1]
80 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
81 ; CHECK-NEXT: bx lr
83 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
84 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
85 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
86 …%gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <…
87 ret <4 x float> %gather
90 define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i16(i8* %base, <4 x i16>* %offptr) {
91 ; CHECK-LABEL: unsigned_unscaled_b_i32_i16:
93 ; CHECK-NEXT: vldrh.u32 q1, [r1]
94 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
95 ; CHECK-NEXT: bx lr
97 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
98 %offs.zext = zext <4 x i16> %offs to <4 x i32>
99 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
100 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
101 …%gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 t…
102 ret <4 x i32> %gather
105 define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i16(i8* %base, <4 x i16>* %offptr) {
106 ; CHECK-LABEL: signed_unscaled_i32_i16:
108 ; CHECK-NEXT: vldrh.s32 q1, [r1]
109 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
110 ; CHECK-NEXT: bx lr
112 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
113 %offs.sext = sext <4 x i16> %offs to <4 x i32>
114 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
115 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
116 …%gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 t…
117 ret <4 x i32> %gather
120 define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i16(i8* %base, <4 x i16>* %offptr) {
121 ; CHECK-LABEL: a_unsigned_unscaled_f32_i16:
123 ; CHECK-NEXT: vldrh.u32 q1, [r1]
124 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
125 ; CHECK-NEXT: bx lr
127 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
128 %offs.zext = zext <4 x i16> %offs to <4 x i32>
129 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
130 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
131 …%gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <…
132 ret <4 x float> %gather
135 define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i16(i8* %base, <4 x i16>* %offptr) {
136 ; CHECK-LABEL: b_signed_unscaled_f32_i16:
138 ; CHECK-NEXT: vldrh.s32 q1, [r1]
139 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
140 ; CHECK-NEXT: bx lr
142 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
143 %offs.sext = sext <4 x i16> %offs to <4 x i32>
144 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
145 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
146 …%gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <…
147 ret <4 x float> %gather
150 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
151 ; CHECK-LABEL: zext_signed_unscaled_i16_i16:
153 ; CHECK-NEXT: vldrh.s32 q1, [r1]
154 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
155 ; CHECK-NEXT: bx lr
157 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
158 %offs.sext = sext <4 x i16> %offs to <4 x i32>
159 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
160 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
161 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
162 %gather.zext = zext <4 x i16> %gather to <4 x i32>
163 ret <4 x i32> %gather.zext
166 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
167 ; CHECK-LABEL: sext_signed_unscaled_i16_i16:
169 ; CHECK-NEXT: vldrh.s32 q1, [r1]
170 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
171 ; CHECK-NEXT: bx lr
173 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
174 %offs.sext = sext <4 x i16> %offs to <4 x i32>
175 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
176 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
177 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
178 %gather.sext = sext <4 x i16> %gather to <4 x i32>
179 ret <4 x i32> %gather.sext
182 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
183 ; CHECK-LABEL: zext_unsigned_unscaled_i16_i16:
185 ; CHECK-NEXT: vldrh.u32 q1, [r1]
186 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
187 ; CHECK-NEXT: bx lr
189 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
190 %offs.zext = zext <4 x i16> %offs to <4 x i32>
191 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
192 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
193 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
194 %gather.zext = zext <4 x i16> %gather to <4 x i32>
195 ret <4 x i32> %gather.zext
198 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
199 ; CHECK-LABEL: sext_unsigned_unscaled_i16_i16:
201 ; CHECK-NEXT: vldrh.u32 q1, [r1]
202 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
203 ; CHECK-NEXT: bx lr
205 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
206 %offs.zext = zext <4 x i16> %offs to <4 x i32>
207 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
208 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
209 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
210 %gather.sext = sext <4 x i16> %gather to <4 x i32>
211 ret <4 x i32> %gather.sext
214 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
215 ; CHECK-LABEL: zext_signed_unscaled_i8_i16:
217 ; CHECK-NEXT: vldrh.s32 q1, [r1]
218 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
219 ; CHECK-NEXT: bx lr
221 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
222 %offs.sext = sext <4 x i16> %offs to <4 x i32>
223 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
224 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
225 %gather.zext = zext <4 x i8> %gather to <4 x i32>
226 ret <4 x i32> %gather.zext
229 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
230 ; CHECK-LABEL: sext_signed_unscaled_i8_i16:
232 ; CHECK-NEXT: vldrh.s32 q1, [r1]
233 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
234 ; CHECK-NEXT: bx lr
236 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
237 %offs.sext = sext <4 x i16> %offs to <4 x i32>
238 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
239 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
240 %gather.sext = sext <4 x i8> %gather to <4 x i32>
241 ret <4 x i32> %gather.sext
244 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
245 ; CHECK-LABEL: zext_unsigned_unscaled_i8_i16:
247 ; CHECK-NEXT: vldrh.u32 q1, [r1]
248 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
249 ; CHECK-NEXT: bx lr
251 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
252 %offs.zext = zext <4 x i16> %offs to <4 x i32>
253 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
254 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
255 %gather.zext = zext <4 x i8> %gather to <4 x i32>
256 ret <4 x i32> %gather.zext
259 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
260 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i16:
262 ; CHECK-NEXT: vldrh.u32 q1, [r1]
263 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
264 ; CHECK-NEXT: bx lr
266 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
267 %offs.zext = zext <4 x i16> %offs to <4 x i32>
268 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
269 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
270 %gather.sext = sext <4 x i8> %gather to <4 x i32>
271 ret <4 x i32> %gather.sext
274 define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i8(i8* %base, <4 x i8>* %offptr) {
275 ; CHECK-LABEL: unsigned_unscaled_b_i32_i8:
277 ; CHECK-NEXT: vldrb.u32 q1, [r1]
278 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
279 ; CHECK-NEXT: bx lr
281 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
282 %offs.zext = zext <4 x i8> %offs to <4 x i32>
283 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
284 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
285 …%gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 t…
286 ret <4 x i32> %gather
289 define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i8(i8* %base, <4 x i8>* %offptr) {
290 ; CHECK-LABEL: signed_unscaled_i32_i8:
292 ; CHECK-NEXT: vldrb.s32 q1, [r1]
293 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
294 ; CHECK-NEXT: bx lr
296 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
297 %offs.sext = sext <4 x i8> %offs to <4 x i32>
298 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
299 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
300 …%gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 t…
301 ret <4 x i32> %gather
304 define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i8(i8* %base, <4 x i8>* %offptr) {
305 ; CHECK-LABEL: a_unsigned_unscaled_f32_i8:
307 ; CHECK-NEXT: vldrb.u32 q1, [r1]
308 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
309 ; CHECK-NEXT: bx lr
311 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
312 %offs.zext = zext <4 x i8> %offs to <4 x i32>
313 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
314 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
315 …%gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <…
316 ret <4 x float> %gather
319 define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i8(i8* %base, <4 x i8>* %offptr) {
320 ; CHECK-LABEL: b_signed_unscaled_f32_i8:
322 ; CHECK-NEXT: vldrb.s32 q1, [r1]
323 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
324 ; CHECK-NEXT: bx lr
326 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
327 %offs.sext = sext <4 x i8> %offs to <4 x i32>
328 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
329 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
330 …%gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <…
331 ret <4 x float> %gather
334 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
335 ; CHECK-LABEL: zext_signed_unscaled_i16_i8:
337 ; CHECK-NEXT: vldrb.s32 q1, [r1]
338 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
339 ; CHECK-NEXT: bx lr
341 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
342 %offs.sext = sext <4 x i8> %offs to <4 x i32>
343 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
344 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
345 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
346 %gather.zext = zext <4 x i16> %gather to <4 x i32>
347 ret <4 x i32> %gather.zext
350 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
351 ; CHECK-LABEL: sext_signed_unscaled_i16_i8:
353 ; CHECK-NEXT: vldrb.s32 q1, [r1]
354 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
355 ; CHECK-NEXT: bx lr
357 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
358 %offs.sext = sext <4 x i8> %offs to <4 x i32>
359 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
360 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
361 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
362 %gather.sext = sext <4 x i16> %gather to <4 x i32>
363 ret <4 x i32> %gather.sext
366 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
367 ; CHECK-LABEL: zext_unsigned_unscaled_i16_i8:
369 ; CHECK-NEXT: vldrb.u32 q1, [r1]
370 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
371 ; CHECK-NEXT: bx lr
373 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
374 %offs.zext = zext <4 x i8> %offs to <4 x i32>
375 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
376 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
377 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
378 %gather.zext = zext <4 x i16> %gather to <4 x i32>
379 ret <4 x i32> %gather.zext
382 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
383 ; CHECK-LABEL: sext_unsigned_unscaled_i16_i8:
385 ; CHECK-NEXT: vldrb.u32 q1, [r1]
386 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
387 ; CHECK-NEXT: bx lr
389 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
390 %offs.zext = zext <4 x i8> %offs to <4 x i32>
391 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
392 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
393 …%gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 t…
394 %gather.sext = sext <4 x i16> %gather to <4 x i32>
395 ret <4 x i32> %gather.sext
398 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
399 ; CHECK-LABEL: zext_signed_unscaled_i8_i8:
401 ; CHECK-NEXT: vldrb.s32 q1, [r1]
402 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
403 ; CHECK-NEXT: bx lr
405 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
406 %offs.sext = sext <4 x i8> %offs to <4 x i32>
407 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
408 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
409 %gather.zext = zext <4 x i8> %gather to <4 x i32>
410 ret <4 x i32> %gather.zext
413 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
414 ; CHECK-LABEL: sext_signed_unscaled_i8_i8:
416 ; CHECK-NEXT: vldrb.s32 q1, [r1]
417 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
418 ; CHECK-NEXT: bx lr
420 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
421 %offs.sext = sext <4 x i8> %offs to <4 x i32>
422 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
423 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
424 %gather.sext = sext <4 x i8> %gather to <4 x i32>
425 ret <4 x i32> %gather.sext
428 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
429 ; CHECK-LABEL: zext_unsigned_unscaled_i8_i8:
431 ; CHECK-NEXT: vldrb.u32 q1, [r1]
432 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
433 ; CHECK-NEXT: bx lr
435 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
436 %offs.zext = zext <4 x i8> %offs to <4 x i32>
437 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
438 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
439 %gather.zext = zext <4 x i8> %gather to <4 x i32>
440 ret <4 x i32> %gather.zext
443 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
444 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i8:
446 ; CHECK-NEXT: vldrb.u32 q1, [r1]
447 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
448 ; CHECK-NEXT: bx lr
450 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
451 %offs.zext = zext <4 x i8> %offs to <4 x i32>
452 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
453 …%gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true,…
454 %gather.sext = sext <4 x i8> %gather to <4 x i32>
455 ret <4 x i32> %gather.sext
458 ; VLDRW.u32 Qd, [P, 4]
459 define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x i32*> %p) {
460 ; CHECK-LABEL: qi4:
462 ; CHECK-NEXT: vmov.i32 q1, #0x10
463 ; CHECK-NEXT: vadd.i32 q0, q0, q1
464 ; CHECK-NEXT: vmov r0, s0
465 ; CHECK-NEXT: vmov r3, s1
466 ; CHECK-NEXT: vmov r1, s2
467 ; CHECK-NEXT: vmov r2, s3
468 ; CHECK-NEXT: ldr r0, [r0]
469 ; CHECK-NEXT: ldr r3, [r3]
470 ; CHECK-NEXT: vmov.32 q0[0], r0
471 ; CHECK-NEXT: ldr r1, [r1]
472 ; CHECK-NEXT: vmov.32 q0[1], r3
473 ; CHECK-NEXT: ldr r2, [r2]
474 ; CHECK-NEXT: vmov.32 q0[2], r1
475 ; CHECK-NEXT: vmov.32 q0[3], r2
476 ; CHECK-NEXT: bx lr
478 %g = getelementptr inbounds i32, <4 x i32*> %p, i32 4
479 …%gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %g, i32 1, <4 x i1> <i1 true…
480 ret <4 x i32> %gather
483 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
484 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
485 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
486 declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>)
487 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)