1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2
3define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4;CHECK: vshls8:
5;CHECK: vshl.u8
6	%tmp1 = load <8 x i8>* %A
7	%tmp2 = load <8 x i8>* %B
8	%tmp3 = shl <8 x i8> %tmp1, %tmp2
9	ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13;CHECK: vshls16:
14;CHECK: vshl.u16
15	%tmp1 = load <4 x i16>* %A
16	%tmp2 = load <4 x i16>* %B
17	%tmp3 = shl <4 x i16> %tmp1, %tmp2
18	ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
22;CHECK: vshls32:
23;CHECK: vshl.u32
24	%tmp1 = load <2 x i32>* %A
25	%tmp2 = load <2 x i32>* %B
26	%tmp3 = shl <2 x i32> %tmp1, %tmp2
27	ret <2 x i32> %tmp3
28}
29
30define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
31;CHECK: vshls64:
32;CHECK: vshl.u64
33	%tmp1 = load <1 x i64>* %A
34	%tmp2 = load <1 x i64>* %B
35	%tmp3 = shl <1 x i64> %tmp1, %tmp2
36	ret <1 x i64> %tmp3
37}
38
39define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
40;CHECK: vshli8:
41;CHECK: vshl.i8
42	%tmp1 = load <8 x i8>* %A
43	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
44	ret <8 x i8> %tmp2
45}
46
47define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
48;CHECK: vshli16:
49;CHECK: vshl.i16
50	%tmp1 = load <4 x i16>* %A
51	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
52	ret <4 x i16> %tmp2
53}
54
55define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
56;CHECK: vshli32:
57;CHECK: vshl.i32
58	%tmp1 = load <2 x i32>* %A
59	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
60	ret <2 x i32> %tmp2
61}
62
63define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
64;CHECK: vshli64:
65;CHECK: vshl.i64
66	%tmp1 = load <1 x i64>* %A
67	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
68	ret <1 x i64> %tmp2
69}
70
71define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
72;CHECK: vshlQs8:
73;CHECK: vshl.u8
74	%tmp1 = load <16 x i8>* %A
75	%tmp2 = load <16 x i8>* %B
76	%tmp3 = shl <16 x i8> %tmp1, %tmp2
77	ret <16 x i8> %tmp3
78}
79
80define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
81;CHECK: vshlQs16:
82;CHECK: vshl.u16
83	%tmp1 = load <8 x i16>* %A
84	%tmp2 = load <8 x i16>* %B
85	%tmp3 = shl <8 x i16> %tmp1, %tmp2
86	ret <8 x i16> %tmp3
87}
88
89define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
90;CHECK: vshlQs32:
91;CHECK: vshl.u32
92	%tmp1 = load <4 x i32>* %A
93	%tmp2 = load <4 x i32>* %B
94	%tmp3 = shl <4 x i32> %tmp1, %tmp2
95	ret <4 x i32> %tmp3
96}
97
98define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
99;CHECK: vshlQs64:
100;CHECK: vshl.u64
101	%tmp1 = load <2 x i64>* %A
102	%tmp2 = load <2 x i64>* %B
103	%tmp3 = shl <2 x i64> %tmp1, %tmp2
104	ret <2 x i64> %tmp3
105}
106
107define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
108;CHECK: vshlQi8:
109;CHECK: vshl.i8
110	%tmp1 = load <16 x i8>* %A
111	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
112	ret <16 x i8> %tmp2
113}
114
115define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
116;CHECK: vshlQi16:
117;CHECK: vshl.i16
118	%tmp1 = load <8 x i16>* %A
119	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
120	ret <8 x i16> %tmp2
121}
122
123define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
124;CHECK: vshlQi32:
125;CHECK: vshl.i32
126	%tmp1 = load <4 x i32>* %A
127	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
128	ret <4 x i32> %tmp2
129}
130
131define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
132;CHECK: vshlQi64:
133;CHECK: vshl.i64
134	%tmp1 = load <2 x i64>* %A
135	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
136	ret <2 x i64> %tmp2
137}
138
139define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
140;CHECK: vlshru8:
141;CHECK: vneg.s8
142;CHECK: vshl.u8
143	%tmp1 = load <8 x i8>* %A
144	%tmp2 = load <8 x i8>* %B
145	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
146	ret <8 x i8> %tmp3
147}
148
149define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
150;CHECK: vlshru16:
151;CHECK: vneg.s16
152;CHECK: vshl.u16
153	%tmp1 = load <4 x i16>* %A
154	%tmp2 = load <4 x i16>* %B
155	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
156	ret <4 x i16> %tmp3
157}
158
159define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
160;CHECK: vlshru32:
161;CHECK: vneg.s32
162;CHECK: vshl.u32
163	%tmp1 = load <2 x i32>* %A
164	%tmp2 = load <2 x i32>* %B
165	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
166	ret <2 x i32> %tmp3
167}
168
169define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
170;CHECK: vlshru64:
171;CHECK: vsub.i64
172;CHECK: vshl.u64
173	%tmp1 = load <1 x i64>* %A
174	%tmp2 = load <1 x i64>* %B
175	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
176	ret <1 x i64> %tmp3
177}
178
179define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
180;CHECK: vlshri8:
181;CHECK: vshr.u8
182	%tmp1 = load <8 x i8>* %A
183	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
184	ret <8 x i8> %tmp2
185}
186
187define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
188;CHECK: vlshri16:
189;CHECK: vshr.u16
190	%tmp1 = load <4 x i16>* %A
191	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
192	ret <4 x i16> %tmp2
193}
194
195define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
196;CHECK: vlshri32:
197;CHECK: vshr.u32
198	%tmp1 = load <2 x i32>* %A
199	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
200	ret <2 x i32> %tmp2
201}
202
203define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
204;CHECK: vlshri64:
205;CHECK: vshr.u64
206	%tmp1 = load <1 x i64>* %A
207	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
208	ret <1 x i64> %tmp2
209}
210
211define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
212;CHECK: vlshrQu8:
213;CHECK: vneg.s8
214;CHECK: vshl.u8
215	%tmp1 = load <16 x i8>* %A
216	%tmp2 = load <16 x i8>* %B
217	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
218	ret <16 x i8> %tmp3
219}
220
221define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
222;CHECK: vlshrQu16:
223;CHECK: vneg.s16
224;CHECK: vshl.u16
225	%tmp1 = load <8 x i16>* %A
226	%tmp2 = load <8 x i16>* %B
227	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
228	ret <8 x i16> %tmp3
229}
230
231define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
232;CHECK: vlshrQu32:
233;CHECK: vneg.s32
234;CHECK: vshl.u32
235	%tmp1 = load <4 x i32>* %A
236	%tmp2 = load <4 x i32>* %B
237	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
238	ret <4 x i32> %tmp3
239}
240
241define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
242;CHECK: vlshrQu64:
243;CHECK: vsub.i64
244;CHECK: vshl.u64
245	%tmp1 = load <2 x i64>* %A
246	%tmp2 = load <2 x i64>* %B
247	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
248	ret <2 x i64> %tmp3
249}
250
251define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
252;CHECK: vlshrQi8:
253;CHECK: vshr.u8
254	%tmp1 = load <16 x i8>* %A
255	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
256	ret <16 x i8> %tmp2
257}
258
259define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
260;CHECK: vlshrQi16:
261;CHECK: vshr.u16
262	%tmp1 = load <8 x i16>* %A
263	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
264	ret <8 x i16> %tmp2
265}
266
267define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
268;CHECK: vlshrQi32:
269;CHECK: vshr.u32
270	%tmp1 = load <4 x i32>* %A
271	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
272	ret <4 x i32> %tmp2
273}
274
275define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
276;CHECK: vlshrQi64:
277;CHECK: vshr.u64
278	%tmp1 = load <2 x i64>* %A
279	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
280	ret <2 x i64> %tmp2
281}
282
283; Example that requires splitting and expanding a vector shift.
284define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
285entry:
286	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
287	ret <2 x i64> %shr
288}
289
290define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
291;CHECK: vashrs8:
292;CHECK: vneg.s8
293;CHECK: vshl.s8
294	%tmp1 = load <8 x i8>* %A
295	%tmp2 = load <8 x i8>* %B
296	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
297	ret <8 x i8> %tmp3
298}
299
300define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
301;CHECK: vashrs16:
302;CHECK: vneg.s16
303;CHECK: vshl.s16
304	%tmp1 = load <4 x i16>* %A
305	%tmp2 = load <4 x i16>* %B
306	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
307	ret <4 x i16> %tmp3
308}
309
310define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
311;CHECK: vashrs32:
312;CHECK: vneg.s32
313;CHECK: vshl.s32
314	%tmp1 = load <2 x i32>* %A
315	%tmp2 = load <2 x i32>* %B
316	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
317	ret <2 x i32> %tmp3
318}
319
320define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
321;CHECK: vashrs64:
322;CHECK: vsub.i64
323;CHECK: vshl.s64
324	%tmp1 = load <1 x i64>* %A
325	%tmp2 = load <1 x i64>* %B
326	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
327	ret <1 x i64> %tmp3
328}
329
330define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
331;CHECK: vashri8:
332;CHECK: vshr.s8
333	%tmp1 = load <8 x i8>* %A
334	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
335	ret <8 x i8> %tmp2
336}
337
338define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
339;CHECK: vashri16:
340;CHECK: vshr.s16
341	%tmp1 = load <4 x i16>* %A
342	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
343	ret <4 x i16> %tmp2
344}
345
346define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
347;CHECK: vashri32:
348;CHECK: vshr.s32
349	%tmp1 = load <2 x i32>* %A
350	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
351	ret <2 x i32> %tmp2
352}
353
354define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
355;CHECK: vashri64:
356;CHECK: vshr.s64
357	%tmp1 = load <1 x i64>* %A
358	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
359	ret <1 x i64> %tmp2
360}
361
362define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
363;CHECK: vashrQs8:
364;CHECK: vneg.s8
365;CHECK: vshl.s8
366	%tmp1 = load <16 x i8>* %A
367	%tmp2 = load <16 x i8>* %B
368	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
369	ret <16 x i8> %tmp3
370}
371
372define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
373;CHECK: vashrQs16:
374;CHECK: vneg.s16
375;CHECK: vshl.s16
376	%tmp1 = load <8 x i16>* %A
377	%tmp2 = load <8 x i16>* %B
378	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
379	ret <8 x i16> %tmp3
380}
381
382define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
383;CHECK: vashrQs32:
384;CHECK: vneg.s32
385;CHECK: vshl.s32
386	%tmp1 = load <4 x i32>* %A
387	%tmp2 = load <4 x i32>* %B
388	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
389	ret <4 x i32> %tmp3
390}
391
392define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
393;CHECK: vashrQs64:
394;CHECK: vsub.i64
395;CHECK: vshl.s64
396	%tmp1 = load <2 x i64>* %A
397	%tmp2 = load <2 x i64>* %B
398	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
399	ret <2 x i64> %tmp3
400}
401
402define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
403;CHECK: vashrQi8:
404;CHECK: vshr.s8
405	%tmp1 = load <16 x i8>* %A
406	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
407	ret <16 x i8> %tmp2
408}
409
410define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
411;CHECK: vashrQi16:
412;CHECK: vshr.s16
413	%tmp1 = load <8 x i16>* %A
414	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
415	ret <8 x i16> %tmp2
416}
417
418define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
419;CHECK: vashrQi32:
420;CHECK: vshr.s32
421	%tmp1 = load <4 x i32>* %A
422	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
423	ret <4 x i32> %tmp2
424}
425
426define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
427;CHECK: vashrQi64:
428;CHECK: vshr.s64
429	%tmp1 = load <2 x i64>* %A
430	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
431	ret <2 x i64> %tmp2
432}
433