1; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -float-abi soft -o - | FileCheck %s
2; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -float-abi hard -o - | FileCheck %s -check-prefix CHECK-HARD
3
4@v2i64 = global <2 x i64> zeroinitializer
5@v2i32 = global <2 x i32> zeroinitializer
6@v4i32 = global <4 x i32> zeroinitializer
7@v4i16 = global <4 x i16> zeroinitializer
8@v8i16 = global <8 x i16> zeroinitializer
9@v8i8 = global <8 x i8> zeroinitializer
10@v16i8 = global <16 x i8> zeroinitializer
11
12@v2f32 = global <2 x float> zeroinitializer
13@v2f64 = global <2 x double> zeroinitializer
14@v4f32 = global <4 x float> zeroinitializer
15
16
17; 64 bit conversions
18define void @conv_i64_to_v8i8( i64 %val,  <8 x i8>* %store ) {
19; CHECK-LABEL: conv_i64_to_v8i8:
20; CHECK: vrev64.8
21  %v = bitcast i64 %val to <8 x i8>
22  %w = load <8 x i8>, <8 x i8>* @v8i8
23  %a = add <8 x i8> %v, %w
24  store <8 x i8> %a, <8 x i8>* %store
25  ret void
26}
27
28define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
29; CHECK-LABEL: conv_v8i8_to_i64:
30; CHECK: vrev64.8
31  %v = load <8 x i8>, <8 x i8>* %load
32  %w = load <8 x i8>, <8 x i8>* @v8i8
33  %a = add <8 x i8> %v, %w
34  %f = bitcast <8 x i8> %a to i64
35  call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
36  ret void
37}
38
39define void @conv_i64_to_v4i16( i64 %val,  <4 x i16>* %store ) {
40; CHECK-LABEL: conv_i64_to_v4i16:
41; CHECK: vrev64.16
42  %v = bitcast i64 %val to <4 x i16>
43  %w = load <4 x i16>, <4 x i16>* @v4i16
44  %a = add <4 x i16> %v, %w
45  store <4 x i16> %a, <4 x i16>* %store
46  ret void
47}
48
49define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
50; CHECK-LABEL: conv_v4i16_to_i64:
51; CHECK: vrev64.16
52  %v = load <4 x i16>, <4 x i16>* %load
53  %w = load <4 x i16>, <4 x i16>* @v4i16
54  %a = add <4 x i16> %v, %w
55  %f = bitcast <4 x i16> %a to i64
56  call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
57  ret void
58}
59
60define void @conv_i64_to_v2i32( i64 %val,  <2 x i32>* %store ) {
61; CHECK-LABEL: conv_i64_to_v2i32:
62; CHECK: vrev64.32
63  %v = bitcast i64 %val to <2 x i32>
64  %w = load <2 x i32>, <2 x i32>* @v2i32
65  %a = add <2 x i32> %v, %w
66  store <2 x i32> %a, <2 x i32>* %store
67  ret void
68}
69
70define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
71; CHECK-LABEL: conv_v2i32_to_i64:
72; CHECK: vrev64.32
73  %v = load <2 x i32>, <2 x i32>* %load
74  %w = load <2 x i32>, <2 x i32>* @v2i32
75  %a = add <2 x i32> %v, %w
76  %f = bitcast <2 x i32> %a to i64
77  call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
78  ret void
79}
80
81define void @conv_i64_to_v2f32( i64 %val,  <2 x float>* %store ) {
82; CHECK-LABEL: conv_i64_to_v2f32:
83; CHECK: vrev64.32
84  %v = bitcast i64 %val to <2 x float>
85  %w = load <2 x float>, <2 x float>* @v2f32
86  %a = fadd <2 x float> %v, %w
87  store <2 x float> %a, <2 x float>* %store
88  ret void
89}
90
91define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
92; CHECK-LABEL: conv_v2f32_to_i64:
93; CHECK: vrev64.32
94  %v = load <2 x float>, <2 x float>* %load
95  %w = load <2 x float>, <2 x float>* @v2f32
96  %a = fadd <2 x float> %v, %w
97  %f = bitcast <2 x float> %a to i64
98  call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
99  ret void
100}
101
102define void @conv_f64_to_v8i8( double %val,  <8 x i8>* %store ) {
103; CHECK-LABEL: conv_f64_to_v8i8:
104; CHECK: vrev64.8
105  %v = bitcast double %val to <8 x i8>
106  %w = load <8 x i8>, <8 x i8>* @v8i8
107  %a = add <8 x i8> %v, %w
108  store <8 x i8> %a, <8 x i8>* %store
109  ret void
110}
111
112define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
113; CHECK-LABEL: conv_v8i8_to_f64:
114; CHECK: vrev64.8
115  %v = load <8 x i8>, <8 x i8>* %load
116  %w = load <8 x i8>, <8 x i8>* @v8i8
117  %a = add <8 x i8> %v, %w
118  %f = bitcast <8 x i8> %a to double
119  call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
120  ret void
121}
122
123define void @conv_f64_to_v4i16( double %val,  <4 x i16>* %store ) {
124; CHECK-LABEL: conv_f64_to_v4i16:
125; CHECK: vrev64.16
126  %v = bitcast double %val to <4 x i16>
127  %w = load <4 x i16>, <4 x i16>* @v4i16
128  %a = add <4 x i16> %v, %w
129  store <4 x i16> %a, <4 x i16>* %store
130  ret void
131}
132
133define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
134; CHECK-LABEL: conv_v4i16_to_f64:
135; CHECK: vrev64.16
136  %v = load <4 x i16>, <4 x i16>* %load
137  %w = load <4 x i16>, <4 x i16>* @v4i16
138  %a = add <4 x i16> %v, %w
139  %f = bitcast <4 x i16> %a to double
140  call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
141  ret void
142}
143
144define void @conv_f64_to_v2i32( double %val,  <2 x i32>* %store ) {
145; CHECK-LABEL: conv_f64_to_v2i32:
146; CHECK: vrev64.32
147  %v = bitcast double %val to <2 x i32>
148  %w = load <2 x i32>, <2 x i32>* @v2i32
149  %a = add <2 x i32> %v, %w
150  store <2 x i32> %a, <2 x i32>* %store
151  ret void
152}
153
154define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
155; CHECK-LABEL: conv_v2i32_to_f64:
156; CHECK: vrev64.32
157  %v = load <2 x i32>, <2 x i32>* %load
158  %w = load <2 x i32>, <2 x i32>* @v2i32
159  %a = add <2 x i32> %v, %w
160  %f = bitcast <2 x i32> %a to double
161  call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
162  ret void
163}
164
165define void @conv_f64_to_v2f32( double %val,  <2 x float>* %store ) {
166; CHECK-LABEL: conv_f64_to_v2f32:
167; CHECK: vrev64.32
168  %v = bitcast double %val to <2 x float>
169  %w = load <2 x float>, <2 x float>* @v2f32
170  %a = fadd <2 x float> %v, %w
171  store <2 x float> %a, <2 x float>* %store
172  ret void
173}
174
175define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
176; CHECK-LABEL: conv_v2f32_to_f64:
177; CHECK: vrev64.32
178  %v = load <2 x float>, <2 x float>* %load
179  %w = load <2 x float>, <2 x float>* @v2f32
180  %a = fadd <2 x float> %v, %w
181  %f = bitcast <2 x float> %a to double
182  call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
183  ret void
184}
185
186; 128 bit conversions
187
188
189define void @conv_i128_to_v16i8( i128 %val,  <16 x i8>* %store ) {
190; CHECK-LABEL: conv_i128_to_v16i8:
191; CHECK: vrev32.8
192  %v = bitcast i128 %val to <16 x i8>
193  %w = load  <16 x i8>,  <16 x i8>* @v16i8
194  %a = add <16 x i8> %v, %w
195  store <16 x i8> %a, <16 x i8>* %store
196  ret void
197}
198
199define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
200; CHECK-LABEL: conv_v16i8_to_i128:
201; CHECK: vrev32.8
202  %v = load <16 x i8>, <16 x i8>* %load
203  %w = load <16 x i8>, <16 x i8>* @v16i8
204  %a = add <16 x i8> %v, %w
205  %f = bitcast <16 x i8> %a to i128
206  call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
207  ret void
208}
209
210define void @conv_i128_to_v8i16( i128 %val,  <8 x i16>* %store ) {
211; CHECK-LABEL: conv_i128_to_v8i16:
212; CHECK: vrev32.16
213  %v = bitcast i128 %val to <8 x i16>
214  %w = load  <8 x i16>,  <8 x i16>* @v8i16
215  %a = add <8 x i16> %v, %w
216  store <8 x i16> %a, <8 x i16>* %store
217  ret void
218}
219
220define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
221; CHECK-LABEL: conv_v8i16_to_i128:
222; CHECK: vrev32.16
223  %v = load <8 x i16>, <8 x i16>* %load
224  %w = load <8 x i16>, <8 x i16>* @v8i16
225  %a = add <8 x i16> %v, %w
226  %f = bitcast <8 x i16> %a to i128
227  call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
228  ret void
229}
230
231define void @conv_i128_to_v4i32( i128 %val,  <4 x i32>* %store ) {
232; CHECK-LABEL: conv_i128_to_v4i32:
233; CHECK: vrev64.32
234  %v = bitcast i128 %val to <4 x i32>
235  %w = load <4 x i32>, <4 x i32>* @v4i32
236  %a = add <4 x i32> %v, %w
237  store <4 x i32> %a, <4 x i32>* %store
238  ret void
239}
240
241define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
242; CHECK-LABEL: conv_v4i32_to_i128:
243; CHECK: vrev64.32
244  %v = load <4 x i32>, <4 x i32>* %load
245  %w = load <4 x i32>, <4 x i32>* @v4i32
246  %a = add <4 x i32> %v, %w
247  %f = bitcast <4 x i32> %a to i128
248  call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
249  ret void
250}
251
252define void @conv_i128_to_v4f32( i128 %val,  <4 x float>* %store ) {
253; CHECK-LABEL: conv_i128_to_v4f32:
254; CHECK: vrev64.32
255  %v = bitcast i128 %val to <4 x float>
256  %w = load <4 x float>, <4 x float>* @v4f32
257  %a = fadd <4 x float> %v, %w
258  store <4 x float> %a, <4 x float>* %store
259  ret void
260}
261
262define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
263; CHECK-LABEL: conv_v4f32_to_i128:
264; CHECK: vrev64.32
265  %v = load <4 x float>, <4 x float>* %load
266  %w = load <4 x float>, <4 x float>* @v4f32
267  %a = fadd <4 x float> %v, %w
268  %f = bitcast <4 x float> %a to i128
269  call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
270  ret void
271}
272
273define void @conv_f128_to_v2f64( fp128 %val,  <2 x double>* %store ) {
274; CHECK-LABEL: conv_f128_to_v2f64:
275; CHECK: vrev64.32
276  %v = bitcast fp128 %val to <2 x double>
277  %w = load <2 x double>, <2 x double>* @v2f64
278  %a = fadd <2 x double> %v, %w
279  store <2 x double> %a, <2 x double>* %store
280  ret void
281}
282
283define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
284; CHECK-LABEL: conv_v2f64_to_f128:
285; CHECK: vrev64.32
286  %v = load <2 x double>, <2 x double>* %load
287  %w = load <2 x double>, <2 x double>* @v2f64
288  %a = fadd <2 x double> %v, %w
289  %f = bitcast <2 x double> %a to fp128
290  call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
291  ret void
292}
293
294define void @conv_f128_to_v16i8( fp128 %val,  <16 x i8>* %store ) {
295; CHECK-LABEL: conv_f128_to_v16i8:
296; CHECK: vrev32.8
297  %v = bitcast fp128 %val to <16 x i8>
298  %w = load  <16 x i8>,  <16 x i8>* @v16i8
299  %a = add <16 x i8> %v, %w
300  store <16 x i8> %a, <16 x i8>* %store
301  ret void
302}
303
304define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
305; CHECK-LABEL: conv_v16i8_to_f128:
306; CHECK: vrev32.8
307  %v = load <16 x i8>, <16 x i8>* %load
308  %w = load <16 x i8>, <16 x i8>* @v16i8
309  %a = add <16 x i8> %v, %w
310  %f = bitcast <16 x i8> %a to fp128
311  call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
312  ret void
313}
314
315define void @conv_f128_to_v8i16( fp128 %val,  <8 x i16>* %store ) {
316; CHECK-LABEL: conv_f128_to_v8i16:
317; CHECK: vrev32.16
318  %v = bitcast fp128 %val to <8 x i16>
319  %w = load  <8 x i16>,  <8 x i16>* @v8i16
320  %a = add <8 x i16> %v, %w
321  store <8 x i16> %a, <8 x i16>* %store
322  ret void
323}
324
325define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
326; CHECK-LABEL: conv_v8i16_to_f128:
327; CHECK: vrev32.16
328  %v = load <8 x i16>, <8 x i16>* %load
329  %w = load <8 x i16>, <8 x i16>* @v8i16
330  %a = add <8 x i16> %v, %w
331  %f = bitcast <8 x i16> %a to fp128
332  call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
333  ret void
334}
335
336define void @conv_f128_to_v4f32( fp128 %val,  <4 x float>* %store ) {
337; CHECK-LABEL: conv_f128_to_v4f32:
338; CHECK: vrev64.32
339  %v = bitcast fp128 %val to <4 x float>
340  %w = load <4 x float>, <4 x float>* @v4f32
341  %a = fadd <4 x float> %v, %w
342  store <4 x float> %a, <4 x float>* %store
343  ret void
344}
345
346define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
347; CHECK-LABEL: conv_v4f32_to_f128:
348; CHECK: vrev64.32
349  %v = load <4 x float>, <4 x float>* %load
350  %w = load <4 x float>, <4 x float>* @v4f32
351  %a = fadd <4 x float> %v, %w
352  %f = bitcast <4 x float> %a to fp128
353  call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
354  ret void
355}
356
357define void @arg_v4i32( <4 x i32> %var, <4 x i32>* %store ) {
358; CHECK-LABEL: arg_v4i32:
359; CHECK: vmov   [[REG2:d[0-9]+]], r3, r2
360; CHECK: vmov   [[REG1:d[0-9]+]], r1, r0
361; CHECK: vst1.64 {[[REG1]], [[REG2]]},
362; CHECK-HARD-LABEL: arg_v4i32:
363; CHECK-HARD-NOT: vmov
364; CHECK-HARD: vst1.64 {d0, d1}
365  store <4 x i32> %var, <4 x i32>* %store
366  ret void
367}
368
369define void @arg_v8i16( <8 x i16> %var, <8 x i16>* %store ) {
370; CHECK-LABEL: arg_v8i16:
371; CHECK: vmov   [[REG2:d[0-9]+]], r3, r2
372; CHECK: vmov   [[REG1:d[0-9]+]], r1, r0
373; CHECK: vst1.64 {[[REG1]], [[REG2]]},
374; CHECK-HARD-LABEL: arg_v8i16:
375; CHECK-HARD-NOT: vmov
376; CHECK-HARD: vst1.64 {d0, d1}
377  store <8 x i16> %var, <8 x i16>* %store
378  ret void
379}
380
381define void @arg_v16i8( <16 x i8> %var, <16 x i8>* %store ) {
382; CHECK-LABEL: arg_v16i8:
383; CHECK: vmov   [[REG2:d[0-9]+]], r3, r2
384; CHECK: vmov   [[REG1:d[0-9]+]], r1, r0
385; CHECK: vst1.64 {[[REG1]], [[REG2]]},
386; CHECK-HARD-LABEL: arg_v16i8:
387; CHECK-HARD-NOT: vmov
388; CHECK-HARD: vst1.64 {d0, d1}
389  store <16 x i8> %var, <16 x i8>* %store
390  ret void
391}
392
393