1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc <4 x float> @foo_float_int32(<4 x i32> %src) {
6; CHECK-MVE-LABEL: foo_float_int32:
7; CHECK-MVE:       @ %bb.0: @ %entry
8; CHECK-MVE-NEXT:    vcvt.f32.s32 s7, s3
9; CHECK-MVE-NEXT:    vcvt.f32.s32 s6, s2
10; CHECK-MVE-NEXT:    vcvt.f32.s32 s5, s1
11; CHECK-MVE-NEXT:    vcvt.f32.s32 s4, s0
12; CHECK-MVE-NEXT:    vmov q0, q1
13; CHECK-MVE-NEXT:    bx lr
14;
15; CHECK-MVEFP-LABEL: foo_float_int32:
16; CHECK-MVEFP:       @ %bb.0: @ %entry
17; CHECK-MVEFP-NEXT:    vcvt.f32.s32 q0, q0
18; CHECK-MVEFP-NEXT:    bx lr
19entry:
20  %out = sitofp <4 x i32> %src to <4 x float>
21  ret <4 x float> %out
22}
23
24define arm_aapcs_vfpcc <4 x float> @foo_float_uint32(<4 x i32> %src) {
25; CHECK-MVE-LABEL: foo_float_uint32:
26; CHECK-MVE:       @ %bb.0: @ %entry
27; CHECK-MVE-NEXT:    vcvt.f32.u32 s7, s3
28; CHECK-MVE-NEXT:    vcvt.f32.u32 s6, s2
29; CHECK-MVE-NEXT:    vcvt.f32.u32 s5, s1
30; CHECK-MVE-NEXT:    vcvt.f32.u32 s4, s0
31; CHECK-MVE-NEXT:    vmov q0, q1
32; CHECK-MVE-NEXT:    bx lr
33;
34; CHECK-MVEFP-LABEL: foo_float_uint32:
35; CHECK-MVEFP:       @ %bb.0: @ %entry
36; CHECK-MVEFP-NEXT:    vcvt.f32.u32 q0, q0
37; CHECK-MVEFP-NEXT:    bx lr
38entry:
39  %out = uitofp <4 x i32> %src to <4 x float>
40  ret <4 x float> %out
41}
42
43define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) {
44; CHECK-MVE-LABEL: foo_int32_float:
45; CHECK-MVE:       @ %bb.0: @ %entry
46; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s0
47; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s1
48; CHECK-MVE-NEXT:    vcvt.s32.f32 s10, s2
49; CHECK-MVE-NEXT:    vcvt.s32.f32 s8, s3
50; CHECK-MVE-NEXT:    vmov r0, s4
51; CHECK-MVE-NEXT:    vmov.32 q0[0], r0
52; CHECK-MVE-NEXT:    vmov r0, s6
53; CHECK-MVE-NEXT:    vmov.32 q0[1], r0
54; CHECK-MVE-NEXT:    vmov r0, s10
55; CHECK-MVE-NEXT:    vmov.32 q0[2], r0
56; CHECK-MVE-NEXT:    vmov r0, s8
57; CHECK-MVE-NEXT:    vmov.32 q0[3], r0
58; CHECK-MVE-NEXT:    bx lr
59;
60; CHECK-MVEFP-LABEL: foo_int32_float:
61; CHECK-MVEFP:       @ %bb.0: @ %entry
62; CHECK-MVEFP-NEXT:    vcvt.s32.f32 q0, q0
63; CHECK-MVEFP-NEXT:    bx lr
64entry:
65  %out = fptosi <4 x float> %src to <4 x i32>
66  ret <4 x i32> %out
67}
68
69define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) {
70; CHECK-MVE-LABEL: foo_uint32_float:
71; CHECK-MVE:       @ %bb.0: @ %entry
72; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s0
73; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s1
74; CHECK-MVE-NEXT:    vcvt.u32.f32 s10, s2
75; CHECK-MVE-NEXT:    vcvt.u32.f32 s8, s3
76; CHECK-MVE-NEXT:    vmov r0, s4
77; CHECK-MVE-NEXT:    vmov.32 q0[0], r0
78; CHECK-MVE-NEXT:    vmov r0, s6
79; CHECK-MVE-NEXT:    vmov.32 q0[1], r0
80; CHECK-MVE-NEXT:    vmov r0, s10
81; CHECK-MVE-NEXT:    vmov.32 q0[2], r0
82; CHECK-MVE-NEXT:    vmov r0, s8
83; CHECK-MVE-NEXT:    vmov.32 q0[3], r0
84; CHECK-MVE-NEXT:    bx lr
85;
86; CHECK-MVEFP-LABEL: foo_uint32_float:
87; CHECK-MVEFP:       @ %bb.0: @ %entry
88; CHECK-MVEFP-NEXT:    vcvt.u32.f32 q0, q0
89; CHECK-MVEFP-NEXT:    bx lr
90entry:
91  %out = fptoui <4 x float> %src to <4 x i32>
92  ret <4 x i32> %out
93}
94
95define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
96; CHECK-MVE-LABEL: foo_half_int16:
97; CHECK-MVE:       @ %bb.0: @ %entry
98; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
99; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
100; CHECK-MVE-NEXT:    sxth r0, r0
101; CHECK-MVE-NEXT:    sxth r1, r1
102; CHECK-MVE-NEXT:    vmov s4, r0
103; CHECK-MVE-NEXT:    vcvt.f16.s32 s4, s4
104; CHECK-MVE-NEXT:    vmov r0, s4
105; CHECK-MVE-NEXT:    vmov s4, r1
106; CHECK-MVE-NEXT:    vcvt.f16.s32 s4, s4
107; CHECK-MVE-NEXT:    vmov r1, s4
108; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
109; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
110; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
111; CHECK-MVE-NEXT:    sxth r0, r0
112; CHECK-MVE-NEXT:    vmov s8, r0
113; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
114; CHECK-MVE-NEXT:    vmov r0, s8
115; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
116; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
117; CHECK-MVE-NEXT:    sxth r0, r0
118; CHECK-MVE-NEXT:    vmov s8, r0
119; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
120; CHECK-MVE-NEXT:    vmov r0, s8
121; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
122; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
123; CHECK-MVE-NEXT:    sxth r0, r0
124; CHECK-MVE-NEXT:    vmov s8, r0
125; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
126; CHECK-MVE-NEXT:    vmov r0, s8
127; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
128; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
129; CHECK-MVE-NEXT:    sxth r0, r0
130; CHECK-MVE-NEXT:    vmov s8, r0
131; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
132; CHECK-MVE-NEXT:    vmov r0, s8
133; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
134; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
135; CHECK-MVE-NEXT:    sxth r0, r0
136; CHECK-MVE-NEXT:    vmov s8, r0
137; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
138; CHECK-MVE-NEXT:    vmov r0, s8
139; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
140; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
141; CHECK-MVE-NEXT:    sxth r0, r0
142; CHECK-MVE-NEXT:    vmov s0, r0
143; CHECK-MVE-NEXT:    vcvt.f16.s32 s0, s0
144; CHECK-MVE-NEXT:    vmov r0, s0
145; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
146; CHECK-MVE-NEXT:    vmov q0, q1
147; CHECK-MVE-NEXT:    bx lr
148;
149; CHECK-MVEFP-LABEL: foo_half_int16:
150; CHECK-MVEFP:       @ %bb.0: @ %entry
151; CHECK-MVEFP-NEXT:    vcvt.f16.s16 q0, q0
152; CHECK-MVEFP-NEXT:    bx lr
153entry:
154  %out = sitofp <8 x i16> %src to <8 x half>
155  ret <8 x half> %out
156}
157
158define arm_aapcs_vfpcc <8 x half> @foo_half_uint16(<8 x i16> %src) {
159; CHECK-MVE-LABEL: foo_half_uint16:
160; CHECK-MVE:       @ %bb.0: @ %entry
161; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
162; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
163; CHECK-MVE-NEXT:    vmov s4, r0
164; CHECK-MVE-NEXT:    vcvt.f16.u32 s4, s4
165; CHECK-MVE-NEXT:    vmov r0, s4
166; CHECK-MVE-NEXT:    vmov s4, r1
167; CHECK-MVE-NEXT:    vcvt.f16.u32 s4, s4
168; CHECK-MVE-NEXT:    vmov r1, s4
169; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
170; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
171; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
172; CHECK-MVE-NEXT:    vmov s8, r0
173; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
174; CHECK-MVE-NEXT:    vmov r0, s8
175; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
176; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
177; CHECK-MVE-NEXT:    vmov s8, r0
178; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
179; CHECK-MVE-NEXT:    vmov r0, s8
180; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
181; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
182; CHECK-MVE-NEXT:    vmov s8, r0
183; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
184; CHECK-MVE-NEXT:    vmov r0, s8
185; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
186; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
187; CHECK-MVE-NEXT:    vmov s8, r0
188; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
189; CHECK-MVE-NEXT:    vmov r0, s8
190; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
191; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
192; CHECK-MVE-NEXT:    vmov s8, r0
193; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
194; CHECK-MVE-NEXT:    vmov r0, s8
195; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
196; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
197; CHECK-MVE-NEXT:    vmov s0, r0
198; CHECK-MVE-NEXT:    vcvt.f16.u32 s0, s0
199; CHECK-MVE-NEXT:    vmov r0, s0
200; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
201; CHECK-MVE-NEXT:    vmov q0, q1
202; CHECK-MVE-NEXT:    bx lr
203;
204; CHECK-MVEFP-LABEL: foo_half_uint16:
205; CHECK-MVEFP:       @ %bb.0: @ %entry
206; CHECK-MVEFP-NEXT:    vcvt.f16.u16 q0, q0
207; CHECK-MVEFP-NEXT:    bx lr
208entry:
209  %out = uitofp <8 x i16> %src to <8 x half>
210  ret <8 x half> %out
211}
212
213define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) {
214; CHECK-MVE-LABEL: foo_int16_half:
215; CHECK-MVE:       @ %bb.0: @ %entry
216; CHECK-MVE-NEXT:    vmovx.f16 s14, s0
217; CHECK-MVE-NEXT:    vcvt.s32.f16 s0, s0
218; CHECK-MVE-NEXT:    vcvt.s32.f16 s14, s14
219; CHECK-MVE-NEXT:    vmov r0, s0
220; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
221; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
222; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
223; CHECK-MVE-NEXT:    vcvt.s32.f16 s8, s3
224; CHECK-MVE-NEXT:    vcvt.s32.f16 s12, s2
225; CHECK-MVE-NEXT:    vcvt.s32.f16 s5, s1
226; CHECK-MVE-NEXT:    vmov.16 q0[0], r0
227; CHECK-MVE-NEXT:    vmov r0, s14
228; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
229; CHECK-MVE-NEXT:    vmov r0, s5
230; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
231; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
232; CHECK-MVE-NEXT:    vmov r0, s10
233; CHECK-MVE-NEXT:    vcvt.s32.f16 s6, s6
234; CHECK-MVE-NEXT:    vmov.16 q0[3], r0
235; CHECK-MVE-NEXT:    vmov r0, s12
236; CHECK-MVE-NEXT:    vmov.16 q0[4], r0
237; CHECK-MVE-NEXT:    vmov r0, s6
238; CHECK-MVE-NEXT:    vmov.16 q0[5], r0
239; CHECK-MVE-NEXT:    vmov r0, s8
240; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s4
241; CHECK-MVE-NEXT:    vmov.16 q0[6], r0
242; CHECK-MVE-NEXT:    vmov r0, s4
243; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
244; CHECK-MVE-NEXT:    bx lr
245;
246; CHECK-MVEFP-LABEL: foo_int16_half:
247; CHECK-MVEFP:       @ %bb.0: @ %entry
248; CHECK-MVEFP-NEXT:    vcvt.s16.f16 q0, q0
249; CHECK-MVEFP-NEXT:    bx lr
250entry:
251  %out = fptosi <8 x half> %src to <8 x i16>
252  ret <8 x i16> %out
253}
254
255define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) {
256; CHECK-MVE-LABEL: foo_uint16_half:
257; CHECK-MVE:       @ %bb.0: @ %entry
258; CHECK-MVE-NEXT:    vmovx.f16 s14, s0
259; CHECK-MVE-NEXT:    vcvt.s32.f16 s0, s0
260; CHECK-MVE-NEXT:    vcvt.s32.f16 s14, s14
261; CHECK-MVE-NEXT:    vmov r0, s0
262; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
263; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
264; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
265; CHECK-MVE-NEXT:    vcvt.s32.f16 s8, s3
266; CHECK-MVE-NEXT:    vcvt.s32.f16 s12, s2
267; CHECK-MVE-NEXT:    vcvt.s32.f16 s5, s1
268; CHECK-MVE-NEXT:    vmov.16 q0[0], r0
269; CHECK-MVE-NEXT:    vmov r0, s14
270; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
271; CHECK-MVE-NEXT:    vmov r0, s5
272; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
273; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
274; CHECK-MVE-NEXT:    vmov r0, s10
275; CHECK-MVE-NEXT:    vcvt.s32.f16 s6, s6
276; CHECK-MVE-NEXT:    vmov.16 q0[3], r0
277; CHECK-MVE-NEXT:    vmov r0, s12
278; CHECK-MVE-NEXT:    vmov.16 q0[4], r0
279; CHECK-MVE-NEXT:    vmov r0, s6
280; CHECK-MVE-NEXT:    vmov.16 q0[5], r0
281; CHECK-MVE-NEXT:    vmov r0, s8
282; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s4
283; CHECK-MVE-NEXT:    vmov.16 q0[6], r0
284; CHECK-MVE-NEXT:    vmov r0, s4
285; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
286; CHECK-MVE-NEXT:    bx lr
287;
288; CHECK-MVEFP-LABEL: foo_uint16_half:
289; CHECK-MVEFP:       @ %bb.0: @ %entry
290; CHECK-MVEFP-NEXT:    vcvt.u16.f16 q0, q0
291; CHECK-MVEFP-NEXT:    bx lr
292entry:
293  %out = fptoui <8 x half> %src to <8 x i16>
294  ret <8 x i16> %out
295}
296
297define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
298; CHECK-LABEL: foo_float_int64:
299; CHECK:       @ %bb.0: @ %entry
300; CHECK-NEXT:    .save {r7, lr}
301; CHECK-NEXT:    push {r7, lr}
302; CHECK-NEXT:    .vsave {d8, d9}
303; CHECK-NEXT:    vpush {d8, d9}
304; CHECK-NEXT:    vmov q4, q0
305; CHECK-NEXT:    vmov r0, s18
306; CHECK-NEXT:    vmov r1, s19
307; CHECK-NEXT:    bl __aeabi_l2d
308; CHECK-NEXT:    vmov r2, s16
309; CHECK-NEXT:    vmov r3, s17
310; CHECK-NEXT:    vmov d9, r0, r1
311; CHECK-NEXT:    mov r0, r2
312; CHECK-NEXT:    mov r1, r3
313; CHECK-NEXT:    bl __aeabi_l2d
314; CHECK-NEXT:    vmov d8, r0, r1
315; CHECK-NEXT:    vmov q0, q4
316; CHECK-NEXT:    vpop {d8, d9}
317; CHECK-NEXT:    pop {r7, pc}
318entry:
319  %out = sitofp <2 x i64> %src to <2 x double>
320  ret <2 x double> %out
321}
322
323define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
324; CHECK-LABEL: foo_float_uint64:
325; CHECK:       @ %bb.0: @ %entry
326; CHECK-NEXT:    .save {r7, lr}
327; CHECK-NEXT:    push {r7, lr}
328; CHECK-NEXT:    .vsave {d8, d9}
329; CHECK-NEXT:    vpush {d8, d9}
330; CHECK-NEXT:    vmov q4, q0
331; CHECK-NEXT:    vmov r0, s18
332; CHECK-NEXT:    vmov r1, s19
333; CHECK-NEXT:    bl __aeabi_ul2d
334; CHECK-NEXT:    vmov r2, s16
335; CHECK-NEXT:    vmov r3, s17
336; CHECK-NEXT:    vmov d9, r0, r1
337; CHECK-NEXT:    mov r0, r2
338; CHECK-NEXT:    mov r1, r3
339; CHECK-NEXT:    bl __aeabi_ul2d
340; CHECK-NEXT:    vmov d8, r0, r1
341; CHECK-NEXT:    vmov q0, q4
342; CHECK-NEXT:    vpop {d8, d9}
343; CHECK-NEXT:    pop {r7, pc}
344entry:
345  %out = uitofp <2 x i64> %src to <2 x double>
346  ret <2 x double> %out
347}
348
349define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
350; CHECK-LABEL: foo_int64_float:
351; CHECK:       @ %bb.0: @ %entry
352; CHECK-NEXT:    .save {r7, lr}
353; CHECK-NEXT:    push {r7, lr}
354; CHECK-NEXT:    .vsave {d8, d9}
355; CHECK-NEXT:    vpush {d8, d9}
356; CHECK-NEXT:    vmov q4, q0
357; CHECK-NEXT:    vmov r0, r1, d8
358; CHECK-NEXT:    bl __aeabi_d2lz
359; CHECK-NEXT:    vmov r2, r3, d9
360; CHECK-NEXT:    vmov.32 q4[0], r0
361; CHECK-NEXT:    vmov.32 q4[1], r1
362; CHECK-NEXT:    mov r0, r2
363; CHECK-NEXT:    mov r1, r3
364; CHECK-NEXT:    bl __aeabi_d2lz
365; CHECK-NEXT:    vmov.32 q4[2], r0
366; CHECK-NEXT:    vmov.32 q4[3], r1
367; CHECK-NEXT:    vmov q0, q4
368; CHECK-NEXT:    vpop {d8, d9}
369; CHECK-NEXT:    pop {r7, pc}
370entry:
371  %out = fptosi <2 x double> %src to <2 x i64>
372  ret <2 x i64> %out
373}
374
375define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
376; CHECK-LABEL: foo_uint64_float:
377; CHECK:       @ %bb.0: @ %entry
378; CHECK-NEXT:    .save {r7, lr}
379; CHECK-NEXT:    push {r7, lr}
380; CHECK-NEXT:    .vsave {d8, d9}
381; CHECK-NEXT:    vpush {d8, d9}
382; CHECK-NEXT:    vmov q4, q0
383; CHECK-NEXT:    vmov r0, r1, d8
384; CHECK-NEXT:    bl __aeabi_d2ulz
385; CHECK-NEXT:    vmov r2, r3, d9
386; CHECK-NEXT:    vmov.32 q4[0], r0
387; CHECK-NEXT:    vmov.32 q4[1], r1
388; CHECK-NEXT:    mov r0, r2
389; CHECK-NEXT:    mov r1, r3
390; CHECK-NEXT:    bl __aeabi_d2ulz
391; CHECK-NEXT:    vmov.32 q4[2], r0
392; CHECK-NEXT:    vmov.32 q4[3], r1
393; CHECK-NEXT:    vmov q0, q4
394; CHECK-NEXT:    vpop {d8, d9}
395; CHECK-NEXT:    pop {r7, pc}
396entry:
397  %out = fptoui <2 x double> %src to <2 x i64>
398  ret <2 x i64> %out
399}
400