1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP 4 5define arm_aapcs_vfpcc <4 x float> @foo_float_int32(<4 x i32> %src) { 6; CHECK-MVE-LABEL: foo_float_int32: 7; CHECK-MVE: @ %bb.0: @ %entry 8; CHECK-MVE-NEXT: vcvt.f32.s32 s7, s3 9; CHECK-MVE-NEXT: vcvt.f32.s32 s6, s2 10; CHECK-MVE-NEXT: vcvt.f32.s32 s5, s1 11; CHECK-MVE-NEXT: vcvt.f32.s32 s4, s0 12; CHECK-MVE-NEXT: vmov q0, q1 13; CHECK-MVE-NEXT: bx lr 14; 15; CHECK-MVEFP-LABEL: foo_float_int32: 16; CHECK-MVEFP: @ %bb.0: @ %entry 17; CHECK-MVEFP-NEXT: vcvt.f32.s32 q0, q0 18; CHECK-MVEFP-NEXT: bx lr 19entry: 20 %out = sitofp <4 x i32> %src to <4 x float> 21 ret <4 x float> %out 22} 23 24define arm_aapcs_vfpcc <4 x float> @foo_float_uint32(<4 x i32> %src) { 25; CHECK-MVE-LABEL: foo_float_uint32: 26; CHECK-MVE: @ %bb.0: @ %entry 27; CHECK-MVE-NEXT: vcvt.f32.u32 s7, s3 28; CHECK-MVE-NEXT: vcvt.f32.u32 s6, s2 29; CHECK-MVE-NEXT: vcvt.f32.u32 s5, s1 30; CHECK-MVE-NEXT: vcvt.f32.u32 s4, s0 31; CHECK-MVE-NEXT: vmov q0, q1 32; CHECK-MVE-NEXT: bx lr 33; 34; CHECK-MVEFP-LABEL: foo_float_uint32: 35; CHECK-MVEFP: @ %bb.0: @ %entry 36; CHECK-MVEFP-NEXT: vcvt.f32.u32 q0, q0 37; CHECK-MVEFP-NEXT: bx lr 38entry: 39 %out = uitofp <4 x i32> %src to <4 x float> 40 ret <4 x float> %out 41} 42 43define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) { 44; CHECK-MVE-LABEL: foo_int32_float: 45; CHECK-MVE: @ %bb.0: @ %entry 46; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s0 47; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1 48; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s2 49; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3 50; CHECK-MVE-NEXT: vmov r0, s4 51; CHECK-MVE-NEXT: vmov.32 q0[0], r0 52; CHECK-MVE-NEXT: vmov r0, s6 53; CHECK-MVE-NEXT: vmov.32 q0[1], r0 54; CHECK-MVE-NEXT: vmov r0, s10 55; CHECK-MVE-NEXT: vmov.32 q0[2], r0 56; CHECK-MVE-NEXT: vmov r0, s8 57; CHECK-MVE-NEXT: vmov.32 q0[3], r0 58; CHECK-MVE-NEXT: bx lr 59; 60; CHECK-MVEFP-LABEL: foo_int32_float: 61; CHECK-MVEFP: @ %bb.0: @ %entry 62; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0 63; CHECK-MVEFP-NEXT: bx lr 64entry: 65 %out = fptosi <4 x float> %src to <4 x i32> 66 ret <4 x i32> %out 67} 68 69define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) { 70; CHECK-MVE-LABEL: foo_uint32_float: 71; CHECK-MVE: @ %bb.0: @ %entry 72; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s0 73; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s1 74; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s2 75; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3 76; CHECK-MVE-NEXT: vmov r0, s4 77; CHECK-MVE-NEXT: vmov.32 q0[0], r0 78; CHECK-MVE-NEXT: vmov r0, s6 79; CHECK-MVE-NEXT: vmov.32 q0[1], r0 80; CHECK-MVE-NEXT: vmov r0, s10 81; CHECK-MVE-NEXT: vmov.32 q0[2], r0 82; CHECK-MVE-NEXT: vmov r0, s8 83; CHECK-MVE-NEXT: vmov.32 q0[3], r0 84; CHECK-MVE-NEXT: bx lr 85; 86; CHECK-MVEFP-LABEL: foo_uint32_float: 87; CHECK-MVEFP: @ %bb.0: @ %entry 88; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0 89; CHECK-MVEFP-NEXT: bx lr 90entry: 91 %out = fptoui <4 x float> %src to <4 x i32> 92 ret <4 x i32> %out 93} 94 95define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) { 96; CHECK-MVE-LABEL: foo_half_int16: 97; CHECK-MVE: @ %bb.0: @ %entry 98; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] 99; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] 100; CHECK-MVE-NEXT: sxth r0, r0 101; CHECK-MVE-NEXT: sxth r1, r1 102; CHECK-MVE-NEXT: vmov s4, r0 103; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 104; CHECK-MVE-NEXT: vmov r0, s4 105; CHECK-MVE-NEXT: vmov s4, r1 106; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 107; CHECK-MVE-NEXT: vmov r1, s4 108; CHECK-MVE-NEXT: vmov.16 q1[0], r0 109; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] 110; CHECK-MVE-NEXT: vmov.16 q1[1], r1 111; CHECK-MVE-NEXT: sxth r0, r0 112; CHECK-MVE-NEXT: vmov s8, r0 113; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 114; CHECK-MVE-NEXT: vmov r0, s8 115; CHECK-MVE-NEXT: vmov.16 q1[2], r0 116; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] 117; CHECK-MVE-NEXT: sxth r0, r0 118; CHECK-MVE-NEXT: vmov s8, r0 119; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 120; CHECK-MVE-NEXT: vmov r0, s8 121; CHECK-MVE-NEXT: vmov.16 q1[3], r0 122; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] 123; CHECK-MVE-NEXT: sxth r0, r0 124; CHECK-MVE-NEXT: vmov s8, r0 125; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 126; CHECK-MVE-NEXT: vmov r0, s8 127; CHECK-MVE-NEXT: vmov.16 q1[4], r0 128; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] 129; CHECK-MVE-NEXT: sxth r0, r0 130; CHECK-MVE-NEXT: vmov s8, r0 131; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 132; CHECK-MVE-NEXT: vmov r0, s8 133; CHECK-MVE-NEXT: vmov.16 q1[5], r0 134; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] 135; CHECK-MVE-NEXT: sxth r0, r0 136; CHECK-MVE-NEXT: vmov s8, r0 137; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 138; CHECK-MVE-NEXT: vmov r0, s8 139; CHECK-MVE-NEXT: vmov.16 q1[6], r0 140; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] 141; CHECK-MVE-NEXT: sxth r0, r0 142; CHECK-MVE-NEXT: vmov s0, r0 143; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0 144; CHECK-MVE-NEXT: vmov r0, s0 145; CHECK-MVE-NEXT: vmov.16 q1[7], r0 146; CHECK-MVE-NEXT: vmov q0, q1 147; CHECK-MVE-NEXT: bx lr 148; 149; CHECK-MVEFP-LABEL: foo_half_int16: 150; CHECK-MVEFP: @ %bb.0: @ %entry 151; CHECK-MVEFP-NEXT: vcvt.f16.s16 q0, q0 152; CHECK-MVEFP-NEXT: bx lr 153entry: 154 %out = sitofp <8 x i16> %src to <8 x half> 155 ret <8 x half> %out 156} 157 158define arm_aapcs_vfpcc <8 x half> @foo_half_uint16(<8 x i16> %src) { 159; CHECK-MVE-LABEL: foo_half_uint16: 160; CHECK-MVE: @ %bb.0: @ %entry 161; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] 162; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] 163; CHECK-MVE-NEXT: vmov s4, r0 164; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 165; CHECK-MVE-NEXT: vmov r0, s4 166; CHECK-MVE-NEXT: vmov s4, r1 167; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 168; CHECK-MVE-NEXT: vmov r1, s4 169; CHECK-MVE-NEXT: vmov.16 q1[0], r0 170; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] 171; CHECK-MVE-NEXT: vmov.16 q1[1], r1 172; CHECK-MVE-NEXT: vmov s8, r0 173; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 174; CHECK-MVE-NEXT: vmov r0, s8 175; CHECK-MVE-NEXT: vmov.16 q1[2], r0 176; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] 177; CHECK-MVE-NEXT: vmov s8, r0 178; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 179; CHECK-MVE-NEXT: vmov r0, s8 180; CHECK-MVE-NEXT: vmov.16 q1[3], r0 181; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] 182; CHECK-MVE-NEXT: vmov s8, r0 183; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 184; CHECK-MVE-NEXT: vmov r0, s8 185; CHECK-MVE-NEXT: vmov.16 q1[4], r0 186; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] 187; CHECK-MVE-NEXT: vmov s8, r0 188; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 189; CHECK-MVE-NEXT: vmov r0, s8 190; CHECK-MVE-NEXT: vmov.16 q1[5], r0 191; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] 192; CHECK-MVE-NEXT: vmov s8, r0 193; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 194; CHECK-MVE-NEXT: vmov r0, s8 195; CHECK-MVE-NEXT: vmov.16 q1[6], r0 196; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] 197; CHECK-MVE-NEXT: vmov s0, r0 198; CHECK-MVE-NEXT: vcvt.f16.u32 s0, s0 199; CHECK-MVE-NEXT: vmov r0, s0 200; CHECK-MVE-NEXT: vmov.16 q1[7], r0 201; CHECK-MVE-NEXT: vmov q0, q1 202; CHECK-MVE-NEXT: bx lr 203; 204; CHECK-MVEFP-LABEL: foo_half_uint16: 205; CHECK-MVEFP: @ %bb.0: @ %entry 206; CHECK-MVEFP-NEXT: vcvt.f16.u16 q0, q0 207; CHECK-MVEFP-NEXT: bx lr 208entry: 209 %out = uitofp <8 x i16> %src to <8 x half> 210 ret <8 x half> %out 211} 212 213define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) { 214; CHECK-MVE-LABEL: foo_int16_half: 215; CHECK-MVE: @ %bb.0: @ %entry 216; CHECK-MVE-NEXT: vmovx.f16 s14, s0 217; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 218; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 219; CHECK-MVE-NEXT: vmov r0, s0 220; CHECK-MVE-NEXT: vmovx.f16 s4, s3 221; CHECK-MVE-NEXT: vmovx.f16 s6, s2 222; CHECK-MVE-NEXT: vmovx.f16 s10, s1 223; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3 224; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2 225; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1 226; CHECK-MVE-NEXT: vmov.16 q0[0], r0 227; CHECK-MVE-NEXT: vmov r0, s14 228; CHECK-MVE-NEXT: vmov.16 q0[1], r0 229; CHECK-MVE-NEXT: vmov r0, s5 230; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 231; CHECK-MVE-NEXT: vmov.16 q0[2], r0 232; CHECK-MVE-NEXT: vmov r0, s10 233; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 234; CHECK-MVE-NEXT: vmov.16 q0[3], r0 235; CHECK-MVE-NEXT: vmov r0, s12 236; CHECK-MVE-NEXT: vmov.16 q0[4], r0 237; CHECK-MVE-NEXT: vmov r0, s6 238; CHECK-MVE-NEXT: vmov.16 q0[5], r0 239; CHECK-MVE-NEXT: vmov r0, s8 240; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 241; CHECK-MVE-NEXT: vmov.16 q0[6], r0 242; CHECK-MVE-NEXT: vmov r0, s4 243; CHECK-MVE-NEXT: vmov.16 q0[7], r0 244; CHECK-MVE-NEXT: bx lr 245; 246; CHECK-MVEFP-LABEL: foo_int16_half: 247; CHECK-MVEFP: @ %bb.0: @ %entry 248; CHECK-MVEFP-NEXT: vcvt.s16.f16 q0, q0 249; CHECK-MVEFP-NEXT: bx lr 250entry: 251 %out = fptosi <8 x half> %src to <8 x i16> 252 ret <8 x i16> %out 253} 254 255define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) { 256; CHECK-MVE-LABEL: foo_uint16_half: 257; CHECK-MVE: @ %bb.0: @ %entry 258; CHECK-MVE-NEXT: vmovx.f16 s14, s0 259; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 260; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 261; CHECK-MVE-NEXT: vmov r0, s0 262; CHECK-MVE-NEXT: vmovx.f16 s4, s3 263; CHECK-MVE-NEXT: vmovx.f16 s6, s2 264; CHECK-MVE-NEXT: vmovx.f16 s10, s1 265; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3 266; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2 267; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1 268; CHECK-MVE-NEXT: vmov.16 q0[0], r0 269; CHECK-MVE-NEXT: vmov r0, s14 270; CHECK-MVE-NEXT: vmov.16 q0[1], r0 271; CHECK-MVE-NEXT: vmov r0, s5 272; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 273; CHECK-MVE-NEXT: vmov.16 q0[2], r0 274; CHECK-MVE-NEXT: vmov r0, s10 275; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 276; CHECK-MVE-NEXT: vmov.16 q0[3], r0 277; CHECK-MVE-NEXT: vmov r0, s12 278; CHECK-MVE-NEXT: vmov.16 q0[4], r0 279; CHECK-MVE-NEXT: vmov r0, s6 280; CHECK-MVE-NEXT: vmov.16 q0[5], r0 281; CHECK-MVE-NEXT: vmov r0, s8 282; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 283; CHECK-MVE-NEXT: vmov.16 q0[6], r0 284; CHECK-MVE-NEXT: vmov r0, s4 285; CHECK-MVE-NEXT: vmov.16 q0[7], r0 286; CHECK-MVE-NEXT: bx lr 287; 288; CHECK-MVEFP-LABEL: foo_uint16_half: 289; CHECK-MVEFP: @ %bb.0: @ %entry 290; CHECK-MVEFP-NEXT: vcvt.u16.f16 q0, q0 291; CHECK-MVEFP-NEXT: bx lr 292entry: 293 %out = fptoui <8 x half> %src to <8 x i16> 294 ret <8 x i16> %out 295} 296 297define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) { 298; CHECK-LABEL: foo_float_int64: 299; CHECK: @ %bb.0: @ %entry 300; CHECK-NEXT: .save {r7, lr} 301; CHECK-NEXT: push {r7, lr} 302; CHECK-NEXT: .vsave {d8, d9} 303; CHECK-NEXT: vpush {d8, d9} 304; CHECK-NEXT: vmov q4, q0 305; CHECK-NEXT: vmov r0, s18 306; CHECK-NEXT: vmov r1, s19 307; CHECK-NEXT: bl __aeabi_l2d 308; CHECK-NEXT: vmov r2, s16 309; CHECK-NEXT: vmov r3, s17 310; CHECK-NEXT: vmov d9, r0, r1 311; CHECK-NEXT: mov r0, r2 312; CHECK-NEXT: mov r1, r3 313; CHECK-NEXT: bl __aeabi_l2d 314; CHECK-NEXT: vmov d8, r0, r1 315; CHECK-NEXT: vmov q0, q4 316; CHECK-NEXT: vpop {d8, d9} 317; CHECK-NEXT: pop {r7, pc} 318entry: 319 %out = sitofp <2 x i64> %src to <2 x double> 320 ret <2 x double> %out 321} 322 323define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) { 324; CHECK-LABEL: foo_float_uint64: 325; CHECK: @ %bb.0: @ %entry 326; CHECK-NEXT: .save {r7, lr} 327; CHECK-NEXT: push {r7, lr} 328; CHECK-NEXT: .vsave {d8, d9} 329; CHECK-NEXT: vpush {d8, d9} 330; CHECK-NEXT: vmov q4, q0 331; CHECK-NEXT: vmov r0, s18 332; CHECK-NEXT: vmov r1, s19 333; CHECK-NEXT: bl __aeabi_ul2d 334; CHECK-NEXT: vmov r2, s16 335; CHECK-NEXT: vmov r3, s17 336; CHECK-NEXT: vmov d9, r0, r1 337; CHECK-NEXT: mov r0, r2 338; CHECK-NEXT: mov r1, r3 339; CHECK-NEXT: bl __aeabi_ul2d 340; CHECK-NEXT: vmov d8, r0, r1 341; CHECK-NEXT: vmov q0, q4 342; CHECK-NEXT: vpop {d8, d9} 343; CHECK-NEXT: pop {r7, pc} 344entry: 345 %out = uitofp <2 x i64> %src to <2 x double> 346 ret <2 x double> %out 347} 348 349define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) { 350; CHECK-LABEL: foo_int64_float: 351; CHECK: @ %bb.0: @ %entry 352; CHECK-NEXT: .save {r7, lr} 353; CHECK-NEXT: push {r7, lr} 354; CHECK-NEXT: .vsave {d8, d9} 355; CHECK-NEXT: vpush {d8, d9} 356; CHECK-NEXT: vmov q4, q0 357; CHECK-NEXT: vmov r0, r1, d8 358; CHECK-NEXT: bl __aeabi_d2lz 359; CHECK-NEXT: vmov r2, r3, d9 360; CHECK-NEXT: vmov.32 q4[0], r0 361; CHECK-NEXT: vmov.32 q4[1], r1 362; CHECK-NEXT: mov r0, r2 363; CHECK-NEXT: mov r1, r3 364; CHECK-NEXT: bl __aeabi_d2lz 365; CHECK-NEXT: vmov.32 q4[2], r0 366; CHECK-NEXT: vmov.32 q4[3], r1 367; CHECK-NEXT: vmov q0, q4 368; CHECK-NEXT: vpop {d8, d9} 369; CHECK-NEXT: pop {r7, pc} 370entry: 371 %out = fptosi <2 x double> %src to <2 x i64> 372 ret <2 x i64> %out 373} 374 375define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) { 376; CHECK-LABEL: foo_uint64_float: 377; CHECK: @ %bb.0: @ %entry 378; CHECK-NEXT: .save {r7, lr} 379; CHECK-NEXT: push {r7, lr} 380; CHECK-NEXT: .vsave {d8, d9} 381; CHECK-NEXT: vpush {d8, d9} 382; CHECK-NEXT: vmov q4, q0 383; CHECK-NEXT: vmov r0, r1, d8 384; CHECK-NEXT: bl __aeabi_d2ulz 385; CHECK-NEXT: vmov r2, r3, d9 386; CHECK-NEXT: vmov.32 q4[0], r0 387; CHECK-NEXT: vmov.32 q4[1], r1 388; CHECK-NEXT: mov r0, r2 389; CHECK-NEXT: mov r1, r3 390; CHECK-NEXT: bl __aeabi_d2ulz 391; CHECK-NEXT: vmov.32 q4[2], r0 392; CHECK-NEXT: vmov.32 q4[3], r1 393; CHECK-NEXT: vmov q0, q4 394; CHECK-NEXT: vpop {d8, d9} 395; CHECK-NEXT: pop {r7, pc} 396entry: 397 %out = fptoui <2 x double> %src to <2 x i64> 398 ret <2 x i64> %out 399} 400