1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fp16 %s -o - | FileCheck %s 3 4define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { 5; CHECK-LABEL: vcvt_f32tos32: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vldr d16, [r0] 8; CHECK-NEXT: vcvt.s32.f32 d16, d16 9; CHECK-NEXT: vmov r0, r1, d16 10; CHECK-NEXT: mov pc, lr 11 %tmp1 = load <2 x float>, <2 x float>* %A 12 %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> 13 ret <2 x i32> %tmp2 14} 15 16define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind { 17; CHECK-LABEL: vcvt_f32tou32: 18; CHECK: @ %bb.0: 19; CHECK-NEXT: vldr d16, [r0] 20; CHECK-NEXT: vcvt.u32.f32 d16, d16 21; CHECK-NEXT: vmov r0, r1, d16 22; CHECK-NEXT: mov pc, lr 23 %tmp1 = load <2 x float>, <2 x float>* %A 24 %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> 25 ret <2 x i32> %tmp2 26} 27 28define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind { 29; CHECK-LABEL: vcvt_s32tof32: 30; CHECK: @ %bb.0: 31; CHECK-NEXT: vldr d16, [r0] 32; CHECK-NEXT: vcvt.f32.s32 d16, d16 33; CHECK-NEXT: vmov r0, r1, d16 34; CHECK-NEXT: mov pc, lr 35 %tmp1 = load <2 x i32>, <2 x i32>* %A 36 %tmp2 = sitofp <2 x i32> %tmp1 to <2 x float> 37 ret <2 x float> %tmp2 38} 39 40define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind { 41; CHECK-LABEL: vcvt_u32tof32: 42; CHECK: @ %bb.0: 43; CHECK-NEXT: vldr d16, [r0] 44; CHECK-NEXT: vcvt.f32.u32 d16, d16 45; CHECK-NEXT: vmov r0, r1, d16 46; CHECK-NEXT: mov pc, lr 47 %tmp1 = load <2 x i32>, <2 x i32>* %A 48 %tmp2 = uitofp <2 x i32> %tmp1 to <2 x float> 49 ret <2 x float> %tmp2 50} 51 52define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind { 53; CHECK-LABEL: vcvtQ_f32tos32: 54; CHECK: @ %bb.0: 55; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 56; CHECK-NEXT: vcvt.s32.f32 q8, q8 57; CHECK-NEXT: vmov r0, r1, d16 58; CHECK-NEXT: vmov r2, r3, d17 59; CHECK-NEXT: mov pc, lr 60 %tmp1 = load <4 x float>, <4 x float>* %A 61 %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> 62 ret <4 x i32> %tmp2 63} 64 65define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind { 66; CHECK-LABEL: vcvtQ_f32tou32: 67; CHECK: @ %bb.0: 68; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 69; CHECK-NEXT: vcvt.u32.f32 q8, q8 70; CHECK-NEXT: vmov r0, r1, d16 71; CHECK-NEXT: vmov r2, r3, d17 72; CHECK-NEXT: mov pc, lr 73 %tmp1 = load <4 x float>, <4 x float>* %A 74 %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> 75 ret <4 x i32> %tmp2 76} 77 78define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind { 79; CHECK-LABEL: vcvtQ_s32tof32: 80; CHECK: @ %bb.0: 81; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 82; CHECK-NEXT: vcvt.f32.s32 q8, q8 83; CHECK-NEXT: vmov r0, r1, d16 84; CHECK-NEXT: vmov r2, r3, d17 85; CHECK-NEXT: mov pc, lr 86 %tmp1 = load <4 x i32>, <4 x i32>* %A 87 %tmp2 = sitofp <4 x i32> %tmp1 to <4 x float> 88 ret <4 x float> %tmp2 89} 90 91define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind { 92; CHECK-LABEL: vcvtQ_u32tof32: 93; CHECK: @ %bb.0: 94; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 95; CHECK-NEXT: vcvt.f32.u32 q8, q8 96; CHECK-NEXT: vmov r0, r1, d16 97; CHECK-NEXT: vmov r2, r3, d17 98; CHECK-NEXT: mov pc, lr 99 %tmp1 = load <4 x i32>, <4 x i32>* %A 100 %tmp2 = uitofp <4 x i32> %tmp1 to <4 x float> 101 ret <4 x float> %tmp2 102} 103 104define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind { 105; CHECK-LABEL: vcvt_n_f32tos32: 106; CHECK: @ %bb.0: 107; CHECK-NEXT: vldr d16, [r0] 108; CHECK-NEXT: vcvt.s32.f32 d16, d16, #1 109; CHECK-NEXT: vmov r0, r1, d16 110; CHECK-NEXT: mov pc, lr 111 %tmp1 = load <2 x float>, <2 x float>* %A 112 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1) 113 ret <2 x i32> %tmp2 114} 115 116define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind { 117; CHECK-LABEL: vcvt_n_f32tou32: 118; CHECK: @ %bb.0: 119; CHECK-NEXT: vldr d16, [r0] 120; CHECK-NEXT: vcvt.u32.f32 d16, d16, #1 121; CHECK-NEXT: vmov r0, r1, d16 122; CHECK-NEXT: mov pc, lr 123 %tmp1 = load <2 x float>, <2 x float>* %A 124 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1) 125 ret <2 x i32> %tmp2 126} 127 128define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind { 129; CHECK-LABEL: vcvt_n_s32tof32: 130; CHECK: @ %bb.0: 131; CHECK-NEXT: vldr d16, [r0] 132; CHECK-NEXT: vcvt.f32.s32 d16, d16, #1 133; CHECK-NEXT: vmov r0, r1, d16 134; CHECK-NEXT: mov pc, lr 135 %tmp1 = load <2 x i32>, <2 x i32>* %A 136 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) 137 ret <2 x float> %tmp2 138} 139 140define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind { 141; CHECK-LABEL: vcvt_n_u32tof32: 142; CHECK: @ %bb.0: 143; CHECK-NEXT: vldr d16, [r0] 144; CHECK-NEXT: vcvt.f32.u32 d16, d16, #1 145; CHECK-NEXT: vmov r0, r1, d16 146; CHECK-NEXT: mov pc, lr 147 %tmp1 = load <2 x i32>, <2 x i32>* %A 148 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) 149 ret <2 x float> %tmp2 150} 151 152declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone 153declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone 154declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone 155declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone 156 157define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind { 158; CHECK-LABEL: vcvtQ_n_f32tos32: 159; CHECK: @ %bb.0: 160; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 161; CHECK-NEXT: vcvt.s32.f32 q8, q8, #1 162; CHECK-NEXT: vmov r0, r1, d16 163; CHECK-NEXT: vmov r2, r3, d17 164; CHECK-NEXT: mov pc, lr 165 %tmp1 = load <4 x float>, <4 x float>* %A 166 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1) 167 ret <4 x i32> %tmp2 168} 169 170define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind { 171; CHECK-LABEL: vcvtQ_n_f32tou32: 172; CHECK: @ %bb.0: 173; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 174; CHECK-NEXT: vcvt.u32.f32 q8, q8, #1 175; CHECK-NEXT: vmov r0, r1, d16 176; CHECK-NEXT: vmov r2, r3, d17 177; CHECK-NEXT: mov pc, lr 178 %tmp1 = load <4 x float>, <4 x float>* %A 179 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1) 180 ret <4 x i32> %tmp2 181} 182 183define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind { 184; CHECK-LABEL: vcvtQ_n_s32tof32: 185; CHECK: @ %bb.0: 186; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 187; CHECK-NEXT: vcvt.f32.s32 q8, q8, #1 188; CHECK-NEXT: vmov r0, r1, d16 189; CHECK-NEXT: vmov r2, r3, d17 190; CHECK-NEXT: mov pc, lr 191 %tmp1 = load <4 x i32>, <4 x i32>* %A 192 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) 193 ret <4 x float> %tmp2 194} 195 196define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind { 197; CHECK-LABEL: vcvtQ_n_u32tof32: 198; CHECK: @ %bb.0: 199; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 200; CHECK-NEXT: vcvt.f32.u32 q8, q8, #1 201; CHECK-NEXT: vmov r0, r1, d16 202; CHECK-NEXT: vmov r2, r3, d17 203; CHECK-NEXT: mov pc, lr 204 %tmp1 = load <4 x i32>, <4 x i32>* %A 205 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) 206 ret <4 x float> %tmp2 207} 208 209declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone 210declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone 211declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone 212declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone 213 214define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind { 215; CHECK-LABEL: vcvt_f16tof32: 216; CHECK: @ %bb.0: 217; CHECK-NEXT: vldr d16, [r0] 218; CHECK-NEXT: vcvt.f32.f16 q8, d16 219; CHECK-NEXT: vmov r0, r1, d16 220; CHECK-NEXT: vmov r2, r3, d17 221; CHECK-NEXT: mov pc, lr 222 %tmp1 = load <4 x i16>, <4 x i16>* %A 223 %tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1) 224 ret <4 x float> %tmp2 225} 226 227define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind { 228; CHECK-LABEL: vcvt_f32tof16: 229; CHECK: @ %bb.0: 230; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 231; CHECK-NEXT: vcvt.f16.f32 d16, q8 232; CHECK-NEXT: vmov r0, r1, d16 233; CHECK-NEXT: mov pc, lr 234 %tmp1 = load <4 x float>, <4 x float>* %A 235 %tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1) 236 ret <4 x i16> %tmp2 237} 238 239declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone 240declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone 241 242 243define <4 x i16> @fix_float_to_i16(<4 x float> %in) { 244; CHECK-LABEL: fix_float_to_i16: 245; CHECK: @ %bb.0: 246; CHECK-NEXT: vmov d17, r2, r3 247; CHECK-NEXT: vmov d16, r0, r1 248; CHECK-NEXT: vcvt.u32.f32 q8, q8, #1 249; CHECK-NEXT: vmovn.i32 d16, q8 250; CHECK-NEXT: vmov r0, r1, d16 251; CHECK-NEXT: mov pc, lr 252 253 %scale = fmul <4 x float> %in, <float 2.0, float 2.0, float 2.0, float 2.0> 254 %conv = fptoui <4 x float> %scale to <4 x i16> 255 ret <4 x i16> %conv 256} 257 258define <2 x i64> @fix_float_to_i64(<2 x float> %in) { 259; CHECK-LABEL: fix_float_to_i64: 260; CHECK: @ %bb.0: 261; CHECK-NEXT: .save {r4, lr} 262; CHECK-NEXT: push {r4, lr} 263; CHECK-NEXT: .vsave {d8, d9} 264; CHECK-NEXT: vpush {d8, d9} 265; CHECK-NEXT: vmov d16, r0, r1 266; CHECK-NEXT: vadd.f32 d8, d16, d16 267; CHECK-NEXT: vmov r0, s17 268; CHECK-NEXT: bl __aeabi_f2ulz 269; CHECK-NEXT: mov r4, r1 270; CHECK-NEXT: vmov r1, s16 271; CHECK-NEXT: vmov.32 d9[0], r0 272; CHECK-NEXT: mov r0, r1 273; CHECK-NEXT: bl __aeabi_f2ulz 274; CHECK-NEXT: vmov.32 d8[0], r0 275; CHECK-NEXT: vmov.32 d9[1], r4 276; CHECK-NEXT: vmov.32 d8[1], r1 277; CHECK-NEXT: vmov r2, r3, d9 278; CHECK-NEXT: vmov r0, r1, d8 279; CHECK-NEXT: vpop {d8, d9} 280; CHECK-NEXT: pop {r4, lr} 281; CHECK-NEXT: mov pc, lr 282 283 %scale = fmul <2 x float> %in, <float 2.0, float 2.0> 284 %conv = fptoui <2 x float> %scale to <2 x i64> 285 ret <2 x i64> %conv 286} 287 288define <4 x i16> @fix_double_to_i16(<4 x double> %in) { 289; CHECK-LABEL: fix_double_to_i16: 290; CHECK: @ %bb.0: 291; CHECK-NEXT: vmov d18, r0, r1 292; CHECK-NEXT: mov r12, sp 293; CHECK-NEXT: vld1.64 {d16, d17}, [r12] 294; CHECK-NEXT: vmov d19, r2, r3 295; CHECK-NEXT: vadd.f64 d18, d18, d18 296; CHECK-NEXT: vcvt.s32.f64 s0, d18 297; CHECK-NEXT: vmov r0, s0 298; CHECK-NEXT: vadd.f64 d20, d16, d16 299; CHECK-NEXT: vadd.f64 d16, d17, d17 300; CHECK-NEXT: vcvt.s32.f64 s2, d20 301; CHECK-NEXT: vcvt.s32.f64 s6, d16 302; CHECK-NEXT: vmov.32 d16[0], r0 303; CHECK-NEXT: vmov r0, s2 304; CHECK-NEXT: vadd.f64 d19, d19, d19 305; CHECK-NEXT: vcvt.s32.f64 s4, d19 306; CHECK-NEXT: vmov.32 d17[0], r0 307; CHECK-NEXT: vmov r0, s4 308; CHECK-NEXT: vmov.32 d16[1], r0 309; CHECK-NEXT: vmov r0, s6 310; CHECK-NEXT: vmov.32 d17[1], r0 311; CHECK-NEXT: vuzp.16 d16, d17 312; CHECK-NEXT: vmov r0, r1, d16 313; CHECK-NEXT: mov pc, lr 314 315 %scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0> 316 %conv = fptoui <4 x double> %scale to <4 x i16> 317 ret <4 x i16> %conv 318} 319 320define <2 x i64> @fix_double_to_i64(<2 x double> %in) { 321; CHECK-LABEL: fix_double_to_i64: 322; CHECK: @ %bb.0: 323; CHECK-NEXT: .save {r4, lr} 324; CHECK-NEXT: push {r4, lr} 325; CHECK-NEXT: .vsave {d8, d9} 326; CHECK-NEXT: vpush {d8, d9} 327; CHECK-NEXT: vmov d16, r2, r3 328; CHECK-NEXT: vadd.f64 d16, d16, d16 329; CHECK-NEXT: vmov r2, r3, d16 330; CHECK-NEXT: vmov d16, r0, r1 331; CHECK-NEXT: vadd.f64 d8, d16, d16 332; CHECK-NEXT: mov r0, r2 333; CHECK-NEXT: mov r1, r3 334; CHECK-NEXT: bl __aeabi_d2ulz 335; CHECK-NEXT: mov r4, r1 336; CHECK-NEXT: vmov r2, r1, d8 337; CHECK-NEXT: vmov.32 d9[0], r0 338; CHECK-NEXT: mov r0, r2 339; CHECK-NEXT: bl __aeabi_d2ulz 340; CHECK-NEXT: vmov.32 d8[0], r0 341; CHECK-NEXT: vmov.32 d9[1], r4 342; CHECK-NEXT: vmov.32 d8[1], r1 343; CHECK-NEXT: vmov r2, r3, d9 344; CHECK-NEXT: vmov r0, r1, d8 345; CHECK-NEXT: vpop {d8, d9} 346; CHECK-NEXT: pop {r4, lr} 347; CHECK-NEXT: mov pc, lr 348 %scale = fmul <2 x double> %in, <double 2.0, double 2.0> 349 %conv = fptoui <2 x double> %scale to <2 x i64> 350 ret <2 x i64> %conv 351} 352 353define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) { 354; CHECK-LABEL: multi_sint: 355; CHECK: @ %bb.0: 356; CHECK-NEXT: vmov d16, r0, r1 357; CHECK-NEXT: vcvt.s32.f64 s0, d16 358; CHECK-NEXT: vmov r0, s0 359; CHECK-NEXT: vstr s0, [r2] 360; CHECK-NEXT: vstr s0, [r3] 361; CHECK-NEXT: mov pc, lr 362 %conv = fptosi double %c to i32 363 store i32 %conv, i32* %p, align 4 364 store i32 %conv, i32* %q, align 4 365 ret i32 %conv 366} 367 368define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) { 369; CHECK-LABEL: multi_uint: 370; CHECK: @ %bb.0: 371; CHECK-NEXT: vmov d16, r0, r1 372; CHECK-NEXT: vcvt.u32.f64 s0, d16 373; CHECK-NEXT: vmov r0, s0 374; CHECK-NEXT: vstr s0, [r2] 375; CHECK-NEXT: vstr s0, [r3] 376; CHECK-NEXT: mov pc, lr 377 %conv = fptoui double %c to i32 378 store i32 %conv, i32* %p, align 4 379 store i32 %conv, i32* %q, align 4 380 ret i32 %conv 381} 382 383define void @double_to_sint_store(double %c, i32* nocapture %p) { 384; CHECK-LABEL: double_to_sint_store: 385; CHECK: @ %bb.0: 386; CHECK-NEXT: vmov d16, r0, r1 387; CHECK-NEXT: vcvt.s32.f64 s0, d16 388; CHECK-NEXT: vstr s0, [r2] 389; CHECK-NEXT: mov pc, lr 390 %conv = fptosi double %c to i32 391 store i32 %conv, i32* %p, align 4 392 ret void 393} 394 395define void @double_to_uint_store(double %c, i32* nocapture %p) { 396; CHECK-LABEL: double_to_uint_store: 397; CHECK: @ %bb.0: 398; CHECK-NEXT: vmov d16, r0, r1 399; CHECK-NEXT: vcvt.u32.f64 s0, d16 400; CHECK-NEXT: vstr s0, [r2] 401; CHECK-NEXT: mov pc, lr 402 %conv = fptoui double %c to i32 403 store i32 %conv, i32* %p, align 4 404 ret void 405} 406 407define void @float_to_sint_store(float %c, i32* nocapture %p) { 408; CHECK-LABEL: float_to_sint_store: 409; CHECK: @ %bb.0: 410; CHECK-NEXT: vmov s0, r0 411; CHECK-NEXT: vcvt.s32.f32 s0, s0 412; CHECK-NEXT: vstr s0, [r1] 413; CHECK-NEXT: mov pc, lr 414 %conv = fptosi float %c to i32 415 store i32 %conv, i32* %p, align 4 416 ret void 417} 418 419define void @float_to_uint_store(float %c, i32* nocapture %p) { 420; CHECK-LABEL: float_to_uint_store: 421; CHECK: @ %bb.0: 422; CHECK-NEXT: vmov s0, r0 423; CHECK-NEXT: vcvt.u32.f32 s0, s0 424; CHECK-NEXT: vstr s0, [r1] 425; CHECK-NEXT: mov pc, lr 426 %conv = fptoui float %c to i32 427 store i32 %conv, i32* %p, align 4 428 ret void 429} 430