1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind { 4;CHECK: vrecpei32: 5;CHECK: vrecpe.u32 6 %tmp1 = load <2 x i32>* %A 7 %tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1) 8 ret <2 x i32> %tmp2 9} 10 11define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind { 12;CHECK: vrecpeQi32: 13;CHECK: vrecpe.u32 14 %tmp1 = load <4 x i32>* %A 15 %tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1) 16 ret <4 x i32> %tmp2 17} 18 19define <2 x float> @vrecpef32(<2 x float>* %A) nounwind { 20;CHECK: vrecpef32: 21;CHECK: vrecpe.f32 22 %tmp1 = load <2 x float>* %A 23 %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1) 24 ret <2 x float> %tmp2 25} 26 27define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind { 28;CHECK: vrecpeQf32: 29;CHECK: vrecpe.f32 30 %tmp1 = load <4 x float>* %A 31 %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1) 32 ret <4 x float> %tmp2 33} 34 35declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone 36declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone 37 38declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone 39declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone 40 41define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind { 42;CHECK: vrecpsf32: 43;CHECK: vrecps.f32 44 %tmp1 = load <2 x float>* %A 45 %tmp2 = load <2 x float>* %B 46 %tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 47 ret <2 x float> %tmp3 48} 49 50define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind { 51;CHECK: vrecpsQf32: 52;CHECK: vrecps.f32 53 %tmp1 = load <4 x float>* %A 54 %tmp2 = load <4 x float>* %B 55 %tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 56 ret <4 x float> %tmp3 57} 58 59declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone 60declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone 61 62define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind { 63;CHECK: vrsqrtei32: 64;CHECK: vrsqrte.u32 65 %tmp1 = load <2 x i32>* %A 66 %tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1) 67 ret <2 x i32> %tmp2 68} 69 70define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind { 71;CHECK: vrsqrteQi32: 72;CHECK: vrsqrte.u32 73 %tmp1 = load <4 x i32>* %A 74 %tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1) 75 ret <4 x i32> %tmp2 76} 77 78define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind { 79;CHECK: vrsqrtef32: 80;CHECK: vrsqrte.f32 81 %tmp1 = load <2 x float>* %A 82 %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1) 83 ret <2 x float> %tmp2 84} 85 86define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind { 87;CHECK: vrsqrteQf32: 88;CHECK: vrsqrte.f32 89 %tmp1 = load <4 x float>* %A 90 %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1) 91 ret <4 x float> %tmp2 92} 93 94declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone 95declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone 96 97declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone 98declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone 99 100define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind { 101;CHECK: vrsqrtsf32: 102;CHECK: vrsqrts.f32 103 %tmp1 = load <2 x float>* %A 104 %tmp2 = load <2 x float>* %B 105 %tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 106 ret <2 x float> %tmp3 107} 108 109define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind { 110;CHECK: vrsqrtsQf32: 111;CHECK: vrsqrts.f32 112 %tmp1 = load <4 x float>* %A 113 %tmp2 = load <4 x float>* %B 114 %tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 115 ret <4 x float> %tmp3 116} 117 118declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone 119declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone 120