1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 2 3; CHECK: vaddpd 4define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 5entry: 6 %add.i = fadd <4 x double> %x, %y 7 ret <4 x double> %add.i 8} 9 10; CHECK: vaddpd LCP{{.*}}(%rip) 11define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 12entry: 13 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 14 ret <4 x double> %add.i 15} 16 17; CHECK: vaddps 18define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 19entry: 20 %add.i = fadd <8 x float> %x, %y 21 ret <8 x float> %add.i 22} 23 24; CHECK: vaddps LCP{{.*}}(%rip) 25define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 26entry: 27 %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 28 ret <8 x float> %add.i 29} 30 31; CHECK: vsubpd 32define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 33entry: 34 %sub.i = fsub <4 x double> %x, %y 35 ret <4 x double> %sub.i 36} 37 38; CHECK: vsubpd (% 39define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { 40entry: 41 %tmp2 = load <4 x double>, <4 x double>* %x, align 32 42 %sub.i = fsub <4 x double> %y, %tmp2 43 ret <4 x double> %sub.i 44} 45 46; CHECK: vsubps 47define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 48entry: 49 %sub.i = fsub <8 x float> %x, %y 50 ret <8 x float> %sub.i 51} 52 53; CHECK: vsubps (% 54define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { 55entry: 56 %tmp2 = load <8 x float>, <8 x float>* %x, align 32 57 %sub.i = fsub <8 x float> %y, %tmp2 58 ret <8 x float> %sub.i 59} 60 61; CHECK: vmulpd 62define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 63entry: 64 %mul.i = fmul <4 x double> %x, %y 65 ret <4 x double> %mul.i 66} 67 68; CHECK: vmulpd LCP{{.*}}(%rip) 69define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 70entry: 71 %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 72 ret <4 x double> %mul.i 73} 74 75; CHECK: vmulps 76define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 77entry: 78 %mul.i = fmul <8 x float> %x, %y 79 ret <8 x float> %mul.i 80} 81 82; CHECK: vmulps LCP{{.*}}(%rip) 83define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 84entry: 85 %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 86 ret <8 x float> %mul.i 87} 88 89; CHECK: vdivpd 90define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 91entry: 92 %div.i = fdiv <4 x double> %x, %y 93 ret <4 x double> %div.i 94} 95 96; CHECK: vdivpd LCP{{.*}}(%rip) 97define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 98entry: 99 %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 100 ret <4 x double> %div.i 101} 102 103; CHECK: vdivps 104define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 105entry: 106 %div.i = fdiv <8 x float> %x, %y 107 ret <8 x float> %div.i 108} 109 110; CHECK: vdivps LCP{{.*}}(%rip) 111define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 112entry: 113 %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 114 ret <8 x float> %div.i 115} 116 117; CHECK: vsqrtss 118define float @sqrtA(float %a) nounwind uwtable readnone ssp { 119entry: 120 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 121 ret float %conv1 122} 123 124declare double @sqrt(double) readnone 125 126; CHECK: vsqrtsd 127define double @sqrtB(double %a) nounwind uwtable readnone ssp { 128entry: 129 %call = tail call double @sqrt(double %a) nounwind readnone 130 ret double %call 131} 132 133declare float @sqrtf(float) readnone 134 135 136; CHECK: vextractf128 $1 137; CHECK-NEXT: vextractf128 $1 138; CHECK-NEXT: vpaddq %xmm 139; CHECK-NEXT: vpaddq %xmm 140; CHECK-NEXT: vinsertf128 $1 141define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 142 %x = add <4 x i64> %i, %j 143 ret <4 x i64> %x 144} 145 146; CHECK: vextractf128 $1 147; CHECK-NEXT: vextractf128 $1 148; CHECK-NEXT: vpaddd %xmm 149; CHECK-NEXT: vpaddd %xmm 150; CHECK-NEXT: vinsertf128 $1 151define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 152 %x = add <8 x i32> %i, %j 153 ret <8 x i32> %x 154} 155 156; CHECK: vextractf128 $1 157; CHECK-NEXT: vextractf128 $1 158; CHECK-NEXT: vpaddw %xmm 159; CHECK-NEXT: vpaddw %xmm 160; CHECK-NEXT: vinsertf128 $1 161define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 162 %x = add <16 x i16> %i, %j 163 ret <16 x i16> %x 164} 165 166; CHECK: vextractf128 $1 167; CHECK-NEXT: vextractf128 $1 168; CHECK-NEXT: vpaddb %xmm 169; CHECK-NEXT: vpaddb %xmm 170; CHECK-NEXT: vinsertf128 $1 171define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 172 %x = add <32 x i8> %i, %j 173 ret <32 x i8> %x 174} 175 176; CHECK: vextractf128 $1 177; CHECK-NEXT: vextractf128 $1 178; CHECK-NEXT: vpsubq %xmm 179; CHECK-NEXT: vpsubq %xmm 180; CHECK-NEXT: vinsertf128 $1 181define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 182 %x = sub <4 x i64> %i, %j 183 ret <4 x i64> %x 184} 185 186; CHECK: vextractf128 $1 187; CHECK-NEXT: vextractf128 $1 188; CHECK-NEXT: vpsubd %xmm 189; CHECK-NEXT: vpsubd %xmm 190; CHECK-NEXT: vinsertf128 $1 191define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 192 %x = sub <8 x i32> %i, %j 193 ret <8 x i32> %x 194} 195 196; CHECK: vextractf128 $1 197; CHECK-NEXT: vextractf128 $1 198; CHECK-NEXT: vpsubw %xmm 199; CHECK-NEXT: vpsubw %xmm 200; CHECK-NEXT: vinsertf128 $1 201define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 202 %x = sub <16 x i16> %i, %j 203 ret <16 x i16> %x 204} 205 206; CHECK: vextractf128 $1 207; CHECK-NEXT: vextractf128 $1 208; CHECK-NEXT: vpsubb %xmm 209; CHECK-NEXT: vpsubb %xmm 210; CHECK-NEXT: vinsertf128 $1 211define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 212 %x = sub <32 x i8> %i, %j 213 ret <32 x i8> %x 214} 215 216; CHECK: vextractf128 $1 217; CHECK-NEXT: vextractf128 $1 218; CHECK-NEXT: vpmulld %xmm 219; CHECK-NEXT: vpmulld %xmm 220; CHECK-NEXT: vinsertf128 $1 221define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 222 %x = mul <8 x i32> %i, %j 223 ret <8 x i32> %x 224} 225 226; CHECK: vextractf128 $1 227; CHECK-NEXT: vextractf128 $1 228; CHECK-NEXT: vpmullw %xmm 229; CHECK-NEXT: vpmullw %xmm 230; CHECK-NEXT: vinsertf128 $1 231define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 232 %x = mul <16 x i16> %i, %j 233 ret <16 x i16> %x 234} 235 236; CHECK: vextractf128 $1 237; CHECK-NEXT: vextractf128 $1 238; CHECK-NEXT: vpmuludq %xmm 239; CHECK-NEXT: vpsrlq $32, %xmm 240; CHECK-NEXT: vpmuludq %xmm 241; CHECK-NEXT: vpsllq $32, %xmm 242; CHECK-NEXT: vpaddq %xmm 243; CHECK-NEXT: vpsrlq $32, %xmm 244; CHECK-NEXT: vpmuludq %xmm 245; CHECK-NEXT: vpsllq $32, %xmm 246; CHECK-NEXT: vpaddq %xmm 247; CHECK-NEXT: vpmuludq %xmm 248; CHECK-NEXT: vpsrlq $32, %xmm 249; CHECK-NEXT: vpmuludq %xmm 250; CHECK-NEXT: vpsllq $32, %xmm 251; CHECK-NEXT: vpaddq %xmm 252; CHECK-NEXT: vpsrlq $32, %xmm 253; CHECK-NEXT: vpmuludq %xmm 254; CHECK-NEXT: vpsllq $32, %xmm 255; CHECK-NEXT: vpaddq %xmm 256; CHECK-NEXT: vinsertf128 $1 257define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 258 %x = mul <4 x i64> %i, %j 259 ret <4 x i64> %x 260} 261 262declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 263 264define <4 x float> @int_sqrt_ss() { 265; CHECK: int_sqrt_ss 266; CHECK: vsqrtss 267 %x0 = load float, float addrspace(1)* undef, align 8 268 %x1 = insertelement <4 x float> undef, float %x0, i32 0 269 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind 270 ret <4 x float> %x2 271} 272