1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 2 3define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 4; CHECK-LABEL: andpd256: 5; CHECK: # BB#0: # %entry 6; CHECK-NEXT: vandpd %ymm0, %ymm1, %ymm0 7; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 8; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 9; CHECK-NEXT: retq 10entry: 11 %0 = bitcast <4 x double> %x to <4 x i64> 12 %1 = bitcast <4 x double> %y to <4 x i64> 13 %and.i = and <4 x i64> %0, %1 14 %2 = bitcast <4 x i64> %and.i to <4 x double> 15 ; add forces execution domain 16 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 17 ret <4 x double> %3 18} 19 20define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 21; CHECK-LABEL: andpd256fold: 22; CHECK: # BB#0: # %entry 23; CHECK-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0 24; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 25; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 26; CHECK-NEXT: retq 27entry: 28 %0 = bitcast <4 x double> %y to <4 x i64> 29 %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 30 %1 = bitcast <4 x i64> %and.i to <4 x double> 31 ; add forces execution domain 32 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 33 ret <4 x double> %2 34} 35 36define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 37; CHECK-LABEL: andps256: 38; CHECK: # BB#0: # %entry 39; CHECK-NEXT: vandps %ymm0, %ymm1, %ymm0 40; CHECK-NEXT: retq 41entry: 42 %0 = bitcast <8 x float> %x to <8 x i32> 43 %1 = bitcast <8 x float> %y to <8 x i32> 44 %and.i = and <8 x i32> %0, %1 45 %2 = bitcast <8 x i32> %and.i to <8 x float> 46 ret <8 x float> %2 47} 48 49define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 50; CHECK-LABEL: andps256fold: 51; CHECK: # BB#0: # %entry 52; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 53; CHECK-NEXT: retq 54entry: 55 %0 = bitcast <8 x float> %y to <8 x i32> 56 %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 57 %1 = bitcast <8 x i32> %and.i to <8 x float> 58 ret <8 x float> %1 59} 60 61define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 62; CHECK-LABEL: xorpd256: 63; CHECK: # BB#0: # %entry 64; CHECK-NEXT: vxorpd %ymm0, %ymm1, %ymm0 65; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 66; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 67; CHECK-NEXT: retq 68entry: 69 %0 = bitcast <4 x double> %x to <4 x i64> 70 %1 = bitcast <4 x double> %y to <4 x i64> 71 %xor.i = xor <4 x i64> %0, %1 72 %2 = bitcast <4 x i64> %xor.i to <4 x double> 73 ; add forces execution domain 74 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 75 ret <4 x double> %3 76} 77 78define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 79; CHECK-LABEL: xorpd256fold: 80; CHECK: # BB#0: # %entry 81; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 82; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 83; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 84; CHECK-NEXT: retq 85entry: 86 %0 = bitcast <4 x double> %y to <4 x i64> 87 %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 88 %1 = bitcast <4 x i64> %xor.i to <4 x double> 89 ; add forces execution domain 90 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 91 ret <4 x double> %2 92} 93 94define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 95; CHECK-LABEL: xorps256: 96; CHECK: # BB#0: # %entry 97; CHECK-NEXT: vxorps %ymm0, %ymm1, %ymm0 98; CHECK-NEXT: retq 99entry: 100 %0 = bitcast <8 x float> %x to <8 x i32> 101 %1 = bitcast <8 x float> %y to <8 x i32> 102 %xor.i = xor <8 x i32> %0, %1 103 %2 = bitcast <8 x i32> %xor.i to <8 x float> 104 ret <8 x float> %2 105} 106 107define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 108; CHECK-LABEL: xorps256fold: 109; CHECK: # BB#0: # %entry 110; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 111; CHECK-NEXT: retq 112entry: 113 %0 = bitcast <8 x float> %y to <8 x i32> 114 %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 115 %1 = bitcast <8 x i32> %xor.i to <8 x float> 116 ret <8 x float> %1 117} 118 119define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 120; CHECK-LABEL: orpd256: 121; CHECK: # BB#0: # %entry 122; CHECK-NEXT: vorpd %ymm0, %ymm1, %ymm0 123; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 124; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 125; CHECK-NEXT: retq 126entry: 127 %0 = bitcast <4 x double> %x to <4 x i64> 128 %1 = bitcast <4 x double> %y to <4 x i64> 129 %or.i = or <4 x i64> %0, %1 130 %2 = bitcast <4 x i64> %or.i to <4 x double> 131 ; add forces execution domain 132 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 133 ret <4 x double> %3 134} 135 136define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 137; CHECK-LABEL: orpd256fold: 138; CHECK: # BB#0: # %entry 139; CHECK-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0 140; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 141; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 142; CHECK-NEXT: retq 143entry: 144 %0 = bitcast <4 x double> %y to <4 x i64> 145 %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 146 %1 = bitcast <4 x i64> %or.i to <4 x double> 147 ; add forces execution domain 148 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 149 ret <4 x double> %2 150} 151 152define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 153; CHECK-LABEL: orps256: 154; CHECK: # BB#0: # %entry 155; CHECK-NEXT: vorps %ymm0, %ymm1, %ymm0 156; CHECK-NEXT: retq 157entry: 158 %0 = bitcast <8 x float> %x to <8 x i32> 159 %1 = bitcast <8 x float> %y to <8 x i32> 160 %or.i = or <8 x i32> %0, %1 161 %2 = bitcast <8 x i32> %or.i to <8 x float> 162 ret <8 x float> %2 163} 164 165define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 166; CHECK-LABEL: orps256fold: 167; CHECK: # BB#0: # %entry 168; CHECK-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0 169; CHECK-NEXT: retq 170entry: 171 %0 = bitcast <8 x float> %y to <8 x i32> 172 %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 173 %1 = bitcast <8 x i32> %or.i to <8 x float> 174 ret <8 x float> %1 175} 176 177define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 178; CHECK-LABEL: andnotpd256: 179; CHECK: # BB#0: # %entry 180; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0 181; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 182; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 183; CHECK-NEXT: retq 184entry: 185 %0 = bitcast <4 x double> %x to <4 x i64> 186 %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 187 %1 = bitcast <4 x double> %y to <4 x i64> 188 %and.i = and <4 x i64> %1, %neg.i 189 %2 = bitcast <4 x i64> %and.i to <4 x double> 190 ; add forces execution domain 191 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 192 ret <4 x double> %3 193} 194 195define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { 196; CHECK-LABEL: andnotpd256fold: 197; CHECK: # BB#0: # %entry 198; CHECK-NEXT: vandnpd (%rdi), %ymm0, %ymm0 199; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 200; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 201; CHECK-NEXT: retq 202entry: 203 %tmp2 = load <4 x double>, <4 x double>* %x, align 32 204 %0 = bitcast <4 x double> %y to <4 x i64> 205 %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 206 %1 = bitcast <4 x double> %tmp2 to <4 x i64> 207 %and.i = and <4 x i64> %1, %neg.i 208 %2 = bitcast <4 x i64> %and.i to <4 x double> 209 ; add forces execution domain 210 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 211 ret <4 x double> %3 212} 213 214define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 215; CHECK-LABEL: andnotps256: 216; CHECK: # BB#0: # %entry 217; CHECK-NEXT: vandnps %ymm0, %ymm1, %ymm0 218; CHECK-NEXT: retq 219entry: 220 %0 = bitcast <8 x float> %x to <8 x i32> 221 %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 222 %1 = bitcast <8 x float> %y to <8 x i32> 223 %and.i = and <8 x i32> %1, %neg.i 224 %2 = bitcast <8 x i32> %and.i to <8 x float> 225 ret <8 x float> %2 226} 227 228define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { 229; CHECK-LABEL: andnotps256fold: 230; CHECK: # BB#0: # %entry 231; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 232; CHECK-NEXT: retq 233entry: 234 %tmp2 = load <8 x float>, <8 x float>* %x, align 32 235 %0 = bitcast <8 x float> %y to <8 x i32> 236 %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 237 %1 = bitcast <8 x float> %tmp2 to <8 x i32> 238 %and.i = and <8 x i32> %1, %neg.i 239 %2 = bitcast <8 x i32> %and.i to <8 x float> 240 ret <8 x float> %2 241} 242 243;;; Test that basic 2 x i64 logic use the integer version on AVX 244 245define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 246; CHECK-LABEL: vpandn: 247; CHECK: # BB#0: # %entry 248; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 249; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 250; CHECK-NEXT: retq 251entry: 252 ; Force the execution domain with an add. 253 %a2 = add <2 x i64> %a, <i64 1, i64 1> 254 %y = xor <2 x i64> %a2, <i64 -1, i64 -1> 255 %x = and <2 x i64> %a, %y 256 ret <2 x i64> %x 257} 258 259define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 260; CHECK-LABEL: vpand: 261; CHECK: # BB#0: # %entry 262; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 263; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 264; CHECK-NEXT: retq 265entry: 266 ; Force the execution domain with an add. 267 %a2 = add <2 x i64> %a, <i64 1, i64 1> 268 %x = and <2 x i64> %a2, %b 269 ret <2 x i64> %x 270} 271 272