1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4; 256-bit 5 6define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 7; CHECK-LABEL: vpaddq256_test: 8; CHECK: ## %bb.0: 9; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1] 10; CHECK-NEXT: retq ## encoding: [0xc3] 11 %x = add <4 x i64> %i, %j 12 ret <4 x i64> %x 13} 14 15define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind { 16; CHECK-LABEL: vpaddq256_fold_test: 17; CHECK: ## %bb.0: 18; CHECK-NEXT: vpaddq (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0x07] 19; CHECK-NEXT: retq ## encoding: [0xc3] 20 %tmp = load <4 x i64>, <4 x i64>* %j, align 4 21 %x = add <4 x i64> %i, %tmp 22 ret <4 x i64> %x 23} 24 25define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind { 26; CHECK-LABEL: vpaddq256_broadcast_test: 27; CHECK: ## %bb.0: 28; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x05,A,A,A,A] 29; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI2_0-4, kind: reloc_riprel_4byte 30; CHECK-NEXT: retq ## encoding: [0xc3] 31 %x = add <4 x i64> %i, <i64 2, i64 2, i64 2, i64 2> 32 ret <4 x i64> %x 33} 34 35define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind { 36; CHECK-LABEL: vpaddq256_broadcast2_test: 37; CHECK: ## %bb.0: 38; CHECK-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x07] 39; CHECK-NEXT: retq ## encoding: [0xc3] 40 %j = load i64, i64* %j.ptr 41 %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0 42 %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer 43 %x = add <4 x i64> %i, %j.v 44 ret <4 x i64> %x 45} 46 47define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 48; CHECK-LABEL: vpaddd256_test: 49; CHECK: ## %bb.0: 50; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 51; CHECK-NEXT: retq ## encoding: [0xc3] 52 %x = add <8 x i32> %i, %j 53 ret <8 x i32> %x 54} 55 56define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind { 57; CHECK-LABEL: vpaddd256_fold_test: 58; CHECK: ## %bb.0: 59; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07] 60; CHECK-NEXT: retq ## encoding: [0xc3] 61 %tmp = load <8 x i32>, <8 x i32>* %j, align 4 62 %x = add <8 x i32> %i, %tmp 63 ret <8 x i32> %x 64} 65 66define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind { 67; CHECK-LABEL: vpaddd256_broadcast_test: 68; CHECK: ## %bb.0: 69; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] 70; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI6_0-4, kind: reloc_riprel_4byte 71; CHECK-NEXT: retq ## encoding: [0xc3] 72 %x = add <8 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 73 ret <8 x i32> %x 74} 75 76define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 77; CHECK-LABEL: vpaddd256_mask_test: 78; CHECK: ## %bb.0: 79; CHECK-NEXT: vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca] 80; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1] 81; CHECK-NEXT: retq ## encoding: [0xc3] 82 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 83 %x = add <8 x i32> %i, %j 84 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 85 ret <8 x i32> %r 86} 87 88define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 89; CHECK-LABEL: vpaddd256_maskz_test: 90; CHECK: ## %bb.0: 91; CHECK-NEXT: vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca] 92; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 93; CHECK-NEXT: retq ## encoding: [0xc3] 94 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 95 %x = add <8 x i32> %i, %j 96 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 97 ret <8 x i32> %r 98} 99 100define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone { 101; CHECK-LABEL: vpaddd256_mask_fold_test: 102; CHECK: ## %bb.0: 103; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 104; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x07] 105; CHECK-NEXT: retq ## encoding: [0xc3] 106 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 107 %j = load <8 x i32>, <8 x i32>* %j.ptr 108 %x = add <8 x i32> %i, %j 109 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 110 ret <8 x i32> %r 111} 112 113define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 114; CHECK-LABEL: vpaddd256_mask_broadcast_test: 115; CHECK: ## %bb.0: 116; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 117; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x05,A,A,A,A] 118; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI10_0-4, kind: reloc_riprel_4byte 119; CHECK-NEXT: retq ## encoding: [0xc3] 120 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 121 %x = add <8 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 122 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 123 ret <8 x i32> %r 124} 125 126define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone { 127; CHECK-LABEL: vpaddd256_maskz_fold_test: 128; CHECK: ## %bb.0: 129; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 130; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] 131; CHECK-NEXT: retq ## encoding: [0xc3] 132 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 133 %j = load <8 x i32>, <8 x i32>* %j.ptr 134 %x = add <8 x i32> %i, %j 135 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 136 ret <8 x i32> %r 137} 138 139define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 140; CHECK-LABEL: vpaddd256_maskz_broadcast_test: 141; CHECK: ## %bb.0: 142; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 143; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x05,A,A,A,A] 144; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI12_0-4, kind: reloc_riprel_4byte 145; CHECK-NEXT: retq ## encoding: [0xc3] 146 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 147 %x = add <8 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 148 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 149 ret <8 x i32> %r 150} 151 152define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 153; CHECK-LABEL: vpsubq256_test: 154; CHECK: ## %bb.0: 155; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xc1] 156; CHECK-NEXT: retq ## encoding: [0xc3] 157 %x = sub <4 x i64> %i, %j 158 ret <4 x i64> %x 159} 160 161define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 162; CHECK-LABEL: vpsubd256_test: 163; CHECK: ## %bb.0: 164; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] 165; CHECK-NEXT: retq ## encoding: [0xc3] 166 %x = sub <8 x i32> %i, %j 167 ret <8 x i32> %x 168} 169 170define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) { 171; CHECK-LABEL: vpmulld256_test: 172; CHECK: ## %bb.0: 173; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x40,0xc1] 174; CHECK-NEXT: retq ## encoding: [0xc3] 175 %x = mul <8 x i32> %i, %j 176 ret <8 x i32> %x 177} 178 179define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) { 180; CHECK-LABEL: test_vaddpd_256: 181; CHECK: ## %bb.0: ## %entry 182; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 183; CHECK-NEXT: retq ## encoding: [0xc3] 184entry: 185 %add.i = fadd <4 x double> %x, %y 186 ret <4 x double> %add.i 187} 188 189define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) { 190; CHECK-LABEL: test_fold_vaddpd_256: 191; CHECK: ## %bb.0: ## %entry 192; CHECK-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0x05,A,A,A,A] 193; CHECK-NEXT: ## fixup A - offset: 4, value: LCPI17_0-4, kind: reloc_riprel_4byte 194; CHECK-NEXT: retq ## encoding: [0xc3] 195entry: 196 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00> 197 ret <4 x double> %add.i 198} 199 200define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind { 201; CHECK-LABEL: test_broadcast_vaddpd_256: 202; CHECK: ## %bb.0: 203; CHECK-NEXT: vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0x05,A,A,A,A] 204; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI18_0-4, kind: reloc_riprel_4byte 205; CHECK-NEXT: retq ## encoding: [0xc3] 206 %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 207 ret <8 x float> %b 208} 209 210define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 211; CHECK-LABEL: test_mask_vaddps_256: 212; CHECK: ## %bb.0: 213; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 214; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x58,0xc2] 215; CHECK-NEXT: retq ## encoding: [0xc3] 216 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 217 %x = fadd <8 x float> %i, %j 218 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 219 ret <8 x float> %r 220} 221 222define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 223; CHECK-LABEL: test_mask_vmulps_256: 224; CHECK: ## %bb.0: 225; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 226; CHECK-NEXT: vmulps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x59,0xc2] 227; CHECK-NEXT: retq ## encoding: [0xc3] 228 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 229 %x = fmul <8 x float> %i, %j 230 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 231 ret <8 x float> %r 232} 233 234define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1)nounwind readnone { 235; CHECK-LABEL: test_mask_vminps_256: 236; CHECK: ## %bb.0: 237; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 238; CHECK-NEXT: vminps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5d,0xc2] 239; CHECK-NEXT: retq ## encoding: [0xc3] 240 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 241 %cmp_res = fcmp olt <8 x float> %i, %j 242 %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 243 %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst 244 ret <8 x float> %r 245} 246 247define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 248; CHECK-LABEL: test_mask_vmaxps_256: 249; CHECK: ## %bb.0: 250; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 251; CHECK-NEXT: vmaxps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5f,0xc2] 252; CHECK-NEXT: retq ## encoding: [0xc3] 253 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 254 %cmp_res = fcmp ogt <8 x float> %i, %j 255 %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 256 %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst 257 ret <8 x float> %r 258} 259 260define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 261; CHECK-LABEL: test_mask_vsubps_256: 262; CHECK: ## %bb.0: 263; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 264; CHECK-NEXT: vsubps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5c,0xc2] 265; CHECK-NEXT: retq ## encoding: [0xc3] 266 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 267 %x = fsub <8 x float> %i, %j 268 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 269 ret <8 x float> %r 270} 271 272define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 273; CHECK-LABEL: test_mask_vdivps_256: 274; CHECK: ## %bb.0: 275; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 276; CHECK-NEXT: vdivps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5e,0xc2] 277; CHECK-NEXT: retq ## encoding: [0xc3] 278 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 279 %x = fdiv <8 x float> %i, %j 280 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 281 ret <8 x float> %r 282} 283 284define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 285; CHECK-LABEL: test_mask_vmulpd_256: 286; CHECK: ## %bb.0: 287; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 288; CHECK-NEXT: vmulpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x59,0xc2] 289; CHECK-NEXT: retq ## encoding: [0xc3] 290 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 291 %x = fmul <4 x double> %i, %j 292 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 293 ret <4 x double> %r 294} 295 296define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 297; CHECK-LABEL: test_mask_vminpd_256: 298; CHECK: ## %bb.0: 299; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 300; CHECK-NEXT: vminpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5d,0xc2] 301; CHECK-NEXT: retq ## encoding: [0xc3] 302 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 303 %cmp_res = fcmp olt <4 x double> %i, %j 304 %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 305 %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst 306 ret <4 x double> %r 307} 308 309define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 310; CHECK-LABEL: test_mask_vmaxpd_256: 311; CHECK: ## %bb.0: 312; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 313; CHECK-NEXT: vmaxpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5f,0xc2] 314; CHECK-NEXT: retq ## encoding: [0xc3] 315 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 316 %cmp_res = fcmp ogt <4 x double> %i, %j 317 %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 318 %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst 319 ret <4 x double> %r 320} 321 322define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 323; CHECK-LABEL: test_mask_vsubpd_256: 324; CHECK: ## %bb.0: 325; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 326; CHECK-NEXT: vsubpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5c,0xc2] 327; CHECK-NEXT: retq ## encoding: [0xc3] 328 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 329 %x = fsub <4 x double> %i, %j 330 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 331 ret <4 x double> %r 332} 333 334define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 335; CHECK-LABEL: test_mask_vdivpd_256: 336; CHECK: ## %bb.0: 337; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 338; CHECK-NEXT: vdivpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5e,0xc2] 339; CHECK-NEXT: retq ## encoding: [0xc3] 340 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 341 %x = fdiv <4 x double> %i, %j 342 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 343 ret <4 x double> %r 344} 345 346define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 347; CHECK-LABEL: test_mask_vaddpd_256: 348; CHECK: ## %bb.0: 349; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 350; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0xc2] 351; CHECK-NEXT: retq ## encoding: [0xc3] 352 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 353 %x = fadd <4 x double> %i, %j 354 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 355 ret <4 x double> %r 356} 357 358define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 359; CHECK-LABEL: test_maskz_vaddpd_256: 360; CHECK: ## %bb.0: 361; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] 362; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0xc1] 363; CHECK-NEXT: retq ## encoding: [0xc3] 364 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 365 %x = fadd <4 x double> %i, %j 366 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 367 ret <4 x double> %r 368} 369 370define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double>* %j, <4 x i64> %mask1) nounwind { 371; CHECK-LABEL: test_mask_fold_vaddpd_256: 372; CHECK: ## %bb.0: 373; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] 374; CHECK-NEXT: vaddpd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0x07] 375; CHECK-NEXT: retq ## encoding: [0xc3] 376 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 377 %tmp = load <4 x double>, <4 x double>* %j 378 %x = fadd <4 x double> %i, %tmp 379 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 380 ret <4 x double> %r 381} 382 383define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j, <4 x i64> %mask1) nounwind { 384; CHECK-LABEL: test_maskz_fold_vaddpd_256: 385; CHECK: ## %bb.0: 386; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 387; CHECK-NEXT: vaddpd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0x07] 388; CHECK-NEXT: retq ## encoding: [0xc3] 389 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 390 %tmp = load <4 x double>, <4 x double>* %j 391 %x = fadd <4 x double> %i, %tmp 392 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 393 ret <4 x double> %r 394} 395 396define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind { 397; CHECK-LABEL: test_broadcast2_vaddpd_256: 398; CHECK: ## %bb.0: 399; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0x58,0x07] 400; CHECK-NEXT: retq ## encoding: [0xc3] 401 %tmp = load double, double* %j 402 %b = insertelement <4 x double> undef, double %tmp, i32 0 403 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 404 %x = fadd <4 x double> %c, %i 405 ret <4 x double> %x 406} 407 408define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind { 409; CHECK-LABEL: test_mask_broadcast_vaddpd_256: 410; CHECK: ## %bb.0: 411; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 412; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] 413; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x07] 414; CHECK-NEXT: retq ## encoding: [0xc3] 415 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 416 %tmp = load double, double* %j 417 %b = insertelement <4 x double> undef, double %tmp, i32 0 418 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 419 %x = fadd <4 x double> %c, %i 420 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i 421 ret <4 x double> %r 422} 423 424define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j, <4 x i64> %mask1) nounwind { 425; CHECK-LABEL: test_maskz_broadcast_vaddpd_256: 426; CHECK: ## %bb.0: 427; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 428; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x58,0x07] 429; CHECK-NEXT: retq ## encoding: [0xc3] 430 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 431 %tmp = load double, double* %j 432 %b = insertelement <4 x double> undef, double %tmp, i32 0 433 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 434 %x = fadd <4 x double> %c, %i 435 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 436 ret <4 x double> %r 437} 438 439; 128-bit 440 441define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 442; CHECK-LABEL: vpaddq128_test: 443; CHECK: ## %bb.0: 444; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] 445; CHECK-NEXT: retq ## encoding: [0xc3] 446 %x = add <2 x i64> %i, %j 447 ret <2 x i64> %x 448} 449 450define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind { 451; CHECK-LABEL: vpaddq128_fold_test: 452; CHECK: ## %bb.0: 453; CHECK-NEXT: vpaddq (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0x07] 454; CHECK-NEXT: retq ## encoding: [0xc3] 455 %tmp = load <2 x i64>, <2 x i64>* %j, align 4 456 %x = add <2 x i64> %i, %tmp 457 ret <2 x i64> %x 458} 459 460define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind { 461; CHECK-LABEL: vpaddq128_broadcast2_test: 462; CHECK: ## %bb.0: 463; CHECK-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xd4,0x07] 464; CHECK-NEXT: retq ## encoding: [0xc3] 465 %tmp = load i64, i64* %j 466 %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0 467 %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1 468 %x = add <2 x i64> %i, %j.1 469 ret <2 x i64> %x 470} 471 472define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 473; CHECK-LABEL: vpaddd128_test: 474; CHECK: ## %bb.0: 475; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 476; CHECK-NEXT: retq ## encoding: [0xc3] 477 %x = add <4 x i32> %i, %j 478 ret <4 x i32> %x 479} 480 481define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind { 482; CHECK-LABEL: vpaddd128_fold_test: 483; CHECK: ## %bb.0: 484; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07] 485; CHECK-NEXT: retq ## encoding: [0xc3] 486 %tmp = load <4 x i32>, <4 x i32>* %j, align 4 487 %x = add <4 x i32> %i, %tmp 488 ret <4 x i32> %x 489} 490 491define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind { 492; CHECK-LABEL: vpaddd128_broadcast_test: 493; CHECK: ## %bb.0: 494; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] 495; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI42_0-4, kind: reloc_riprel_4byte 496; CHECK-NEXT: retq ## encoding: [0xc3] 497 %x = add <4 x i32> %i, <i32 6, i32 6, i32 6, i32 6> 498 ret <4 x i32> %x 499} 500 501define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 502; CHECK-LABEL: vpaddd128_mask_test: 503; CHECK: ## %bb.0: 504; CHECK-NEXT: vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca] 505; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xc1] 506; CHECK-NEXT: retq ## encoding: [0xc3] 507 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 508 %x = add <4 x i32> %i, %j 509 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 510 ret <4 x i32> %r 511} 512 513define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 514; CHECK-LABEL: vpaddd128_maskz_test: 515; CHECK: ## %bb.0: 516; CHECK-NEXT: vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca] 517; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 518; CHECK-NEXT: retq ## encoding: [0xc3] 519 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 520 %x = add <4 x i32> %i, %j 521 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 522 ret <4 x i32> %r 523} 524 525define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone { 526; CHECK-LABEL: vpaddd128_mask_fold_test: 527; CHECK: ## %bb.0: 528; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 529; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x07] 530; CHECK-NEXT: retq ## encoding: [0xc3] 531 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 532 %j = load <4 x i32>, <4 x i32>* %j.ptr 533 %x = add <4 x i32> %i, %j 534 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 535 ret <4 x i32> %r 536} 537 538define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 539; CHECK-LABEL: vpaddd128_mask_broadcast_test: 540; CHECK: ## %bb.0: 541; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 542; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x05,A,A,A,A] 543; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI46_0-4, kind: reloc_riprel_4byte 544; CHECK-NEXT: retq ## encoding: [0xc3] 545 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 546 %x = add <4 x i32> %i, <i32 7, i32 7, i32 7, i32 7> 547 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 548 ret <4 x i32> %r 549} 550 551define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone { 552; CHECK-LABEL: vpaddd128_maskz_fold_test: 553; CHECK: ## %bb.0: 554; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 555; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] 556; CHECK-NEXT: retq ## encoding: [0xc3] 557 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 558 %j = load <4 x i32>, <4 x i32>* %j.ptr 559 %x = add <4 x i32> %i, %j 560 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 561 ret <4 x i32> %r 562} 563 564define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 565; CHECK-LABEL: vpaddd128_maskz_broadcast_test: 566; CHECK: ## %bb.0: 567; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 568; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x05,A,A,A,A] 569; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI48_0-4, kind: reloc_riprel_4byte 570; CHECK-NEXT: retq ## encoding: [0xc3] 571 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 572 %x = add <4 x i32> %i, <i32 8, i32 8, i32 8, i32 8> 573 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 574 ret <4 x i32> %r 575} 576 577define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 578; CHECK-LABEL: vpsubq128_test: 579; CHECK: ## %bb.0: 580; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 581; CHECK-NEXT: retq ## encoding: [0xc3] 582 %x = sub <2 x i64> %i, %j 583 ret <2 x i64> %x 584} 585 586define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 587; CHECK-LABEL: vpsubd128_test: 588; CHECK: ## %bb.0: 589; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 590; CHECK-NEXT: retq ## encoding: [0xc3] 591 %x = sub <4 x i32> %i, %j 592 ret <4 x i32> %x 593} 594 595define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) { 596; CHECK-LABEL: vpmulld128_test: 597; CHECK: ## %bb.0: 598; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x40,0xc1] 599; CHECK-NEXT: retq ## encoding: [0xc3] 600 %x = mul <4 x i32> %i, %j 601 ret <4 x i32> %x 602} 603 604define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) { 605; CHECK-LABEL: test_vaddpd_128: 606; CHECK: ## %bb.0: ## %entry 607; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 608; CHECK-NEXT: retq ## encoding: [0xc3] 609entry: 610 %add.i = fadd <2 x double> %x, %y 611 ret <2 x double> %add.i 612} 613 614define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) { 615; CHECK-LABEL: test_fold_vaddpd_128: 616; CHECK: ## %bb.0: ## %entry 617; CHECK-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0x05,A,A,A,A] 618; CHECK-NEXT: ## fixup A - offset: 4, value: LCPI53_0-4, kind: reloc_riprel_4byte 619; CHECK-NEXT: retq ## encoding: [0xc3] 620entry: 621 %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00> 622 ret <2 x double> %add.i 623} 624 625define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind { 626; CHECK-LABEL: test_broadcast_vaddpd_128: 627; CHECK: ## %bb.0: 628; CHECK-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0x05,A,A,A,A] 629; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI54_0-4, kind: reloc_riprel_4byte 630; CHECK-NEXT: retq ## encoding: [0xc3] 631 %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 632 ret <4 x float> %b 633} 634 635define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 636; CHECK-LABEL: test_mask_vaddps_128: 637; CHECK: ## %bb.0: 638; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 639; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x58,0xc2] 640; CHECK-NEXT: retq ## encoding: [0xc3] 641 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 642 %x = fadd <4 x float> %i, %j 643 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 644 ret <4 x float> %r 645} 646 647define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 648; CHECK-LABEL: test_mask_vmulps_128: 649; CHECK: ## %bb.0: 650; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 651; CHECK-NEXT: vmulps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x59,0xc2] 652; CHECK-NEXT: retq ## encoding: [0xc3] 653 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 654 %x = fmul <4 x float> %i, %j 655 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 656 ret <4 x float> %r 657} 658 659define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 660; CHECK-LABEL: test_mask_vminps_128: 661; CHECK: ## %bb.0: 662; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 663; CHECK-NEXT: vminps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5d,0xc2] 664; CHECK-NEXT: retq ## encoding: [0xc3] 665 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 666 %cmp_res = fcmp olt <4 x float> %i, %j 667 %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 668 %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst 669 ret <4 x float> %r 670} 671 672define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 673; CHECK-LABEL: test_mask_vmaxps_128: 674; CHECK: ## %bb.0: 675; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 676; CHECK-NEXT: vmaxps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5f,0xc2] 677; CHECK-NEXT: retq ## encoding: [0xc3] 678 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 679 %cmp_res = fcmp ogt <4 x float> %i, %j 680 %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 681 %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst 682 ret <4 x float> %r 683} 684 685define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 686; CHECK-LABEL: test_mask_vsubps_128: 687; CHECK: ## %bb.0: 688; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 689; CHECK-NEXT: vsubps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5c,0xc2] 690; CHECK-NEXT: retq ## encoding: [0xc3] 691 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 692 %x = fsub <4 x float> %i, %j 693 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 694 ret <4 x float> %r 695} 696 697 698define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 699; CHECK-LABEL: test_mask_vdivps_128: 700; CHECK: ## %bb.0: 701; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 702; CHECK-NEXT: vdivps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5e,0xc2] 703; CHECK-NEXT: retq ## encoding: [0xc3] 704 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 705 %x = fdiv <4 x float> %i, %j 706 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 707 ret <4 x float> %r 708} 709 710define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 711; CHECK-LABEL: test_mask_vmulpd_128: 712; CHECK: ## %bb.0: 713; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 714; CHECK-NEXT: vmulpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x59,0xc2] 715; CHECK-NEXT: retq ## encoding: [0xc3] 716 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 717 %x = fmul <2 x double> %i, %j 718 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 719 ret <2 x double> %r 720} 721 722define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 723; CHECK-LABEL: test_mask_vminpd_128: 724; CHECK: ## %bb.0: 725; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 726; CHECK-NEXT: vminpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5d,0xc2] 727; CHECK-NEXT: retq ## encoding: [0xc3] 728 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 729 %cmp_res = fcmp olt <2 x double> %i, %j 730 %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 731 %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst 732 ret <2 x double> %r 733} 734 735define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 736; CHECK-LABEL: test_mask_vmaxpd_128: 737; CHECK: ## %bb.0: 738; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 739; CHECK-NEXT: vmaxpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5f,0xc2] 740; CHECK-NEXT: retq ## encoding: [0xc3] 741 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 742 %cmp_res = fcmp ogt <2 x double> %i, %j 743 %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 744 %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst 745 ret <2 x double> %r 746} 747 748define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 749; CHECK-LABEL: test_mask_vsubpd_128: 750; CHECK: ## %bb.0: 751; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 752; CHECK-NEXT: vsubpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5c,0xc2] 753; CHECK-NEXT: retq ## encoding: [0xc3] 754 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 755 %x = fsub <2 x double> %i, %j 756 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 757 ret <2 x double> %r 758} 759 760define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 761; CHECK-LABEL: test_mask_vdivpd_128: 762; CHECK: ## %bb.0: 763; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 764; CHECK-NEXT: vdivpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5e,0xc2] 765; CHECK-NEXT: retq ## encoding: [0xc3] 766 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 767 %x = fdiv <2 x double> %i, %j 768 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 769 ret <2 x double> %r 770} 771 772define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 773; CHECK-LABEL: test_mask_vaddpd_128: 774; CHECK: ## %bb.0: 775; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 776; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0xc2] 777; CHECK-NEXT: retq ## encoding: [0xc3] 778 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 779 %x = fadd <2 x double> %i, %j 780 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 781 ret <2 x double> %r 782} 783 784define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j, 785; CHECK-LABEL: test_maskz_vaddpd_128: 786; CHECK: ## %bb.0: 787; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] 788; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0xc1] 789; CHECK-NEXT: retq ## encoding: [0xc3] 790 <2 x i64> %mask1) nounwind readnone { 791 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 792 %x = fadd <2 x double> %i, %j 793 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 794 ret <2 x double> %r 795} 796 797define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, <2 x double>* %j, <2 x i64> %mask1) nounwind { 798; CHECK-LABEL: test_mask_fold_vaddpd_128: 799; CHECK: ## %bb.0: 800; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] 801; CHECK-NEXT: vaddpd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0x07] 802; CHECK-NEXT: retq ## encoding: [0xc3] 803 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 804 %tmp = load <2 x double>, <2 x double>* %j 805 %x = fadd <2 x double> %i, %tmp 806 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 807 ret <2 x double> %r 808} 809 810define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j, <2 x i64> %mask1) nounwind { 811; CHECK-LABEL: test_maskz_fold_vaddpd_128: 812; CHECK: ## %bb.0: 813; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 814; CHECK-NEXT: vaddpd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0x07] 815; CHECK-NEXT: retq ## encoding: [0xc3] 816 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 817 %tmp = load <2 x double>, <2 x double>* %j 818 %x = fadd <2 x double> %i, %tmp 819 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 820 ret <2 x double> %r 821} 822 823define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind { 824; CHECK-LABEL: test_broadcast2_vaddpd_128: 825; CHECK: ## %bb.0: 826; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0x58,0x07] 827; CHECK-NEXT: retq ## encoding: [0xc3] 828 %tmp = load double, double* %j 829 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 830 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 831 %x = fadd <2 x double> %j.1, %i 832 ret <2 x double> %x 833} 834 835define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind { 836; CHECK-LABEL: test_mask_broadcast_vaddpd_128: 837; CHECK: ## %bb.0: 838; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 839; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] 840; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x07] 841; CHECK-NEXT: retq ## encoding: [0xc3] 842 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 843 %tmp = load double, double* %j 844 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 845 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 846 %x = fadd <2 x double> %j.1, %i 847 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i 848 ret <2 x double> %r 849} 850 851define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j, <2 x i64> %mask1) nounwind { 852; CHECK-LABEL: test_maskz_broadcast_vaddpd_128: 853; CHECK: ## %bb.0: 854; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 855; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x58,0x07] 856; CHECK-NEXT: retq ## encoding: [0xc3] 857 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 858 %tmp = load double, double* %j 859 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 860 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 861 %x = fadd <2 x double> %j.1, %i 862 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 863 ret <2 x double> %r 864} 865