1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) 5 6define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) { 7; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256: 8; CHECK: ## BB#0: 9; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 10; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] 11; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] 12; CHECK-NEXT: vpaddd (%rsi){1to8}, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e] 13; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] 14; CHECK-NEXT: retq ## encoding: [0xc3] 15 %y_32 = load i32, i32 * %y_ptr 16 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 17 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1) 18 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) 19 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 20 %res3 = add <8 x i32> %res, %res1 21 %res4 = add <8 x i32> %res2, %res3 22 ret <8 x i32> %res4 23} 24 25declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) 26 27define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 28; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: 29; CHECK: ## BB#0: 30; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x58,0xd0] 31; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 32; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] 33; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] 34; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc9] 35; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 36; CHECK-NEXT: retq ## encoding: [0xc3] 37 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 38 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) 39 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask) 40 %res3 = add <4 x i32> %res, %res1 41 %res4 = add <4 x i32> %res2, %res3 42 ret <4 x i32> %res4 43} 44 45declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) 46 47define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { 48; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: 49; CHECK: ## BB#0: 50; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x59,0xd0] 51; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 52; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] 53; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] 54; CHECK-NEXT: vpaddq %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc9] 55; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] 56; CHECK-NEXT: retq ## encoding: [0xc3] 57 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) 58 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) 59 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask) 60 %res3 = add <4 x i64> %res, %res1 61 %res4 = add <4 x i64> %res2, %res3 62 ret <4 x i64> %res4 63} 64 65declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) 66 67define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { 68; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: 69; CHECK: ## BB#0: 70; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x59,0xd0] 71; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 72; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] 73; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] 74; CHECK-NEXT: vpaddq %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc9] 75; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc1] 76; CHECK-NEXT: retq ## encoding: [0xc3] 77 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) 78 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) 79 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask) 80 %res3 = add <2 x i64> %res, %res1 81 %res4 = add <2 x i64> %res2, %res3 82 ret <2 x i64> %res4 83} 84 85declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly 86 87define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) { 88; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256: 89; CHECK: ## BB#0: 90; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x19,0xd0] 91; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 92; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] 93; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] 94; CHECK-NEXT: vaddpd %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc9] 95; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] 96; CHECK-NEXT: retq ## encoding: [0xc3] 97 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) 98 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) 99 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 100 %res3 = fadd <4 x double> %res, %res1 101 %res4 = fadd <4 x double> %res2, %res3 102 ret <4 x double> %res4 103} 104 105declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly 106 107define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) { 108; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256: 109; CHECK: ## BB#0: 110; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x18,0xd0] 111; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 112; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] 113; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] 114; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc9] 115; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 116; CHECK-NEXT: retq ## encoding: [0xc3] 117 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) 118 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) 119 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 120 %res3 = fadd <8 x float> %res, %res1 121 %res4 = fadd <8 x float> %res2, %res3 122 ret <8 x float> %res4 123} 124 125declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly 126 127define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) { 128; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128: 129; CHECK: ## BB#0: 130; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0xd0] 131; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 132; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] 133; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] 134; CHECK-NEXT: vaddps %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc9] 135; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 136; CHECK-NEXT: retq ## encoding: [0xc3] 137 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) 138 %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 139 %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 140 %res3 = fadd <4 x float> %res, %res1 141 %res4 = fadd <4 x float> %res2, %res3 142 ret <4 x float> %res4 143} 144 145declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8) 146 147define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 148; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128: 149; CHECK: ## BB#0: 150; CHECK-NEXT: vmovsldup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xd0] 151; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2] 152; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 153; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] 154; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0,2,2] 155; CHECK-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] 156; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0,2,2] 157; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] 158; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 159; CHECK-NEXT: retq ## encoding: [0xc3] 160 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 161 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 162 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 163 %res3 = fadd <4 x float> %res, %res1 164 %res4 = fadd <4 x float> %res2, %res3 165 ret <4 x float> %res4 166} 167 168declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8) 169 170define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 171; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256: 172; CHECK: ## BB#0: 173; CHECK-NEXT: vmovsldup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xd0] 174; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6] 175; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 176; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] 177; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] 178; CHECK-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] 179; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] 180; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] 181; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 182; CHECK-NEXT: retq ## encoding: [0xc3] 183 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 184 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 185 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 186 %res3 = fadd <8 x float> %res, %res1 187 %res4 = fadd <8 x float> %res2, %res3 188 ret <8 x float> %res4 189} 190 191declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8) 192 193define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 194; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128: 195; CHECK: ## BB#0: 196; CHECK-NEXT: vmovshdup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xd0] 197; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3] 198; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 199; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] 200; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,1,3,3] 201; CHECK-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] 202; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,1,3,3] 203; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] 204; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 205; CHECK-NEXT: retq ## encoding: [0xc3] 206 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 207 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 208 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 209 %res3 = fadd <4 x float> %res, %res1 210 %res4 = fadd <4 x float> %res2, %res3 211 ret <4 x float> %res4 212} 213 214declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8) 215 216define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 217; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256: 218; CHECK: ## BB#0: 219; CHECK-NEXT: vmovshdup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xd0] 220; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7] 221; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 222; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] 223; CHECK-NEXT: ## ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] 224; CHECK-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] 225; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] 226; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] 227; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 228; CHECK-NEXT: retq ## encoding: [0xc3] 229 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 230 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 231 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 232 %res3 = fadd <8 x float> %res, %res1 233 %res4 = fadd <8 x float> %res2, %res3 234 ret <8 x float> %res4 235} 236declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8) 237 238define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 239; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128: 240; CHECK: ## BB#0: 241; CHECK-NEXT: vmovddup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xd0] 242; CHECK-NEXT: ## xmm2 = xmm0[0,0] 243; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 244; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] 245; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0] 246; CHECK-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] 247; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0] 248; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca] 249; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1] 250; CHECK-NEXT: retq ## encoding: [0xc3] 251 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2) 252 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1) 253 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2) 254 %res3 = fadd <2 x double> %res, %res1 255 %res4 = fadd <2 x double> %res2, %res3 256 ret <2 x double> %res4 257} 258 259declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8) 260 261define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { 262; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256: 263; CHECK: ## BB#0: 264; CHECK-NEXT: vmovddup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xd0] 265; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2] 266; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 267; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] 268; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2] 269; CHECK-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] 270; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2] 271; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca] 272; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] 273; CHECK-NEXT: retq ## encoding: [0xc3] 274 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2) 275 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1) 276 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2) 277 %res3 = fadd <4 x double> %res, %res1 278 %res4 = fadd <4 x double> %res2, %res3 279 ret <4 x double> %res4 280} 281 282declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) 283 284define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 285; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 286; CHECK: ## BB#0: 287; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06] 288; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2] 289; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 290; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] 291; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2] 292; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] 293; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,3,2] 294; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 295; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 296; CHECK-NEXT: retq ## encoding: [0xc3] 297 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) 298 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) 299 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) 300 %res3 = fadd <4 x double> %res, %res1 301 %res4 = fadd <4 x double> %res2, %res3 302 ret <4 x double> %res4 303} 304 305declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) 306 307define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 308; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 309; CHECK: ## BB#0: 310; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01] 311; CHECK-NEXT: ## xmm2 = xmm0[1,0] 312; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 313; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] 314; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0] 315; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] 316; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,0] 317; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 318; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2] 319; CHECK-NEXT: retq ## encoding: [0xc3] 320 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) 321 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) 322 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) 323 %res3 = fadd <2 x double> %res, %res1 324 %res4 = fadd <2 x double> %res3, %res2 325 ret <2 x double> %res4 326} 327 328declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) 329 330define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 331; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 332; CHECK: ## BB#0: 333; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16] 334; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4] 335; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 336; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] 337; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] 338; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] 339; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] 340; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 341; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2] 342; CHECK-NEXT: retq ## encoding: [0xc3] 343 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) 344 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) 345 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) 346 %res3 = fadd <8 x float> %res, %res1 347 %res4 = fadd <8 x float> %res3, %res2 348 ret <8 x float> %res4 349} 350 351declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) 352 353define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 354; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 355; CHECK: ## BB#0: 356; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16] 357; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0] 358; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 359; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] 360; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0] 361; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] 362; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[2,1,1,0] 363; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 364; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] 365; CHECK-NEXT: retq ## encoding: [0xc3] 366 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) 367 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) 368 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) 369 %res3 = fadd <4 x float> %res, %res1 370 %res4 = fadd <4 x float> %res2, %res3 371 ret <4 x float> %res4 372} 373 374declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8) 375 376define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) { 377; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256: 378; CHECK: ## BB#0: 379; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xd0,0x03] 380; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0] 381; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 382; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] 383; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0] 384; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] 385; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0] 386; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 387; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc2] 388; CHECK-NEXT: retq ## encoding: [0xc3] 389 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3) 390 %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3) 391 %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1) 392 %res3 = fadd <4 x double> %res, %res1 393 %res4 = fadd <4 x double> %res3, %res2 394 ret <4 x double> %res4 395} 396 397declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8) 398 399define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 400; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256: 401; CHECK: ## BB#0: 402; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xd0,0x03] 403; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0] 404; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 405; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] 406; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0] 407; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] 408; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0] 409; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 410; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] 411; CHECK-NEXT: retq ## encoding: [0xc3] 412 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 413 %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 414 %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 415 %res3 = add <4 x i64> %res, %res1 416 %res4 = add <4 x i64> %res3, %res2 417 ret <4 x i64> %res4 418} 419 420declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) 421 422define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 423; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128: 424; CHECK: ## BB#0: 425; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 426; CHECK-NEXT: vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] 427; CHECK-NEXT: vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06] 428; CHECK-NEXT: retq ## encoding: [0xc3] 429 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 430 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 431 ret void 432} 433 434declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) 435 436define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 437; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256: 438; CHECK: ## BB#0: 439; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 440; CHECK-NEXT: vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] 441; CHECK-NEXT: vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06] 442; CHECK-NEXT: retq ## encoding: [0xc3] 443 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 444 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 445 ret void 446} 447 448declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) 449 450define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 451; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128: 452; CHECK: ## BB#0: 453; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 454; CHECK-NEXT: vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] 455; CHECK-NEXT: vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06] 456; CHECK-NEXT: retq ## encoding: [0xc3] 457 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 458 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 459 ret void 460} 461 462declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) 463 464define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 465; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256: 466; CHECK: ## BB#0: 467; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 468; CHECK-NEXT: vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] 469; CHECK-NEXT: vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06] 470; CHECK-NEXT: retq ## encoding: [0xc3] 471 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 472 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 473 ret void 474} 475 476declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) 477 478define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 479; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128: 480; CHECK: ## BB#0: 481; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 482; CHECK-NEXT: vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] 483; CHECK-NEXT: vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06] 484; CHECK-NEXT: retq ## encoding: [0xc3] 485 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 486 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 487 ret void 488} 489 490declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) 491 492define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 493; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256: 494; CHECK: ## BB#0: 495; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 496; CHECK-NEXT: vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] 497; CHECK-NEXT: vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06] 498; CHECK-NEXT: retq ## encoding: [0xc3] 499 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 500 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 501 ret void 502} 503 504declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) 505 506define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 507; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128: 508; CHECK: ## BB#0: 509; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 510; CHECK-NEXT: vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] 511; CHECK-NEXT: vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06] 512; CHECK-NEXT: retq ## encoding: [0xc3] 513 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 514 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 515 ret void 516} 517 518declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) 519 520define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 521; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256: 522; CHECK: ## BB#0: 523; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 524; CHECK-NEXT: vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] 525; CHECK-NEXT: vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06] 526; CHECK-NEXT: retq ## encoding: [0xc3] 527 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 528 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 529 ret void 530} 531 532declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) 533 534define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 535; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128: 536; CHECK: ## BB#0: 537; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 538; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] 539; CHECK-NEXT: vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06] 540; CHECK-NEXT: retq ## encoding: [0xc3] 541 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 542 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 543 ret void 544} 545 546declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) 547 548define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 549; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256: 550; CHECK: ## BB#0: 551; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 552; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] 553; CHECK-NEXT: vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06] 554; CHECK-NEXT: retq ## encoding: [0xc3] 555 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 556 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 557 ret void 558} 559 560declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) 561 562define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 563; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128: 564; CHECK: ## BB#0: 565; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 566; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] 567; CHECK-NEXT: vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06] 568; CHECK-NEXT: retq ## encoding: [0xc3] 569 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 570 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 571 ret void 572} 573 574declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) 575 576define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 577; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256: 578; CHECK: ## BB#0: 579; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 580; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] 581; CHECK-NEXT: vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06] 582; CHECK-NEXT: retq ## encoding: [0xc3] 583 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 584 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 585 ret void 586} 587 588declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) 589 590define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 591; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128: 592; CHECK: ## BB#0: 593; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 594; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] 595; CHECK-NEXT: vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06] 596; CHECK-NEXT: retq ## encoding: [0xc3] 597 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 598 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 599 ret void 600} 601 602declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) 603 604define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 605; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256: 606; CHECK: ## BB#0: 607; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 608; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] 609; CHECK-NEXT: vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06] 610; CHECK-NEXT: retq ## encoding: [0xc3] 611 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 612 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 613 ret void 614} 615 616declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) 617 618define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 619; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128: 620; CHECK: ## BB#0: 621; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 622; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] 623; CHECK-NEXT: vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06] 624; CHECK-NEXT: retq ## encoding: [0xc3] 625 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 626 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 627 ret void 628} 629 630declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) 631 632define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 633; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256: 634; CHECK: ## BB#0: 635; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 636; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] 637; CHECK-NEXT: vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06] 638; CHECK-NEXT: retq ## encoding: [0xc3] 639 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 640 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 641 ret void 642} 643 644define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 645; CHECK-LABEL: test_mask_load_aligned_ps_256: 646; CHECK: ## BB#0: 647; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] 648; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 649; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] 650; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f] 651; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 652; CHECK-NEXT: retq ## encoding: [0xc3] 653 %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 654 %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 655 %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 656 %res4 = fadd <8 x float> %res2, %res1 657 ret <8 x float> %res4 658} 659 660declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8) 661 662define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 663; CHECK-LABEL: test_mask_load_unaligned_ps_256: 664; CHECK: ## BB#0: 665; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] 666; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 667; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] 668; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f] 669; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 670; CHECK-NEXT: retq ## encoding: [0xc3] 671 %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 672 %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 673 %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 674 %res4 = fadd <8 x float> %res2, %res1 675 ret <8 x float> %res4 676} 677 678declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8) 679 680define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 681; CHECK-LABEL: test_mask_load_aligned_pd_256: 682; CHECK: ## BB#0: 683; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07] 684; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 685; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] 686; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f] 687; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 688; CHECK-NEXT: retq ## encoding: [0xc3] 689 %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 690 %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 691 %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 692 %res4 = fadd <4 x double> %res2, %res1 693 ret <4 x double> %res4 694} 695 696declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8) 697 698define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 699; CHECK-LABEL: test_mask_load_unaligned_pd_256: 700; CHECK: ## BB#0: 701; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07] 702; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 703; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] 704; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f] 705; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 706; CHECK-NEXT: retq ## encoding: [0xc3] 707 %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 708 %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 709 %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 710 %res4 = fadd <4 x double> %res2, %res1 711 ret <4 x double> %res4 712} 713 714declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8) 715 716define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 717; CHECK-LABEL: test_mask_load_aligned_ps_128: 718; CHECK: ## BB#0: 719; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] 720; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 721; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] 722; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f] 723; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 724; CHECK-NEXT: retq ## encoding: [0xc3] 725 %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 726 %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 727 %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 728 %res4 = fadd <4 x float> %res2, %res1 729 ret <4 x float> %res4 730} 731 732declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8) 733 734define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 735; CHECK-LABEL: test_mask_load_unaligned_ps_128: 736; CHECK: ## BB#0: 737; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] 738; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 739; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] 740; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f] 741; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 742; CHECK-NEXT: retq ## encoding: [0xc3] 743 %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 744 %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 745 %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 746 %res4 = fadd <4 x float> %res2, %res1 747 ret <4 x float> %res4 748} 749 750declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8) 751 752define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 753; CHECK-LABEL: test_mask_load_aligned_pd_128: 754; CHECK: ## BB#0: 755; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07] 756; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 757; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] 758; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f] 759; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 760; CHECK-NEXT: retq ## encoding: [0xc3] 761 %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 762 %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 763 %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 764 %res4 = fadd <2 x double> %res2, %res1 765 ret <2 x double> %res4 766} 767 768declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8) 769 770define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 771; CHECK-LABEL: test_mask_load_unaligned_pd_128: 772; CHECK: ## BB#0: 773; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07] 774; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 775; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] 776; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f] 777; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 778; CHECK-NEXT: retq ## encoding: [0xc3] 779 %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 780 %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 781 %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 782 %res4 = fadd <2 x double> %res2, %res1 783 ret <2 x double> %res4 784} 785 786declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8) 787 788declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8) 789 790define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) { 791; CHECK-LABEL: test_mask_load_unaligned_d_128: 792; CHECK: ## BB#0: 793; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] 794; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 795; CHECK-NEXT: vmovdqu32 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06] 796; CHECK-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f] 797; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 798; CHECK-NEXT: retq ## encoding: [0xc3] 799 %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 800 %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask) 801 %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 802 %res4 = add <4 x i32> %res2, %res1 803 ret <4 x i32> %res4 804} 805 806declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8) 807 808define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) { 809; CHECK-LABEL: test_mask_load_unaligned_d_256: 810; CHECK: ## BB#0: 811; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] 812; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 813; CHECK-NEXT: vmovdqu32 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06] 814; CHECK-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f] 815; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 816; CHECK-NEXT: retq ## encoding: [0xc3] 817 %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 818 %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask) 819 %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 820 %res4 = add <8 x i32> %res2, %res1 821 ret <8 x i32> %res4 822} 823 824declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8) 825 826define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) { 827; CHECK-LABEL: test_mask_load_unaligned_q_128: 828; CHECK: ## BB#0: 829; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] 830; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 831; CHECK-NEXT: vmovdqu64 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06] 832; CHECK-NEXT: vmovdqu64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f] 833; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 834; CHECK-NEXT: retq ## encoding: [0xc3] 835 %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 836 %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask) 837 %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 838 %res4 = add <2 x i64> %res2, %res1 839 ret <2 x i64> %res4 840} 841 842declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8) 843 844define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) { 845; CHECK-LABEL: test_mask_load_unaligned_q_256: 846; CHECK: ## BB#0: 847; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] 848; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 849; CHECK-NEXT: vmovdqu64 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06] 850; CHECK-NEXT: vmovdqu64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f] 851; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 852; CHECK-NEXT: retq ## encoding: [0xc3] 853 %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 854 %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask) 855 %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 856 %res4 = add <4 x i64> %res2, %res1 857 ret <4 x i64> %res4 858} 859 860declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8) 861 862define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) { 863; CHECK-LABEL: test_mask_load_aligned_d_128: 864; CHECK: ## BB#0: 865; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] 866; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 867; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] 868; CHECK-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f] 869; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 870; CHECK-NEXT: retq ## encoding: [0xc3] 871 %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 872 %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask) 873 %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 874 %res4 = add <4 x i32> %res2, %res1 875 ret <4 x i32> %res4 876} 877 878declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8) 879 880define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) { 881; CHECK-LABEL: test_mask_load_aligned_d_256: 882; CHECK: ## BB#0: 883; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] 884; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 885; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] 886; CHECK-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f] 887; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 888; CHECK-NEXT: retq ## encoding: [0xc3] 889 %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 890 %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask) 891 %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 892 %res4 = add <8 x i32> %res2, %res1 893 ret <8 x i32> %res4 894} 895 896declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8) 897 898define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) { 899; CHECK-LABEL: test_mask_load_aligned_q_128: 900; CHECK: ## BB#0: 901; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] 902; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 903; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] 904; CHECK-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f] 905; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 906; CHECK-NEXT: retq ## encoding: [0xc3] 907 %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 908 %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask) 909 %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 910 %res4 = add <2 x i64> %res2, %res1 911 ret <2 x i64> %res4 912} 913 914declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8) 915 916define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) { 917; CHECK-LABEL: test_mask_load_aligned_q_256: 918; CHECK: ## BB#0: 919; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] 920; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 921; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] 922; CHECK-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f] 923; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 924; CHECK-NEXT: retq ## encoding: [0xc3] 925 %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 926 %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask) 927 %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 928 %res4 = add <4 x i64> %res2, %res1 929 ret <4 x i64> %res4 930} 931 932declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8) 933 934define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 935; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128: 936; CHECK: ## BB#0: 937; CHECK-NEXT: vpshufd $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7d,0x08,0x70,0xd0,0x03] 938; CHECK-NEXT: ## xmm2 = xmm0[3,0,0,0] 939; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 940; CHECK-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] 941; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0] 942; CHECK-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] 943; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[3,0,0,0] 944; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 945; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 946; CHECK-NEXT: retq ## encoding: [0xc3] 947 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 948 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 949 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 950 %res3 = add <4 x i32> %res, %res1 951 %res4 = add <4 x i32> %res3, %res2 952 ret <4 x i32> %res4 953} 954 955declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8) 956 957define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 958; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256: 959; CHECK: ## BB#0: 960; CHECK-NEXT: vpshufd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7d,0x28,0x70,0xd0,0x03] 961; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0,7,4,4,4] 962; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 963; CHECK-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] 964; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] 965; CHECK-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] 966; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] 967; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 968; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] 969; CHECK-NEXT: retq ## encoding: [0xc3] 970 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 971 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 972 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 973 %res3 = add <8 x i32> %res, %res1 974 %res4 = add <8 x i32> %res3, %res2 975 ret <8 x i32> %res4 976} 977 978define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { 979; CHECK-LABEL: test_pcmpeq_d_256: 980; CHECK: ## BB#0: 981; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 982; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 983; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 984; CHECK-NEXT: retq ## encoding: [0xc3] 985 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 986 ret i8 %res 987} 988 989define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 990; CHECK-LABEL: test_mask_pcmpeq_d_256: 991; CHECK: ## BB#0: 992; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 993; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 994; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 995; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 996; CHECK-NEXT: retq ## encoding: [0xc3] 997 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 998 ret i8 %res 999} 1000 1001declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) 1002 1003define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { 1004; CHECK-LABEL: test_pcmpeq_q_256: 1005; CHECK: ## BB#0: 1006; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 1007; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1008; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1009; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1010; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1011; CHECK-NEXT: retq ## encoding: [0xc3] 1012 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 1013 ret i8 %res 1014} 1015 1016define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 1017; CHECK-LABEL: test_mask_pcmpeq_q_256: 1018; CHECK: ## BB#0: 1019; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1020; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 1021; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1022; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1023; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1024; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1025; CHECK-NEXT: retq ## encoding: [0xc3] 1026 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 1027 ret i8 %res 1028} 1029 1030declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) 1031 1032define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { 1033; CHECK-LABEL: test_pcmpgt_d_256: 1034; CHECK: ## BB#0: 1035; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 1036; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1037; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1038; CHECK-NEXT: retq ## encoding: [0xc3] 1039 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 1040 ret i8 %res 1041} 1042 1043define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1044; CHECK-LABEL: test_mask_pcmpgt_d_256: 1045; CHECK: ## BB#0: 1046; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1047; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] 1048; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1049; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1050; CHECK-NEXT: retq ## encoding: [0xc3] 1051 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 1052 ret i8 %res 1053} 1054 1055declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) 1056 1057define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { 1058; CHECK-LABEL: test_pcmpgt_q_256: 1059; CHECK: ## BB#0: 1060; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] 1061; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1062; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1063; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1064; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1065; CHECK-NEXT: retq ## encoding: [0xc3] 1066 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 1067 ret i8 %res 1068} 1069 1070define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 1071; CHECK-LABEL: test_mask_pcmpgt_q_256: 1072; CHECK: ## BB#0: 1073; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1074; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] 1075; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1076; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1077; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1078; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1079; CHECK-NEXT: retq ## encoding: [0xc3] 1080 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 1081 ret i8 %res 1082} 1083 1084declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) 1085 1086define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { 1087; CHECK-LABEL: test_pcmpeq_d_128: 1088; CHECK: ## BB#0: 1089; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 1090; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1091; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1092; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1093; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1094; CHECK-NEXT: retq ## encoding: [0xc3] 1095 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 1096 ret i8 %res 1097} 1098 1099define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1100; CHECK-LABEL: test_mask_pcmpeq_d_128: 1101; CHECK: ## BB#0: 1102; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1103; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 1104; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1105; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1106; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1107; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1108; CHECK-NEXT: retq ## encoding: [0xc3] 1109 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 1110 ret i8 %res 1111} 1112 1113declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) 1114 1115define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { 1116; CHECK-LABEL: test_pcmpeq_q_128: 1117; CHECK: ## BB#0: 1118; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 1119; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1120; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1121; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1122; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1123; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1124; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1125; CHECK-NEXT: retq ## encoding: [0xc3] 1126 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 1127 ret i8 %res 1128} 1129 1130define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 1131; CHECK-LABEL: test_mask_pcmpeq_q_128: 1132; CHECK: ## BB#0: 1133; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1134; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 1135; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1136; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1137; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1138; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1139; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1140; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1141; CHECK-NEXT: retq ## encoding: [0xc3] 1142 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 1143 ret i8 %res 1144} 1145 1146declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) 1147 1148define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { 1149; CHECK-LABEL: test_pcmpgt_d_128: 1150; CHECK: ## BB#0: 1151; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 1152; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1153; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1154; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1155; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1156; CHECK-NEXT: retq ## encoding: [0xc3] 1157 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 1158 ret i8 %res 1159} 1160 1161define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1162; CHECK-LABEL: test_mask_pcmpgt_d_128: 1163; CHECK: ## BB#0: 1164; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1165; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] 1166; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1167; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1168; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1169; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1170; CHECK-NEXT: retq ## encoding: [0xc3] 1171 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 1172 ret i8 %res 1173} 1174 1175declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) 1176 1177define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { 1178; CHECK-LABEL: test_pcmpgt_q_128: 1179; CHECK: ## BB#0: 1180; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] 1181; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1182; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1183; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1184; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1185; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1186; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1187; CHECK-NEXT: retq ## encoding: [0xc3] 1188 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 1189 ret i8 %res 1190} 1191 1192define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 1193; CHECK-LABEL: test_mask_pcmpgt_q_128: 1194; CHECK: ## BB#0: 1195; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1196; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] 1197; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1198; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1199; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1200; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1201; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1202; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1203; CHECK-NEXT: retq ## encoding: [0xc3] 1204 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 1205 ret i8 %res 1206} 1207 1208declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) 1209 1210declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1211 1212define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1213; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 1214; CHECK: ## BB#0: 1215; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xd9] 1216; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] 1217; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1218; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] 1219; CHECK-NEXT: ## xmm2 {%k1} = xmm0[1],xmm1[1] 1220; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3] 1221; CHECK-NEXT: retq ## encoding: [0xc3] 1222 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1223 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1224 %res2 = fadd <2 x double> %res, %res1 1225 ret <2 x double> %res2 1226} 1227 1228declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1229 1230define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1231; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 1232; CHECK: ## BB#0: 1233; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xd9] 1234; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1235; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1236; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] 1237; CHECK-NEXT: ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1238; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] 1239; CHECK-NEXT: retq ## encoding: [0xc3] 1240 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1241 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1242 %res2 = fadd <4 x double> %res, %res1 1243 ret <4 x double> %res2 1244} 1245 1246declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1247 1248define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1249; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 1250; CHECK: ## BB#0: 1251; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xd9] 1252; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1253; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1254; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] 1255; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1256; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] 1257; CHECK-NEXT: retq ## encoding: [0xc3] 1258 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1259 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1260 %res2 = fadd <4 x float> %res, %res1 1261 ret <4 x float> %res2 1262} 1263 1264declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1265 1266define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1267; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 1268; CHECK: ## BB#0: 1269; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xd9] 1270; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1271; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1272; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] 1273; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1274; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] 1275; CHECK-NEXT: retq ## encoding: [0xc3] 1276 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1277 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1278 %res2 = fadd <8 x float> %res, %res1 1279 ret <8 x float> %res2 1280} 1281 1282declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1283 1284define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1285; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 1286; CHECK: ## BB#0: 1287; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xd9] 1288; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] 1289; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1290; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] 1291; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0] 1292; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3] 1293; CHECK-NEXT: retq ## encoding: [0xc3] 1294 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1295 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1296 %res2 = fadd <2 x double> %res, %res1 1297 ret <2 x double> %res2 1298} 1299 1300declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1301 1302define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1303; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 1304; CHECK: ## BB#0: 1305; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xd9] 1306; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1307; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1308; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] 1309; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1310; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] 1311; CHECK-NEXT: retq ## encoding: [0xc3] 1312 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1313 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1314 %res2 = fadd <4 x double> %res, %res1 1315 ret <4 x double> %res2 1316} 1317 1318declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1319 1320define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1321; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 1322; CHECK: ## BB#0: 1323; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xd9] 1324; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1325; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1326; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] 1327; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1328; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] 1329; CHECK-NEXT: retq ## encoding: [0xc3] 1330 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1331 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1332 %res2 = fadd <4 x float> %res, %res1 1333 ret <4 x float> %res2 1334} 1335 1336declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1337 1338define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1339; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 1340; CHECK: ## BB#0: 1341; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xd9] 1342; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1343; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1344; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] 1345; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1346; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] 1347; CHECK-NEXT: retq ## encoding: [0xc3] 1348 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1349 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1350 %res2 = fadd <8 x float> %res, %res1 1351 ret <8 x float> %res2 1352} 1353 1354declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1355 1356define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 1357; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 1358; CHECK: ## BB#0: 1359; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xd9] 1360; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1361; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1362; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] 1363; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1364; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3] 1365; CHECK-NEXT: retq ## encoding: [0xc3] 1366 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1367 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1368 %res2 = add <4 x i32> %res, %res1 1369 ret <4 x i32> %res2 1370} 1371 1372declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1373 1374define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 1375; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 1376; CHECK: ## BB#0: 1377; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xd9] 1378; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1379; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1380; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] 1381; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1382; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3] 1383; CHECK-NEXT: retq ## encoding: [0xc3] 1384 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1385 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1386 %res2 = add <4 x i32> %res, %res1 1387 ret <4 x i32> %res2 1388} 1389 1390declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1391 1392define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 1393; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 1394; CHECK: ## BB#0: 1395; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xd9] 1396; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1397; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1398; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] 1399; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1400; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] 1401; CHECK-NEXT: retq ## encoding: [0xc3] 1402 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1403 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1404 %res2 = add <8 x i32> %res, %res1 1405 ret <8 x i32> %res2 1406} 1407 1408declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1409 1410define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 1411; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 1412; CHECK: ## BB#0: 1413; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xd9] 1414; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1415; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1416; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] 1417; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1418; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] 1419; CHECK-NEXT: retq ## encoding: [0xc3] 1420 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1421 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1422 %res2 = add <8 x i32> %res, %res1 1423 ret <8 x i32> %res2 1424} 1425 1426declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1427 1428define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 1429; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 1430; CHECK: ## BB#0: 1431; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xd9] 1432; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] 1433; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1434; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] 1435; CHECK-NEXT: ## xmm2 = xmm0[1],xmm1[1] 1436; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] 1437; CHECK-NEXT: retq ## encoding: [0xc3] 1438 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1439 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1440 %res2 = add <2 x i64> %res, %res1 1441 ret <2 x i64> %res2 1442} 1443 1444declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1445 1446define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 1447; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 1448; CHECK: ## BB#0: 1449; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xd9] 1450; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] 1451; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1452; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] 1453; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0] 1454; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] 1455; CHECK-NEXT: retq ## encoding: [0xc3] 1456 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1457 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1458 %res2 = add <2 x i64> %res, %res1 1459 ret <2 x i64> %res2 1460} 1461 1462declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1463 1464define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 1465; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 1466; CHECK: ## BB#0: 1467; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xd9] 1468; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1469; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1470; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] 1471; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1472; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] 1473; CHECK-NEXT: retq ## encoding: [0xc3] 1474 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1475 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1476 %res2 = add <4 x i64> %res, %res1 1477 ret <4 x i64> %res2 1478} 1479 1480declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1481 1482define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 1483; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 1484; CHECK: ## BB#0: 1485; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xd9] 1486; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1487; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1488; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] 1489; CHECK-NEXT: ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1490; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] 1491; CHECK-NEXT: retq ## encoding: [0xc3] 1492 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1493 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1494 %res2 = add <4 x i64> %res, %res1 1495 ret <4 x i64> %res2 1496} 1497 1498define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1499; CHECK-LABEL: test_mask_and_epi32_rr_128: 1500; CHECK: ## BB#0: 1501; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1] 1502; CHECK-NEXT: retq ## encoding: [0xc3] 1503 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1504 ret <4 x i32> %res 1505} 1506 1507define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1508; CHECK-LABEL: test_mask_and_epi32_rrk_128: 1509; CHECK: ## BB#0: 1510; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1511; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 1512; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1513; CHECK-NEXT: retq ## encoding: [0xc3] 1514 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1515 ret <4 x i32> %res 1516} 1517 1518define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1519; CHECK-LABEL: test_mask_and_epi32_rrkz_128: 1520; CHECK: ## BB#0: 1521; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1522; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 1523; CHECK-NEXT: retq ## encoding: [0xc3] 1524 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1525 ret <4 x i32> %res 1526} 1527 1528define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1529; CHECK-LABEL: test_mask_and_epi32_rm_128: 1530; CHECK: ## BB#0: 1531; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07] 1532; CHECK-NEXT: retq ## encoding: [0xc3] 1533 %b = load <4 x i32>, <4 x i32>* %ptr_b 1534 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1535 ret <4 x i32> %res 1536} 1537 1538define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1539; CHECK-LABEL: test_mask_and_epi32_rmk_128: 1540; CHECK: ## BB#0: 1541; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1542; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] 1543; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1544; CHECK-NEXT: retq ## encoding: [0xc3] 1545 %b = load <4 x i32>, <4 x i32>* %ptr_b 1546 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1547 ret <4 x i32> %res 1548} 1549 1550define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1551; CHECK-LABEL: test_mask_and_epi32_rmkz_128: 1552; CHECK: ## BB#0: 1553; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1554; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07] 1555; CHECK-NEXT: retq ## encoding: [0xc3] 1556 %b = load <4 x i32>, <4 x i32>* %ptr_b 1557 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1558 ret <4 x i32> %res 1559} 1560 1561define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1562; CHECK-LABEL: test_mask_and_epi32_rmb_128: 1563; CHECK: ## BB#0: 1564; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07] 1565; CHECK-NEXT: retq ## encoding: [0xc3] 1566 %q = load i32, i32* %ptr_b 1567 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1568 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1569 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1570 ret <4 x i32> %res 1571} 1572 1573define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1574; CHECK-LABEL: test_mask_and_epi32_rmbk_128: 1575; CHECK: ## BB#0: 1576; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1577; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] 1578; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1579; CHECK-NEXT: retq ## encoding: [0xc3] 1580 %q = load i32, i32* %ptr_b 1581 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1582 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1583 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1584 ret <4 x i32> %res 1585} 1586 1587define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1588; CHECK-LABEL: test_mask_and_epi32_rmbkz_128: 1589; CHECK: ## BB#0: 1590; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1591; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07] 1592; CHECK-NEXT: retq ## encoding: [0xc3] 1593 %q = load i32, i32* %ptr_b 1594 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1595 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1596 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1597 ret <4 x i32> %res 1598} 1599 1600declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1601 1602define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1603; CHECK-LABEL: test_mask_and_epi32_rr_256: 1604; CHECK: ## BB#0: 1605; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1] 1606; CHECK-NEXT: retq ## encoding: [0xc3] 1607 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1608 ret <8 x i32> %res 1609} 1610 1611define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1612; CHECK-LABEL: test_mask_and_epi32_rrk_256: 1613; CHECK: ## BB#0: 1614; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1615; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 1616; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1617; CHECK-NEXT: retq ## encoding: [0xc3] 1618 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1619 ret <8 x i32> %res 1620} 1621 1622define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1623; CHECK-LABEL: test_mask_and_epi32_rrkz_256: 1624; CHECK: ## BB#0: 1625; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1626; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 1627; CHECK-NEXT: retq ## encoding: [0xc3] 1628 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1629 ret <8 x i32> %res 1630} 1631 1632define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1633; CHECK-LABEL: test_mask_and_epi32_rm_256: 1634; CHECK: ## BB#0: 1635; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07] 1636; CHECK-NEXT: retq ## encoding: [0xc3] 1637 %b = load <8 x i32>, <8 x i32>* %ptr_b 1638 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1639 ret <8 x i32> %res 1640} 1641 1642define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1643; CHECK-LABEL: test_mask_and_epi32_rmk_256: 1644; CHECK: ## BB#0: 1645; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1646; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] 1647; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1648; CHECK-NEXT: retq ## encoding: [0xc3] 1649 %b = load <8 x i32>, <8 x i32>* %ptr_b 1650 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1651 ret <8 x i32> %res 1652} 1653 1654define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1655; CHECK-LABEL: test_mask_and_epi32_rmkz_256: 1656; CHECK: ## BB#0: 1657; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1658; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07] 1659; CHECK-NEXT: retq ## encoding: [0xc3] 1660 %b = load <8 x i32>, <8 x i32>* %ptr_b 1661 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1662 ret <8 x i32> %res 1663} 1664 1665define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1666; CHECK-LABEL: test_mask_and_epi32_rmb_256: 1667; CHECK: ## BB#0: 1668; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07] 1669; CHECK-NEXT: retq ## encoding: [0xc3] 1670 %q = load i32, i32* %ptr_b 1671 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1672 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1673 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1674 ret <8 x i32> %res 1675} 1676 1677define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1678; CHECK-LABEL: test_mask_and_epi32_rmbk_256: 1679; CHECK: ## BB#0: 1680; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1681; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] 1682; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1683; CHECK-NEXT: retq ## encoding: [0xc3] 1684 %q = load i32, i32* %ptr_b 1685 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1686 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1687 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1688 ret <8 x i32> %res 1689} 1690 1691define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1692; CHECK-LABEL: test_mask_and_epi32_rmbkz_256: 1693; CHECK: ## BB#0: 1694; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1695; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07] 1696; CHECK-NEXT: retq ## encoding: [0xc3] 1697 %q = load i32, i32* %ptr_b 1698 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1699 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1700 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1701 ret <8 x i32> %res 1702} 1703 1704declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1705 1706define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1707; CHECK-LABEL: test_mask_or_epi32_rr_128: 1708; CHECK: ## BB#0: 1709; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1] 1710; CHECK-NEXT: retq ## encoding: [0xc3] 1711 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1712 ret <4 x i32> %res 1713} 1714 1715define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1716; CHECK-LABEL: test_mask_or_epi32_rrk_128: 1717; CHECK: ## BB#0: 1718; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1719; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 1720; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1721; CHECK-NEXT: retq ## encoding: [0xc3] 1722 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1723 ret <4 x i32> %res 1724} 1725 1726define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1727; CHECK-LABEL: test_mask_or_epi32_rrkz_128: 1728; CHECK: ## BB#0: 1729; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1730; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 1731; CHECK-NEXT: retq ## encoding: [0xc3] 1732 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1733 ret <4 x i32> %res 1734} 1735 1736define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1737; CHECK-LABEL: test_mask_or_epi32_rm_128: 1738; CHECK: ## BB#0: 1739; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07] 1740; CHECK-NEXT: retq ## encoding: [0xc3] 1741 %b = load <4 x i32>, <4 x i32>* %ptr_b 1742 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1743 ret <4 x i32> %res 1744} 1745 1746define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1747; CHECK-LABEL: test_mask_or_epi32_rmk_128: 1748; CHECK: ## BB#0: 1749; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1750; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] 1751; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1752; CHECK-NEXT: retq ## encoding: [0xc3] 1753 %b = load <4 x i32>, <4 x i32>* %ptr_b 1754 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1755 ret <4 x i32> %res 1756} 1757 1758define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1759; CHECK-LABEL: test_mask_or_epi32_rmkz_128: 1760; CHECK: ## BB#0: 1761; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1762; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07] 1763; CHECK-NEXT: retq ## encoding: [0xc3] 1764 %b = load <4 x i32>, <4 x i32>* %ptr_b 1765 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1766 ret <4 x i32> %res 1767} 1768 1769define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1770; CHECK-LABEL: test_mask_or_epi32_rmb_128: 1771; CHECK: ## BB#0: 1772; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07] 1773; CHECK-NEXT: retq ## encoding: [0xc3] 1774 %q = load i32, i32* %ptr_b 1775 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1776 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1777 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1778 ret <4 x i32> %res 1779} 1780 1781define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1782; CHECK-LABEL: test_mask_or_epi32_rmbk_128: 1783; CHECK: ## BB#0: 1784; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1785; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] 1786; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1787; CHECK-NEXT: retq ## encoding: [0xc3] 1788 %q = load i32, i32* %ptr_b 1789 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1790 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1791 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1792 ret <4 x i32> %res 1793} 1794 1795define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1796; CHECK-LABEL: test_mask_or_epi32_rmbkz_128: 1797; CHECK: ## BB#0: 1798; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1799; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07] 1800; CHECK-NEXT: retq ## encoding: [0xc3] 1801 %q = load i32, i32* %ptr_b 1802 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1803 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1804 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1805 ret <4 x i32> %res 1806} 1807 1808declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1809 1810define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1811; CHECK-LABEL: test_mask_or_epi32_rr_256: 1812; CHECK: ## BB#0: 1813; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1] 1814; CHECK-NEXT: retq ## encoding: [0xc3] 1815 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1816 ret <8 x i32> %res 1817} 1818 1819define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1820; CHECK-LABEL: test_mask_or_epi32_rrk_256: 1821; CHECK: ## BB#0: 1822; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1823; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 1824; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1825; CHECK-NEXT: retq ## encoding: [0xc3] 1826 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1827 ret <8 x i32> %res 1828} 1829 1830define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1831; CHECK-LABEL: test_mask_or_epi32_rrkz_256: 1832; CHECK: ## BB#0: 1833; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1834; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 1835; CHECK-NEXT: retq ## encoding: [0xc3] 1836 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1837 ret <8 x i32> %res 1838} 1839 1840define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1841; CHECK-LABEL: test_mask_or_epi32_rm_256: 1842; CHECK: ## BB#0: 1843; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07] 1844; CHECK-NEXT: retq ## encoding: [0xc3] 1845 %b = load <8 x i32>, <8 x i32>* %ptr_b 1846 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1847 ret <8 x i32> %res 1848} 1849 1850define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1851; CHECK-LABEL: test_mask_or_epi32_rmk_256: 1852; CHECK: ## BB#0: 1853; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1854; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] 1855; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1856; CHECK-NEXT: retq ## encoding: [0xc3] 1857 %b = load <8 x i32>, <8 x i32>* %ptr_b 1858 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1859 ret <8 x i32> %res 1860} 1861 1862define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1863; CHECK-LABEL: test_mask_or_epi32_rmkz_256: 1864; CHECK: ## BB#0: 1865; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1866; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07] 1867; CHECK-NEXT: retq ## encoding: [0xc3] 1868 %b = load <8 x i32>, <8 x i32>* %ptr_b 1869 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1870 ret <8 x i32> %res 1871} 1872 1873define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1874; CHECK-LABEL: test_mask_or_epi32_rmb_256: 1875; CHECK: ## BB#0: 1876; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07] 1877; CHECK-NEXT: retq ## encoding: [0xc3] 1878 %q = load i32, i32* %ptr_b 1879 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1880 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1881 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1882 ret <8 x i32> %res 1883} 1884 1885define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1886; CHECK-LABEL: test_mask_or_epi32_rmbk_256: 1887; CHECK: ## BB#0: 1888; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1889; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] 1890; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1891; CHECK-NEXT: retq ## encoding: [0xc3] 1892 %q = load i32, i32* %ptr_b 1893 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1894 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1895 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1896 ret <8 x i32> %res 1897} 1898 1899define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1900; CHECK-LABEL: test_mask_or_epi32_rmbkz_256: 1901; CHECK: ## BB#0: 1902; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1903; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07] 1904; CHECK-NEXT: retq ## encoding: [0xc3] 1905 %q = load i32, i32* %ptr_b 1906 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1907 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1908 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1909 ret <8 x i32> %res 1910} 1911 1912declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1913 1914define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1915; CHECK-LABEL: test_mask_xor_epi32_rr_128: 1916; CHECK: ## BB#0: 1917; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1] 1918; CHECK-NEXT: retq ## encoding: [0xc3] 1919 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1920 ret <4 x i32> %res 1921} 1922 1923define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1924; CHECK-LABEL: test_mask_xor_epi32_rrk_128: 1925; CHECK: ## BB#0: 1926; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1927; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 1928; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1929; CHECK-NEXT: retq ## encoding: [0xc3] 1930 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1931 ret <4 x i32> %res 1932} 1933 1934define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1935; CHECK-LABEL: test_mask_xor_epi32_rrkz_128: 1936; CHECK: ## BB#0: 1937; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1938; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 1939; CHECK-NEXT: retq ## encoding: [0xc3] 1940 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1941 ret <4 x i32> %res 1942} 1943 1944define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1945; CHECK-LABEL: test_mask_xor_epi32_rm_128: 1946; CHECK: ## BB#0: 1947; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07] 1948; CHECK-NEXT: retq ## encoding: [0xc3] 1949 %b = load <4 x i32>, <4 x i32>* %ptr_b 1950 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1951 ret <4 x i32> %res 1952} 1953 1954define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1955; CHECK-LABEL: test_mask_xor_epi32_rmk_128: 1956; CHECK: ## BB#0: 1957; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1958; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] 1959; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1960; CHECK-NEXT: retq ## encoding: [0xc3] 1961 %b = load <4 x i32>, <4 x i32>* %ptr_b 1962 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1963 ret <4 x i32> %res 1964} 1965 1966define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1967; CHECK-LABEL: test_mask_xor_epi32_rmkz_128: 1968; CHECK: ## BB#0: 1969; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1970; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07] 1971; CHECK-NEXT: retq ## encoding: [0xc3] 1972 %b = load <4 x i32>, <4 x i32>* %ptr_b 1973 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1974 ret <4 x i32> %res 1975} 1976 1977define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1978; CHECK-LABEL: test_mask_xor_epi32_rmb_128: 1979; CHECK: ## BB#0: 1980; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07] 1981; CHECK-NEXT: retq ## encoding: [0xc3] 1982 %q = load i32, i32* %ptr_b 1983 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1984 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1985 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1986 ret <4 x i32> %res 1987} 1988 1989define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1990; CHECK-LABEL: test_mask_xor_epi32_rmbk_128: 1991; CHECK: ## BB#0: 1992; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1993; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] 1994; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1995; CHECK-NEXT: retq ## encoding: [0xc3] 1996 %q = load i32, i32* %ptr_b 1997 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1998 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1999 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2000 ret <4 x i32> %res 2001} 2002 2003define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2004; CHECK-LABEL: test_mask_xor_epi32_rmbkz_128: 2005; CHECK: ## BB#0: 2006; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2007; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07] 2008; CHECK-NEXT: retq ## encoding: [0xc3] 2009 %q = load i32, i32* %ptr_b 2010 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2011 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2012 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2013 ret <4 x i32> %res 2014} 2015 2016declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2017 2018define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2019; CHECK-LABEL: test_mask_xor_epi32_rr_256: 2020; CHECK: ## BB#0: 2021; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1] 2022; CHECK-NEXT: retq ## encoding: [0xc3] 2023 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2024 ret <8 x i32> %res 2025} 2026 2027define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2028; CHECK-LABEL: test_mask_xor_epi32_rrk_256: 2029; CHECK: ## BB#0: 2030; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2031; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 2032; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2033; CHECK-NEXT: retq ## encoding: [0xc3] 2034 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2035 ret <8 x i32> %res 2036} 2037 2038define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2039; CHECK-LABEL: test_mask_xor_epi32_rrkz_256: 2040; CHECK: ## BB#0: 2041; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2042; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 2043; CHECK-NEXT: retq ## encoding: [0xc3] 2044 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2045 ret <8 x i32> %res 2046} 2047 2048define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2049; CHECK-LABEL: test_mask_xor_epi32_rm_256: 2050; CHECK: ## BB#0: 2051; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07] 2052; CHECK-NEXT: retq ## encoding: [0xc3] 2053 %b = load <8 x i32>, <8 x i32>* %ptr_b 2054 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2055 ret <8 x i32> %res 2056} 2057 2058define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2059; CHECK-LABEL: test_mask_xor_epi32_rmk_256: 2060; CHECK: ## BB#0: 2061; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2062; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] 2063; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2064; CHECK-NEXT: retq ## encoding: [0xc3] 2065 %b = load <8 x i32>, <8 x i32>* %ptr_b 2066 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2067 ret <8 x i32> %res 2068} 2069 2070define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 2071; CHECK-LABEL: test_mask_xor_epi32_rmkz_256: 2072; CHECK: ## BB#0: 2073; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2074; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07] 2075; CHECK-NEXT: retq ## encoding: [0xc3] 2076 %b = load <8 x i32>, <8 x i32>* %ptr_b 2077 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2078 ret <8 x i32> %res 2079} 2080 2081define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2082; CHECK-LABEL: test_mask_xor_epi32_rmb_256: 2083; CHECK: ## BB#0: 2084; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07] 2085; CHECK-NEXT: retq ## encoding: [0xc3] 2086 %q = load i32, i32* %ptr_b 2087 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2088 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2089 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2090 ret <8 x i32> %res 2091} 2092 2093define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2094; CHECK-LABEL: test_mask_xor_epi32_rmbk_256: 2095; CHECK: ## BB#0: 2096; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2097; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] 2098; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2099; CHECK-NEXT: retq ## encoding: [0xc3] 2100 %q = load i32, i32* %ptr_b 2101 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2102 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2103 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2104 ret <8 x i32> %res 2105} 2106 2107define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 2108; CHECK-LABEL: test_mask_xor_epi32_rmbkz_256: 2109; CHECK: ## BB#0: 2110; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2111; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07] 2112; CHECK-NEXT: retq ## encoding: [0xc3] 2113 %q = load i32, i32* %ptr_b 2114 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2115 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2116 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2117 ret <8 x i32> %res 2118} 2119 2120declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2121 2122define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2123; CHECK-LABEL: test_mask_andnot_epi32_rr_128: 2124; CHECK: ## BB#0: 2125; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1] 2126; CHECK-NEXT: retq ## encoding: [0xc3] 2127 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2128 ret <4 x i32> %res 2129} 2130 2131define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 2132; CHECK-LABEL: test_mask_andnot_epi32_rrk_128: 2133; CHECK: ## BB#0: 2134; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2135; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 2136; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2137; CHECK-NEXT: retq ## encoding: [0xc3] 2138 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2139 ret <4 x i32> %res 2140} 2141 2142define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2143; CHECK-LABEL: test_mask_andnot_epi32_rrkz_128: 2144; CHECK: ## BB#0: 2145; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2146; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 2147; CHECK-NEXT: retq ## encoding: [0xc3] 2148 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2149 ret <4 x i32> %res 2150} 2151 2152define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2153; CHECK-LABEL: test_mask_andnot_epi32_rm_128: 2154; CHECK: ## BB#0: 2155; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07] 2156; CHECK-NEXT: retq ## encoding: [0xc3] 2157 %b = load <4 x i32>, <4 x i32>* %ptr_b 2158 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2159 ret <4 x i32> %res 2160} 2161 2162define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2163; CHECK-LABEL: test_mask_andnot_epi32_rmk_128: 2164; CHECK: ## BB#0: 2165; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2166; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] 2167; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2168; CHECK-NEXT: retq ## encoding: [0xc3] 2169 %b = load <4 x i32>, <4 x i32>* %ptr_b 2170 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2171 ret <4 x i32> %res 2172} 2173 2174define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2175; CHECK-LABEL: test_mask_andnot_epi32_rmkz_128: 2176; CHECK: ## BB#0: 2177; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2178; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07] 2179; CHECK-NEXT: retq ## encoding: [0xc3] 2180 %b = load <4 x i32>, <4 x i32>* %ptr_b 2181 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2182 ret <4 x i32> %res 2183} 2184 2185define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2186; CHECK-LABEL: test_mask_andnot_epi32_rmb_128: 2187; CHECK: ## BB#0: 2188; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07] 2189; CHECK-NEXT: retq ## encoding: [0xc3] 2190 %q = load i32, i32* %ptr_b 2191 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2192 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2193 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2194 ret <4 x i32> %res 2195} 2196 2197define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2198; CHECK-LABEL: test_mask_andnot_epi32_rmbk_128: 2199; CHECK: ## BB#0: 2200; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2201; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] 2202; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2203; CHECK-NEXT: retq ## encoding: [0xc3] 2204 %q = load i32, i32* %ptr_b 2205 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2206 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2207 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2208 ret <4 x i32> %res 2209} 2210 2211define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2212; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128: 2213; CHECK: ## BB#0: 2214; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2215; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07] 2216; CHECK-NEXT: retq ## encoding: [0xc3] 2217 %q = load i32, i32* %ptr_b 2218 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2219 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2220 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2221 ret <4 x i32> %res 2222} 2223 2224declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2225 2226define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2227; CHECK-LABEL: test_mask_andnot_epi32_rr_256: 2228; CHECK: ## BB#0: 2229; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1] 2230; CHECK-NEXT: retq ## encoding: [0xc3] 2231 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2232 ret <8 x i32> %res 2233} 2234 2235define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2236; CHECK-LABEL: test_mask_andnot_epi32_rrk_256: 2237; CHECK: ## BB#0: 2238; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2239; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 2240; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2241; CHECK-NEXT: retq ## encoding: [0xc3] 2242 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2243 ret <8 x i32> %res 2244} 2245 2246define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2247; CHECK-LABEL: test_mask_andnot_epi32_rrkz_256: 2248; CHECK: ## BB#0: 2249; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2250; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 2251; CHECK-NEXT: retq ## encoding: [0xc3] 2252 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2253 ret <8 x i32> %res 2254} 2255 2256define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2257; CHECK-LABEL: test_mask_andnot_epi32_rm_256: 2258; CHECK: ## BB#0: 2259; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07] 2260; CHECK-NEXT: retq ## encoding: [0xc3] 2261 %b = load <8 x i32>, <8 x i32>* %ptr_b 2262 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2263 ret <8 x i32> %res 2264} 2265 2266define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2267; CHECK-LABEL: test_mask_andnot_epi32_rmk_256: 2268; CHECK: ## BB#0: 2269; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2270; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] 2271; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2272; CHECK-NEXT: retq ## encoding: [0xc3] 2273 %b = load <8 x i32>, <8 x i32>* %ptr_b 2274 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2275 ret <8 x i32> %res 2276} 2277 2278define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 2279; CHECK-LABEL: test_mask_andnot_epi32_rmkz_256: 2280; CHECK: ## BB#0: 2281; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2282; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07] 2283; CHECK-NEXT: retq ## encoding: [0xc3] 2284 %b = load <8 x i32>, <8 x i32>* %ptr_b 2285 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2286 ret <8 x i32> %res 2287} 2288 2289define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2290; CHECK-LABEL: test_mask_andnot_epi32_rmb_256: 2291; CHECK: ## BB#0: 2292; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07] 2293; CHECK-NEXT: retq ## encoding: [0xc3] 2294 %q = load i32, i32* %ptr_b 2295 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2296 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2297 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2298 ret <8 x i32> %res 2299} 2300 2301define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2302; CHECK-LABEL: test_mask_andnot_epi32_rmbk_256: 2303; CHECK: ## BB#0: 2304; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2305; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] 2306; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2307; CHECK-NEXT: retq ## encoding: [0xc3] 2308 %q = load i32, i32* %ptr_b 2309 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2310 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2311 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2312 ret <8 x i32> %res 2313} 2314 2315define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 2316; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256: 2317; CHECK: ## BB#0: 2318; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2319; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07] 2320; CHECK-NEXT: retq ## encoding: [0xc3] 2321 %q = load i32, i32* %ptr_b 2322 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2323 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2324 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2325 ret <8 x i32> %res 2326} 2327 2328declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2329 2330define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 2331; CHECK-LABEL: test_mask_andnot_epi64_rr_128: 2332; CHECK: ## BB#0: 2333; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1] 2334; CHECK-NEXT: retq ## encoding: [0xc3] 2335 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2336 ret <2 x i64> %res 2337} 2338 2339define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 2340; CHECK-LABEL: test_mask_andnot_epi64_rrk_128: 2341; CHECK: ## BB#0: 2342; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2343; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 2344; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2345; CHECK-NEXT: retq ## encoding: [0xc3] 2346 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2347 ret <2 x i64> %res 2348} 2349 2350define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2351; CHECK-LABEL: test_mask_andnot_epi64_rrkz_128: 2352; CHECK: ## BB#0: 2353; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2354; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 2355; CHECK-NEXT: retq ## encoding: [0xc3] 2356 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2357 ret <2 x i64> %res 2358} 2359 2360define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 2361; CHECK-LABEL: test_mask_andnot_epi64_rm_128: 2362; CHECK: ## BB#0: 2363; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07] 2364; CHECK-NEXT: retq ## encoding: [0xc3] 2365 %b = load <2 x i64>, <2 x i64>* %ptr_b 2366 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2367 ret <2 x i64> %res 2368} 2369 2370define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2371; CHECK-LABEL: test_mask_andnot_epi64_rmk_128: 2372; CHECK: ## BB#0: 2373; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2374; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] 2375; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2376; CHECK-NEXT: retq ## encoding: [0xc3] 2377 %b = load <2 x i64>, <2 x i64>* %ptr_b 2378 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2379 ret <2 x i64> %res 2380} 2381 2382define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 2383; CHECK-LABEL: test_mask_andnot_epi64_rmkz_128: 2384; CHECK: ## BB#0: 2385; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2386; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07] 2387; CHECK-NEXT: retq ## encoding: [0xc3] 2388 %b = load <2 x i64>, <2 x i64>* %ptr_b 2389 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2390 ret <2 x i64> %res 2391} 2392 2393define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 2394; CHECK-LABEL: test_mask_andnot_epi64_rmb_128: 2395; CHECK: ## BB#0: 2396; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07] 2397; CHECK-NEXT: retq ## encoding: [0xc3] 2398 %q = load i64, i64* %ptr_b 2399 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2400 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2401 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2402 ret <2 x i64> %res 2403} 2404 2405define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2406; CHECK-LABEL: test_mask_andnot_epi64_rmbk_128: 2407; CHECK: ## BB#0: 2408; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2409; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] 2410; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2411; CHECK-NEXT: retq ## encoding: [0xc3] 2412 %q = load i64, i64* %ptr_b 2413 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2414 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2415 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2416 ret <2 x i64> %res 2417} 2418 2419define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 2420; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128: 2421; CHECK: ## BB#0: 2422; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2423; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07] 2424; CHECK-NEXT: retq ## encoding: [0xc3] 2425 %q = load i64, i64* %ptr_b 2426 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2427 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2428 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2429 ret <2 x i64> %res 2430} 2431 2432declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2433 2434define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 2435; CHECK-LABEL: test_mask_andnot_epi64_rr_256: 2436; CHECK: ## BB#0: 2437; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1] 2438; CHECK-NEXT: retq ## encoding: [0xc3] 2439 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2440 ret <4 x i64> %res 2441} 2442 2443define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 2444; CHECK-LABEL: test_mask_andnot_epi64_rrk_256: 2445; CHECK: ## BB#0: 2446; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2447; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 2448; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2449; CHECK-NEXT: retq ## encoding: [0xc3] 2450 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2451 ret <4 x i64> %res 2452} 2453 2454define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2455; CHECK-LABEL: test_mask_andnot_epi64_rrkz_256: 2456; CHECK: ## BB#0: 2457; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2458; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 2459; CHECK-NEXT: retq ## encoding: [0xc3] 2460 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2461 ret <4 x i64> %res 2462} 2463 2464define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 2465; CHECK-LABEL: test_mask_andnot_epi64_rm_256: 2466; CHECK: ## BB#0: 2467; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07] 2468; CHECK-NEXT: retq ## encoding: [0xc3] 2469 %b = load <4 x i64>, <4 x i64>* %ptr_b 2470 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2471 ret <4 x i64> %res 2472} 2473 2474define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2475; CHECK-LABEL: test_mask_andnot_epi64_rmk_256: 2476; CHECK: ## BB#0: 2477; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2478; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] 2479; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2480; CHECK-NEXT: retq ## encoding: [0xc3] 2481 %b = load <4 x i64>, <4 x i64>* %ptr_b 2482 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2483 ret <4 x i64> %res 2484} 2485 2486define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 2487; CHECK-LABEL: test_mask_andnot_epi64_rmkz_256: 2488; CHECK: ## BB#0: 2489; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2490; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07] 2491; CHECK-NEXT: retq ## encoding: [0xc3] 2492 %b = load <4 x i64>, <4 x i64>* %ptr_b 2493 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2494 ret <4 x i64> %res 2495} 2496 2497define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 2498; CHECK-LABEL: test_mask_andnot_epi64_rmb_256: 2499; CHECK: ## BB#0: 2500; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07] 2501; CHECK-NEXT: retq ## encoding: [0xc3] 2502 %q = load i64, i64* %ptr_b 2503 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2504 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2505 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2506 ret <4 x i64> %res 2507} 2508 2509define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2510; CHECK-LABEL: test_mask_andnot_epi64_rmbk_256: 2511; CHECK: ## BB#0: 2512; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2513; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] 2514; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2515; CHECK-NEXT: retq ## encoding: [0xc3] 2516 %q = load i64, i64* %ptr_b 2517 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2518 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2519 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2520 ret <4 x i64> %res 2521} 2522 2523define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 2524; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256: 2525; CHECK: ## BB#0: 2526; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2527; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07] 2528; CHECK-NEXT: retq ## encoding: [0xc3] 2529 %q = load i64, i64* %ptr_b 2530 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2531 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2532 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2533 ret <4 x i64> %res 2534} 2535 2536declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2537