1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) { 6; CHECK-LABEL: test_mask_andnot_ps_rr_128: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] 9; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 11 ret <4 x float> %res 12} 13 14define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 15; X86-LABEL: test_mask_andnot_ps_rrk_128: 16; X86: # %bb.0: 17; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 18; X86-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] 19; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 20; X86-NEXT: retl # encoding: [0xc3] 21; 22; X64-LABEL: test_mask_andnot_ps_rrk_128: 23; X64: # %bb.0: 24; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 25; X64-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] 26; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 27; X64-NEXT: retq # encoding: [0xc3] 28 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 29 ret <4 x float> %res 30} 31 32define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 33; X86-LABEL: test_mask_andnot_ps_rrkz_128: 34; X86: # %bb.0: 35; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 36; X86-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1] 37; X86-NEXT: retl # encoding: [0xc3] 38; 39; X64-LABEL: test_mask_andnot_ps_rrkz_128: 40; X64: # %bb.0: 41; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 42; X64-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1] 43; X64-NEXT: retq # encoding: [0xc3] 44 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 45 ret <4 x float> %res 46} 47 48define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 49; X86-LABEL: test_mask_andnot_ps_rm_128: 50; X86: # %bb.0: 51; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 52; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x00] 53; X86-NEXT: retl # encoding: [0xc3] 54; 55; X64-LABEL: test_mask_andnot_ps_rm_128: 56; X64: # %bb.0: 57; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x07] 58; X64-NEXT: retq # encoding: [0xc3] 59 %b = load <4 x float>, <4 x float>* %ptr_b 60 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 61 ret <4 x float> %res 62} 63 64define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 65; X86-LABEL: test_mask_andnot_ps_rmk_128: 66; X86: # %bb.0: 67; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 68; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 69; X86-NEXT: vandnps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x08] 70; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 71; X86-NEXT: retl # encoding: [0xc3] 72; 73; X64-LABEL: test_mask_andnot_ps_rmk_128: 74; X64: # %bb.0: 75; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 76; X64-NEXT: vandnps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f] 77; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 78; X64-NEXT: retq # encoding: [0xc3] 79 %b = load <4 x float>, <4 x float>* %ptr_b 80 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 81 ret <4 x float> %res 82} 83 84define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 85; X86-LABEL: test_mask_andnot_ps_rmkz_128: 86; X86: # %bb.0: 87; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 88; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 89; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x00] 90; X86-NEXT: retl # encoding: [0xc3] 91; 92; X64-LABEL: test_mask_andnot_ps_rmkz_128: 93; X64: # %bb.0: 94; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 95; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07] 96; X64-NEXT: retq # encoding: [0xc3] 97 %b = load <4 x float>, <4 x float>* %ptr_b 98 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 99 ret <4 x float> %res 100} 101 102define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 103; X86-LABEL: test_mask_andnot_ps_rmb_128: 104; X86: # %bb.0: 105; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 106; X86-NEXT: vandnps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x00] 107; X86-NEXT: retl # encoding: [0xc3] 108; 109; X64-LABEL: test_mask_andnot_ps_rmb_128: 110; X64: # %bb.0: 111; X64-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07] 112; X64-NEXT: retq # encoding: [0xc3] 113 %q = load float, float* %ptr_b 114 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 115 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 116 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 117 ret <4 x float> %res 118} 119 120define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 121; X86-LABEL: test_mask_andnot_ps_rmbk_128: 122; X86: # %bb.0: 123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 124; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 125; X86-NEXT: vandnps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x08] 126; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 127; X86-NEXT: retl # encoding: [0xc3] 128; 129; X64-LABEL: test_mask_andnot_ps_rmbk_128: 130; X64: # %bb.0: 131; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 132; X64-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f] 133; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 134; X64-NEXT: retq # encoding: [0xc3] 135 %q = load float, float* %ptr_b 136 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 137 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 138 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 139 ret <4 x float> %res 140} 141 142define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 143; X86-LABEL: test_mask_andnot_ps_rmbkz_128: 144; X86: # %bb.0: 145; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 146; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 147; X86-NEXT: vandnps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x00] 148; X86-NEXT: retl # encoding: [0xc3] 149; 150; X64-LABEL: test_mask_andnot_ps_rmbkz_128: 151; X64: # %bb.0: 152; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 153; X64-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07] 154; X64-NEXT: retq # encoding: [0xc3] 155 %q = load float, float* %ptr_b 156 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 157 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 158 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 159 ret <4 x float> %res 160} 161 162declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 163 164define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) { 165; CHECK-LABEL: test_mask_andnot_ps_rr_256: 166; CHECK: # %bb.0: 167; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0xc1] 168; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 169 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 170 ret <8 x float> %res 171} 172 173define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 174; X86-LABEL: test_mask_andnot_ps_rrk_256: 175; X86: # %bb.0: 176; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 177; X86-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] 178; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 179; X86-NEXT: retl # encoding: [0xc3] 180; 181; X64-LABEL: test_mask_andnot_ps_rrk_256: 182; X64: # %bb.0: 183; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 184; X64-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] 185; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 186; X64-NEXT: retq # encoding: [0xc3] 187 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 188 ret <8 x float> %res 189} 190 191define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 192; X86-LABEL: test_mask_andnot_ps_rrkz_256: 193; X86: # %bb.0: 194; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 195; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1] 196; X86-NEXT: retl # encoding: [0xc3] 197; 198; X64-LABEL: test_mask_andnot_ps_rrkz_256: 199; X64: # %bb.0: 200; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 201; X64-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1] 202; X64-NEXT: retq # encoding: [0xc3] 203 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 204 ret <8 x float> %res 205} 206 207define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 208; X86-LABEL: test_mask_andnot_ps_rm_256: 209; X86: # %bb.0: 210; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 211; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x00] 212; X86-NEXT: retl # encoding: [0xc3] 213; 214; X64-LABEL: test_mask_andnot_ps_rm_256: 215; X64: # %bb.0: 216; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x07] 217; X64-NEXT: retq # encoding: [0xc3] 218 %b = load <8 x float>, <8 x float>* %ptr_b 219 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 220 ret <8 x float> %res 221} 222 223define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 224; X86-LABEL: test_mask_andnot_ps_rmk_256: 225; X86: # %bb.0: 226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 227; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 228; X86-NEXT: vandnps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x08] 229; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 230; X86-NEXT: retl # encoding: [0xc3] 231; 232; X64-LABEL: test_mask_andnot_ps_rmk_256: 233; X64: # %bb.0: 234; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 235; X64-NEXT: vandnps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f] 236; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 237; X64-NEXT: retq # encoding: [0xc3] 238 %b = load <8 x float>, <8 x float>* %ptr_b 239 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 240 ret <8 x float> %res 241} 242 243define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 244; X86-LABEL: test_mask_andnot_ps_rmkz_256: 245; X86: # %bb.0: 246; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 247; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 248; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x00] 249; X86-NEXT: retl # encoding: [0xc3] 250; 251; X64-LABEL: test_mask_andnot_ps_rmkz_256: 252; X64: # %bb.0: 253; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 254; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07] 255; X64-NEXT: retq # encoding: [0xc3] 256 %b = load <8 x float>, <8 x float>* %ptr_b 257 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 258 ret <8 x float> %res 259} 260 261define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 262; X86-LABEL: test_mask_andnot_ps_rmb_256: 263; X86: # %bb.0: 264; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 265; X86-NEXT: vandnps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x00] 266; X86-NEXT: retl # encoding: [0xc3] 267; 268; X64-LABEL: test_mask_andnot_ps_rmb_256: 269; X64: # %bb.0: 270; X64-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07] 271; X64-NEXT: retq # encoding: [0xc3] 272 %q = load float, float* %ptr_b 273 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 274 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 275 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 276 ret <8 x float> %res 277} 278 279define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 280; X86-LABEL: test_mask_andnot_ps_rmbk_256: 281; X86: # %bb.0: 282; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 283; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 284; X86-NEXT: vandnps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x08] 285; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 286; X86-NEXT: retl # encoding: [0xc3] 287; 288; X64-LABEL: test_mask_andnot_ps_rmbk_256: 289; X64: # %bb.0: 290; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 291; X64-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f] 292; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 293; X64-NEXT: retq # encoding: [0xc3] 294 %q = load float, float* %ptr_b 295 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 296 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 297 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 298 ret <8 x float> %res 299} 300 301define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 302; X86-LABEL: test_mask_andnot_ps_rmbkz_256: 303; X86: # %bb.0: 304; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 305; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 306; X86-NEXT: vandnps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x00] 307; X86-NEXT: retl # encoding: [0xc3] 308; 309; X64-LABEL: test_mask_andnot_ps_rmbkz_256: 310; X64: # %bb.0: 311; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 312; X64-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07] 313; X64-NEXT: retq # encoding: [0xc3] 314 %q = load float, float* %ptr_b 315 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 316 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 317 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 318 ret <8 x float> %res 319} 320 321declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 322 323define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) { 324; CHECK-LABEL: test_mask_andnot_ps_rr_512: 325; CHECK: # %bb.0: 326; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1] 327; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 328 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 329 ret <16 x float> %res 330} 331 332define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 333; X86-LABEL: test_mask_andnot_ps_rrk_512: 334; X86: # %bb.0: 335; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 336; X86-NEXT: vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1] 337; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 338; X86-NEXT: retl # encoding: [0xc3] 339; 340; X64-LABEL: test_mask_andnot_ps_rrk_512: 341; X64: # %bb.0: 342; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 343; X64-NEXT: vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1] 344; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 345; X64-NEXT: retq # encoding: [0xc3] 346 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 347 ret <16 x float> %res 348} 349 350define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 351; X86-LABEL: test_mask_andnot_ps_rrkz_512: 352; X86: # %bb.0: 353; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 354; X86-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1] 355; X86-NEXT: retl # encoding: [0xc3] 356; 357; X64-LABEL: test_mask_andnot_ps_rrkz_512: 358; X64: # %bb.0: 359; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 360; X64-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1] 361; X64-NEXT: retq # encoding: [0xc3] 362 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 363 ret <16 x float> %res 364} 365 366define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 367; X86-LABEL: test_mask_andnot_ps_rm_512: 368; X86: # %bb.0: 369; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 370; X86-NEXT: vandnps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x00] 371; X86-NEXT: retl # encoding: [0xc3] 372; 373; X64-LABEL: test_mask_andnot_ps_rm_512: 374; X64: # %bb.0: 375; X64-NEXT: vandnps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07] 376; X64-NEXT: retq # encoding: [0xc3] 377 %b = load <16 x float>, <16 x float>* %ptr_b 378 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 379 ret <16 x float> %res 380} 381 382define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 383; X86-LABEL: test_mask_andnot_ps_rmk_512: 384; X86: # %bb.0: 385; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 386; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 387; X86-NEXT: vandnps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x08] 388; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 389; X86-NEXT: retl # encoding: [0xc3] 390; 391; X64-LABEL: test_mask_andnot_ps_rmk_512: 392; X64: # %bb.0: 393; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 394; X64-NEXT: vandnps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f] 395; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 396; X64-NEXT: retq # encoding: [0xc3] 397 %b = load <16 x float>, <16 x float>* %ptr_b 398 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 399 ret <16 x float> %res 400} 401 402define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 403; X86-LABEL: test_mask_andnot_ps_rmkz_512: 404; X86: # %bb.0: 405; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 406; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 407; X86-NEXT: vandnps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x00] 408; X86-NEXT: retl # encoding: [0xc3] 409; 410; X64-LABEL: test_mask_andnot_ps_rmkz_512: 411; X64: # %bb.0: 412; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 413; X64-NEXT: vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07] 414; X64-NEXT: retq # encoding: [0xc3] 415 %b = load <16 x float>, <16 x float>* %ptr_b 416 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 417 ret <16 x float> %res 418} 419 420define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 421; X86-LABEL: test_mask_andnot_ps_rmb_512: 422; X86: # %bb.0: 423; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 424; X86-NEXT: vandnps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x00] 425; X86-NEXT: retl # encoding: [0xc3] 426; 427; X64-LABEL: test_mask_andnot_ps_rmb_512: 428; X64: # %bb.0: 429; X64-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07] 430; X64-NEXT: retq # encoding: [0xc3] 431 %q = load float, float* %ptr_b 432 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 433 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 434 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 435 ret <16 x float> %res 436} 437 438define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 439; X86-LABEL: test_mask_andnot_ps_rmbk_512: 440; X86: # %bb.0: 441; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 442; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 443; X86-NEXT: vandnps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x08] 444; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 445; X86-NEXT: retl # encoding: [0xc3] 446; 447; X64-LABEL: test_mask_andnot_ps_rmbk_512: 448; X64: # %bb.0: 449; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 450; X64-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f] 451; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 452; X64-NEXT: retq # encoding: [0xc3] 453 %q = load float, float* %ptr_b 454 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 455 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 456 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 457 ret <16 x float> %res 458} 459 460define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 461; X86-LABEL: test_mask_andnot_ps_rmbkz_512: 462; X86: # %bb.0: 463; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 464; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 465; X86-NEXT: vandnps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x00] 466; X86-NEXT: retl # encoding: [0xc3] 467; 468; X64-LABEL: test_mask_andnot_ps_rmbkz_512: 469; X64: # %bb.0: 470; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 471; X64-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07] 472; X64-NEXT: retq # encoding: [0xc3] 473 %q = load float, float* %ptr_b 474 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 475 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 476 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 477 ret <16 x float> %res 478} 479 480declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 481 482define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) { 483; CHECK-LABEL: test_mask_and_ps_rr_128: 484; CHECK: # %bb.0: 485; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 486; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 487 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 488 ret <4 x float> %res 489} 490 491define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 492; X86-LABEL: test_mask_and_ps_rrk_128: 493; X86: # %bb.0: 494; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 495; X86-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] 496; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 497; X86-NEXT: retl # encoding: [0xc3] 498; 499; X64-LABEL: test_mask_and_ps_rrk_128: 500; X64: # %bb.0: 501; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 502; X64-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] 503; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 504; X64-NEXT: retq # encoding: [0xc3] 505 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 506 ret <4 x float> %res 507} 508 509define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 510; X86-LABEL: test_mask_and_ps_rrkz_128: 511; X86: # %bb.0: 512; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 513; X86-NEXT: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1] 514; X86-NEXT: retl # encoding: [0xc3] 515; 516; X64-LABEL: test_mask_and_ps_rrkz_128: 517; X64: # %bb.0: 518; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 519; X64-NEXT: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1] 520; X64-NEXT: retq # encoding: [0xc3] 521 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 522 ret <4 x float> %res 523} 524 525define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 526; X86-LABEL: test_mask_and_ps_rm_128: 527; X86: # %bb.0: 528; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 529; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x00] 530; X86-NEXT: retl # encoding: [0xc3] 531; 532; X64-LABEL: test_mask_and_ps_rm_128: 533; X64: # %bb.0: 534; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x07] 535; X64-NEXT: retq # encoding: [0xc3] 536 %b = load <4 x float>, <4 x float>* %ptr_b 537 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 538 ret <4 x float> %res 539} 540 541define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 542; X86-LABEL: test_mask_and_ps_rmk_128: 543; X86: # %bb.0: 544; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 545; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 546; X86-NEXT: vandps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x08] 547; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 548; X86-NEXT: retl # encoding: [0xc3] 549; 550; X64-LABEL: test_mask_and_ps_rmk_128: 551; X64: # %bb.0: 552; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 553; X64-NEXT: vandps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f] 554; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 555; X64-NEXT: retq # encoding: [0xc3] 556 %b = load <4 x float>, <4 x float>* %ptr_b 557 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 558 ret <4 x float> %res 559} 560 561define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 562; X86-LABEL: test_mask_and_ps_rmkz_128: 563; X86: # %bb.0: 564; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 565; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 566; X86-NEXT: vandps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x00] 567; X86-NEXT: retl # encoding: [0xc3] 568; 569; X64-LABEL: test_mask_and_ps_rmkz_128: 570; X64: # %bb.0: 571; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 572; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07] 573; X64-NEXT: retq # encoding: [0xc3] 574 %b = load <4 x float>, <4 x float>* %ptr_b 575 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 576 ret <4 x float> %res 577} 578 579define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 580; X86-LABEL: test_mask_and_ps_rmb_128: 581; X86: # %bb.0: 582; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 583; X86-NEXT: vandps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x00] 584; X86-NEXT: retl # encoding: [0xc3] 585; 586; X64-LABEL: test_mask_and_ps_rmb_128: 587; X64: # %bb.0: 588; X64-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07] 589; X64-NEXT: retq # encoding: [0xc3] 590 %q = load float, float* %ptr_b 591 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 592 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 593 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 594 ret <4 x float> %res 595} 596 597define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 598; X86-LABEL: test_mask_and_ps_rmbk_128: 599; X86: # %bb.0: 600; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 601; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 602; X86-NEXT: vandps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x08] 603; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 604; X86-NEXT: retl # encoding: [0xc3] 605; 606; X64-LABEL: test_mask_and_ps_rmbk_128: 607; X64: # %bb.0: 608; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 609; X64-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f] 610; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 611; X64-NEXT: retq # encoding: [0xc3] 612 %q = load float, float* %ptr_b 613 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 614 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 615 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 616 ret <4 x float> %res 617} 618 619define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 620; X86-LABEL: test_mask_and_ps_rmbkz_128: 621; X86: # %bb.0: 622; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 623; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 624; X86-NEXT: vandps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x00] 625; X86-NEXT: retl # encoding: [0xc3] 626; 627; X64-LABEL: test_mask_and_ps_rmbkz_128: 628; X64: # %bb.0: 629; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 630; X64-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07] 631; X64-NEXT: retq # encoding: [0xc3] 632 %q = load float, float* %ptr_b 633 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 634 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 635 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 636 ret <4 x float> %res 637} 638 639declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 640 641define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) { 642; CHECK-LABEL: test_mask_and_ps_rr_256: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc1] 645; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 646 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 647 ret <8 x float> %res 648} 649 650define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 651; X86-LABEL: test_mask_and_ps_rrk_256: 652; X86: # %bb.0: 653; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 654; X86-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] 655; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 656; X86-NEXT: retl # encoding: [0xc3] 657; 658; X64-LABEL: test_mask_and_ps_rrk_256: 659; X64: # %bb.0: 660; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 661; X64-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] 662; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 663; X64-NEXT: retq # encoding: [0xc3] 664 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 665 ret <8 x float> %res 666} 667 668define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 669; X86-LABEL: test_mask_and_ps_rrkz_256: 670; X86: # %bb.0: 671; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 672; X86-NEXT: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1] 673; X86-NEXT: retl # encoding: [0xc3] 674; 675; X64-LABEL: test_mask_and_ps_rrkz_256: 676; X64: # %bb.0: 677; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 678; X64-NEXT: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1] 679; X64-NEXT: retq # encoding: [0xc3] 680 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 681 ret <8 x float> %res 682} 683 684define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 685; X86-LABEL: test_mask_and_ps_rm_256: 686; X86: # %bb.0: 687; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 688; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x00] 689; X86-NEXT: retl # encoding: [0xc3] 690; 691; X64-LABEL: test_mask_and_ps_rm_256: 692; X64: # %bb.0: 693; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x07] 694; X64-NEXT: retq # encoding: [0xc3] 695 %b = load <8 x float>, <8 x float>* %ptr_b 696 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 697 ret <8 x float> %res 698} 699 700define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 701; X86-LABEL: test_mask_and_ps_rmk_256: 702; X86: # %bb.0: 703; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 704; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 705; X86-NEXT: vandps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x08] 706; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 707; X86-NEXT: retl # encoding: [0xc3] 708; 709; X64-LABEL: test_mask_and_ps_rmk_256: 710; X64: # %bb.0: 711; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 712; X64-NEXT: vandps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f] 713; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 714; X64-NEXT: retq # encoding: [0xc3] 715 %b = load <8 x float>, <8 x float>* %ptr_b 716 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 717 ret <8 x float> %res 718} 719 720define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 721; X86-LABEL: test_mask_and_ps_rmkz_256: 722; X86: # %bb.0: 723; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 724; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 725; X86-NEXT: vandps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x00] 726; X86-NEXT: retl # encoding: [0xc3] 727; 728; X64-LABEL: test_mask_and_ps_rmkz_256: 729; X64: # %bb.0: 730; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 731; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07] 732; X64-NEXT: retq # encoding: [0xc3] 733 %b = load <8 x float>, <8 x float>* %ptr_b 734 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 735 ret <8 x float> %res 736} 737 738define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 739; X86-LABEL: test_mask_and_ps_rmb_256: 740; X86: # %bb.0: 741; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 742; X86-NEXT: vandps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x00] 743; X86-NEXT: retl # encoding: [0xc3] 744; 745; X64-LABEL: test_mask_and_ps_rmb_256: 746; X64: # %bb.0: 747; X64-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07] 748; X64-NEXT: retq # encoding: [0xc3] 749 %q = load float, float* %ptr_b 750 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 751 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 752 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 753 ret <8 x float> %res 754} 755 756define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 757; X86-LABEL: test_mask_and_ps_rmbk_256: 758; X86: # %bb.0: 759; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 760; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 761; X86-NEXT: vandps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x08] 762; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 763; X86-NEXT: retl # encoding: [0xc3] 764; 765; X64-LABEL: test_mask_and_ps_rmbk_256: 766; X64: # %bb.0: 767; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 768; X64-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f] 769; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 770; X64-NEXT: retq # encoding: [0xc3] 771 %q = load float, float* %ptr_b 772 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 773 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 774 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 775 ret <8 x float> %res 776} 777 778define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 779; X86-LABEL: test_mask_and_ps_rmbkz_256: 780; X86: # %bb.0: 781; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 782; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 783; X86-NEXT: vandps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x00] 784; X86-NEXT: retl # encoding: [0xc3] 785; 786; X64-LABEL: test_mask_and_ps_rmbkz_256: 787; X64: # %bb.0: 788; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 789; X64-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07] 790; X64-NEXT: retq # encoding: [0xc3] 791 %q = load float, float* %ptr_b 792 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 793 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 794 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 795 ret <8 x float> %res 796} 797 798declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 799 800define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) { 801; CHECK-LABEL: test_mask_and_ps_rr_512: 802; CHECK: # %bb.0: 803; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1] 804; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 805 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 806 ret <16 x float> %res 807} 808 809define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 810; X86-LABEL: test_mask_and_ps_rrk_512: 811; X86: # %bb.0: 812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 813; X86-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1] 814; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 815; X86-NEXT: retl # encoding: [0xc3] 816; 817; X64-LABEL: test_mask_and_ps_rrk_512: 818; X64: # %bb.0: 819; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 820; X64-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1] 821; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 822; X64-NEXT: retq # encoding: [0xc3] 823 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 824 ret <16 x float> %res 825} 826 827define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 828; X86-LABEL: test_mask_and_ps_rrkz_512: 829; X86: # %bb.0: 830; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 831; X86-NEXT: vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1] 832; X86-NEXT: retl # encoding: [0xc3] 833; 834; X64-LABEL: test_mask_and_ps_rrkz_512: 835; X64: # %bb.0: 836; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 837; X64-NEXT: vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1] 838; X64-NEXT: retq # encoding: [0xc3] 839 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 840 ret <16 x float> %res 841} 842 843define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 844; X86-LABEL: test_mask_and_ps_rm_512: 845; X86: # %bb.0: 846; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 847; X86-NEXT: vandps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x00] 848; X86-NEXT: retl # encoding: [0xc3] 849; 850; X64-LABEL: test_mask_and_ps_rm_512: 851; X64: # %bb.0: 852; X64-NEXT: vandps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07] 853; X64-NEXT: retq # encoding: [0xc3] 854 %b = load <16 x float>, <16 x float>* %ptr_b 855 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 856 ret <16 x float> %res 857} 858 859define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 860; X86-LABEL: test_mask_and_ps_rmk_512: 861; X86: # %bb.0: 862; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 863; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 864; X86-NEXT: vandps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x08] 865; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 866; X86-NEXT: retl # encoding: [0xc3] 867; 868; X64-LABEL: test_mask_and_ps_rmk_512: 869; X64: # %bb.0: 870; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 871; X64-NEXT: vandps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f] 872; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 873; X64-NEXT: retq # encoding: [0xc3] 874 %b = load <16 x float>, <16 x float>* %ptr_b 875 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 876 ret <16 x float> %res 877} 878 879define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 880; X86-LABEL: test_mask_and_ps_rmkz_512: 881; X86: # %bb.0: 882; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 883; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 884; X86-NEXT: vandps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x00] 885; X86-NEXT: retl # encoding: [0xc3] 886; 887; X64-LABEL: test_mask_and_ps_rmkz_512: 888; X64: # %bb.0: 889; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 890; X64-NEXT: vandps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07] 891; X64-NEXT: retq # encoding: [0xc3] 892 %b = load <16 x float>, <16 x float>* %ptr_b 893 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 894 ret <16 x float> %res 895} 896 897define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 898; X86-LABEL: test_mask_and_ps_rmb_512: 899; X86: # %bb.0: 900; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 901; X86-NEXT: vandps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x00] 902; X86-NEXT: retl # encoding: [0xc3] 903; 904; X64-LABEL: test_mask_and_ps_rmb_512: 905; X64: # %bb.0: 906; X64-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07] 907; X64-NEXT: retq # encoding: [0xc3] 908 %q = load float, float* %ptr_b 909 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 910 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 911 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 912 ret <16 x float> %res 913} 914 915define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 916; X86-LABEL: test_mask_and_ps_rmbk_512: 917; X86: # %bb.0: 918; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 919; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 920; X86-NEXT: vandps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x08] 921; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 922; X86-NEXT: retl # encoding: [0xc3] 923; 924; X64-LABEL: test_mask_and_ps_rmbk_512: 925; X64: # %bb.0: 926; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 927; X64-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f] 928; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 929; X64-NEXT: retq # encoding: [0xc3] 930 %q = load float, float* %ptr_b 931 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 932 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 933 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 934 ret <16 x float> %res 935} 936 937define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 938; X86-LABEL: test_mask_and_ps_rmbkz_512: 939; X86: # %bb.0: 940; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 941; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 942; X86-NEXT: vandps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x00] 943; X86-NEXT: retl # encoding: [0xc3] 944; 945; X64-LABEL: test_mask_and_ps_rmbkz_512: 946; X64: # %bb.0: 947; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 948; X64-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07] 949; X64-NEXT: retq # encoding: [0xc3] 950 %q = load float, float* %ptr_b 951 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 952 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 953 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 954 ret <16 x float> %res 955} 956 957declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 958 959define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) { 960; CHECK-LABEL: test_mask_or_ps_rr_128: 961; CHECK: # %bb.0: 962; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 963; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 964 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 965 ret <4 x float> %res 966} 967 968define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 969; X86-LABEL: test_mask_or_ps_rrk_128: 970; X86: # %bb.0: 971; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 972; X86-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] 973; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 974; X86-NEXT: retl # encoding: [0xc3] 975; 976; X64-LABEL: test_mask_or_ps_rrk_128: 977; X64: # %bb.0: 978; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 979; X64-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] 980; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 981; X64-NEXT: retq # encoding: [0xc3] 982 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 983 ret <4 x float> %res 984} 985 986define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 987; X86-LABEL: test_mask_or_ps_rrkz_128: 988; X86: # %bb.0: 989; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 990; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1] 991; X86-NEXT: retl # encoding: [0xc3] 992; 993; X64-LABEL: test_mask_or_ps_rrkz_128: 994; X64: # %bb.0: 995; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 996; X64-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1] 997; X64-NEXT: retq # encoding: [0xc3] 998 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 999 ret <4 x float> %res 1000} 1001 1002define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 1003; X86-LABEL: test_mask_or_ps_rm_128: 1004; X86: # %bb.0: 1005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1006; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x00] 1007; X86-NEXT: retl # encoding: [0xc3] 1008; 1009; X64-LABEL: test_mask_or_ps_rm_128: 1010; X64: # %bb.0: 1011; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x07] 1012; X64-NEXT: retq # encoding: [0xc3] 1013 %b = load <4 x float>, <4 x float>* %ptr_b 1014 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1015 ret <4 x float> %res 1016} 1017 1018define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 1019; X86-LABEL: test_mask_or_ps_rmk_128: 1020; X86: # %bb.0: 1021; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1022; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1023; X86-NEXT: vorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x08] 1024; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1025; X86-NEXT: retl # encoding: [0xc3] 1026; 1027; X64-LABEL: test_mask_or_ps_rmk_128: 1028; X64: # %bb.0: 1029; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1030; X64-NEXT: vorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f] 1031; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1032; X64-NEXT: retq # encoding: [0xc3] 1033 %b = load <4 x float>, <4 x float>* %ptr_b 1034 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1035 ret <4 x float> %res 1036} 1037 1038define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 1039; X86-LABEL: test_mask_or_ps_rmkz_128: 1040; X86: # %bb.0: 1041; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1042; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1043; X86-NEXT: vorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x00] 1044; X86-NEXT: retl # encoding: [0xc3] 1045; 1046; X64-LABEL: test_mask_or_ps_rmkz_128: 1047; X64: # %bb.0: 1048; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1049; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07] 1050; X64-NEXT: retq # encoding: [0xc3] 1051 %b = load <4 x float>, <4 x float>* %ptr_b 1052 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1053 ret <4 x float> %res 1054} 1055 1056define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 1057; X86-LABEL: test_mask_or_ps_rmb_128: 1058; X86: # %bb.0: 1059; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1060; X86-NEXT: vorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x00] 1061; X86-NEXT: retl # encoding: [0xc3] 1062; 1063; X64-LABEL: test_mask_or_ps_rmb_128: 1064; X64: # %bb.0: 1065; X64-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07] 1066; X64-NEXT: retq # encoding: [0xc3] 1067 %q = load float, float* %ptr_b 1068 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1069 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1070 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1071 ret <4 x float> %res 1072} 1073 1074define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 1075; X86-LABEL: test_mask_or_ps_rmbk_128: 1076; X86: # %bb.0: 1077; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1078; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1079; X86-NEXT: vorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x08] 1080; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1081; X86-NEXT: retl # encoding: [0xc3] 1082; 1083; X64-LABEL: test_mask_or_ps_rmbk_128: 1084; X64: # %bb.0: 1085; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1086; X64-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f] 1087; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1088; X64-NEXT: retq # encoding: [0xc3] 1089 %q = load float, float* %ptr_b 1090 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1091 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1092 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1093 ret <4 x float> %res 1094} 1095 1096define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 1097; X86-LABEL: test_mask_or_ps_rmbkz_128: 1098; X86: # %bb.0: 1099; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1100; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1101; X86-NEXT: vorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x00] 1102; X86-NEXT: retl # encoding: [0xc3] 1103; 1104; X64-LABEL: test_mask_or_ps_rmbkz_128: 1105; X64: # %bb.0: 1106; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1107; X64-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07] 1108; X64-NEXT: retq # encoding: [0xc3] 1109 %q = load float, float* %ptr_b 1110 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1111 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1112 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1113 ret <4 x float> %res 1114} 1115 1116declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1117 1118define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) { 1119; CHECK-LABEL: test_mask_or_ps_rr_256: 1120; CHECK: # %bb.0: 1121; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0xc1] 1122; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1123 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1124 ret <8 x float> %res 1125} 1126 1127define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 1128; X86-LABEL: test_mask_or_ps_rrk_256: 1129; X86: # %bb.0: 1130; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1131; X86-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] 1132; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1133; X86-NEXT: retl # encoding: [0xc3] 1134; 1135; X64-LABEL: test_mask_or_ps_rrk_256: 1136; X64: # %bb.0: 1137; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1138; X64-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] 1139; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1140; X64-NEXT: retq # encoding: [0xc3] 1141 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1142 ret <8 x float> %res 1143} 1144 1145define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 1146; X86-LABEL: test_mask_or_ps_rrkz_256: 1147; X86: # %bb.0: 1148; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1149; X86-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1] 1150; X86-NEXT: retl # encoding: [0xc3] 1151; 1152; X64-LABEL: test_mask_or_ps_rrkz_256: 1153; X64: # %bb.0: 1154; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1155; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1] 1156; X64-NEXT: retq # encoding: [0xc3] 1157 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1158 ret <8 x float> %res 1159} 1160 1161define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 1162; X86-LABEL: test_mask_or_ps_rm_256: 1163; X86: # %bb.0: 1164; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1165; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x00] 1166; X86-NEXT: retl # encoding: [0xc3] 1167; 1168; X64-LABEL: test_mask_or_ps_rm_256: 1169; X64: # %bb.0: 1170; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x07] 1171; X64-NEXT: retq # encoding: [0xc3] 1172 %b = load <8 x float>, <8 x float>* %ptr_b 1173 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1174 ret <8 x float> %res 1175} 1176 1177define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 1178; X86-LABEL: test_mask_or_ps_rmk_256: 1179; X86: # %bb.0: 1180; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1181; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1182; X86-NEXT: vorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x08] 1183; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1184; X86-NEXT: retl # encoding: [0xc3] 1185; 1186; X64-LABEL: test_mask_or_ps_rmk_256: 1187; X64: # %bb.0: 1188; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1189; X64-NEXT: vorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f] 1190; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1191; X64-NEXT: retq # encoding: [0xc3] 1192 %b = load <8 x float>, <8 x float>* %ptr_b 1193 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1194 ret <8 x float> %res 1195} 1196 1197define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 1198; X86-LABEL: test_mask_or_ps_rmkz_256: 1199; X86: # %bb.0: 1200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1201; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1202; X86-NEXT: vorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x00] 1203; X86-NEXT: retl # encoding: [0xc3] 1204; 1205; X64-LABEL: test_mask_or_ps_rmkz_256: 1206; X64: # %bb.0: 1207; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1208; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07] 1209; X64-NEXT: retq # encoding: [0xc3] 1210 %b = load <8 x float>, <8 x float>* %ptr_b 1211 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1212 ret <8 x float> %res 1213} 1214 1215define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 1216; X86-LABEL: test_mask_or_ps_rmb_256: 1217; X86: # %bb.0: 1218; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1219; X86-NEXT: vorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x00] 1220; X86-NEXT: retl # encoding: [0xc3] 1221; 1222; X64-LABEL: test_mask_or_ps_rmb_256: 1223; X64: # %bb.0: 1224; X64-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07] 1225; X64-NEXT: retq # encoding: [0xc3] 1226 %q = load float, float* %ptr_b 1227 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1228 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1229 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1230 ret <8 x float> %res 1231} 1232 1233define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 1234; X86-LABEL: test_mask_or_ps_rmbk_256: 1235; X86: # %bb.0: 1236; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1237; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1238; X86-NEXT: vorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x08] 1239; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1240; X86-NEXT: retl # encoding: [0xc3] 1241; 1242; X64-LABEL: test_mask_or_ps_rmbk_256: 1243; X64: # %bb.0: 1244; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1245; X64-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f] 1246; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1247; X64-NEXT: retq # encoding: [0xc3] 1248 %q = load float, float* %ptr_b 1249 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1250 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1251 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1252 ret <8 x float> %res 1253} 1254 1255define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 1256; X86-LABEL: test_mask_or_ps_rmbkz_256: 1257; X86: # %bb.0: 1258; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1259; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1260; X86-NEXT: vorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x00] 1261; X86-NEXT: retl # encoding: [0xc3] 1262; 1263; X64-LABEL: test_mask_or_ps_rmbkz_256: 1264; X64: # %bb.0: 1265; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1266; X64-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07] 1267; X64-NEXT: retq # encoding: [0xc3] 1268 %q = load float, float* %ptr_b 1269 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1270 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1271 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1272 ret <8 x float> %res 1273} 1274 1275declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1276 1277define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) { 1278; CHECK-LABEL: test_mask_or_ps_rr_512: 1279; CHECK: # %bb.0: 1280; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1] 1281; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1282 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1283 ret <16 x float> %res 1284} 1285 1286define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 1287; X86-LABEL: test_mask_or_ps_rrk_512: 1288; X86: # %bb.0: 1289; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1290; X86-NEXT: vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1] 1291; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1292; X86-NEXT: retl # encoding: [0xc3] 1293; 1294; X64-LABEL: test_mask_or_ps_rrk_512: 1295; X64: # %bb.0: 1296; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1297; X64-NEXT: vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1] 1298; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1299; X64-NEXT: retq # encoding: [0xc3] 1300 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1301 ret <16 x float> %res 1302} 1303 1304define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 1305; X86-LABEL: test_mask_or_ps_rrkz_512: 1306; X86: # %bb.0: 1307; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1308; X86-NEXT: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1] 1309; X86-NEXT: retl # encoding: [0xc3] 1310; 1311; X64-LABEL: test_mask_or_ps_rrkz_512: 1312; X64: # %bb.0: 1313; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1314; X64-NEXT: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1] 1315; X64-NEXT: retq # encoding: [0xc3] 1316 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1317 ret <16 x float> %res 1318} 1319 1320define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 1321; X86-LABEL: test_mask_or_ps_rm_512: 1322; X86: # %bb.0: 1323; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1324; X86-NEXT: vorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x00] 1325; X86-NEXT: retl # encoding: [0xc3] 1326; 1327; X64-LABEL: test_mask_or_ps_rm_512: 1328; X64: # %bb.0: 1329; X64-NEXT: vorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07] 1330; X64-NEXT: retq # encoding: [0xc3] 1331 %b = load <16 x float>, <16 x float>* %ptr_b 1332 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1333 ret <16 x float> %res 1334} 1335 1336define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 1337; X86-LABEL: test_mask_or_ps_rmk_512: 1338; X86: # %bb.0: 1339; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1340; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1341; X86-NEXT: vorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x08] 1342; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1343; X86-NEXT: retl # encoding: [0xc3] 1344; 1345; X64-LABEL: test_mask_or_ps_rmk_512: 1346; X64: # %bb.0: 1347; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1348; X64-NEXT: vorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f] 1349; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1350; X64-NEXT: retq # encoding: [0xc3] 1351 %b = load <16 x float>, <16 x float>* %ptr_b 1352 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1353 ret <16 x float> %res 1354} 1355 1356define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 1357; X86-LABEL: test_mask_or_ps_rmkz_512: 1358; X86: # %bb.0: 1359; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1360; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1361; X86-NEXT: vorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x00] 1362; X86-NEXT: retl # encoding: [0xc3] 1363; 1364; X64-LABEL: test_mask_or_ps_rmkz_512: 1365; X64: # %bb.0: 1366; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1367; X64-NEXT: vorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07] 1368; X64-NEXT: retq # encoding: [0xc3] 1369 %b = load <16 x float>, <16 x float>* %ptr_b 1370 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1371 ret <16 x float> %res 1372} 1373 1374define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 1375; X86-LABEL: test_mask_or_ps_rmb_512: 1376; X86: # %bb.0: 1377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1378; X86-NEXT: vorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x00] 1379; X86-NEXT: retl # encoding: [0xc3] 1380; 1381; X64-LABEL: test_mask_or_ps_rmb_512: 1382; X64: # %bb.0: 1383; X64-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07] 1384; X64-NEXT: retq # encoding: [0xc3] 1385 %q = load float, float* %ptr_b 1386 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1387 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1388 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1389 ret <16 x float> %res 1390} 1391 1392define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 1393; X86-LABEL: test_mask_or_ps_rmbk_512: 1394; X86: # %bb.0: 1395; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1396; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1397; X86-NEXT: vorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x08] 1398; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1399; X86-NEXT: retl # encoding: [0xc3] 1400; 1401; X64-LABEL: test_mask_or_ps_rmbk_512: 1402; X64: # %bb.0: 1403; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1404; X64-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f] 1405; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1406; X64-NEXT: retq # encoding: [0xc3] 1407 %q = load float, float* %ptr_b 1408 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1409 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1410 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1411 ret <16 x float> %res 1412} 1413 1414define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 1415; X86-LABEL: test_mask_or_ps_rmbkz_512: 1416; X86: # %bb.0: 1417; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1418; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1419; X86-NEXT: vorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x00] 1420; X86-NEXT: retl # encoding: [0xc3] 1421; 1422; X64-LABEL: test_mask_or_ps_rmbkz_512: 1423; X64: # %bb.0: 1424; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1425; X64-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07] 1426; X64-NEXT: retq # encoding: [0xc3] 1427 %q = load float, float* %ptr_b 1428 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1429 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1430 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1431 ret <16 x float> %res 1432} 1433 1434declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1435 1436define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) { 1437; CHECK-LABEL: test_mask_xor_ps_rr_128: 1438; CHECK: # %bb.0: 1439; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 1440; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1441 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1442 ret <4 x float> %res 1443} 1444 1445define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 1446; X86-LABEL: test_mask_xor_ps_rrk_128: 1447; X86: # %bb.0: 1448; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1449; X86-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] 1450; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 1451; X86-NEXT: retl # encoding: [0xc3] 1452; 1453; X64-LABEL: test_mask_xor_ps_rrk_128: 1454; X64: # %bb.0: 1455; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1456; X64-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] 1457; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 1458; X64-NEXT: retq # encoding: [0xc3] 1459 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1460 ret <4 x float> %res 1461} 1462 1463define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 1464; X86-LABEL: test_mask_xor_ps_rrkz_128: 1465; X86: # %bb.0: 1466; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1467; X86-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1] 1468; X86-NEXT: retl # encoding: [0xc3] 1469; 1470; X64-LABEL: test_mask_xor_ps_rrkz_128: 1471; X64: # %bb.0: 1472; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1473; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1] 1474; X64-NEXT: retq # encoding: [0xc3] 1475 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1476 ret <4 x float> %res 1477} 1478 1479define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 1480; X86-LABEL: test_mask_xor_ps_rm_128: 1481; X86: # %bb.0: 1482; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1483; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x00] 1484; X86-NEXT: retl # encoding: [0xc3] 1485; 1486; X64-LABEL: test_mask_xor_ps_rm_128: 1487; X64: # %bb.0: 1488; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x07] 1489; X64-NEXT: retq # encoding: [0xc3] 1490 %b = load <4 x float>, <4 x float>* %ptr_b 1491 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1492 ret <4 x float> %res 1493} 1494 1495define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 1496; X86-LABEL: test_mask_xor_ps_rmk_128: 1497; X86: # %bb.0: 1498; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1499; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1500; X86-NEXT: vxorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x08] 1501; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1502; X86-NEXT: retl # encoding: [0xc3] 1503; 1504; X64-LABEL: test_mask_xor_ps_rmk_128: 1505; X64: # %bb.0: 1506; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1507; X64-NEXT: vxorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f] 1508; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1509; X64-NEXT: retq # encoding: [0xc3] 1510 %b = load <4 x float>, <4 x float>* %ptr_b 1511 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1512 ret <4 x float> %res 1513} 1514 1515define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 1516; X86-LABEL: test_mask_xor_ps_rmkz_128: 1517; X86: # %bb.0: 1518; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1519; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1520; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x00] 1521; X86-NEXT: retl # encoding: [0xc3] 1522; 1523; X64-LABEL: test_mask_xor_ps_rmkz_128: 1524; X64: # %bb.0: 1525; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1526; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07] 1527; X64-NEXT: retq # encoding: [0xc3] 1528 %b = load <4 x float>, <4 x float>* %ptr_b 1529 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1530 ret <4 x float> %res 1531} 1532 1533define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 1534; X86-LABEL: test_mask_xor_ps_rmb_128: 1535; X86: # %bb.0: 1536; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1537; X86-NEXT: vxorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x00] 1538; X86-NEXT: retl # encoding: [0xc3] 1539; 1540; X64-LABEL: test_mask_xor_ps_rmb_128: 1541; X64: # %bb.0: 1542; X64-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07] 1543; X64-NEXT: retq # encoding: [0xc3] 1544 %q = load float, float* %ptr_b 1545 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1546 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1547 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1548 ret <4 x float> %res 1549} 1550 1551define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 1552; X86-LABEL: test_mask_xor_ps_rmbk_128: 1553; X86: # %bb.0: 1554; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1555; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1556; X86-NEXT: vxorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x08] 1557; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1558; X86-NEXT: retl # encoding: [0xc3] 1559; 1560; X64-LABEL: test_mask_xor_ps_rmbk_128: 1561; X64: # %bb.0: 1562; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1563; X64-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f] 1564; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1565; X64-NEXT: retq # encoding: [0xc3] 1566 %q = load float, float* %ptr_b 1567 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1568 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1569 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1570 ret <4 x float> %res 1571} 1572 1573define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 1574; X86-LABEL: test_mask_xor_ps_rmbkz_128: 1575; X86: # %bb.0: 1576; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1577; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1578; X86-NEXT: vxorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x00] 1579; X86-NEXT: retl # encoding: [0xc3] 1580; 1581; X64-LABEL: test_mask_xor_ps_rmbkz_128: 1582; X64: # %bb.0: 1583; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1584; X64-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07] 1585; X64-NEXT: retq # encoding: [0xc3] 1586 %q = load float, float* %ptr_b 1587 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1588 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1589 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1590 ret <4 x float> %res 1591} 1592 1593declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1594 1595define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) { 1596; CHECK-LABEL: test_mask_xor_ps_rr_256: 1597; CHECK: # %bb.0: 1598; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0xc1] 1599; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1600 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1601 ret <8 x float> %res 1602} 1603 1604define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 1605; X86-LABEL: test_mask_xor_ps_rrk_256: 1606; X86: # %bb.0: 1607; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1608; X86-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] 1609; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1610; X86-NEXT: retl # encoding: [0xc3] 1611; 1612; X64-LABEL: test_mask_xor_ps_rrk_256: 1613; X64: # %bb.0: 1614; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1615; X64-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] 1616; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1617; X64-NEXT: retq # encoding: [0xc3] 1618 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1619 ret <8 x float> %res 1620} 1621 1622define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 1623; X86-LABEL: test_mask_xor_ps_rrkz_256: 1624; X86: # %bb.0: 1625; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1626; X86-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1] 1627; X86-NEXT: retl # encoding: [0xc3] 1628; 1629; X64-LABEL: test_mask_xor_ps_rrkz_256: 1630; X64: # %bb.0: 1631; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1632; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1] 1633; X64-NEXT: retq # encoding: [0xc3] 1634 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1635 ret <8 x float> %res 1636} 1637 1638define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 1639; X86-LABEL: test_mask_xor_ps_rm_256: 1640; X86: # %bb.0: 1641; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1642; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x00] 1643; X86-NEXT: retl # encoding: [0xc3] 1644; 1645; X64-LABEL: test_mask_xor_ps_rm_256: 1646; X64: # %bb.0: 1647; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x07] 1648; X64-NEXT: retq # encoding: [0xc3] 1649 %b = load <8 x float>, <8 x float>* %ptr_b 1650 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1651 ret <8 x float> %res 1652} 1653 1654define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 1655; X86-LABEL: test_mask_xor_ps_rmk_256: 1656; X86: # %bb.0: 1657; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1658; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1659; X86-NEXT: vxorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x08] 1660; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1661; X86-NEXT: retl # encoding: [0xc3] 1662; 1663; X64-LABEL: test_mask_xor_ps_rmk_256: 1664; X64: # %bb.0: 1665; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1666; X64-NEXT: vxorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f] 1667; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1668; X64-NEXT: retq # encoding: [0xc3] 1669 %b = load <8 x float>, <8 x float>* %ptr_b 1670 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1671 ret <8 x float> %res 1672} 1673 1674define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 1675; X86-LABEL: test_mask_xor_ps_rmkz_256: 1676; X86: # %bb.0: 1677; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1678; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1679; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x00] 1680; X86-NEXT: retl # encoding: [0xc3] 1681; 1682; X64-LABEL: test_mask_xor_ps_rmkz_256: 1683; X64: # %bb.0: 1684; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1685; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07] 1686; X64-NEXT: retq # encoding: [0xc3] 1687 %b = load <8 x float>, <8 x float>* %ptr_b 1688 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1689 ret <8 x float> %res 1690} 1691 1692define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 1693; X86-LABEL: test_mask_xor_ps_rmb_256: 1694; X86: # %bb.0: 1695; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1696; X86-NEXT: vxorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x00] 1697; X86-NEXT: retl # encoding: [0xc3] 1698; 1699; X64-LABEL: test_mask_xor_ps_rmb_256: 1700; X64: # %bb.0: 1701; X64-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07] 1702; X64-NEXT: retq # encoding: [0xc3] 1703 %q = load float, float* %ptr_b 1704 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1705 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1706 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1707 ret <8 x float> %res 1708} 1709 1710define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 1711; X86-LABEL: test_mask_xor_ps_rmbk_256: 1712; X86: # %bb.0: 1713; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1714; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1715; X86-NEXT: vxorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x08] 1716; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1717; X86-NEXT: retl # encoding: [0xc3] 1718; 1719; X64-LABEL: test_mask_xor_ps_rmbk_256: 1720; X64: # %bb.0: 1721; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1722; X64-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f] 1723; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1724; X64-NEXT: retq # encoding: [0xc3] 1725 %q = load float, float* %ptr_b 1726 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1727 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1728 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1729 ret <8 x float> %res 1730} 1731 1732define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 1733; X86-LABEL: test_mask_xor_ps_rmbkz_256: 1734; X86: # %bb.0: 1735; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1736; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1737; X86-NEXT: vxorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x00] 1738; X86-NEXT: retl # encoding: [0xc3] 1739; 1740; X64-LABEL: test_mask_xor_ps_rmbkz_256: 1741; X64: # %bb.0: 1742; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1743; X64-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07] 1744; X64-NEXT: retq # encoding: [0xc3] 1745 %q = load float, float* %ptr_b 1746 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1747 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1748 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1749 ret <8 x float> %res 1750} 1751 1752declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1753 1754define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) { 1755; CHECK-LABEL: test_mask_xor_ps_rr_512: 1756; CHECK: # %bb.0: 1757; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1] 1758; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1759 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1760 ret <16 x float> %res 1761} 1762 1763define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 1764; X86-LABEL: test_mask_xor_ps_rrk_512: 1765; X86: # %bb.0: 1766; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1767; X86-NEXT: vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1] 1768; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1769; X86-NEXT: retl # encoding: [0xc3] 1770; 1771; X64-LABEL: test_mask_xor_ps_rrk_512: 1772; X64: # %bb.0: 1773; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1774; X64-NEXT: vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1] 1775; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1776; X64-NEXT: retq # encoding: [0xc3] 1777 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1778 ret <16 x float> %res 1779} 1780 1781define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 1782; X86-LABEL: test_mask_xor_ps_rrkz_512: 1783; X86: # %bb.0: 1784; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1785; X86-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1] 1786; X86-NEXT: retl # encoding: [0xc3] 1787; 1788; X64-LABEL: test_mask_xor_ps_rrkz_512: 1789; X64: # %bb.0: 1790; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1791; X64-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1] 1792; X64-NEXT: retq # encoding: [0xc3] 1793 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1794 ret <16 x float> %res 1795} 1796 1797define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 1798; X86-LABEL: test_mask_xor_ps_rm_512: 1799; X86: # %bb.0: 1800; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1801; X86-NEXT: vxorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x00] 1802; X86-NEXT: retl # encoding: [0xc3] 1803; 1804; X64-LABEL: test_mask_xor_ps_rm_512: 1805; X64: # %bb.0: 1806; X64-NEXT: vxorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07] 1807; X64-NEXT: retq # encoding: [0xc3] 1808 %b = load <16 x float>, <16 x float>* %ptr_b 1809 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1810 ret <16 x float> %res 1811} 1812 1813define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 1814; X86-LABEL: test_mask_xor_ps_rmk_512: 1815; X86: # %bb.0: 1816; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1817; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1818; X86-NEXT: vxorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x08] 1819; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1820; X86-NEXT: retl # encoding: [0xc3] 1821; 1822; X64-LABEL: test_mask_xor_ps_rmk_512: 1823; X64: # %bb.0: 1824; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1825; X64-NEXT: vxorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f] 1826; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1827; X64-NEXT: retq # encoding: [0xc3] 1828 %b = load <16 x float>, <16 x float>* %ptr_b 1829 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1830 ret <16 x float> %res 1831} 1832 1833define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 1834; X86-LABEL: test_mask_xor_ps_rmkz_512: 1835; X86: # %bb.0: 1836; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1837; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1838; X86-NEXT: vxorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x00] 1839; X86-NEXT: retl # encoding: [0xc3] 1840; 1841; X64-LABEL: test_mask_xor_ps_rmkz_512: 1842; X64: # %bb.0: 1843; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1844; X64-NEXT: vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07] 1845; X64-NEXT: retq # encoding: [0xc3] 1846 %b = load <16 x float>, <16 x float>* %ptr_b 1847 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1848 ret <16 x float> %res 1849} 1850 1851define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 1852; X86-LABEL: test_mask_xor_ps_rmb_512: 1853; X86: # %bb.0: 1854; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1855; X86-NEXT: vxorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x00] 1856; X86-NEXT: retl # encoding: [0xc3] 1857; 1858; X64-LABEL: test_mask_xor_ps_rmb_512: 1859; X64: # %bb.0: 1860; X64-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07] 1861; X64-NEXT: retq # encoding: [0xc3] 1862 %q = load float, float* %ptr_b 1863 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1864 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1865 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1866 ret <16 x float> %res 1867} 1868 1869define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 1870; X86-LABEL: test_mask_xor_ps_rmbk_512: 1871; X86: # %bb.0: 1872; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1873; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1874; X86-NEXT: vxorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x08] 1875; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1876; X86-NEXT: retl # encoding: [0xc3] 1877; 1878; X64-LABEL: test_mask_xor_ps_rmbk_512: 1879; X64: # %bb.0: 1880; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1881; X64-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f] 1882; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1883; X64-NEXT: retq # encoding: [0xc3] 1884 %q = load float, float* %ptr_b 1885 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1886 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1887 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1888 ret <16 x float> %res 1889} 1890 1891define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 1892; X86-LABEL: test_mask_xor_ps_rmbkz_512: 1893; X86: # %bb.0: 1894; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1895; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1896; X86-NEXT: vxorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x00] 1897; X86-NEXT: retl # encoding: [0xc3] 1898; 1899; X64-LABEL: test_mask_xor_ps_rmbkz_512: 1900; X64: # %bb.0: 1901; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1902; X64-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07] 1903; X64-NEXT: retq # encoding: [0xc3] 1904 %q = load float, float* %ptr_b 1905 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1906 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1907 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1908 ret <16 x float> %res 1909} 1910 1911declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1912 1913define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) { 1914; CHECK-LABEL: test_mask_mullo_epi64_rr_512: 1915; CHECK: # %bb.0: 1916; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1] 1917; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1918 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1919 ret <8 x i64> %res 1920} 1921 1922define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1923; X86-LABEL: test_mask_mullo_epi64_rrk_512: 1924; X86: # %bb.0: 1925; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1926; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1] 1927; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1928; X86-NEXT: retl # encoding: [0xc3] 1929; 1930; X64-LABEL: test_mask_mullo_epi64_rrk_512: 1931; X64: # %bb.0: 1932; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1933; X64-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1] 1934; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1935; X64-NEXT: retq # encoding: [0xc3] 1936 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1937 ret <8 x i64> %res 1938} 1939 1940define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1941; X86-LABEL: test_mask_mullo_epi64_rrkz_512: 1942; X86: # %bb.0: 1943; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1944; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] 1945; X86-NEXT: retl # encoding: [0xc3] 1946; 1947; X64-LABEL: test_mask_mullo_epi64_rrkz_512: 1948; X64: # %bb.0: 1949; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1950; X64-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] 1951; X64-NEXT: retq # encoding: [0xc3] 1952 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1953 ret <8 x i64> %res 1954} 1955 1956define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) { 1957; X86-LABEL: test_mask_mullo_epi64_rm_512: 1958; X86: # %bb.0: 1959; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1960; X86-NEXT: vpmullq (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x00] 1961; X86-NEXT: retl # encoding: [0xc3] 1962; 1963; X64-LABEL: test_mask_mullo_epi64_rm_512: 1964; X64: # %bb.0: 1965; X64-NEXT: vpmullq (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07] 1966; X64-NEXT: retq # encoding: [0xc3] 1967 %b = load <8 x i64>, <8 x i64>* %ptr_b 1968 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1969 ret <8 x i64> %res 1970} 1971 1972define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 1973; X86-LABEL: test_mask_mullo_epi64_rmk_512: 1974; X86: # %bb.0: 1975; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1976; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1977; X86-NEXT: vpmullq (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x08] 1978; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1979; X86-NEXT: retl # encoding: [0xc3] 1980; 1981; X64-LABEL: test_mask_mullo_epi64_rmk_512: 1982; X64: # %bb.0: 1983; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1984; X64-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f] 1985; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1986; X64-NEXT: retq # encoding: [0xc3] 1987 %b = load <8 x i64>, <8 x i64>* %ptr_b 1988 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1989 ret <8 x i64> %res 1990} 1991 1992define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 1993; X86-LABEL: test_mask_mullo_epi64_rmkz_512: 1994; X86: # %bb.0: 1995; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1996; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1997; X86-NEXT: vpmullq (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x00] 1998; X86-NEXT: retl # encoding: [0xc3] 1999; 2000; X64-LABEL: test_mask_mullo_epi64_rmkz_512: 2001; X64: # %bb.0: 2002; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2003; X64-NEXT: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07] 2004; X64-NEXT: retq # encoding: [0xc3] 2005 %b = load <8 x i64>, <8 x i64>* %ptr_b 2006 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2007 ret <8 x i64> %res 2008} 2009 2010define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) { 2011; X86-LABEL: test_mask_mullo_epi64_rmb_512: 2012; X86: # %bb.0: 2013; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2014; X86-NEXT: vpmullq (%eax){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x00] 2015; X86-NEXT: retl # encoding: [0xc3] 2016; 2017; X64-LABEL: test_mask_mullo_epi64_rmb_512: 2018; X64: # %bb.0: 2019; X64-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07] 2020; X64-NEXT: retq # encoding: [0xc3] 2021 %q = load i64, i64* %ptr_b 2022 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2023 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2024 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2025 ret <8 x i64> %res 2026} 2027 2028define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2029; X86-LABEL: test_mask_mullo_epi64_rmbk_512: 2030; X86: # %bb.0: 2031; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2032; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2033; X86-NEXT: vpmullq (%eax){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x08] 2034; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2035; X86-NEXT: retl # encoding: [0xc3] 2036; 2037; X64-LABEL: test_mask_mullo_epi64_rmbk_512: 2038; X64: # %bb.0: 2039; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2040; X64-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f] 2041; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2042; X64-NEXT: retq # encoding: [0xc3] 2043 %q = load i64, i64* %ptr_b 2044 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2045 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2046 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2047 ret <8 x i64> %res 2048} 2049 2050define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2051; X86-LABEL: test_mask_mullo_epi64_rmbkz_512: 2052; X86: # %bb.0: 2053; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2054; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2055; X86-NEXT: vpmullq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x00] 2056; X86-NEXT: retl # encoding: [0xc3] 2057; 2058; X64-LABEL: test_mask_mullo_epi64_rmbkz_512: 2059; X64: # %bb.0: 2060; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2061; X64-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07] 2062; X64-NEXT: retq # encoding: [0xc3] 2063 %q = load i64, i64* %ptr_b 2064 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2065 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2066 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2067 ret <8 x i64> %res 2068} 2069declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2070 2071define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 2072; CHECK-LABEL: test_mask_mullo_epi64_rr_256: 2073; CHECK: # %bb.0: 2074; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1] 2075; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2076 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2077 ret <4 x i64> %res 2078} 2079 2080define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 2081; X86-LABEL: test_mask_mullo_epi64_rrk_256: 2082; X86: # %bb.0: 2083; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2084; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] 2085; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2086; X86-NEXT: retl # encoding: [0xc3] 2087; 2088; X64-LABEL: test_mask_mullo_epi64_rrk_256: 2089; X64: # %bb.0: 2090; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2091; X64-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] 2092; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2093; X64-NEXT: retq # encoding: [0xc3] 2094 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2095 ret <4 x i64> %res 2096} 2097 2098define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2099; X86-LABEL: test_mask_mullo_epi64_rrkz_256: 2100; X86: # %bb.0: 2101; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2102; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] 2103; X86-NEXT: retl # encoding: [0xc3] 2104; 2105; X64-LABEL: test_mask_mullo_epi64_rrkz_256: 2106; X64: # %bb.0: 2107; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2108; X64-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] 2109; X64-NEXT: retq # encoding: [0xc3] 2110 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2111 ret <4 x i64> %res 2112} 2113 2114define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 2115; X86-LABEL: test_mask_mullo_epi64_rm_256: 2116; X86: # %bb.0: 2117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2118; X86-NEXT: vpmullq (%eax), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x00] 2119; X86-NEXT: retl # encoding: [0xc3] 2120; 2121; X64-LABEL: test_mask_mullo_epi64_rm_256: 2122; X64: # %bb.0: 2123; X64-NEXT: vpmullq (%rdi), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07] 2124; X64-NEXT: retq # encoding: [0xc3] 2125 %b = load <4 x i64>, <4 x i64>* %ptr_b 2126 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2127 ret <4 x i64> %res 2128} 2129 2130define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2131; X86-LABEL: test_mask_mullo_epi64_rmk_256: 2132; X86: # %bb.0: 2133; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2134; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2135; X86-NEXT: vpmullq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x08] 2136; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2137; X86-NEXT: retl # encoding: [0xc3] 2138; 2139; X64-LABEL: test_mask_mullo_epi64_rmk_256: 2140; X64: # %bb.0: 2141; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2142; X64-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f] 2143; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2144; X64-NEXT: retq # encoding: [0xc3] 2145 %b = load <4 x i64>, <4 x i64>* %ptr_b 2146 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2147 ret <4 x i64> %res 2148} 2149 2150define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 2151; X86-LABEL: test_mask_mullo_epi64_rmkz_256: 2152; X86: # %bb.0: 2153; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2154; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2155; X86-NEXT: vpmullq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x00] 2156; X86-NEXT: retl # encoding: [0xc3] 2157; 2158; X64-LABEL: test_mask_mullo_epi64_rmkz_256: 2159; X64: # %bb.0: 2160; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2161; X64-NEXT: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07] 2162; X64-NEXT: retq # encoding: [0xc3] 2163 %b = load <4 x i64>, <4 x i64>* %ptr_b 2164 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2165 ret <4 x i64> %res 2166} 2167 2168define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 2169; X86-LABEL: test_mask_mullo_epi64_rmb_256: 2170; X86: # %bb.0: 2171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2172; X86-NEXT: vpmullq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x00] 2173; X86-NEXT: retl # encoding: [0xc3] 2174; 2175; X64-LABEL: test_mask_mullo_epi64_rmb_256: 2176; X64: # %bb.0: 2177; X64-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07] 2178; X64-NEXT: retq # encoding: [0xc3] 2179 %q = load i64, i64* %ptr_b 2180 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2181 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2182 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2183 ret <4 x i64> %res 2184} 2185 2186define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2187; X86-LABEL: test_mask_mullo_epi64_rmbk_256: 2188; X86: # %bb.0: 2189; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2190; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2191; X86-NEXT: vpmullq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x08] 2192; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2193; X86-NEXT: retl # encoding: [0xc3] 2194; 2195; X64-LABEL: test_mask_mullo_epi64_rmbk_256: 2196; X64: # %bb.0: 2197; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2198; X64-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f] 2199; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2200; X64-NEXT: retq # encoding: [0xc3] 2201 %q = load i64, i64* %ptr_b 2202 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2203 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2204 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2205 ret <4 x i64> %res 2206} 2207 2208define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 2209; X86-LABEL: test_mask_mullo_epi64_rmbkz_256: 2210; X86: # %bb.0: 2211; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2212; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2213; X86-NEXT: vpmullq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x00] 2214; X86-NEXT: retl # encoding: [0xc3] 2215; 2216; X64-LABEL: test_mask_mullo_epi64_rmbkz_256: 2217; X64: # %bb.0: 2218; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2219; X64-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07] 2220; X64-NEXT: retq # encoding: [0xc3] 2221 %q = load i64, i64* %ptr_b 2222 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2223 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2224 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2225 ret <4 x i64> %res 2226} 2227 2228declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2229 2230define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 2231; CHECK-LABEL: test_mask_mullo_epi64_rr_128: 2232; CHECK: # %bb.0: 2233; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 2234; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2235 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2236 ret <2 x i64> %res 2237} 2238 2239define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 2240; X86-LABEL: test_mask_mullo_epi64_rrk_128: 2241; X86: # %bb.0: 2242; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2243; X86-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] 2244; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2245; X86-NEXT: retl # encoding: [0xc3] 2246; 2247; X64-LABEL: test_mask_mullo_epi64_rrk_128: 2248; X64: # %bb.0: 2249; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2250; X64-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] 2251; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2252; X64-NEXT: retq # encoding: [0xc3] 2253 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2254 ret <2 x i64> %res 2255} 2256 2257define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2258; X86-LABEL: test_mask_mullo_epi64_rrkz_128: 2259; X86: # %bb.0: 2260; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2261; X86-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] 2262; X86-NEXT: retl # encoding: [0xc3] 2263; 2264; X64-LABEL: test_mask_mullo_epi64_rrkz_128: 2265; X64: # %bb.0: 2266; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2267; X64-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] 2268; X64-NEXT: retq # encoding: [0xc3] 2269 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2270 ret <2 x i64> %res 2271} 2272 2273define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 2274; X86-LABEL: test_mask_mullo_epi64_rm_128: 2275; X86: # %bb.0: 2276; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2277; X86-NEXT: vpmullq (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x00] 2278; X86-NEXT: retl # encoding: [0xc3] 2279; 2280; X64-LABEL: test_mask_mullo_epi64_rm_128: 2281; X64: # %bb.0: 2282; X64-NEXT: vpmullq (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07] 2283; X64-NEXT: retq # encoding: [0xc3] 2284 %b = load <2 x i64>, <2 x i64>* %ptr_b 2285 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2286 ret <2 x i64> %res 2287} 2288 2289define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2290; X86-LABEL: test_mask_mullo_epi64_rmk_128: 2291; X86: # %bb.0: 2292; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2293; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2294; X86-NEXT: vpmullq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x08] 2295; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2296; X86-NEXT: retl # encoding: [0xc3] 2297; 2298; X64-LABEL: test_mask_mullo_epi64_rmk_128: 2299; X64: # %bb.0: 2300; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2301; X64-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f] 2302; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2303; X64-NEXT: retq # encoding: [0xc3] 2304 %b = load <2 x i64>, <2 x i64>* %ptr_b 2305 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2306 ret <2 x i64> %res 2307} 2308 2309define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 2310; X86-LABEL: test_mask_mullo_epi64_rmkz_128: 2311; X86: # %bb.0: 2312; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2313; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2314; X86-NEXT: vpmullq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x00] 2315; X86-NEXT: retl # encoding: [0xc3] 2316; 2317; X64-LABEL: test_mask_mullo_epi64_rmkz_128: 2318; X64: # %bb.0: 2319; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2320; X64-NEXT: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07] 2321; X64-NEXT: retq # encoding: [0xc3] 2322 %b = load <2 x i64>, <2 x i64>* %ptr_b 2323 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2324 ret <2 x i64> %res 2325} 2326 2327define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 2328; X86-LABEL: test_mask_mullo_epi64_rmb_128: 2329; X86: # %bb.0: 2330; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2331; X86-NEXT: vpmullq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x00] 2332; X86-NEXT: retl # encoding: [0xc3] 2333; 2334; X64-LABEL: test_mask_mullo_epi64_rmb_128: 2335; X64: # %bb.0: 2336; X64-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07] 2337; X64-NEXT: retq # encoding: [0xc3] 2338 %q = load i64, i64* %ptr_b 2339 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2340 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2341 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2342 ret <2 x i64> %res 2343} 2344 2345define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2346; X86-LABEL: test_mask_mullo_epi64_rmbk_128: 2347; X86: # %bb.0: 2348; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2349; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2350; X86-NEXT: vpmullq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x08] 2351; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2352; X86-NEXT: retl # encoding: [0xc3] 2353; 2354; X64-LABEL: test_mask_mullo_epi64_rmbk_128: 2355; X64: # %bb.0: 2356; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2357; X64-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f] 2358; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2359; X64-NEXT: retq # encoding: [0xc3] 2360 %q = load i64, i64* %ptr_b 2361 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2362 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2363 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2364 ret <2 x i64> %res 2365} 2366 2367define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 2368; X86-LABEL: test_mask_mullo_epi64_rmbkz_128: 2369; X86: # %bb.0: 2370; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2371; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2372; X86-NEXT: vpmullq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x00] 2373; X86-NEXT: retl # encoding: [0xc3] 2374; 2375; X64-LABEL: test_mask_mullo_epi64_rmbkz_128: 2376; X64: # %bb.0: 2377; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2378; X64-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07] 2379; X64-NEXT: retq # encoding: [0xc3] 2380 %q = load i64, i64* %ptr_b 2381 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2382 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2383 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2384 ret <2 x i64> %res 2385} 2386 2387declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2388 2389declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8) 2390 2391define <2 x double>@test_int_x86_avx512_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2) { 2392; CHECK-LABEL: test_int_x86_avx512_vextractf64x2_256: 2393; CHECK: # %bb.0: 2394; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 2395; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2396; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2397 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 2398 ret <2 x double> %res 2399} 2400 2401define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) { 2402; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: 2403; X86: # %bb.0: 2404; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2405; X86-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] 2406; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 2407; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2408; X86-NEXT: retl # encoding: [0xc3] 2409; 2410; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: 2411; X64: # %bb.0: 2412; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2413; X64-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] 2414; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 2415; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2416; X64-NEXT: retq # encoding: [0xc3] 2417 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 2418 ret <2 x double> %res 2419} 2420 2421define <2 x double>@test_int_x86_avx512_maskz_vextractf64x2_256(<4 x double> %x0, i8 %x3) { 2422; X86-LABEL: test_int_x86_avx512_maskz_vextractf64x2_256: 2423; X86: # %bb.0: 2424; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2425; X86-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01] 2426; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2427; X86-NEXT: retl # encoding: [0xc3] 2428; 2429; X64-LABEL: test_int_x86_avx512_maskz_vextractf64x2_256: 2430; X64: # %bb.0: 2431; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2432; X64-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01] 2433; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2434; X64-NEXT: retq # encoding: [0xc3] 2435 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 2436 ret <2 x double> %res 2437} 2438 2439declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8) 2440 2441define <4 x double>@test_int_x86_avx512_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3) { 2442; CHECK-LABEL: test_int_x86_avx512_insertf64x2_256: 2443; CHECK: # %bb.0: 2444; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 2445; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2446 %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1) 2447 ret <4 x double> %res 2448} 2449 2450define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) { 2451; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_256: 2452; X86: # %bb.0: 2453; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2454; X86-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] 2455; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 2456; X86-NEXT: retl # encoding: [0xc3] 2457; 2458; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_256: 2459; X64: # %bb.0: 2460; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2461; X64-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] 2462; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 2463; X64-NEXT: retq # encoding: [0xc3] 2464 %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4) 2465 ret <4 x double> %res 2466} 2467 2468define <4 x double>@test_int_x86_avx512_maskz_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, i8 %x4) { 2469; X86-LABEL: test_int_x86_avx512_maskz_insertf64x2_256: 2470; X86: # %bb.0: 2471; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2472; X86-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01] 2473; X86-NEXT: retl # encoding: [0xc3] 2474; 2475; X64-LABEL: test_int_x86_avx512_maskz_insertf64x2_256: 2476; X64: # %bb.0: 2477; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2478; X64-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01] 2479; X64-NEXT: retq # encoding: [0xc3] 2480 %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4) 2481 ret <4 x double> %res 2482} 2483 2484declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8) 2485 2486define <4 x i64>@test_int_x86_avx512_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3) { 2487; CHECK-LABEL: test_int_x86_avx512_inserti64x2_256: 2488; CHECK: # %bb.0: 2489; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 2490; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2491 %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1) 2492 ret <4 x i64> %res 2493} 2494 2495define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) { 2496; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_256: 2497; X86: # %bb.0: 2498; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2499; X86-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] 2500; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2501; X86-NEXT: retl # encoding: [0xc3] 2502; 2503; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_256: 2504; X64: # %bb.0: 2505; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2506; X64-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] 2507; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2508; X64-NEXT: retq # encoding: [0xc3] 2509 %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4) 2510 ret <4 x i64> %res 2511} 2512 2513define <4 x i64>@test_int_x86_avx512_maskz_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x4) { 2514; X86-LABEL: test_int_x86_avx512_maskz_inserti64x2_256: 2515; X86: # %bb.0: 2516; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2517; X86-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01] 2518; X86-NEXT: retl # encoding: [0xc3] 2519; 2520; X64-LABEL: test_int_x86_avx512_maskz_inserti64x2_256: 2521; X64: # %bb.0: 2522; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2523; X64-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01] 2524; X64-NEXT: retq # encoding: [0xc3] 2525 %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4) 2526 ret <4 x i64> %res 2527} 2528 2529declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8) 2530 2531define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) { 2532; X86-LABEL: test_int_x86_avx512_cvtmask2d_128: 2533; X86: # %bb.0: 2534; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2535; X86-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 2536; X86-NEXT: retl # encoding: [0xc3] 2537; 2538; X64-LABEL: test_int_x86_avx512_cvtmask2d_128: 2539; X64: # %bb.0: 2540; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2541; X64-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 2542; X64-NEXT: retq # encoding: [0xc3] 2543 %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0) 2544 ret <4 x i32> %res 2545} 2546 2547declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8) 2548 2549define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) { 2550; X86-LABEL: test_int_x86_avx512_cvtmask2d_256: 2551; X86: # %bb.0: 2552; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2553; X86-NEXT: vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0] 2554; X86-NEXT: retl # encoding: [0xc3] 2555; 2556; X64-LABEL: test_int_x86_avx512_cvtmask2d_256: 2557; X64: # %bb.0: 2558; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2559; X64-NEXT: vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0] 2560; X64-NEXT: retq # encoding: [0xc3] 2561 %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0) 2562 ret <8 x i32> %res 2563} 2564 2565declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8) 2566 2567define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) { 2568; X86-LABEL: test_int_x86_avx512_cvtmask2q_128: 2569; X86: # %bb.0: 2570; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2571; X86-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 2572; X86-NEXT: retl # encoding: [0xc3] 2573; 2574; X64-LABEL: test_int_x86_avx512_cvtmask2q_128: 2575; X64: # %bb.0: 2576; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2577; X64-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 2578; X64-NEXT: retq # encoding: [0xc3] 2579 %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0) 2580 ret <2 x i64> %res 2581} 2582 2583declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8) 2584 2585define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) { 2586; X86-LABEL: test_int_x86_avx512_cvtmask2q_256: 2587; X86: # %bb.0: 2588; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2589; X86-NEXT: vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0] 2590; X86-NEXT: retl # encoding: [0xc3] 2591; 2592; X64-LABEL: test_int_x86_avx512_cvtmask2q_256: 2593; X64: # %bb.0: 2594; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2595; X64-NEXT: vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0] 2596; X64-NEXT: retq # encoding: [0xc3] 2597 %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0) 2598 ret <4 x i64> %res 2599} 2600 2601declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8) 2602 2603define <4 x double>@test_int_x86_avx512_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2) { 2604; CHECK-LABEL: test_int_x86_avx512_broadcastf64x2_256: 2605; CHECK: # %bb.0: 2606; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2607; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 2608; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2609 2610 %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1) 2611 ret <4 x double> %res 2612} 2613 2614define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) { 2615; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256: 2616; X86: # %bb.0: 2617; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2618; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2619; X86-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01] 2620; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 2621; X86-NEXT: retl # encoding: [0xc3] 2622; 2623; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256: 2624; X64: # %bb.0: 2625; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2626; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2627; X64-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01] 2628; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 2629; X64-NEXT: retq # encoding: [0xc3] 2630 2631 %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask) 2632 ret <4 x double> %res 2633} 2634 2635define <4 x double>@test_int_x86_avx512_maskz_broadcastf64x2_256(<2 x double> %x0, i8 %mask) { 2636; X86-LABEL: test_int_x86_avx512_maskz_broadcastf64x2_256: 2637; X86: # %bb.0: 2638; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2639; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2640; X86-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01] 2641; X86-NEXT: retl # encoding: [0xc3] 2642; 2643; X64-LABEL: test_int_x86_avx512_maskz_broadcastf64x2_256: 2644; X64: # %bb.0: 2645; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2646; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2647; X64-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01] 2648; X64-NEXT: retq # encoding: [0xc3] 2649 2650 %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask) 2651 ret <4 x double> %res 2652} 2653 2654define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256_load(<2 x double>* %x0ptr, <4 x double> %x2, i8 %mask) { 2655; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load: 2656; X86: # %bb.0: 2657; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2658; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2659; X86-NEXT: vbroadcastf64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x00] 2660; X86-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2661; X86-NEXT: retl # encoding: [0xc3] 2662; 2663; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load: 2664; X64: # %bb.0: 2665; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2666; X64-NEXT: vbroadcastf64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x07] 2667; X64-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2668; X64-NEXT: retq # encoding: [0xc3] 2669 2670 %x0 = load <2 x double>, <2 x double>* %x0ptr 2671 %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask) 2672 ret <4 x double> %res 2673} 2674 2675declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8) 2676 2677define <4 x i64>@test_int_x86_avx512_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2) { 2678; CHECK-LABEL: test_int_x86_avx512_broadcasti64x2_256: 2679; CHECK: # %bb.0: 2680; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2681; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 2682; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2683 2684 %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1) 2685 ret <4 x i64> %res 2686} 2687 2688define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) { 2689; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256: 2690; X86: # %bb.0: 2691; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2692; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2693; X86-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01] 2694; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2695; X86-NEXT: retl # encoding: [0xc3] 2696; 2697; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256: 2698; X64: # %bb.0: 2699; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2700; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2701; X64-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01] 2702; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2703; X64-NEXT: retq # encoding: [0xc3] 2704 2705 %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) 2706 ret <4 x i64> %res 2707} 2708 2709define <4 x i64>@test_int_x86_avx512_maskz_broadcasti64x2_256(<2 x i64> %x0, i8 %mask) { 2710; X86-LABEL: test_int_x86_avx512_maskz_broadcasti64x2_256: 2711; X86: # %bb.0: 2712; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2713; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2714; X86-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01] 2715; X86-NEXT: retl # encoding: [0xc3] 2716; 2717; X64-LABEL: test_int_x86_avx512_maskz_broadcasti64x2_256: 2718; X64: # %bb.0: 2719; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2720; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2721; X64-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01] 2722; X64-NEXT: retq # encoding: [0xc3] 2723 2724 %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask) 2725 ret <4 x i64> %res 2726} 2727 2728define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256_load(<2 x i64>* %x0ptr, <4 x i64> %x2, i8 %mask) { 2729; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load: 2730; X86: # %bb.0: 2731; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2732; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2733; X86-NEXT: vbroadcasti64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x00] 2734; X86-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2735; X86-NEXT: retl # encoding: [0xc3] 2736; 2737; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load: 2738; X64: # %bb.0: 2739; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2740; X64-NEXT: vbroadcasti64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x07] 2741; X64-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2742; X64-NEXT: retq # encoding: [0xc3] 2743 2744 %x0 = load <2 x i64>, <2 x i64>* %x0ptr 2745 %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) 2746 ret <4 x i64> %res 2747} 2748 2749declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) 2750 2751define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { 2752; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: 2753; CHECK: # %bb.0: 2754; CHECK-NEXT: vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] 2755; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2756; CHECK-NEXT: # kill: def $al killed $al killed $eax 2757; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2758 %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) 2759 ret i8 %res 2760} 2761 2762declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) 2763 2764define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { 2765; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: 2766; CHECK: # %bb.0: 2767; CHECK-NEXT: vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0] 2768; CHECK-NEXT: # kill: def $al killed $al killed $eax 2769; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2770; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2771 %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) 2772 ret i8 %res 2773} 2774 2775declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) 2776 2777define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { 2778; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: 2779; CHECK: # %bb.0: 2780; CHECK-NEXT: vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] 2781; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2782; CHECK-NEXT: # kill: def $al killed $al killed $eax 2783; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2784 %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) 2785 ret i8 %res 2786} 2787 2788declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) 2789 2790define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { 2791; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: 2792; CHECK: # %bb.0: 2793; CHECK-NEXT: vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] 2794; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2795; CHECK-NEXT: # kill: def $al killed $al killed $eax 2796; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2797; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2798 %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) 2799 ret i8 %res 2800} 2801 2802declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8) 2803 2804define <2 x double>@test_int_x86_avx512_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1) { 2805; CHECK-LABEL: test_int_x86_avx512_cvt_qq2pd_128: 2806; CHECK: # %bb.0: 2807; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0] 2808; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2809 %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) 2810 ret <2 x double> %res 2811} 2812 2813define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { 2814; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: 2815; X86: # %bb.0: 2816; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2817; X86-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] 2818; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 2819; X86-NEXT: retl # encoding: [0xc3] 2820; 2821; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: 2822; X64: # %bb.0: 2823; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2824; X64-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] 2825; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 2826; X64-NEXT: retq # encoding: [0xc3] 2827 %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) 2828 ret <2 x double> %res 2829} 2830 2831declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8) 2832 2833define <4 x double>@test_int_x86_avx512_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1) { 2834; CHECK-LABEL: test_int_x86_avx512_cvt_qq2pd_256: 2835; CHECK: # %bb.0: 2836; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0] 2837; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2838 %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) 2839 ret <4 x double> %res 2840} 2841 2842define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { 2843; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: 2844; X86: # %bb.0: 2845; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2846; X86-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] 2847; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 2848; X86-NEXT: retl # encoding: [0xc3] 2849; 2850; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: 2851; X64: # %bb.0: 2852; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2853; X64-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] 2854; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 2855; X64-NEXT: retq # encoding: [0xc3] 2856 %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) 2857 ret <4 x double> %res 2858} 2859 2860declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8) 2861 2862define <2 x double>@test_int_x86_avx512_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1) { 2863; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2pd_128: 2864; CHECK: # %bb.0: 2865; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0] 2866; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2867 %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) 2868 ret <2 x double> %res 2869} 2870 2871define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { 2872; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: 2873; X86: # %bb.0: 2874; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2875; X86-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] 2876; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 2877; X86-NEXT: retl # encoding: [0xc3] 2878; 2879; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: 2880; X64: # %bb.0: 2881; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2882; X64-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] 2883; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 2884; X64-NEXT: retq # encoding: [0xc3] 2885 %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) 2886 ret <2 x double> %res 2887} 2888 2889declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8) 2890 2891define <4 x double>@test_int_x86_avx512_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1) { 2892; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2pd_256: 2893; CHECK: # %bb.0: 2894; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0] 2895; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2896 %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) 2897 ret <4 x double> %res 2898} 2899 2900define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { 2901; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: 2902; X86: # %bb.0: 2903; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2904; X86-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] 2905; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 2906; X86-NEXT: retl # encoding: [0xc3] 2907; 2908; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: 2909; X64: # %bb.0: 2910; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2911; X64-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] 2912; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 2913; X64-NEXT: retq # encoding: [0xc3] 2914 %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) 2915 ret <4 x double> %res 2916} 2917 2918declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8) 2919 2920define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { 2921; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: 2922; CHECK: # %bb.0: 2923; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] 2924; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] 2925; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2926; CHECK-NEXT: # kill: def $al killed $al killed $eax 2927; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2928 %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1) 2929 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res) 2930 ret i8 %res1 2931} 2932 2933declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8) 2934 2935define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { 2936; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: 2937; CHECK: # %bb.0: 2938; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] 2939; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] 2940; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2941; CHECK-NEXT: # kill: def $al killed $al killed $eax 2942; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2943; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2944 %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1) 2945 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res) 2946 ret i8 %res1 2947} 2948 2949declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8) 2950 2951define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { 2952; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: 2953; CHECK: # %bb.0: 2954; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] 2955; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] 2956; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2957; CHECK-NEXT: # kill: def $al killed $al killed $eax 2958; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2959 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1) 2960 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res) 2961 ret i8 %res1 2962} 2963 2964declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8) 2965 2966define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { 2967; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: 2968; CHECK: # %bb.0: 2969; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] 2970; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] 2971; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2972; CHECK-NEXT: # kill: def $al killed $al killed $eax 2973; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2974; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2975 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1) 2976 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) 2977 ret i8 %res1 2978} 2979 2980declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8) 2981 2982define <4 x float> @test_int_x86_avx512_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1) { 2983; CHECK-LABEL: test_int_x86_avx512_cvt_qq2ps_256: 2984; CHECK: # %bb.0: 2985; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0] 2986; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2987; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2988 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) 2989 ret <4 x float> %res 2990} 2991 2992define <4 x float> @test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { 2993; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: 2994; X86: # %bb.0: 2995; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2996; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] 2997; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 2998; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2999; X86-NEXT: retl # encoding: [0xc3] 3000; 3001; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: 3002; X64: # %bb.0: 3003; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3004; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] 3005; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 3006; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3007; X64-NEXT: retq # encoding: [0xc3] 3008 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) 3009 ret <4 x float> %res 3010} 3011 3012declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8) 3013 3014define <4 x float> @test_int_x86_avx512_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1) { 3015; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_256: 3016; CHECK: # %bb.0: 3017; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0] 3018; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3019; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3020 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) 3021 ret <4 x float> %res 3022} 3023 3024define <4 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { 3025; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: 3026; X86: # %bb.0: 3027; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 3028; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] 3029; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 3030; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3031; X86-NEXT: retl # encoding: [0xc3] 3032; 3033; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: 3034; X64: # %bb.0: 3035; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3036; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] 3037; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 3038; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3039; X64-NEXT: retq # encoding: [0xc3] 3040 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) 3041 ret <4 x float> %res 3042} 3043