1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8) 6 7define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) { 8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: 9; X86: # %bb.0: 10; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x4c,0x24,0x04] 11; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc1] 14; X86-NEXT: vmovdqa32 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd1] 15; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 16; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 17; X86-NEXT: retl # encoding: [0xc3] 18; 19; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: 20; X64: # %bb.0: 21; X64-NEXT: vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf] 22; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 23; X64-NEXT: vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] 24; X64-NEXT: vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7] 25; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 26; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 27; X64-NEXT: retq # encoding: [0xc3] 28 %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1) 29 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask) 30 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask) 31 %res3 = add <4 x i32> %res, %res1 32 %res4 = add <4 x i32> %res2, %res3 33 ret <4 x i32> %res4 34} 35 36 37declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8) 38 39define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) { 40; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: 41; X86: # %bb.0: 42; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x4c,0x24,0x04] 43; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 44; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 45; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] 46; X86-NEXT: vmovdqa64 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xd1] 47; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 48; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 49; X86-NEXT: retl # encoding: [0xc3] 50; 51; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: 52; X64: # %bb.0: 53; X64-NEXT: vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf] 54; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 55; X64-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] 56; X64-NEXT: vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7] 57; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 58; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 59; X64-NEXT: retq # encoding: [0xc3] 60 %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1) 61 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask) 62 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask) 63 %res3 = add <2 x i64> %res, %res1 64 %res4 = add <2 x i64> %res2, %res3 65 ret <2 x i64> %res4 66} 67 68 69 declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8) 70 71 define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) { 72; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: 73; X86: # %bb.0: 74; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x4c,0x24,0x04] 75; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 76; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 77; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc1] 78; X86-NEXT: vmovdqa32 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xd1] 79; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 80; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 81; X86-NEXT: retl # encoding: [0xc3] 82; 83; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: 84; X64: # %bb.0: 85; X64-NEXT: vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf] 86; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 87; X64-NEXT: vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] 88; X64-NEXT: vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7] 89; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 90; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 91; X64-NEXT: retq # encoding: [0xc3] 92 %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1) 93 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask) 94 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask) 95 %res3 = add <8 x i32> %res, %res1 96 %res4 = add <8 x i32> %res2, %res3 97 ret <8 x i32> %res4 98} 99 100declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8) 101 102define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) { 103; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: 104; X86: # %bb.0: 105; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x4c,0x24,0x04] 106; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 107; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 108; X86-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc1] 109; X86-NEXT: vmovdqa64 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xd1] 110; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 111; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 112; X86-NEXT: retl # encoding: [0xc3] 113; 114; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: 115; X64: # %bb.0: 116; X64-NEXT: vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf] 117; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 118; X64-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] 119; X64-NEXT: vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7] 120; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 121; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 122; X64-NEXT: retq # encoding: [0xc3] 123 %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1) 124 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask) 125 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask) 126 %res3 = add <4 x i64> %res, %res1 127 %res4 = add <4 x i64> %res2, %res3 128 ret <4 x i64> %res4 129} 130 131 132 133declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) 134 135define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i32 * %y_ptr) { 136; X86-LABEL: test_int_x86_avx512_pbroadcastd_256: 137; X86: # %bb.0: 138; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 139; X86-NEXT: vbroadcastss (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x00] 140; X86-NEXT: retl # encoding: [0xc3] 141; 142; X64-LABEL: test_int_x86_avx512_pbroadcastd_256: 143; X64: # %bb.0: 144; X64-NEXT: vbroadcastss (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x07] 145; X64-NEXT: retq # encoding: [0xc3] 146 %y_32 = load i32, i32 * %y_ptr 147 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 148 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1) 149 ret <8 x i32> %res 150} 151 152define <8 x i32>@test_int_x86_avx512_mask_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) { 153; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_256: 154; X86: # %bb.0: 155; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 156; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 157; X86-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] 158; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 159; X86-NEXT: retl # encoding: [0xc3] 160; 161; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_256: 162; X64: # %bb.0: 163; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 164; X64-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] 165; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 166; X64-NEXT: retq # encoding: [0xc3] 167 %y_32 = load i32, i32 * %y_ptr 168 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 169 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) 170 ret <8 x i32> %res 171} 172 173define <8 x i32>@test_int_x86_avx512_maskz_pbroadcastd_256(<4 x i32> %x0, i8 %mask, i32 * %y_ptr) { 174; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_256: 175; X86: # %bb.0: 176; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 177; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 178; X86-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] 179; X86-NEXT: retl # encoding: [0xc3] 180; 181; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_256: 182; X64: # %bb.0: 183; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 184; X64-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] 185; X64-NEXT: retq # encoding: [0xc3] 186 %y_32 = load i32, i32 * %y_ptr 187 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 188 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 189 ret <8 x i32> %res 190} 191 192declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) 193 194define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1) { 195; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: 196; CHECK: # %bb.0: 197; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 198; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 199 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 200 ret <4 x i32> %res 201} 202 203define <4 x i32>@test_int_x86_avx512_mask_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 204; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_128: 205; X86: # %bb.0: 206; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 207; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 208; X86-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] 209; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 210; X86-NEXT: retl # encoding: [0xc3] 211; 212; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_128: 213; X64: # %bb.0: 214; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 215; X64-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] 216; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 217; X64-NEXT: retq # encoding: [0xc3] 218 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) 219 ret <4 x i32> %res 220} 221 222define <4 x i32>@test_int_x86_avx512_maskz_pbroadcastd_128(<4 x i32> %x0, i8 %mask) { 223; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_128: 224; X86: # %bb.0: 225; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 226; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 227; X86-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] 228; X86-NEXT: retl # encoding: [0xc3] 229; 230; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_128: 231; X64: # %bb.0: 232; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 233; X64-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] 234; X64-NEXT: retq # encoding: [0xc3] 235 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask) 236 ret <4 x i32> %res 237} 238 239declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) 240 241define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1) { 242; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc0] 245; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 246 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) 247 ret <4 x i64> %res 248} 249 250define <4 x i64>@test_int_x86_avx512_mask_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { 251; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_256: 252; X86: # %bb.0: 253; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 254; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 255; X86-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] 256; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 257; X86-NEXT: retl # encoding: [0xc3] 258; 259; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_256: 260; X64: # %bb.0: 261; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 262; X64-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] 263; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 264; X64-NEXT: retq # encoding: [0xc3] 265 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) 266 ret <4 x i64> %res 267} 268 269define <4 x i64>@test_int_x86_avx512_maskz_pbroadcastq_256(<2 x i64> %x0, i8 %mask) { 270; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_256: 271; X86: # %bb.0: 272; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 273; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 274; X86-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] 275; X86-NEXT: retl # encoding: [0xc3] 276; 277; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_256: 278; X64: # %bb.0: 279; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 280; X64-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] 281; X64-NEXT: retq # encoding: [0xc3] 282 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask) 283 ret <4 x i64> %res 284} 285 286declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) 287 288define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1) { 289; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: 290; CHECK: # %bb.0: 291; CHECK-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 292; CHECK-NEXT: # xmm0 = xmm0[0,0] 293; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 294 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) 295 ret <2 x i64> %res 296} 297 298define <2 x i64>@test_int_x86_avx512_mask_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { 299; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_128: 300; X86: # %bb.0: 301; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 302; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 303; X86-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] 304; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 305; X86-NEXT: retl # encoding: [0xc3] 306; 307; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_128: 308; X64: # %bb.0: 309; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 310; X64-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] 311; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 312; X64-NEXT: retq # encoding: [0xc3] 313 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) 314 ret <2 x i64> %res 315} 316 317define <2 x i64>@test_int_x86_avx512_maskz_pbroadcastq_128(<2 x i64> %x0, i8 %mask) { 318; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_128: 319; X86: # %bb.0: 320; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 321; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 322; X86-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] 323; X86-NEXT: retl # encoding: [0xc3] 324; 325; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_128: 326; X64: # %bb.0: 327; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 328; X64-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] 329; X64-NEXT: retq # encoding: [0xc3] 330 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask) 331 ret <2 x i64> %res 332} 333 334declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly 335 336define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1) { 337; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256: 338; CHECK: # %bb.0: 339; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc0] 340; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 341 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> undef, i8 -1) 342 ret <4 x double> %res 343} 344 345define <4 x double> @test_x86_mask_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) { 346; X86-LABEL: test_x86_mask_vbroadcast_sd_pd_256: 347; X86: # %bb.0: 348; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 349; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 350; X86-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] 351; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 352; X86-NEXT: retl # encoding: [0xc3] 353; 354; X64-LABEL: test_x86_mask_vbroadcast_sd_pd_256: 355; X64: # %bb.0: 356; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 357; X64-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] 358; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 359; X64-NEXT: retq # encoding: [0xc3] 360 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) 361 ret <4 x double> %res 362} 363 364define <4 x double> @test_x86_maskz_vbroadcast_sd_pd_256(<2 x double> %a0, i8 %mask ) { 365; X86-LABEL: test_x86_maskz_vbroadcast_sd_pd_256: 366; X86: # %bb.0: 367; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 368; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 369; X86-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] 370; X86-NEXT: retl # encoding: [0xc3] 371; 372; X64-LABEL: test_x86_maskz_vbroadcast_sd_pd_256: 373; X64: # %bb.0: 374; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 375; X64-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] 376; X64-NEXT: retq # encoding: [0xc3] 377 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 378 ret <4 x double> %res 379} 380 381declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly 382 383define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1) { 384; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256: 385; CHECK: # %bb.0: 386; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xc0] 387; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 388 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) 389 ret <8 x float> %res 390} 391 392define <8 x float> @test_x86_mask_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) { 393; X86-LABEL: test_x86_mask_vbroadcast_ss_ps_256: 394; X86: # %bb.0: 395; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 396; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 397; X86-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] 398; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 399; X86-NEXT: retl # encoding: [0xc3] 400; 401; X64-LABEL: test_x86_mask_vbroadcast_ss_ps_256: 402; X64: # %bb.0: 403; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 404; X64-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] 405; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 406; X64-NEXT: retq # encoding: [0xc3] 407 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) 408 ret <8 x float> %res 409} 410 411define <8 x float> @test_x86_maskz_vbroadcast_ss_ps_256(<4 x float> %a0, i8 %mask ) { 412; X86-LABEL: test_x86_maskz_vbroadcast_ss_ps_256: 413; X86: # %bb.0: 414; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 415; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 416; X86-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] 417; X86-NEXT: retl # encoding: [0xc3] 418; 419; X64-LABEL: test_x86_maskz_vbroadcast_ss_ps_256: 420; X64: # %bb.0: 421; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 422; X64-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] 423; X64-NEXT: retq # encoding: [0xc3] 424 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 425 ret <8 x float> %res 426} 427 428declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly 429 430define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1) { 431; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128: 432; CHECK: # %bb.0: 433; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 434; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 435 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> undef, i8 -1) 436 ret <4 x float> %res 437} 438 439 440define <4 x float> @test_x86_mask_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) { 441; X86-LABEL: test_x86_mask_vbroadcast_ss_ps_128: 442; X86: # %bb.0: 443; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 444; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 445; X86-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] 446; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 447; X86-NEXT: retl # encoding: [0xc3] 448; 449; X64-LABEL: test_x86_mask_vbroadcast_ss_ps_128: 450; X64: # %bb.0: 451; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 452; X64-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] 453; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 454; X64-NEXT: retq # encoding: [0xc3] 455 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 456 ret <4 x float> %res 457} 458 459define <4 x float> @test_x86_maskz_vbroadcast_ss_ps_128(<4 x float> %a0, i8 %mask ) { 460; X86-LABEL: test_x86_maskz_vbroadcast_ss_ps_128: 461; X86: # %bb.0: 462; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 463; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 464; X86-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] 465; X86-NEXT: retl # encoding: [0xc3] 466; 467; X64-LABEL: test_x86_maskz_vbroadcast_ss_ps_128: 468; X64: # %bb.0: 469; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 470; X64-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] 471; X64-NEXT: retq # encoding: [0xc3] 472 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 473 ret <4 x float> %res 474} 475 476declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8) 477 478define <4 x float>@test_int_x86_avx512_movsldup_128(<4 x float> %x0, <4 x float> %x1) { 479; CHECK-LABEL: test_int_x86_avx512_movsldup_128: 480; CHECK: # %bb.0: 481; CHECK-NEXT: vmovsldup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xc0] 482; CHECK-NEXT: # xmm0 = xmm0[0,0,2,2] 483; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 484 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 485 ret <4 x float> %res 486} 487 488define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 489; X86-LABEL: test_int_x86_avx512_mask_movsldup_128: 490; X86: # %bb.0: 491; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 492; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 493; X86-NEXT: vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] 494; X86-NEXT: # xmm1 {%k1} = xmm0[0,0,2,2] 495; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 496; X86-NEXT: retl # encoding: [0xc3] 497; 498; X64-LABEL: test_int_x86_avx512_mask_movsldup_128: 499; X64: # %bb.0: 500; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 501; X64-NEXT: vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] 502; X64-NEXT: # xmm1 {%k1} = xmm0[0,0,2,2] 503; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 504; X64-NEXT: retq # encoding: [0xc3] 505 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 506 ret <4 x float> %res 507} 508 509define <4 x float>@test_int_x86_avx512_maskz_movsldup_128(<4 x float> %x0, i8 %x2) { 510; X86-LABEL: test_int_x86_avx512_maskz_movsldup_128: 511; X86: # %bb.0: 512; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 513; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 514; X86-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] 515; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,0,2,2] 516; X86-NEXT: retl # encoding: [0xc3] 517; 518; X64-LABEL: test_int_x86_avx512_maskz_movsldup_128: 519; X64: # %bb.0: 520; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 521; X64-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] 522; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,0,2,2] 523; X64-NEXT: retq # encoding: [0xc3] 524 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 525 ret <4 x float> %res 526} 527 528declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8) 529 530define <8 x float>@test_int_x86_avx512_movsldup_256(<8 x float> %x0, <8 x float> %x1) { 531; CHECK-LABEL: test_int_x86_avx512_movsldup_256: 532; CHECK: # %bb.0: 533; CHECK-NEXT: vmovsldup %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xc0] 534; CHECK-NEXT: # ymm0 = ymm0[0,0,2,2,4,4,6,6] 535; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 536 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 537 ret <8 x float> %res 538} 539 540define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 541; X86-LABEL: test_int_x86_avx512_mask_movsldup_256: 542; X86: # %bb.0: 543; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 544; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 545; X86-NEXT: vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] 546; X86-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] 547; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 548; X86-NEXT: retl # encoding: [0xc3] 549; 550; X64-LABEL: test_int_x86_avx512_mask_movsldup_256: 551; X64: # %bb.0: 552; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 553; X64-NEXT: vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] 554; X64-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] 555; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 556; X64-NEXT: retq # encoding: [0xc3] 557 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 558 ret <8 x float> %res 559} 560 561define <8 x float>@test_int_x86_avx512_maskz_movsldup_256(<8 x float> %x0, i8 %x2) { 562; X86-LABEL: test_int_x86_avx512_maskz_movsldup_256: 563; X86: # %bb.0: 564; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 565; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 566; X86-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] 567; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] 568; X86-NEXT: retl # encoding: [0xc3] 569; 570; X64-LABEL: test_int_x86_avx512_maskz_movsldup_256: 571; X64: # %bb.0: 572; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 573; X64-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] 574; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] 575; X64-NEXT: retq # encoding: [0xc3] 576 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 577 ret <8 x float> %res 578} 579 580declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8) 581 582define <4 x float>@test_int_x86_avx512_movshdup_128(<4 x float> %x0, <4 x float> %x1) { 583; CHECK-LABEL: test_int_x86_avx512_movshdup_128: 584; CHECK: # %bb.0: 585; CHECK-NEXT: vmovshdup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xc0] 586; CHECK-NEXT: # xmm0 = xmm0[1,1,3,3] 587; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 588 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 589 ret <4 x float> %res 590} 591 592define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 593; X86-LABEL: test_int_x86_avx512_mask_movshdup_128: 594; X86: # %bb.0: 595; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 596; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 597; X86-NEXT: vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] 598; X86-NEXT: # xmm1 {%k1} = xmm0[1,1,3,3] 599; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 600; X86-NEXT: retl # encoding: [0xc3] 601; 602; X64-LABEL: test_int_x86_avx512_mask_movshdup_128: 603; X64: # %bb.0: 604; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 605; X64-NEXT: vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] 606; X64-NEXT: # xmm1 {%k1} = xmm0[1,1,3,3] 607; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 608; X64-NEXT: retq # encoding: [0xc3] 609 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 610 ret <4 x float> %res 611} 612 613define <4 x float>@test_int_x86_avx512_maskz_movshdup_128(<4 x float> %x0, i8 %x2) { 614; X86-LABEL: test_int_x86_avx512_maskz_movshdup_128: 615; X86: # %bb.0: 616; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 617; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 618; X86-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] 619; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1,1,3,3] 620; X86-NEXT: retl # encoding: [0xc3] 621; 622; X64-LABEL: test_int_x86_avx512_maskz_movshdup_128: 623; X64: # %bb.0: 624; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 625; X64-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] 626; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1,1,3,3] 627; X64-NEXT: retq # encoding: [0xc3] 628 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 629 ret <4 x float> %res 630} 631 632declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8) 633 634define <8 x float>@test_int_x86_avx512_movshdup_256(<8 x float> %x0, <8 x float> %x1) { 635; CHECK-LABEL: test_int_x86_avx512_movshdup_256: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vmovshdup %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xc0] 638; CHECK-NEXT: # ymm0 = ymm0[1,1,3,3,5,5,7,7] 639; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 640 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 641 ret <8 x float> %res 642} 643 644define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 645; X86-LABEL: test_int_x86_avx512_mask_movshdup_256: 646; X86: # %bb.0: 647; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 648; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 649; X86-NEXT: vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] 650; X86-NEXT: # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] 651; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 652; X86-NEXT: retl # encoding: [0xc3] 653; 654; X64-LABEL: test_int_x86_avx512_mask_movshdup_256: 655; X64: # %bb.0: 656; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 657; X64-NEXT: vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] 658; X64-NEXT: # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] 659; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 660; X64-NEXT: retq # encoding: [0xc3] 661 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 662 ret <8 x float> %res 663} 664 665define <8 x float>@test_int_x86_avx512_maskz_movshdup_256(<8 x float> %x0, i8 %x2) { 666; X86-LABEL: test_int_x86_avx512_maskz_movshdup_256: 667; X86: # %bb.0: 668; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 669; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 670; X86-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] 671; X86-NEXT: # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] 672; X86-NEXT: retl # encoding: [0xc3] 673; 674; X64-LABEL: test_int_x86_avx512_maskz_movshdup_256: 675; X64: # %bb.0: 676; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 677; X64-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] 678; X64-NEXT: # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] 679; X64-NEXT: retq # encoding: [0xc3] 680 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 681 ret <8 x float> %res 682} 683 684declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8) 685 686define <2 x double>@test_int_x86_avx512_movddup_128(<2 x double> %x0, <2 x double> %x1) { 687; CHECK-LABEL: test_int_x86_avx512_movddup_128: 688; CHECK: # %bb.0: 689; CHECK-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 690; CHECK-NEXT: # xmm0 = xmm0[0,0] 691; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 692 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1) 693 ret <2 x double> %res 694} 695 696define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 697; X86-LABEL: test_int_x86_avx512_mask_movddup_128: 698; X86: # %bb.0: 699; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 700; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 701; X86-NEXT: vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] 702; X86-NEXT: # xmm1 {%k1} = xmm0[0,0] 703; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 704; X86-NEXT: retl # encoding: [0xc3] 705; 706; X64-LABEL: test_int_x86_avx512_mask_movddup_128: 707; X64: # %bb.0: 708; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 709; X64-NEXT: vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] 710; X64-NEXT: # xmm1 {%k1} = xmm0[0,0] 711; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 712; X64-NEXT: retq # encoding: [0xc3] 713 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2) 714 ret <2 x double> %res 715} 716 717define <2 x double>@test_int_x86_avx512_maskz_movddup_128(<2 x double> %x0, i8 %x2) { 718; X86-LABEL: test_int_x86_avx512_maskz_movddup_128: 719; X86: # %bb.0: 720; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 721; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 722; X86-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] 723; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,0] 724; X86-NEXT: retl # encoding: [0xc3] 725; 726; X64-LABEL: test_int_x86_avx512_maskz_movddup_128: 727; X64: # %bb.0: 728; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 729; X64-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] 730; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,0] 731; X64-NEXT: retq # encoding: [0xc3] 732 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2) 733 ret <2 x double> %res 734} 735 736declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8) 737 738define <4 x double>@test_int_x86_avx512_movddup_256(<4 x double> %x0, <4 x double> %x1) { 739; CHECK-LABEL: test_int_x86_avx512_movddup_256: 740; CHECK: # %bb.0: 741; CHECK-NEXT: vmovddup %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xc0] 742; CHECK-NEXT: # ymm0 = ymm0[0,0,2,2] 743; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 744 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1) 745 ret <4 x double> %res 746} 747 748 749define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { 750; X86-LABEL: test_int_x86_avx512_mask_movddup_256: 751; X86: # %bb.0: 752; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 753; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 754; X86-NEXT: vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] 755; X86-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2] 756; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 757; X86-NEXT: retl # encoding: [0xc3] 758; 759; X64-LABEL: test_int_x86_avx512_mask_movddup_256: 760; X64: # %bb.0: 761; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 762; X64-NEXT: vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] 763; X64-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2] 764; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 765; X64-NEXT: retq # encoding: [0xc3] 766 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2) 767 ret <4 x double> %res 768} 769 770define <4 x double>@test_int_x86_avx512_maskz_movddup_256(<4 x double> %x0, i8 %x2) { 771; X86-LABEL: test_int_x86_avx512_maskz_movddup_256: 772; X86: # %bb.0: 773; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 774; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 775; X86-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] 776; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2] 777; X86-NEXT: retl # encoding: [0xc3] 778; 779; X64-LABEL: test_int_x86_avx512_maskz_movddup_256: 780; X64: # %bb.0: 781; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 782; X64-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] 783; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2] 784; X64-NEXT: retq # encoding: [0xc3] 785 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2) 786 ret <4 x double> %res 787} 788 789declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) 790 791define <4 x double>@test_int_x86_avx512_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2) { 792; CHECK-LABEL: test_int_x86_avx512_vpermil_pd_256: 793; CHECK: # %bb.0: 794; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x06] 795; CHECK-NEXT: # ymm0 = ymm0[0,1,3,2] 796; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 797 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) 798 ret <4 x double> %res 799} 800 801define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 802; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 803; X86: # %bb.0: 804; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 805; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 806; X86-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] 807; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,3,2] 808; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 809; X86-NEXT: retl # encoding: [0xc3] 810; 811; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 812; X64: # %bb.0: 813; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 814; X64-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] 815; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,3,2] 816; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 817; X64-NEXT: retq # encoding: [0xc3] 818 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) 819 ret <4 x double> %res 820} 821 822define <4 x double>@test_int_x86_avx512_maskz_vpermil_pd_256(<4 x double> %x0, i8 %x3) { 823; X86-LABEL: test_int_x86_avx512_maskz_vpermil_pd_256: 824; X86: # %bb.0: 825; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 826; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 827; X86-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] 828; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,3,2] 829; X86-NEXT: retl # encoding: [0xc3] 830; 831; X64-LABEL: test_int_x86_avx512_maskz_vpermil_pd_256: 832; X64: # %bb.0: 833; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 834; X64-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] 835; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,3,2] 836; X64-NEXT: retq # encoding: [0xc3] 837 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) 838 ret <4 x double> %res 839} 840 841declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) 842 843define <2 x double>@test_int_x86_avx512_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2) { 844; CHECK-LABEL: test_int_x86_avx512_vpermil_pd_128: 845; CHECK: # %bb.0: 846; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 847; CHECK-NEXT: # xmm0 = xmm0[1,0] 848; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 849 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) 850 ret <2 x double> %res 851} 852 853define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 854; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 855; X86: # %bb.0: 856; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 857; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 858; X86-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] 859; X86-NEXT: # xmm1 {%k1} = xmm0[1,0] 860; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 861; X86-NEXT: retl # encoding: [0xc3] 862; 863; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 864; X64: # %bb.0: 865; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 866; X64-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] 867; X64-NEXT: # xmm1 {%k1} = xmm0[1,0] 868; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 869; X64-NEXT: retq # encoding: [0xc3] 870 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) 871 ret <2 x double> %res 872} 873 874define <2 x double>@test_int_x86_avx512_maskz_vpermil_pd_128(<2 x double> %x0, i8 %x3) { 875; X86-LABEL: test_int_x86_avx512_maskz_vpermil_pd_128: 876; X86: # %bb.0: 877; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 878; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 879; X86-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] 880; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1,0] 881; X86-NEXT: retl # encoding: [0xc3] 882; 883; X64-LABEL: test_int_x86_avx512_maskz_vpermil_pd_128: 884; X64: # %bb.0: 885; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 886; X64-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] 887; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1,0] 888; X64-NEXT: retq # encoding: [0xc3] 889 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) 890 ret <2 x double> %res 891} 892 893declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) 894 895define <8 x float>@test_int_x86_avx512_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2) { 896; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_256: 897; CHECK: # %bb.0: 898; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x16] 899; CHECK-NEXT: # ymm0 = ymm0[2,1,1,0,6,5,5,4] 900; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 901 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) 902 ret <8 x float> %res 903} 904 905define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 906; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 907; X86: # %bb.0: 908; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 909; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 910; X86-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] 911; X86-NEXT: # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] 912; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 913; X86-NEXT: retl # encoding: [0xc3] 914; 915; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 916; X64: # %bb.0: 917; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 918; X64-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] 919; X64-NEXT: # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] 920; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 921; X64-NEXT: retq # encoding: [0xc3] 922 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) 923 ret <8 x float> %res 924} 925 926define <8 x float>@test_int_x86_avx512_maskz_vpermil_ps_256(<8 x float> %x0, i8 %x3) { 927; X86-LABEL: test_int_x86_avx512_maskz_vpermil_ps_256: 928; X86: # %bb.0: 929; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 930; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 931; X86-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] 932; X86-NEXT: # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] 933; X86-NEXT: retl # encoding: [0xc3] 934; 935; X64-LABEL: test_int_x86_avx512_maskz_vpermil_ps_256: 936; X64: # %bb.0: 937; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 938; X64-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] 939; X64-NEXT: # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] 940; X64-NEXT: retq # encoding: [0xc3] 941 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) 942 ret <8 x float> %res 943} 944 945declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) 946 947define <4 x float>@test_int_x86_avx512_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2) { 948; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_128: 949; CHECK: # %bb.0: 950; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x16] 951; CHECK-NEXT: # xmm0 = xmm0[2,1,1,0] 952; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 953 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) 954 ret <4 x float> %res 955} 956 957define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 958; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 959; X86: # %bb.0: 960; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 961; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 962; X86-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] 963; X86-NEXT: # xmm1 {%k1} = xmm0[2,1,1,0] 964; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 965; X86-NEXT: retl # encoding: [0xc3] 966; 967; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 968; X64: # %bb.0: 969; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 970; X64-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] 971; X64-NEXT: # xmm1 {%k1} = xmm0[2,1,1,0] 972; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 973; X64-NEXT: retq # encoding: [0xc3] 974 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) 975 ret <4 x float> %res 976} 977 978define <4 x float>@test_int_x86_avx512_maskz_vpermil_ps_128(<4 x float> %x0, i8 %x3) { 979; X86-LABEL: test_int_x86_avx512_maskz_vpermil_ps_128: 980; X86: # %bb.0: 981; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 982; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 983; X86-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] 984; X86-NEXT: # xmm0 {%k1} {z} = xmm0[2,1,1,0] 985; X86-NEXT: retl # encoding: [0xc3] 986; 987; X64-LABEL: test_int_x86_avx512_maskz_vpermil_ps_128: 988; X64: # %bb.0: 989; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 990; X64-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] 991; X64-NEXT: # xmm0 {%k1} {z} = xmm0[2,1,1,0] 992; X64-NEXT: retq # encoding: [0xc3] 993 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) 994 ret <4 x float> %res 995} 996 997declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8) 998 999define <4 x double>@test_int_x86_avx512_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2) { 1000; CHECK-LABEL: test_int_x86_avx512_perm_df_256: 1001; CHECK: # %bb.0: 1002; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xc0,0x03] 1003; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0] 1004; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1005 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1) 1006 ret <4 x double> %res 1007} 1008 1009define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) { 1010; X86-LABEL: test_int_x86_avx512_mask_perm_df_256: 1011; X86: # %bb.0: 1012; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1013; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1014; X86-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] 1015; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 1016; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 1017; X86-NEXT: retl # encoding: [0xc3] 1018; 1019; X64-LABEL: test_int_x86_avx512_mask_perm_df_256: 1020; X64: # %bb.0: 1021; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1022; X64-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] 1023; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 1024; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 1025; X64-NEXT: retq # encoding: [0xc3] 1026 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3) 1027 ret <4 x double> %res 1028} 1029 1030define <4 x double>@test_int_x86_avx512_maskz_perm_df_256(<4 x double> %x0, i32 %x1, i8 %x3) { 1031; X86-LABEL: test_int_x86_avx512_maskz_perm_df_256: 1032; X86: # %bb.0: 1033; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1034; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1035; X86-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] 1036; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 1037; X86-NEXT: retl # encoding: [0xc3] 1038; 1039; X64-LABEL: test_int_x86_avx512_maskz_perm_df_256: 1040; X64: # %bb.0: 1041; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1042; X64-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] 1043; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 1044; X64-NEXT: retq # encoding: [0xc3] 1045 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3) 1046 ret <4 x double> %res 1047} 1048 1049declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8) 1050 1051define <4 x i64>@test_int_x86_avx512_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2) { 1052; CHECK-LABEL: test_int_x86_avx512_perm_di_256: 1053; CHECK: # %bb.0: 1054; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xc0,0x03] 1055; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0] 1056; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1057 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 1058 ret <4 x i64> %res 1059} 1060 1061define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 1062; X86-LABEL: test_int_x86_avx512_mask_perm_di_256: 1063; X86: # %bb.0: 1064; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1065; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1066; X86-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] 1067; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 1068; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1069; X86-NEXT: retl # encoding: [0xc3] 1070; 1071; X64-LABEL: test_int_x86_avx512_mask_perm_di_256: 1072; X64: # %bb.0: 1073; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1074; X64-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] 1075; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 1076; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1077; X64-NEXT: retq # encoding: [0xc3] 1078 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 1079 ret <4 x i64> %res 1080} 1081 1082define <4 x i64>@test_int_x86_avx512_maskz_perm_di_256(<4 x i64> %x0, i32 %x1, i8 %x3) { 1083; X86-LABEL: test_int_x86_avx512_maskz_perm_di_256: 1084; X86: # %bb.0: 1085; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1086; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1087; X86-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] 1088; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 1089; X86-NEXT: retl # encoding: [0xc3] 1090; 1091; X64-LABEL: test_int_x86_avx512_maskz_perm_di_256: 1092; X64: # %bb.0: 1093; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1094; X64-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] 1095; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 1096; X64-NEXT: retq # encoding: [0xc3] 1097 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 1098 ret <4 x i64> %res 1099} 1100 1101declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) 1102 1103define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 1104; X86-LABEL: test_int_x86_avx512_mask_store_pd_128: 1105; X86: # %bb.0: 1106; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1107; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1108; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1109; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1110; X86-NEXT: vmovapd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x01] 1111; X86-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 1112; X86-NEXT: retl # encoding: [0xc3] 1113; 1114; X64-LABEL: test_int_x86_avx512_mask_store_pd_128: 1115; X64: # %bb.0: 1116; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1117; X64-NEXT: vmovapd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] 1118; X64-NEXT: vmovapd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06] 1119; X64-NEXT: retq # encoding: [0xc3] 1120 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 1121 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 1122 ret void 1123} 1124 1125declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) 1126 1127define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 1128; X86-LABEL: test_int_x86_avx512_mask_store_pd_256: 1129; X86: # %bb.0: 1130; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1131; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1132; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1133; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1134; X86-NEXT: vmovapd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x01] 1135; X86-NEXT: vmovapd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x00] 1136; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1137; X86-NEXT: retl # encoding: [0xc3] 1138; 1139; X64-LABEL: test_int_x86_avx512_mask_store_pd_256: 1140; X64: # %bb.0: 1141; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1142; X64-NEXT: vmovapd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] 1143; X64-NEXT: vmovapd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06] 1144; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1145; X64-NEXT: retq # encoding: [0xc3] 1146 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 1147 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 1148 ret void 1149} 1150 1151declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) 1152 1153define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 1154; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_128: 1155; X86: # %bb.0: 1156; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1157; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1158; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1159; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1160; X86-NEXT: vmovupd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x01] 1161; X86-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 1162; X86-NEXT: retl # encoding: [0xc3] 1163; 1164; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_128: 1165; X64: # %bb.0: 1166; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1167; X64-NEXT: vmovupd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] 1168; X64-NEXT: vmovupd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06] 1169; X64-NEXT: retq # encoding: [0xc3] 1170 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 1171 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 1172 ret void 1173} 1174 1175declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) 1176 1177define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 1178; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_256: 1179; X86: # %bb.0: 1180; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1181; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1182; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1183; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1184; X86-NEXT: vmovupd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x01] 1185; X86-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00] 1186; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1187; X86-NEXT: retl # encoding: [0xc3] 1188; 1189; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_256: 1190; X64: # %bb.0: 1191; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1192; X64-NEXT: vmovupd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] 1193; X64-NEXT: vmovupd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06] 1194; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1195; X64-NEXT: retq # encoding: [0xc3] 1196 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 1197 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 1198 ret void 1199} 1200 1201declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) 1202 1203define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 1204; X86-LABEL: test_int_x86_avx512_mask_store_ps_128: 1205; X86: # %bb.0: 1206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1207; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1208; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1209; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1210; X86-NEXT: vmovaps %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x01] 1211; X86-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 1212; X86-NEXT: retl # encoding: [0xc3] 1213; 1214; X64-LABEL: test_int_x86_avx512_mask_store_ps_128: 1215; X64: # %bb.0: 1216; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1217; X64-NEXT: vmovaps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] 1218; X64-NEXT: vmovaps %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06] 1219; X64-NEXT: retq # encoding: [0xc3] 1220 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 1221 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 1222 ret void 1223} 1224 1225declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) 1226 1227define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 1228; X86-LABEL: test_int_x86_avx512_mask_store_ps_256: 1229; X86: # %bb.0: 1230; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1231; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1232; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1233; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1234; X86-NEXT: vmovaps %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x01] 1235; X86-NEXT: vmovaps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x00] 1236; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1237; X86-NEXT: retl # encoding: [0xc3] 1238; 1239; X64-LABEL: test_int_x86_avx512_mask_store_ps_256: 1240; X64: # %bb.0: 1241; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1242; X64-NEXT: vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] 1243; X64-NEXT: vmovaps %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06] 1244; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1245; X64-NEXT: retq # encoding: [0xc3] 1246 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 1247 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 1248 ret void 1249} 1250 1251declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) 1252 1253define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 1254; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_128: 1255; X86: # %bb.0: 1256; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1257; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1258; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1259; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1260; X86-NEXT: vmovups %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x01] 1261; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 1262; X86-NEXT: retl # encoding: [0xc3] 1263; 1264; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_128: 1265; X64: # %bb.0: 1266; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1267; X64-NEXT: vmovups %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] 1268; X64-NEXT: vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06] 1269; X64-NEXT: retq # encoding: [0xc3] 1270 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 1271 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 1272 ret void 1273} 1274 1275declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) 1276 1277define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 1278; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_256: 1279; X86: # %bb.0: 1280; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1281; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1282; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1283; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1284; X86-NEXT: vmovups %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x01] 1285; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] 1286; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1287; X86-NEXT: retl # encoding: [0xc3] 1288; 1289; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_256: 1290; X64: # %bb.0: 1291; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1292; X64-NEXT: vmovups %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] 1293; X64-NEXT: vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06] 1294; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1295; X64-NEXT: retq # encoding: [0xc3] 1296 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 1297 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 1298 ret void 1299} 1300 1301declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) 1302 1303define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 1304; X86-LABEL: test_int_x86_avx512_mask_storeu_q_128: 1305; X86: # %bb.0: 1306; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1307; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1308; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1309; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1310; X86-NEXT: vmovdqu64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x01] 1311; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 1312; X86-NEXT: retl # encoding: [0xc3] 1313; 1314; X64-LABEL: test_int_x86_avx512_mask_storeu_q_128: 1315; X64: # %bb.0: 1316; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1317; X64-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] 1318; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 1319; X64-NEXT: retq # encoding: [0xc3] 1320 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 1321 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 1322 ret void 1323} 1324 1325declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) 1326 1327define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 1328; X86-LABEL: test_int_x86_avx512_mask_storeu_q_256: 1329; X86: # %bb.0: 1330; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1331; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1332; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1333; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1334; X86-NEXT: vmovdqu64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x01] 1335; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 1336; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1337; X86-NEXT: retl # encoding: [0xc3] 1338; 1339; X64-LABEL: test_int_x86_avx512_mask_storeu_q_256: 1340; X64: # %bb.0: 1341; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1342; X64-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] 1343; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 1344; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1345; X64-NEXT: retq # encoding: [0xc3] 1346 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 1347 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 1348 ret void 1349} 1350 1351declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) 1352 1353define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 1354; X86-LABEL: test_int_x86_avx512_mask_storeu_d_128: 1355; X86: # %bb.0: 1356; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1357; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1358; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1359; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1360; X86-NEXT: vmovdqu32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x01] 1361; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 1362; X86-NEXT: retl # encoding: [0xc3] 1363; 1364; X64-LABEL: test_int_x86_avx512_mask_storeu_d_128: 1365; X64: # %bb.0: 1366; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1367; X64-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] 1368; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 1369; X64-NEXT: retq # encoding: [0xc3] 1370 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 1371 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 1372 ret void 1373} 1374 1375declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) 1376 1377define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 1378; X86-LABEL: test_int_x86_avx512_mask_storeu_d_256: 1379; X86: # %bb.0: 1380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1381; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1382; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1383; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1384; X86-NEXT: vmovdqu32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x01] 1385; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 1386; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1387; X86-NEXT: retl # encoding: [0xc3] 1388; 1389; X64-LABEL: test_int_x86_avx512_mask_storeu_d_256: 1390; X64: # %bb.0: 1391; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1392; X64-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] 1393; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 1394; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1395; X64-NEXT: retq # encoding: [0xc3] 1396 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 1397 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 1398 ret void 1399} 1400 1401declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) 1402 1403define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 1404; X86-LABEL: test_int_x86_avx512_mask_store_q_128: 1405; X86: # %bb.0: 1406; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1407; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1408; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1409; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1410; X86-NEXT: vmovdqa64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x01] 1411; X86-NEXT: vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00] 1412; X86-NEXT: retl # encoding: [0xc3] 1413; 1414; X64-LABEL: test_int_x86_avx512_mask_store_q_128: 1415; X64: # %bb.0: 1416; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1417; X64-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] 1418; X64-NEXT: vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06] 1419; X64-NEXT: retq # encoding: [0xc3] 1420 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 1421 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 1422 ret void 1423} 1424 1425declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) 1426 1427define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 1428; X86-LABEL: test_int_x86_avx512_mask_store_q_256: 1429; X86: # %bb.0: 1430; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1431; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1432; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1433; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1434; X86-NEXT: vmovdqa64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x01] 1435; X86-NEXT: vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00] 1436; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1437; X86-NEXT: retl # encoding: [0xc3] 1438; 1439; X64-LABEL: test_int_x86_avx512_mask_store_q_256: 1440; X64: # %bb.0: 1441; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1442; X64-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] 1443; X64-NEXT: vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06] 1444; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1445; X64-NEXT: retq # encoding: [0xc3] 1446 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 1447 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 1448 ret void 1449} 1450 1451declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) 1452 1453define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 1454; X86-LABEL: test_int_x86_avx512_mask_store_d_128: 1455; X86: # %bb.0: 1456; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1457; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1458; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1459; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1460; X86-NEXT: vmovdqa32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x01] 1461; X86-NEXT: vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00] 1462; X86-NEXT: retl # encoding: [0xc3] 1463; 1464; X64-LABEL: test_int_x86_avx512_mask_store_d_128: 1465; X64: # %bb.0: 1466; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1467; X64-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] 1468; X64-NEXT: vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06] 1469; X64-NEXT: retq # encoding: [0xc3] 1470 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 1471 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 1472 ret void 1473} 1474 1475declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) 1476 1477define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 1478; X86-LABEL: test_int_x86_avx512_mask_store_d_256: 1479; X86: # %bb.0: 1480; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1481; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1482; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1483; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1484; X86-NEXT: vmovdqa32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x01] 1485; X86-NEXT: vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00] 1486; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1487; X86-NEXT: retl # encoding: [0xc3] 1488; 1489; X64-LABEL: test_int_x86_avx512_mask_store_d_256: 1490; X64: # %bb.0: 1491; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1492; X64-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] 1493; X64-NEXT: vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06] 1494; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1495; X64-NEXT: retq # encoding: [0xc3] 1496 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 1497 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 1498 ret void 1499} 1500 1501define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 1502; X86-LABEL: test_mask_load_aligned_ps_256: 1503; X86: # %bb.0: 1504; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1505; X86-NEXT: vmovaps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x00] 1506; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1507; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1508; X86-NEXT: vmovaps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x00] 1509; X86-NEXT: vmovaps (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x08] 1510; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1511; X86-NEXT: retl # encoding: [0xc3] 1512; 1513; X64-LABEL: test_mask_load_aligned_ps_256: 1514; X64: # %bb.0: 1515; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 1516; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1517; X64-NEXT: vmovaps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] 1518; X64-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f] 1519; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1520; X64-NEXT: retq # encoding: [0xc3] 1521 %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 1522 %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 1523 %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 1524 %res4 = fadd <8 x float> %res2, %res1 1525 ret <8 x float> %res4 1526} 1527 1528declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8) 1529 1530define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 1531; X86-LABEL: test_mask_load_unaligned_ps_256: 1532; X86: # %bb.0: 1533; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1534; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] 1535; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1536; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1537; X86-NEXT: vmovups (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x00] 1538; X86-NEXT: vmovups (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x08] 1539; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1540; X86-NEXT: retl # encoding: [0xc3] 1541; 1542; X64-LABEL: test_mask_load_unaligned_ps_256: 1543; X64: # %bb.0: 1544; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 1545; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1546; X64-NEXT: vmovups (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] 1547; X64-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f] 1548; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1549; X64-NEXT: retq # encoding: [0xc3] 1550 %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 1551 %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 1552 %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 1553 %res4 = fadd <8 x float> %res2, %res1 1554 ret <8 x float> %res4 1555} 1556 1557declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8) 1558 1559define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 1560; X86-LABEL: test_mask_load_aligned_pd_256: 1561; X86: # %bb.0: 1562; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1563; X86-NEXT: vmovapd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x00] 1564; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1565; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1566; X86-NEXT: vmovapd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x00] 1567; X86-NEXT: vmovapd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x08] 1568; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1569; X86-NEXT: retl # encoding: [0xc3] 1570; 1571; X64-LABEL: test_mask_load_aligned_pd_256: 1572; X64: # %bb.0: 1573; X64-NEXT: vmovapd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07] 1574; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1575; X64-NEXT: vmovapd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] 1576; X64-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f] 1577; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1578; X64-NEXT: retq # encoding: [0xc3] 1579 %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 1580 %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 1581 %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 1582 %res4 = fadd <4 x double> %res2, %res1 1583 ret <4 x double> %res4 1584} 1585 1586declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8) 1587 1588define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 1589; X86-LABEL: test_mask_load_unaligned_pd_256: 1590; X86: # %bb.0: 1591; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1592; X86-NEXT: vmovupd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x00] 1593; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1594; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1595; X86-NEXT: vmovupd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x00] 1596; X86-NEXT: vmovupd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x08] 1597; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1598; X86-NEXT: retl # encoding: [0xc3] 1599; 1600; X64-LABEL: test_mask_load_unaligned_pd_256: 1601; X64: # %bb.0: 1602; X64-NEXT: vmovupd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07] 1603; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1604; X64-NEXT: vmovupd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] 1605; X64-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f] 1606; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1607; X64-NEXT: retq # encoding: [0xc3] 1608 %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 1609 %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 1610 %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 1611 %res4 = fadd <4 x double> %res2, %res1 1612 ret <4 x double> %res4 1613} 1614 1615declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8) 1616 1617define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 1618; X86-LABEL: test_mask_load_aligned_ps_128: 1619; X86: # %bb.0: 1620; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1621; X86-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1622; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1623; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1624; X86-NEXT: vmovaps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x00] 1625; X86-NEXT: vmovaps (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x08] 1626; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1627; X86-NEXT: retl # encoding: [0xc3] 1628; 1629; X64-LABEL: test_mask_load_aligned_ps_128: 1630; X64: # %bb.0: 1631; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1632; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1633; X64-NEXT: vmovaps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] 1634; X64-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f] 1635; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1636; X64-NEXT: retq # encoding: [0xc3] 1637 %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 1638 %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 1639 %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 1640 %res4 = fadd <4 x float> %res2, %res1 1641 ret <4 x float> %res4 1642} 1643 1644declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8) 1645 1646define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 1647; X86-LABEL: test_mask_load_unaligned_ps_128: 1648; X86: # %bb.0: 1649; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1650; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1651; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1652; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1653; X86-NEXT: vmovups (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x00] 1654; X86-NEXT: vmovups (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x08] 1655; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1656; X86-NEXT: retl # encoding: [0xc3] 1657; 1658; X64-LABEL: test_mask_load_unaligned_ps_128: 1659; X64: # %bb.0: 1660; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1661; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1662; X64-NEXT: vmovups (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] 1663; X64-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f] 1664; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1665; X64-NEXT: retq # encoding: [0xc3] 1666 %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 1667 %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 1668 %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 1669 %res4 = fadd <4 x float> %res2, %res1 1670 ret <4 x float> %res4 1671} 1672 1673declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8) 1674 1675define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 1676; X86-LABEL: test_mask_load_aligned_pd_128: 1677; X86: # %bb.0: 1678; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1679; X86-NEXT: vmovapd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] 1680; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1681; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1682; X86-NEXT: vmovapd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x00] 1683; X86-NEXT: vmovapd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x08] 1684; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1685; X86-NEXT: retl # encoding: [0xc3] 1686; 1687; X64-LABEL: test_mask_load_aligned_pd_128: 1688; X64: # %bb.0: 1689; X64-NEXT: vmovapd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] 1690; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1691; X64-NEXT: vmovapd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] 1692; X64-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f] 1693; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1694; X64-NEXT: retq # encoding: [0xc3] 1695 %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 1696 %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 1697 %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 1698 %res4 = fadd <2 x double> %res2, %res1 1699 ret <2 x double> %res4 1700} 1701 1702declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8) 1703 1704define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 1705; X86-LABEL: test_mask_load_unaligned_pd_128: 1706; X86: # %bb.0: 1707; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1708; X86-NEXT: vmovupd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x00] 1709; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1710; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1711; X86-NEXT: vmovupd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x00] 1712; X86-NEXT: vmovupd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x08] 1713; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1714; X86-NEXT: retl # encoding: [0xc3] 1715; 1716; X64-LABEL: test_mask_load_unaligned_pd_128: 1717; X64: # %bb.0: 1718; X64-NEXT: vmovupd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07] 1719; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1720; X64-NEXT: vmovupd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] 1721; X64-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f] 1722; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1723; X64-NEXT: retq # encoding: [0xc3] 1724 %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 1725 %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 1726 %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 1727 %res4 = fadd <2 x double> %res2, %res1 1728 ret <2 x double> %res4 1729} 1730 1731declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8) 1732 1733declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8) 1734 1735define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) { 1736; X86-LABEL: test_mask_load_unaligned_d_128: 1737; X86: # %bb.0: 1738; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1739; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1740; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 1741; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1742; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1743; X86-NEXT: vmovdqu32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x00] 1744; X86-NEXT: vmovdqu32 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x09] 1745; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1746; X86-NEXT: retl # encoding: [0xc3] 1747; 1748; X64-LABEL: test_mask_load_unaligned_d_128: 1749; X64: # %bb.0: 1750; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 1751; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1752; X64-NEXT: vmovdqu32 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06] 1753; X64-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f] 1754; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1755; X64-NEXT: retq # encoding: [0xc3] 1756 %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 1757 %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask) 1758 %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 1759 %res4 = add <4 x i32> %res2, %res1 1760 ret <4 x i32> %res4 1761} 1762 1763declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8) 1764 1765define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) { 1766; X86-LABEL: test_mask_load_unaligned_d_256: 1767; X86: # %bb.0: 1768; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1769; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1770; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 1771; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1772; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1773; X86-NEXT: vmovdqu32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x00] 1774; X86-NEXT: vmovdqu32 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x09] 1775; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1776; X86-NEXT: retl # encoding: [0xc3] 1777; 1778; X64-LABEL: test_mask_load_unaligned_d_256: 1779; X64: # %bb.0: 1780; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 1781; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1782; X64-NEXT: vmovdqu32 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06] 1783; X64-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f] 1784; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1785; X64-NEXT: retq # encoding: [0xc3] 1786 %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 1787 %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask) 1788 %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 1789 %res4 = add <8 x i32> %res2, %res1 1790 ret <8 x i32> %res4 1791} 1792 1793declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8) 1794 1795define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) { 1796; X86-LABEL: test_mask_load_unaligned_q_128: 1797; X86: # %bb.0: 1798; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1799; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1800; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 1801; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1802; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1803; X86-NEXT: vmovdqu64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x00] 1804; X86-NEXT: vmovdqu64 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x09] 1805; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1806; X86-NEXT: retl # encoding: [0xc3] 1807; 1808; X64-LABEL: test_mask_load_unaligned_q_128: 1809; X64: # %bb.0: 1810; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 1811; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1812; X64-NEXT: vmovdqu64 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06] 1813; X64-NEXT: vmovdqu64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f] 1814; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1815; X64-NEXT: retq # encoding: [0xc3] 1816 %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 1817 %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask) 1818 %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 1819 %res4 = add <2 x i64> %res2, %res1 1820 ret <2 x i64> %res4 1821} 1822 1823declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8) 1824 1825define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) { 1826; X86-LABEL: test_mask_load_unaligned_q_256: 1827; X86: # %bb.0: 1828; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1829; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1830; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 1831; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1832; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1833; X86-NEXT: vmovdqu64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x00] 1834; X86-NEXT: vmovdqu64 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x09] 1835; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1836; X86-NEXT: retl # encoding: [0xc3] 1837; 1838; X64-LABEL: test_mask_load_unaligned_q_256: 1839; X64: # %bb.0: 1840; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 1841; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1842; X64-NEXT: vmovdqu64 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06] 1843; X64-NEXT: vmovdqu64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f] 1844; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1845; X64-NEXT: retq # encoding: [0xc3] 1846 %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 1847 %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask) 1848 %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 1849 %res4 = add <4 x i64> %res2, %res1 1850 ret <4 x i64> %res4 1851} 1852 1853declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8) 1854 1855define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) { 1856; X86-LABEL: test_mask_load_aligned_d_128: 1857; X86: # %bb.0: 1858; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1859; X86-NEXT: vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00] 1860; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1861; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1862; X86-NEXT: vmovdqa32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x00] 1863; X86-NEXT: vmovdqa32 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x08] 1864; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1865; X86-NEXT: retl # encoding: [0xc3] 1866; 1867; X64-LABEL: test_mask_load_aligned_d_128: 1868; X64: # %bb.0: 1869; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] 1870; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1871; X64-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] 1872; X64-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f] 1873; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1874; X64-NEXT: retq # encoding: [0xc3] 1875 %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 1876 %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask) 1877 %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 1878 %res4 = add <4 x i32> %res2, %res1 1879 ret <4 x i32> %res4 1880} 1881 1882declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8) 1883 1884define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) { 1885; X86-LABEL: test_mask_load_aligned_d_256: 1886; X86: # %bb.0: 1887; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1888; X86-NEXT: vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00] 1889; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1890; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1891; X86-NEXT: vmovdqa32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x00] 1892; X86-NEXT: vmovdqa32 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x08] 1893; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1894; X86-NEXT: retl # encoding: [0xc3] 1895; 1896; X64-LABEL: test_mask_load_aligned_d_256: 1897; X64: # %bb.0: 1898; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] 1899; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1900; X64-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] 1901; X64-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f] 1902; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1903; X64-NEXT: retq # encoding: [0xc3] 1904 %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 1905 %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask) 1906 %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 1907 %res4 = add <8 x i32> %res2, %res1 1908 ret <8 x i32> %res4 1909} 1910 1911declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8) 1912 1913define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) { 1914; X86-LABEL: test_mask_load_aligned_q_128: 1915; X86: # %bb.0: 1916; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1917; X86-NEXT: vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00] 1918; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1919; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1920; X86-NEXT: vmovdqa64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x00] 1921; X86-NEXT: vmovdqa64 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x08] 1922; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1923; X86-NEXT: retl # encoding: [0xc3] 1924; 1925; X64-LABEL: test_mask_load_aligned_q_128: 1926; X64: # %bb.0: 1927; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] 1928; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1929; X64-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] 1930; X64-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f] 1931; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1932; X64-NEXT: retq # encoding: [0xc3] 1933 %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 1934 %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask) 1935 %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 1936 %res4 = add <2 x i64> %res2, %res1 1937 ret <2 x i64> %res4 1938} 1939 1940declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8) 1941 1942define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) { 1943; X86-LABEL: test_mask_load_aligned_q_256: 1944; X86: # %bb.0: 1945; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1946; X86-NEXT: vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00] 1947; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1948; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1949; X86-NEXT: vmovdqa64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x00] 1950; X86-NEXT: vmovdqa64 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x08] 1951; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1952; X86-NEXT: retl # encoding: [0xc3] 1953; 1954; X64-LABEL: test_mask_load_aligned_q_256: 1955; X64: # %bb.0: 1956; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] 1957; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1958; X64-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] 1959; X64-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f] 1960; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1961; X64-NEXT: retq # encoding: [0xc3] 1962 %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 1963 %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask) 1964 %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 1965 %res4 = add <4 x i64> %res2, %res1 1966 ret <4 x i64> %res4 1967} 1968 1969declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8) 1970 1971define <4 x i32>@test_int_x86_avx512_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2) { 1972; CHECK-LABEL: test_int_x86_avx512_pshuf_d_128: 1973; CHECK: # %bb.0: 1974; CHECK-NEXT: vpermilps $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x03] 1975; CHECK-NEXT: # xmm0 = xmm0[3,0,0,0] 1976; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1977 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 1978 ret <4 x i32> %res 1979} 1980 1981define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 1982; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_128: 1983; X86: # %bb.0: 1984; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1985; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1986; X86-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] 1987; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0] 1988; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1989; X86-NEXT: retl # encoding: [0xc3] 1990; 1991; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_128: 1992; X64: # %bb.0: 1993; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1994; X64-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] 1995; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0] 1996; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1997; X64-NEXT: retq # encoding: [0xc3] 1998 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 1999 ret <4 x i32> %res 2000} 2001 2002define <4 x i32>@test_int_x86_avx512_maskz_pshuf_d_128(<4 x i32> %x0, i32 %x1, i8 %x3) { 2003; X86-LABEL: test_int_x86_avx512_maskz_pshuf_d_128: 2004; X86: # %bb.0: 2005; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2006; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2007; X86-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] 2008; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0] 2009; X86-NEXT: retl # encoding: [0xc3] 2010; 2011; X64-LABEL: test_int_x86_avx512_maskz_pshuf_d_128: 2012; X64: # %bb.0: 2013; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2014; X64-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] 2015; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0] 2016; X64-NEXT: retq # encoding: [0xc3] 2017 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 2018 ret <4 x i32> %res 2019} 2020 2021declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8) 2022 2023define <8 x i32>@test_int_x86_avx512_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2) { 2024; CHECK-LABEL: test_int_x86_avx512_pshuf_d_256: 2025; CHECK: # %bb.0: 2026; CHECK-NEXT: vpermilps $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x03] 2027; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0,7,4,4,4] 2028; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2029 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 2030 ret <8 x i32> %res 2031} 2032 2033define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 2034; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_256: 2035; X86: # %bb.0: 2036; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2037; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2038; X86-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] 2039; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] 2040; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2041; X86-NEXT: retl # encoding: [0xc3] 2042; 2043; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_256: 2044; X64: # %bb.0: 2045; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2046; X64-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] 2047; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] 2048; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2049; X64-NEXT: retq # encoding: [0xc3] 2050 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 2051 ret <8 x i32> %res 2052} 2053 2054define <8 x i32>@test_int_x86_avx512_maskz_pshuf_d_256(<8 x i32> %x0, i32 %x1, i8 %x3) { 2055; X86-LABEL: test_int_x86_avx512_maskz_pshuf_d_256: 2056; X86: # %bb.0: 2057; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2058; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2059; X86-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] 2060; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] 2061; X86-NEXT: retl # encoding: [0xc3] 2062; 2063; X64-LABEL: test_int_x86_avx512_maskz_pshuf_d_256: 2064; X64: # %bb.0: 2065; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2066; X64-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] 2067; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] 2068; X64-NEXT: retq # encoding: [0xc3] 2069 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 2070 ret <8 x i32> %res 2071} 2072 2073define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { 2074; CHECK-LABEL: test_pcmpeq_d_256: 2075; CHECK: # %bb.0: 2076; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 2077; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2078; CHECK-NEXT: # kill: def $al killed $al killed $eax 2079; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2080; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2081 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 2082 ret i8 %res 2083} 2084 2085define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2086; X86-LABEL: test_mask_pcmpeq_d_256: 2087; X86: # %bb.0: 2088; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 2089; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2090; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 2091; X86-NEXT: # kill: def $al killed $al killed $eax 2092; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2093; X86-NEXT: retl # encoding: [0xc3] 2094; 2095; X64-LABEL: test_mask_pcmpeq_d_256: 2096; X64: # %bb.0: 2097; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 2098; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2099; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 2100; X64-NEXT: # kill: def $al killed $al killed $eax 2101; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2102; X64-NEXT: retq # encoding: [0xc3] 2103 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 2104 ret i8 %res 2105} 2106 2107declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) 2108 2109define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { 2110; CHECK-LABEL: test_pcmpeq_q_256: 2111; CHECK: # %bb.0: 2112; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 2113; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2114; CHECK-NEXT: # kill: def $al killed $al killed $eax 2115; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2116; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2117 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 2118 ret i8 %res 2119} 2120 2121define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2122; X86-LABEL: test_mask_pcmpeq_q_256: 2123; X86: # %bb.0: 2124; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2125; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2126; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 2127; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2128; X86-NEXT: # kill: def $al killed $al killed $eax 2129; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2130; X86-NEXT: retl # encoding: [0xc3] 2131; 2132; X64-LABEL: test_mask_pcmpeq_q_256: 2133; X64: # %bb.0: 2134; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2135; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 2136; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2137; X64-NEXT: # kill: def $al killed $al killed $eax 2138; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2139; X64-NEXT: retq # encoding: [0xc3] 2140 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 2141 ret i8 %res 2142} 2143 2144declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) 2145 2146define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { 2147; CHECK-LABEL: test_pcmpgt_d_256: 2148; CHECK: # %bb.0: 2149; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 2150; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2151; CHECK-NEXT: # kill: def $al killed $al killed $eax 2152; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2153; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2154 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 2155 ret i8 %res 2156} 2157 2158define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2159; X86-LABEL: test_mask_pcmpgt_d_256: 2160; X86: # %bb.0: 2161; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 2162; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2163; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 2164; X86-NEXT: # kill: def $al killed $al killed $eax 2165; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2166; X86-NEXT: retl # encoding: [0xc3] 2167; 2168; X64-LABEL: test_mask_pcmpgt_d_256: 2169; X64: # %bb.0: 2170; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 2171; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2172; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 2173; X64-NEXT: # kill: def $al killed $al killed $eax 2174; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2175; X64-NEXT: retq # encoding: [0xc3] 2176 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 2177 ret i8 %res 2178} 2179 2180declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) 2181 2182define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { 2183; CHECK-LABEL: test_pcmpgt_q_256: 2184; CHECK: # %bb.0: 2185; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] 2186; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2187; CHECK-NEXT: # kill: def $al killed $al killed $eax 2188; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2189; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2190 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 2191 ret i8 %res 2192} 2193 2194define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2195; X86-LABEL: test_mask_pcmpgt_q_256: 2196; X86: # %bb.0: 2197; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2198; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2199; X86-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] 2200; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2201; X86-NEXT: # kill: def $al killed $al killed $eax 2202; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2203; X86-NEXT: retl # encoding: [0xc3] 2204; 2205; X64-LABEL: test_mask_pcmpgt_q_256: 2206; X64: # %bb.0: 2207; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2208; X64-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] 2209; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2210; X64-NEXT: # kill: def $al killed $al killed $eax 2211; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2212; X64-NEXT: retq # encoding: [0xc3] 2213 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 2214 ret i8 %res 2215} 2216 2217declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) 2218 2219define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { 2220; CHECK-LABEL: test_pcmpeq_d_128: 2221; CHECK: # %bb.0: 2222; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 2223; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2224; CHECK-NEXT: # kill: def $al killed $al killed $eax 2225; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2226 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 2227 ret i8 %res 2228} 2229 2230define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2231; X86-LABEL: test_mask_pcmpeq_d_128: 2232; X86: # %bb.0: 2233; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2234; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2235; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 2236; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2237; X86-NEXT: # kill: def $al killed $al killed $eax 2238; X86-NEXT: retl # encoding: [0xc3] 2239; 2240; X64-LABEL: test_mask_pcmpeq_d_128: 2241; X64: # %bb.0: 2242; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2243; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 2244; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2245; X64-NEXT: # kill: def $al killed $al killed $eax 2246; X64-NEXT: retq # encoding: [0xc3] 2247 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 2248 ret i8 %res 2249} 2250 2251declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) 2252 2253define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { 2254; CHECK-LABEL: test_pcmpeq_q_128: 2255; CHECK: # %bb.0: 2256; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 2257; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2258; CHECK-NEXT: # kill: def $al killed $al killed $eax 2259; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2260 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 2261 ret i8 %res 2262} 2263 2264define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2265; X86-LABEL: test_mask_pcmpeq_q_128: 2266; X86: # %bb.0: 2267; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2268; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2269; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 2270; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2271; X86-NEXT: # kill: def $al killed $al killed $eax 2272; X86-NEXT: retl # encoding: [0xc3] 2273; 2274; X64-LABEL: test_mask_pcmpeq_q_128: 2275; X64: # %bb.0: 2276; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2277; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 2278; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2279; X64-NEXT: # kill: def $al killed $al killed $eax 2280; X64-NEXT: retq # encoding: [0xc3] 2281 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 2282 ret i8 %res 2283} 2284 2285declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) 2286 2287define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { 2288; CHECK-LABEL: test_pcmpgt_d_128: 2289; CHECK: # %bb.0: 2290; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 2291; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2292; CHECK-NEXT: # kill: def $al killed $al killed $eax 2293; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2294 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 2295 ret i8 %res 2296} 2297 2298define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2299; X86-LABEL: test_mask_pcmpgt_d_128: 2300; X86: # %bb.0: 2301; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2302; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2303; X86-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] 2304; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2305; X86-NEXT: # kill: def $al killed $al killed $eax 2306; X86-NEXT: retl # encoding: [0xc3] 2307; 2308; X64-LABEL: test_mask_pcmpgt_d_128: 2309; X64: # %bb.0: 2310; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2311; X64-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] 2312; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2313; X64-NEXT: # kill: def $al killed $al killed $eax 2314; X64-NEXT: retq # encoding: [0xc3] 2315 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 2316 ret i8 %res 2317} 2318 2319declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) 2320 2321define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { 2322; CHECK-LABEL: test_pcmpgt_q_128: 2323; CHECK: # %bb.0: 2324; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] 2325; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2326; CHECK-NEXT: # kill: def $al killed $al killed $eax 2327; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2328 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 2329 ret i8 %res 2330} 2331 2332define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2333; X86-LABEL: test_mask_pcmpgt_q_128: 2334; X86: # %bb.0: 2335; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2336; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2337; X86-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] 2338; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2339; X86-NEXT: # kill: def $al killed $al killed $eax 2340; X86-NEXT: retl # encoding: [0xc3] 2341; 2342; X64-LABEL: test_mask_pcmpgt_q_128: 2343; X64: # %bb.0: 2344; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2345; X64-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] 2346; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2347; X64-NEXT: # kill: def $al killed $al killed $eax 2348; X64-NEXT: retq # encoding: [0xc3] 2349 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 2350 ret i8 %res 2351} 2352 2353declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) 2354 2355declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 2356 2357define <2 x double>@test_int_x86_avx512_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2) { 2358; CHECK-LABEL: test_int_x86_avx512_unpckh_pd_128: 2359; CHECK: # %bb.0: 2360; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 2361; CHECK-NEXT: # xmm0 = xmm0[1],xmm1[1] 2362; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2363 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 2364 ret <2 x double> %res 2365} 2366 2367define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 2368; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 2369; X86: # %bb.0: 2370; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2371; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2372; X86-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] 2373; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2374; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 2375; X86-NEXT: retl # encoding: [0xc3] 2376; 2377; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 2378; X64: # %bb.0: 2379; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2380; X64-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] 2381; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2382; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 2383; X64-NEXT: retq # encoding: [0xc3] 2384 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 2385 ret <2 x double> %res 2386} 2387 2388declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 2389 2390define <4 x double>@test_int_x86_avx512_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2391; CHECK-LABEL: test_int_x86_avx512_unpckh_pd_256: 2392; CHECK: # %bb.0: 2393; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xc1] 2394; CHECK-NEXT: # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2395; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2396 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 2397 ret <4 x double> %res 2398} 2399 2400define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2401; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 2402; X86: # %bb.0: 2403; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2404; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2405; X86-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] 2406; X86-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2407; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 2408; X86-NEXT: retl # encoding: [0xc3] 2409; 2410; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 2411; X64: # %bb.0: 2412; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2413; X64-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] 2414; X64-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2415; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 2416; X64-NEXT: retq # encoding: [0xc3] 2417 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 2418 ret <4 x double> %res 2419} 2420 2421declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2422 2423define <4 x float>@test_int_x86_avx512_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) { 2424; CHECK-LABEL: test_int_x86_avx512_unpckh_ps_128: 2425; CHECK: # %bb.0: 2426; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 2427; CHECK-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2428; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2429 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 2430 ret <4 x float> %res 2431} 2432 2433define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 2434; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 2435; X86: # %bb.0: 2436; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2437; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2438; X86-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] 2439; X86-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2440; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 2441; X86-NEXT: retl # encoding: [0xc3] 2442; 2443; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 2444; X64: # %bb.0: 2445; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2446; X64-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] 2447; X64-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2448; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 2449; X64-NEXT: retq # encoding: [0xc3] 2450 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 2451 ret <4 x float> %res 2452} 2453 2454declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2455 2456define <8 x float>@test_int_x86_avx512_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2) { 2457; CHECK-LABEL: test_int_x86_avx512_unpckh_ps_256: 2458; CHECK: # %bb.0: 2459; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xc1] 2460; CHECK-NEXT: # ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2461; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2462 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 2463 ret <8 x float> %res 2464} 2465 2466define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 2467; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 2468; X86: # %bb.0: 2469; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2470; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2471; X86-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] 2472; X86-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2473; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 2474; X86-NEXT: retl # encoding: [0xc3] 2475; 2476; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 2477; X64: # %bb.0: 2478; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2479; X64-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] 2480; X64-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2481; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 2482; X64-NEXT: retq # encoding: [0xc3] 2483 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 2484 ret <8 x float> %res 2485} 2486 2487declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 2488 2489define <2 x double>@test_int_x86_avx512_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2) { 2490; CHECK-LABEL: test_int_x86_avx512_unpckl_pd_128: 2491; CHECK: # %bb.0: 2492; CHECK-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 2493; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0] 2494; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2495 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 2496 ret <2 x double> %res 2497} 2498 2499define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 2500; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 2501; X86: # %bb.0: 2502; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2503; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2504; X86-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] 2505; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2506; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 2507; X86-NEXT: retl # encoding: [0xc3] 2508; 2509; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 2510; X64: # %bb.0: 2511; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2512; X64-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] 2513; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2514; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 2515; X64-NEXT: retq # encoding: [0xc3] 2516 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 2517 ret <2 x double> %res 2518} 2519 2520declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 2521 2522define <4 x double>@test_int_x86_avx512_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2) { 2523; CHECK-LABEL: test_int_x86_avx512_unpckl_pd_256: 2524; CHECK: # %bb.0: 2525; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xc1] 2526; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2527; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2528 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 2529 ret <4 x double> %res 2530} 2531 2532define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2533; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 2534; X86: # %bb.0: 2535; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2536; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2537; X86-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] 2538; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2539; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 2540; X86-NEXT: retl # encoding: [0xc3] 2541; 2542; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 2543; X64: # %bb.0: 2544; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2545; X64-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] 2546; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2547; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 2548; X64-NEXT: retq # encoding: [0xc3] 2549 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 2550 ret <4 x double> %res 2551} 2552 2553declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2554 2555define <4 x float>@test_int_x86_avx512_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) { 2556; CHECK-LABEL: test_int_x86_avx512_unpckl_ps_128: 2557; CHECK: # %bb.0: 2558; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 2559; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2560; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2561 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 2562 ret <4 x float> %res 2563} 2564 2565define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 2566; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 2567; X86: # %bb.0: 2568; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2569; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2570; X86-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] 2571; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2572; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 2573; X86-NEXT: retl # encoding: [0xc3] 2574; 2575; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 2576; X64: # %bb.0: 2577; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2578; X64-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] 2579; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2580; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 2581; X64-NEXT: retq # encoding: [0xc3] 2582 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 2583 ret <4 x float> %res 2584} 2585 2586declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2587 2588define <8 x float>@test_int_x86_avx512_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2) { 2589; CHECK-LABEL: test_int_x86_avx512_unpckl_ps_256: 2590; CHECK: # %bb.0: 2591; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xc1] 2592; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2593; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2594 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 2595 ret <8 x float> %res 2596} 2597 2598define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 2599; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 2600; X86: # %bb.0: 2601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2602; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2603; X86-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] 2604; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2605; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 2606; X86-NEXT: retl # encoding: [0xc3] 2607; 2608; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 2609; X64: # %bb.0: 2610; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2611; X64-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] 2612; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2613; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 2614; X64-NEXT: retq # encoding: [0xc3] 2615 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 2616 ret <8 x float> %res 2617} 2618 2619declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2620 2621define <4 x i32>@test_int_x86_avx512_ask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 2622; CHECK-LABEL: test_int_x86_avx512_ask_punpckhd_q_128: 2623; CHECK: # %bb.0: 2624; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 2625; CHECK-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2626; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2627 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2628 ret <4 x i32> %res 2629} 2630 2631define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2632; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 2633; X86: # %bb.0: 2634; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2635; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2636; X86-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] 2637; X86-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2638; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2639; X86-NEXT: retl # encoding: [0xc3] 2640; 2641; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 2642; X64: # %bb.0: 2643; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2644; X64-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] 2645; X64-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2646; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2647; X64-NEXT: retq # encoding: [0xc3] 2648 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2649 ret <4 x i32> %res 2650} 2651 2652declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2653 2654define <4 x i32>@test_int_x86_avx512_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 2655; CHECK-LABEL: test_int_x86_avx512_punpckld_q_128: 2656; CHECK: # %bb.0: 2657; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 2658; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2659; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2660 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2661 ret <4 x i32> %res 2662} 2663 2664define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2665; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 2666; X86: # %bb.0: 2667; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2668; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2669; X86-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] 2670; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2671; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2672; X86-NEXT: retl # encoding: [0xc3] 2673; 2674; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 2675; X64: # %bb.0: 2676; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2677; X64-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] 2678; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2679; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2680; X64-NEXT: retq # encoding: [0xc3] 2681 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2682 ret <4 x i32> %res 2683} 2684 2685declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2686 2687define <8 x i32>@test_int_x86_avx512_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 2688; CHECK-LABEL: test_int_x86_avx512_punpckhd_q_256: 2689; CHECK: # %bb.0: 2690; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xc1] 2691; CHECK-NEXT: # ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2692; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2693 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2694 ret <8 x i32> %res 2695} 2696 2697define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2698; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 2699; X86: # %bb.0: 2700; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2701; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2702; X86-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] 2703; X86-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2704; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2705; X86-NEXT: retl # encoding: [0xc3] 2706; 2707; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 2708; X64: # %bb.0: 2709; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2710; X64-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] 2711; X64-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2712; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2713; X64-NEXT: retq # encoding: [0xc3] 2714 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2715 ret <8 x i32> %res 2716} 2717 2718declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2719 2720define <8 x i32>@test_int_x86_avx512_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 2721; CHECK-LABEL: test_int_x86_avx512_punpckld_q_256: 2722; CHECK: # %bb.0: 2723; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xc1] 2724; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2725; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2726 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2727 ret <8 x i32> %res 2728} 2729 2730define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2731; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 2732; X86: # %bb.0: 2733; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2734; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2735; X86-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] 2736; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2737; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2738; X86-NEXT: retl # encoding: [0xc3] 2739; 2740; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 2741; X64: # %bb.0: 2742; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2743; X64-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] 2744; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2745; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2746; X64-NEXT: retq # encoding: [0xc3] 2747 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2748 ret <8 x i32> %res 2749} 2750 2751declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2752 2753define <2 x i64>@test_int_x86_avx512_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 2754; CHECK-LABEL: test_int_x86_avx512_punpckhqd_q_128: 2755; CHECK: # %bb.0: 2756; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 2757; CHECK-NEXT: # xmm0 = xmm0[1],xmm1[1] 2758; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2759 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2760 ret <2 x i64> %res 2761} 2762 2763define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2764; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 2765; X86: # %bb.0: 2766; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2767; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2768; X86-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] 2769; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2770; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2771; X86-NEXT: retl # encoding: [0xc3] 2772; 2773; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 2774; X64: # %bb.0: 2775; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2776; X64-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] 2777; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2778; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2779; X64-NEXT: retq # encoding: [0xc3] 2780 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2781 ret <2 x i64> %res 2782} 2783 2784declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2785 2786define <2 x i64>@test_int_x86_avx512_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 2787; CHECK-LABEL: test_int_x86_avx512_punpcklqd_q_128: 2788; CHECK: # %bb.0: 2789; CHECK-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 2790; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0] 2791; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2792 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2793 ret <2 x i64> %res 2794} 2795 2796define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2797; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 2798; X86: # %bb.0: 2799; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2800; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2801; X86-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] 2802; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2803; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2804; X86-NEXT: retl # encoding: [0xc3] 2805; 2806; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 2807; X64: # %bb.0: 2808; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2809; X64-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] 2810; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2811; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2812; X64-NEXT: retq # encoding: [0xc3] 2813 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2814 ret <2 x i64> %res 2815} 2816 2817declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2818 2819define <4 x i64>@test_int_x86_avx512_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 2820; CHECK-LABEL: test_int_x86_avx512_punpcklqd_q_256: 2821; CHECK: # %bb.0: 2822; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xc1] 2823; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2824; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2825 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 2826 ret <4 x i64> %res 2827} 2828 2829define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 2830; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 2831; X86: # %bb.0: 2832; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2833; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2834; X86-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] 2835; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2836; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2837; X86-NEXT: retl # encoding: [0xc3] 2838; 2839; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 2840; X64: # %bb.0: 2841; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2842; X64-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] 2843; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2844; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2845; X64-NEXT: retq # encoding: [0xc3] 2846 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 2847 ret <4 x i64> %res 2848} 2849 2850declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2851 2852define <4 x i64>@test_int_x86_avx512_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 2853; CHECK-LABEL: test_int_x86_avx512_punpckhqd_q_256: 2854; CHECK: # %bb.0: 2855; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xc1] 2856; CHECK-NEXT: # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2857; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2858 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 2859 ret <4 x i64> %res 2860} 2861 2862define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 2863; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 2864; X86: # %bb.0: 2865; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2866; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2867; X86-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] 2868; X86-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2869; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2870; X86-NEXT: retl # encoding: [0xc3] 2871; 2872; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 2873; X64: # %bb.0: 2874; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2875; X64-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] 2876; X64-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2877; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2878; X64-NEXT: retq # encoding: [0xc3] 2879 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 2880 ret <4 x i64> %res 2881} 2882 2883define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2884; CHECK-LABEL: test_mask_and_epi32_rr_128: 2885; CHECK: # %bb.0: 2886; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 2887; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2888 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2889 ret <4 x i32> %res 2890} 2891 2892define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 2893; X86-LABEL: test_mask_and_epi32_rrk_128: 2894; X86: # %bb.0: 2895; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2896; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2897; X86-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 2898; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2899; X86-NEXT: retl # encoding: [0xc3] 2900; 2901; X64-LABEL: test_mask_and_epi32_rrk_128: 2902; X64: # %bb.0: 2903; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2904; X64-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 2905; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2906; X64-NEXT: retq # encoding: [0xc3] 2907 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2908 ret <4 x i32> %res 2909} 2910 2911define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2912; X86-LABEL: test_mask_and_epi32_rrkz_128: 2913; X86: # %bb.0: 2914; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2915; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2916; X86-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 2917; X86-NEXT: retl # encoding: [0xc3] 2918; 2919; X64-LABEL: test_mask_and_epi32_rrkz_128: 2920; X64: # %bb.0: 2921; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2922; X64-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 2923; X64-NEXT: retq # encoding: [0xc3] 2924 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2925 ret <4 x i32> %res 2926} 2927 2928define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2929; X86-LABEL: test_mask_and_epi32_rm_128: 2930; X86: # %bb.0: 2931; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2932; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00] 2933; X86-NEXT: retl # encoding: [0xc3] 2934; 2935; X64-LABEL: test_mask_and_epi32_rm_128: 2936; X64: # %bb.0: 2937; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07] 2938; X64-NEXT: retq # encoding: [0xc3] 2939 %b = load <4 x i32>, <4 x i32>* %ptr_b 2940 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2941 ret <4 x i32> %res 2942} 2943 2944define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2945; X86-LABEL: test_mask_and_epi32_rmk_128: 2946; X86: # %bb.0: 2947; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2948; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2949; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2950; X86-NEXT: vpandd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x08] 2951; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2952; X86-NEXT: retl # encoding: [0xc3] 2953; 2954; X64-LABEL: test_mask_and_epi32_rmk_128: 2955; X64: # %bb.0: 2956; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2957; X64-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] 2958; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2959; X64-NEXT: retq # encoding: [0xc3] 2960 %b = load <4 x i32>, <4 x i32>* %ptr_b 2961 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2962 ret <4 x i32> %res 2963} 2964 2965define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2966; X86-LABEL: test_mask_and_epi32_rmkz_128: 2967; X86: # %bb.0: 2968; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2969; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2970; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2971; X86-NEXT: vpandd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x00] 2972; X86-NEXT: retl # encoding: [0xc3] 2973; 2974; X64-LABEL: test_mask_and_epi32_rmkz_128: 2975; X64: # %bb.0: 2976; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2977; X64-NEXT: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07] 2978; X64-NEXT: retq # encoding: [0xc3] 2979 %b = load <4 x i32>, <4 x i32>* %ptr_b 2980 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2981 ret <4 x i32> %res 2982} 2983 2984define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2985; X86-LABEL: test_mask_and_epi32_rmb_128: 2986; X86: # %bb.0: 2987; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2988; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x00] 2989; X86-NEXT: retl # encoding: [0xc3] 2990; 2991; X64-LABEL: test_mask_and_epi32_rmb_128: 2992; X64: # %bb.0: 2993; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07] 2994; X64-NEXT: retq # encoding: [0xc3] 2995 %q = load i32, i32* %ptr_b 2996 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2997 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2998 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2999 ret <4 x i32> %res 3000} 3001 3002define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3003; X86-LABEL: test_mask_and_epi32_rmbk_128: 3004; X86: # %bb.0: 3005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3006; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3007; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3008; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x08] 3009; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3010; X86-NEXT: retl # encoding: [0xc3] 3011; 3012; X64-LABEL: test_mask_and_epi32_rmbk_128: 3013; X64: # %bb.0: 3014; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3015; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] 3016; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3017; X64-NEXT: retq # encoding: [0xc3] 3018 %q = load i32, i32* %ptr_b 3019 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3020 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3021 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3022 ret <4 x i32> %res 3023} 3024 3025define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3026; X86-LABEL: test_mask_and_epi32_rmbkz_128: 3027; X86: # %bb.0: 3028; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3029; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3030; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3031; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x00] 3032; X86-NEXT: retl # encoding: [0xc3] 3033; 3034; X64-LABEL: test_mask_and_epi32_rmbkz_128: 3035; X64: # %bb.0: 3036; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3037; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07] 3038; X64-NEXT: retq # encoding: [0xc3] 3039 %q = load i32, i32* %ptr_b 3040 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3041 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3042 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3043 ret <4 x i32> %res 3044} 3045 3046declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3047 3048define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3049; CHECK-LABEL: test_mask_and_epi32_rr_256: 3050; CHECK: # %bb.0: 3051; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1] 3052; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3053 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3054 ret <8 x i32> %res 3055} 3056 3057define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 3058; X86-LABEL: test_mask_and_epi32_rrk_256: 3059; X86: # %bb.0: 3060; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3061; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3062; X86-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 3063; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3064; X86-NEXT: retl # encoding: [0xc3] 3065; 3066; X64-LABEL: test_mask_and_epi32_rrk_256: 3067; X64: # %bb.0: 3068; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3069; X64-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 3070; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3071; X64-NEXT: retq # encoding: [0xc3] 3072 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3073 ret <8 x i32> %res 3074} 3075 3076define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 3077; X86-LABEL: test_mask_and_epi32_rrkz_256: 3078; X86: # %bb.0: 3079; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3080; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3081; X86-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 3082; X86-NEXT: retl # encoding: [0xc3] 3083; 3084; X64-LABEL: test_mask_and_epi32_rrkz_256: 3085; X64: # %bb.0: 3086; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3087; X64-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 3088; X64-NEXT: retq # encoding: [0xc3] 3089 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3090 ret <8 x i32> %res 3091} 3092 3093define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3094; X86-LABEL: test_mask_and_epi32_rm_256: 3095; X86: # %bb.0: 3096; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3097; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00] 3098; X86-NEXT: retl # encoding: [0xc3] 3099; 3100; X64-LABEL: test_mask_and_epi32_rm_256: 3101; X64: # %bb.0: 3102; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07] 3103; X64-NEXT: retq # encoding: [0xc3] 3104 %b = load <8 x i32>, <8 x i32>* %ptr_b 3105 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3106 ret <8 x i32> %res 3107} 3108 3109define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3110; X86-LABEL: test_mask_and_epi32_rmk_256: 3111; X86: # %bb.0: 3112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3113; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3114; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3115; X86-NEXT: vpandd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x08] 3116; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3117; X86-NEXT: retl # encoding: [0xc3] 3118; 3119; X64-LABEL: test_mask_and_epi32_rmk_256: 3120; X64: # %bb.0: 3121; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3122; X64-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] 3123; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3124; X64-NEXT: retq # encoding: [0xc3] 3125 %b = load <8 x i32>, <8 x i32>* %ptr_b 3126 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3127 ret <8 x i32> %res 3128} 3129 3130define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 3131; X86-LABEL: test_mask_and_epi32_rmkz_256: 3132; X86: # %bb.0: 3133; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3134; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3135; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3136; X86-NEXT: vpandd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x00] 3137; X86-NEXT: retl # encoding: [0xc3] 3138; 3139; X64-LABEL: test_mask_and_epi32_rmkz_256: 3140; X64: # %bb.0: 3141; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3142; X64-NEXT: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07] 3143; X64-NEXT: retq # encoding: [0xc3] 3144 %b = load <8 x i32>, <8 x i32>* %ptr_b 3145 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3146 ret <8 x i32> %res 3147} 3148 3149define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3150; X86-LABEL: test_mask_and_epi32_rmb_256: 3151; X86: # %bb.0: 3152; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3153; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x00] 3154; X86-NEXT: retl # encoding: [0xc3] 3155; 3156; X64-LABEL: test_mask_and_epi32_rmb_256: 3157; X64: # %bb.0: 3158; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07] 3159; X64-NEXT: retq # encoding: [0xc3] 3160 %q = load i32, i32* %ptr_b 3161 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3162 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3163 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3164 ret <8 x i32> %res 3165} 3166 3167define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3168; X86-LABEL: test_mask_and_epi32_rmbk_256: 3169; X86: # %bb.0: 3170; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3171; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3172; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3173; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x08] 3174; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3175; X86-NEXT: retl # encoding: [0xc3] 3176; 3177; X64-LABEL: test_mask_and_epi32_rmbk_256: 3178; X64: # %bb.0: 3179; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3180; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] 3181; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3182; X64-NEXT: retq # encoding: [0xc3] 3183 %q = load i32, i32* %ptr_b 3184 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3185 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3186 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3187 ret <8 x i32> %res 3188} 3189 3190define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 3191; X86-LABEL: test_mask_and_epi32_rmbkz_256: 3192; X86: # %bb.0: 3193; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3194; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3195; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3196; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x00] 3197; X86-NEXT: retl # encoding: [0xc3] 3198; 3199; X64-LABEL: test_mask_and_epi32_rmbkz_256: 3200; X64: # %bb.0: 3201; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3202; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07] 3203; X64-NEXT: retq # encoding: [0xc3] 3204 %q = load i32, i32* %ptr_b 3205 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3206 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3207 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3208 ret <8 x i32> %res 3209} 3210 3211declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3212 3213define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3214; CHECK-LABEL: test_mask_or_epi32_rr_128: 3215; CHECK: # %bb.0: 3216; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 3217; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3218 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3219 ret <4 x i32> %res 3220} 3221 3222define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 3223; X86-LABEL: test_mask_or_epi32_rrk_128: 3224; X86: # %bb.0: 3225; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3226; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3227; X86-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 3228; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3229; X86-NEXT: retl # encoding: [0xc3] 3230; 3231; X64-LABEL: test_mask_or_epi32_rrk_128: 3232; X64: # %bb.0: 3233; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3234; X64-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 3235; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3236; X64-NEXT: retq # encoding: [0xc3] 3237 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3238 ret <4 x i32> %res 3239} 3240 3241define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3242; X86-LABEL: test_mask_or_epi32_rrkz_128: 3243; X86: # %bb.0: 3244; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3245; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3246; X86-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 3247; X86-NEXT: retl # encoding: [0xc3] 3248; 3249; X64-LABEL: test_mask_or_epi32_rrkz_128: 3250; X64: # %bb.0: 3251; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3252; X64-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 3253; X64-NEXT: retq # encoding: [0xc3] 3254 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3255 ret <4 x i32> %res 3256} 3257 3258define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3259; X86-LABEL: test_mask_or_epi32_rm_128: 3260; X86: # %bb.0: 3261; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3262; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00] 3263; X86-NEXT: retl # encoding: [0xc3] 3264; 3265; X64-LABEL: test_mask_or_epi32_rm_128: 3266; X64: # %bb.0: 3267; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07] 3268; X64-NEXT: retq # encoding: [0xc3] 3269 %b = load <4 x i32>, <4 x i32>* %ptr_b 3270 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3271 ret <4 x i32> %res 3272} 3273 3274define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3275; X86-LABEL: test_mask_or_epi32_rmk_128: 3276; X86: # %bb.0: 3277; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3278; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3279; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3280; X86-NEXT: vpord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x08] 3281; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3282; X86-NEXT: retl # encoding: [0xc3] 3283; 3284; X64-LABEL: test_mask_or_epi32_rmk_128: 3285; X64: # %bb.0: 3286; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3287; X64-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] 3288; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3289; X64-NEXT: retq # encoding: [0xc3] 3290 %b = load <4 x i32>, <4 x i32>* %ptr_b 3291 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3292 ret <4 x i32> %res 3293} 3294 3295define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3296; X86-LABEL: test_mask_or_epi32_rmkz_128: 3297; X86: # %bb.0: 3298; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3299; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3300; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3301; X86-NEXT: vpord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x00] 3302; X86-NEXT: retl # encoding: [0xc3] 3303; 3304; X64-LABEL: test_mask_or_epi32_rmkz_128: 3305; X64: # %bb.0: 3306; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3307; X64-NEXT: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07] 3308; X64-NEXT: retq # encoding: [0xc3] 3309 %b = load <4 x i32>, <4 x i32>* %ptr_b 3310 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3311 ret <4 x i32> %res 3312} 3313 3314define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3315; X86-LABEL: test_mask_or_epi32_rmb_128: 3316; X86: # %bb.0: 3317; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3318; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x00] 3319; X86-NEXT: retl # encoding: [0xc3] 3320; 3321; X64-LABEL: test_mask_or_epi32_rmb_128: 3322; X64: # %bb.0: 3323; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07] 3324; X64-NEXT: retq # encoding: [0xc3] 3325 %q = load i32, i32* %ptr_b 3326 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3327 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3328 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3329 ret <4 x i32> %res 3330} 3331 3332define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3333; X86-LABEL: test_mask_or_epi32_rmbk_128: 3334; X86: # %bb.0: 3335; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3336; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3337; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3338; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x08] 3339; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3340; X86-NEXT: retl # encoding: [0xc3] 3341; 3342; X64-LABEL: test_mask_or_epi32_rmbk_128: 3343; X64: # %bb.0: 3344; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3345; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] 3346; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3347; X64-NEXT: retq # encoding: [0xc3] 3348 %q = load i32, i32* %ptr_b 3349 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3350 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3351 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3352 ret <4 x i32> %res 3353} 3354 3355define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3356; X86-LABEL: test_mask_or_epi32_rmbkz_128: 3357; X86: # %bb.0: 3358; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3359; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3360; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3361; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x00] 3362; X86-NEXT: retl # encoding: [0xc3] 3363; 3364; X64-LABEL: test_mask_or_epi32_rmbkz_128: 3365; X64: # %bb.0: 3366; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3367; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07] 3368; X64-NEXT: retq # encoding: [0xc3] 3369 %q = load i32, i32* %ptr_b 3370 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3371 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3372 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3373 ret <4 x i32> %res 3374} 3375 3376declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3377 3378define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3379; CHECK-LABEL: test_mask_or_epi32_rr_256: 3380; CHECK: # %bb.0: 3381; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1] 3382; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3383 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3384 ret <8 x i32> %res 3385} 3386 3387define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 3388; X86-LABEL: test_mask_or_epi32_rrk_256: 3389; X86: # %bb.0: 3390; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3391; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3392; X86-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 3393; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3394; X86-NEXT: retl # encoding: [0xc3] 3395; 3396; X64-LABEL: test_mask_or_epi32_rrk_256: 3397; X64: # %bb.0: 3398; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3399; X64-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 3400; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3401; X64-NEXT: retq # encoding: [0xc3] 3402 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3403 ret <8 x i32> %res 3404} 3405 3406define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 3407; X86-LABEL: test_mask_or_epi32_rrkz_256: 3408; X86: # %bb.0: 3409; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3410; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3411; X86-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 3412; X86-NEXT: retl # encoding: [0xc3] 3413; 3414; X64-LABEL: test_mask_or_epi32_rrkz_256: 3415; X64: # %bb.0: 3416; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3417; X64-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 3418; X64-NEXT: retq # encoding: [0xc3] 3419 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3420 ret <8 x i32> %res 3421} 3422 3423define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3424; X86-LABEL: test_mask_or_epi32_rm_256: 3425; X86: # %bb.0: 3426; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3427; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00] 3428; X86-NEXT: retl # encoding: [0xc3] 3429; 3430; X64-LABEL: test_mask_or_epi32_rm_256: 3431; X64: # %bb.0: 3432; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07] 3433; X64-NEXT: retq # encoding: [0xc3] 3434 %b = load <8 x i32>, <8 x i32>* %ptr_b 3435 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3436 ret <8 x i32> %res 3437} 3438 3439define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3440; X86-LABEL: test_mask_or_epi32_rmk_256: 3441; X86: # %bb.0: 3442; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3443; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3444; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3445; X86-NEXT: vpord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x08] 3446; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3447; X86-NEXT: retl # encoding: [0xc3] 3448; 3449; X64-LABEL: test_mask_or_epi32_rmk_256: 3450; X64: # %bb.0: 3451; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3452; X64-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] 3453; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3454; X64-NEXT: retq # encoding: [0xc3] 3455 %b = load <8 x i32>, <8 x i32>* %ptr_b 3456 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3457 ret <8 x i32> %res 3458} 3459 3460define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 3461; X86-LABEL: test_mask_or_epi32_rmkz_256: 3462; X86: # %bb.0: 3463; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3464; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3465; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3466; X86-NEXT: vpord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x00] 3467; X86-NEXT: retl # encoding: [0xc3] 3468; 3469; X64-LABEL: test_mask_or_epi32_rmkz_256: 3470; X64: # %bb.0: 3471; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3472; X64-NEXT: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07] 3473; X64-NEXT: retq # encoding: [0xc3] 3474 %b = load <8 x i32>, <8 x i32>* %ptr_b 3475 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3476 ret <8 x i32> %res 3477} 3478 3479define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3480; X86-LABEL: test_mask_or_epi32_rmb_256: 3481; X86: # %bb.0: 3482; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3483; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x00] 3484; X86-NEXT: retl # encoding: [0xc3] 3485; 3486; X64-LABEL: test_mask_or_epi32_rmb_256: 3487; X64: # %bb.0: 3488; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07] 3489; X64-NEXT: retq # encoding: [0xc3] 3490 %q = load i32, i32* %ptr_b 3491 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3492 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3493 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3494 ret <8 x i32> %res 3495} 3496 3497define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3498; X86-LABEL: test_mask_or_epi32_rmbk_256: 3499; X86: # %bb.0: 3500; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3501; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3502; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3503; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x08] 3504; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3505; X86-NEXT: retl # encoding: [0xc3] 3506; 3507; X64-LABEL: test_mask_or_epi32_rmbk_256: 3508; X64: # %bb.0: 3509; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3510; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] 3511; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3512; X64-NEXT: retq # encoding: [0xc3] 3513 %q = load i32, i32* %ptr_b 3514 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3515 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3516 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3517 ret <8 x i32> %res 3518} 3519 3520define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 3521; X86-LABEL: test_mask_or_epi32_rmbkz_256: 3522; X86: # %bb.0: 3523; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3524; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3525; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3526; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x00] 3527; X86-NEXT: retl # encoding: [0xc3] 3528; 3529; X64-LABEL: test_mask_or_epi32_rmbkz_256: 3530; X64: # %bb.0: 3531; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3532; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07] 3533; X64-NEXT: retq # encoding: [0xc3] 3534 %q = load i32, i32* %ptr_b 3535 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3536 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3537 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3538 ret <8 x i32> %res 3539} 3540 3541declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3542 3543define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3544; CHECK-LABEL: test_mask_xor_epi32_rr_128: 3545; CHECK: # %bb.0: 3546; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3547; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3548 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3549 ret <4 x i32> %res 3550} 3551 3552define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 3553; X86-LABEL: test_mask_xor_epi32_rrk_128: 3554; X86: # %bb.0: 3555; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3556; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3557; X86-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 3558; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3559; X86-NEXT: retl # encoding: [0xc3] 3560; 3561; X64-LABEL: test_mask_xor_epi32_rrk_128: 3562; X64: # %bb.0: 3563; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3564; X64-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 3565; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3566; X64-NEXT: retq # encoding: [0xc3] 3567 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3568 ret <4 x i32> %res 3569} 3570 3571define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3572; X86-LABEL: test_mask_xor_epi32_rrkz_128: 3573; X86: # %bb.0: 3574; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3575; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3576; X86-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 3577; X86-NEXT: retl # encoding: [0xc3] 3578; 3579; X64-LABEL: test_mask_xor_epi32_rrkz_128: 3580; X64: # %bb.0: 3581; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3582; X64-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 3583; X64-NEXT: retq # encoding: [0xc3] 3584 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3585 ret <4 x i32> %res 3586} 3587 3588define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3589; X86-LABEL: test_mask_xor_epi32_rm_128: 3590; X86: # %bb.0: 3591; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3592; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00] 3593; X86-NEXT: retl # encoding: [0xc3] 3594; 3595; X64-LABEL: test_mask_xor_epi32_rm_128: 3596; X64: # %bb.0: 3597; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07] 3598; X64-NEXT: retq # encoding: [0xc3] 3599 %b = load <4 x i32>, <4 x i32>* %ptr_b 3600 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3601 ret <4 x i32> %res 3602} 3603 3604define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3605; X86-LABEL: test_mask_xor_epi32_rmk_128: 3606; X86: # %bb.0: 3607; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3608; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3609; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3610; X86-NEXT: vpxord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x08] 3611; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3612; X86-NEXT: retl # encoding: [0xc3] 3613; 3614; X64-LABEL: test_mask_xor_epi32_rmk_128: 3615; X64: # %bb.0: 3616; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3617; X64-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] 3618; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3619; X64-NEXT: retq # encoding: [0xc3] 3620 %b = load <4 x i32>, <4 x i32>* %ptr_b 3621 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3622 ret <4 x i32> %res 3623} 3624 3625define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3626; X86-LABEL: test_mask_xor_epi32_rmkz_128: 3627; X86: # %bb.0: 3628; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3629; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3630; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3631; X86-NEXT: vpxord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x00] 3632; X86-NEXT: retl # encoding: [0xc3] 3633; 3634; X64-LABEL: test_mask_xor_epi32_rmkz_128: 3635; X64: # %bb.0: 3636; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3637; X64-NEXT: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07] 3638; X64-NEXT: retq # encoding: [0xc3] 3639 %b = load <4 x i32>, <4 x i32>* %ptr_b 3640 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3641 ret <4 x i32> %res 3642} 3643 3644define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3645; X86-LABEL: test_mask_xor_epi32_rmb_128: 3646; X86: # %bb.0: 3647; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3648; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x00] 3649; X86-NEXT: retl # encoding: [0xc3] 3650; 3651; X64-LABEL: test_mask_xor_epi32_rmb_128: 3652; X64: # %bb.0: 3653; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07] 3654; X64-NEXT: retq # encoding: [0xc3] 3655 %q = load i32, i32* %ptr_b 3656 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3657 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3658 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3659 ret <4 x i32> %res 3660} 3661 3662define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3663; X86-LABEL: test_mask_xor_epi32_rmbk_128: 3664; X86: # %bb.0: 3665; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3666; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3667; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3668; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x08] 3669; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3670; X86-NEXT: retl # encoding: [0xc3] 3671; 3672; X64-LABEL: test_mask_xor_epi32_rmbk_128: 3673; X64: # %bb.0: 3674; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3675; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] 3676; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3677; X64-NEXT: retq # encoding: [0xc3] 3678 %q = load i32, i32* %ptr_b 3679 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3680 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3681 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3682 ret <4 x i32> %res 3683} 3684 3685define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3686; X86-LABEL: test_mask_xor_epi32_rmbkz_128: 3687; X86: # %bb.0: 3688; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3689; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3690; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3691; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x00] 3692; X86-NEXT: retl # encoding: [0xc3] 3693; 3694; X64-LABEL: test_mask_xor_epi32_rmbkz_128: 3695; X64: # %bb.0: 3696; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3697; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07] 3698; X64-NEXT: retq # encoding: [0xc3] 3699 %q = load i32, i32* %ptr_b 3700 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3701 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3702 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3703 ret <4 x i32> %res 3704} 3705 3706declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3707 3708define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3709; CHECK-LABEL: test_mask_xor_epi32_rr_256: 3710; CHECK: # %bb.0: 3711; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1] 3712; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3713 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3714 ret <8 x i32> %res 3715} 3716 3717define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 3718; X86-LABEL: test_mask_xor_epi32_rrk_256: 3719; X86: # %bb.0: 3720; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3721; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3722; X86-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 3723; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3724; X86-NEXT: retl # encoding: [0xc3] 3725; 3726; X64-LABEL: test_mask_xor_epi32_rrk_256: 3727; X64: # %bb.0: 3728; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3729; X64-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 3730; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3731; X64-NEXT: retq # encoding: [0xc3] 3732 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3733 ret <8 x i32> %res 3734} 3735 3736define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 3737; X86-LABEL: test_mask_xor_epi32_rrkz_256: 3738; X86: # %bb.0: 3739; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3740; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3741; X86-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 3742; X86-NEXT: retl # encoding: [0xc3] 3743; 3744; X64-LABEL: test_mask_xor_epi32_rrkz_256: 3745; X64: # %bb.0: 3746; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3747; X64-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 3748; X64-NEXT: retq # encoding: [0xc3] 3749 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3750 ret <8 x i32> %res 3751} 3752 3753define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3754; X86-LABEL: test_mask_xor_epi32_rm_256: 3755; X86: # %bb.0: 3756; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3757; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00] 3758; X86-NEXT: retl # encoding: [0xc3] 3759; 3760; X64-LABEL: test_mask_xor_epi32_rm_256: 3761; X64: # %bb.0: 3762; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07] 3763; X64-NEXT: retq # encoding: [0xc3] 3764 %b = load <8 x i32>, <8 x i32>* %ptr_b 3765 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3766 ret <8 x i32> %res 3767} 3768 3769define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3770; X86-LABEL: test_mask_xor_epi32_rmk_256: 3771; X86: # %bb.0: 3772; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3773; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3774; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3775; X86-NEXT: vpxord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x08] 3776; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3777; X86-NEXT: retl # encoding: [0xc3] 3778; 3779; X64-LABEL: test_mask_xor_epi32_rmk_256: 3780; X64: # %bb.0: 3781; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3782; X64-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] 3783; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3784; X64-NEXT: retq # encoding: [0xc3] 3785 %b = load <8 x i32>, <8 x i32>* %ptr_b 3786 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3787 ret <8 x i32> %res 3788} 3789 3790define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 3791; X86-LABEL: test_mask_xor_epi32_rmkz_256: 3792; X86: # %bb.0: 3793; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3794; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3795; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3796; X86-NEXT: vpxord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x00] 3797; X86-NEXT: retl # encoding: [0xc3] 3798; 3799; X64-LABEL: test_mask_xor_epi32_rmkz_256: 3800; X64: # %bb.0: 3801; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3802; X64-NEXT: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07] 3803; X64-NEXT: retq # encoding: [0xc3] 3804 %b = load <8 x i32>, <8 x i32>* %ptr_b 3805 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3806 ret <8 x i32> %res 3807} 3808 3809define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3810; X86-LABEL: test_mask_xor_epi32_rmb_256: 3811; X86: # %bb.0: 3812; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3813; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x00] 3814; X86-NEXT: retl # encoding: [0xc3] 3815; 3816; X64-LABEL: test_mask_xor_epi32_rmb_256: 3817; X64: # %bb.0: 3818; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07] 3819; X64-NEXT: retq # encoding: [0xc3] 3820 %q = load i32, i32* %ptr_b 3821 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3822 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3823 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3824 ret <8 x i32> %res 3825} 3826 3827define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3828; X86-LABEL: test_mask_xor_epi32_rmbk_256: 3829; X86: # %bb.0: 3830; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3831; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3832; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3833; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x08] 3834; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3835; X86-NEXT: retl # encoding: [0xc3] 3836; 3837; X64-LABEL: test_mask_xor_epi32_rmbk_256: 3838; X64: # %bb.0: 3839; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3840; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] 3841; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3842; X64-NEXT: retq # encoding: [0xc3] 3843 %q = load i32, i32* %ptr_b 3844 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3845 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3846 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3847 ret <8 x i32> %res 3848} 3849 3850define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 3851; X86-LABEL: test_mask_xor_epi32_rmbkz_256: 3852; X86: # %bb.0: 3853; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3854; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3855; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3856; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x00] 3857; X86-NEXT: retl # encoding: [0xc3] 3858; 3859; X64-LABEL: test_mask_xor_epi32_rmbkz_256: 3860; X64: # %bb.0: 3861; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3862; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07] 3863; X64-NEXT: retq # encoding: [0xc3] 3864 %q = load i32, i32* %ptr_b 3865 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3866 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3867 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3868 ret <8 x i32> %res 3869} 3870 3871declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3872 3873define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3874; CHECK-LABEL: test_mask_andnot_epi32_rr_128: 3875; CHECK: # %bb.0: 3876; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 3877; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3878 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3879 ret <4 x i32> %res 3880} 3881 3882define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 3883; X86-LABEL: test_mask_andnot_epi32_rrk_128: 3884; X86: # %bb.0: 3885; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3886; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3887; X86-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 3888; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3889; X86-NEXT: retl # encoding: [0xc3] 3890; 3891; X64-LABEL: test_mask_andnot_epi32_rrk_128: 3892; X64: # %bb.0: 3893; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3894; X64-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 3895; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3896; X64-NEXT: retq # encoding: [0xc3] 3897 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3898 ret <4 x i32> %res 3899} 3900 3901define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3902; X86-LABEL: test_mask_andnot_epi32_rrkz_128: 3903; X86: # %bb.0: 3904; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3905; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3906; X86-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 3907; X86-NEXT: retl # encoding: [0xc3] 3908; 3909; X64-LABEL: test_mask_andnot_epi32_rrkz_128: 3910; X64: # %bb.0: 3911; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3912; X64-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 3913; X64-NEXT: retq # encoding: [0xc3] 3914 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3915 ret <4 x i32> %res 3916} 3917 3918define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3919; X86-LABEL: test_mask_andnot_epi32_rm_128: 3920; X86: # %bb.0: 3921; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3922; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00] 3923; X86-NEXT: retl # encoding: [0xc3] 3924; 3925; X64-LABEL: test_mask_andnot_epi32_rm_128: 3926; X64: # %bb.0: 3927; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07] 3928; X64-NEXT: retq # encoding: [0xc3] 3929 %b = load <4 x i32>, <4 x i32>* %ptr_b 3930 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3931 ret <4 x i32> %res 3932} 3933 3934define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3935; X86-LABEL: test_mask_andnot_epi32_rmk_128: 3936; X86: # %bb.0: 3937; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3938; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3939; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3940; X86-NEXT: vpandnd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x08] 3941; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3942; X86-NEXT: retl # encoding: [0xc3] 3943; 3944; X64-LABEL: test_mask_andnot_epi32_rmk_128: 3945; X64: # %bb.0: 3946; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3947; X64-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] 3948; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3949; X64-NEXT: retq # encoding: [0xc3] 3950 %b = load <4 x i32>, <4 x i32>* %ptr_b 3951 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3952 ret <4 x i32> %res 3953} 3954 3955define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3956; X86-LABEL: test_mask_andnot_epi32_rmkz_128: 3957; X86: # %bb.0: 3958; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3959; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3960; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3961; X86-NEXT: vpandnd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x00] 3962; X86-NEXT: retl # encoding: [0xc3] 3963; 3964; X64-LABEL: test_mask_andnot_epi32_rmkz_128: 3965; X64: # %bb.0: 3966; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3967; X64-NEXT: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07] 3968; X64-NEXT: retq # encoding: [0xc3] 3969 %b = load <4 x i32>, <4 x i32>* %ptr_b 3970 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3971 ret <4 x i32> %res 3972} 3973 3974define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3975; X86-LABEL: test_mask_andnot_epi32_rmb_128: 3976; X86: # %bb.0: 3977; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3978; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x00] 3979; X86-NEXT: retl # encoding: [0xc3] 3980; 3981; X64-LABEL: test_mask_andnot_epi32_rmb_128: 3982; X64: # %bb.0: 3983; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07] 3984; X64-NEXT: retq # encoding: [0xc3] 3985 %q = load i32, i32* %ptr_b 3986 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3987 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3988 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3989 ret <4 x i32> %res 3990} 3991 3992define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3993; X86-LABEL: test_mask_andnot_epi32_rmbk_128: 3994; X86: # %bb.0: 3995; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3996; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3997; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3998; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x08] 3999; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4000; X86-NEXT: retl # encoding: [0xc3] 4001; 4002; X64-LABEL: test_mask_andnot_epi32_rmbk_128: 4003; X64: # %bb.0: 4004; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4005; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] 4006; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4007; X64-NEXT: retq # encoding: [0xc3] 4008 %q = load i32, i32* %ptr_b 4009 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4010 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4011 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4012 ret <4 x i32> %res 4013} 4014 4015define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 4016; X86-LABEL: test_mask_andnot_epi32_rmbkz_128: 4017; X86: # %bb.0: 4018; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4019; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4020; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4021; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x00] 4022; X86-NEXT: retl # encoding: [0xc3] 4023; 4024; X64-LABEL: test_mask_andnot_epi32_rmbkz_128: 4025; X64: # %bb.0: 4026; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4027; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07] 4028; X64-NEXT: retq # encoding: [0xc3] 4029 %q = load i32, i32* %ptr_b 4030 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4031 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4032 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4033 ret <4 x i32> %res 4034} 4035 4036declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 4037 4038define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 4039; CHECK-LABEL: test_mask_andnot_epi32_rr_256: 4040; CHECK: # %bb.0: 4041; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] 4042; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4043 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4044 ret <8 x i32> %res 4045} 4046 4047define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 4048; X86-LABEL: test_mask_andnot_epi32_rrk_256: 4049; X86: # %bb.0: 4050; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4051; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4052; X86-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 4053; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4054; X86-NEXT: retl # encoding: [0xc3] 4055; 4056; X64-LABEL: test_mask_andnot_epi32_rrk_256: 4057; X64: # %bb.0: 4058; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4059; X64-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 4060; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4061; X64-NEXT: retq # encoding: [0xc3] 4062 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4063 ret <8 x i32> %res 4064} 4065 4066define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 4067; X86-LABEL: test_mask_andnot_epi32_rrkz_256: 4068; X86: # %bb.0: 4069; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4070; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4071; X86-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 4072; X86-NEXT: retl # encoding: [0xc3] 4073; 4074; X64-LABEL: test_mask_andnot_epi32_rrkz_256: 4075; X64: # %bb.0: 4076; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4077; X64-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 4078; X64-NEXT: retq # encoding: [0xc3] 4079 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4080 ret <8 x i32> %res 4081} 4082 4083define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 4084; X86-LABEL: test_mask_andnot_epi32_rm_256: 4085; X86: # %bb.0: 4086; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4087; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00] 4088; X86-NEXT: retl # encoding: [0xc3] 4089; 4090; X64-LABEL: test_mask_andnot_epi32_rm_256: 4091; X64: # %bb.0: 4092; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07] 4093; X64-NEXT: retq # encoding: [0xc3] 4094 %b = load <8 x i32>, <8 x i32>* %ptr_b 4095 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4096 ret <8 x i32> %res 4097} 4098 4099define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4100; X86-LABEL: test_mask_andnot_epi32_rmk_256: 4101; X86: # %bb.0: 4102; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4103; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4104; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4105; X86-NEXT: vpandnd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x08] 4106; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4107; X86-NEXT: retl # encoding: [0xc3] 4108; 4109; X64-LABEL: test_mask_andnot_epi32_rmk_256: 4110; X64: # %bb.0: 4111; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4112; X64-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] 4113; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4114; X64-NEXT: retq # encoding: [0xc3] 4115 %b = load <8 x i32>, <8 x i32>* %ptr_b 4116 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4117 ret <8 x i32> %res 4118} 4119 4120define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 4121; X86-LABEL: test_mask_andnot_epi32_rmkz_256: 4122; X86: # %bb.0: 4123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4124; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4125; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4126; X86-NEXT: vpandnd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x00] 4127; X86-NEXT: retl # encoding: [0xc3] 4128; 4129; X64-LABEL: test_mask_andnot_epi32_rmkz_256: 4130; X64: # %bb.0: 4131; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4132; X64-NEXT: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07] 4133; X64-NEXT: retq # encoding: [0xc3] 4134 %b = load <8 x i32>, <8 x i32>* %ptr_b 4135 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4136 ret <8 x i32> %res 4137} 4138 4139define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4140; X86-LABEL: test_mask_andnot_epi32_rmb_256: 4141; X86: # %bb.0: 4142; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4143; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x00] 4144; X86-NEXT: retl # encoding: [0xc3] 4145; 4146; X64-LABEL: test_mask_andnot_epi32_rmb_256: 4147; X64: # %bb.0: 4148; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07] 4149; X64-NEXT: retq # encoding: [0xc3] 4150 %q = load i32, i32* %ptr_b 4151 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4152 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4153 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4154 ret <8 x i32> %res 4155} 4156 4157define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4158; X86-LABEL: test_mask_andnot_epi32_rmbk_256: 4159; X86: # %bb.0: 4160; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4162; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4163; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x08] 4164; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4165; X86-NEXT: retl # encoding: [0xc3] 4166; 4167; X64-LABEL: test_mask_andnot_epi32_rmbk_256: 4168; X64: # %bb.0: 4169; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4170; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] 4171; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4172; X64-NEXT: retq # encoding: [0xc3] 4173 %q = load i32, i32* %ptr_b 4174 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4175 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4176 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4177 ret <8 x i32> %res 4178} 4179 4180define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 4181; X86-LABEL: test_mask_andnot_epi32_rmbkz_256: 4182; X86: # %bb.0: 4183; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4184; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4185; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4186; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x00] 4187; X86-NEXT: retl # encoding: [0xc3] 4188; 4189; X64-LABEL: test_mask_andnot_epi32_rmbkz_256: 4190; X64: # %bb.0: 4191; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4192; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07] 4193; X64-NEXT: retq # encoding: [0xc3] 4194 %q = load i32, i32* %ptr_b 4195 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4196 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4197 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4198 ret <8 x i32> %res 4199} 4200 4201declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 4202 4203define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 4204; CHECK-LABEL: test_mask_andnot_epi64_rr_128: 4205; CHECK: # %bb.0: 4206; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 4207; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4208 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 4209 ret <2 x i64> %res 4210} 4211 4212define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 4213; X86-LABEL: test_mask_andnot_epi64_rrk_128: 4214; X86: # %bb.0: 4215; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4216; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4217; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 4218; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4219; X86-NEXT: retl # encoding: [0xc3] 4220; 4221; X64-LABEL: test_mask_andnot_epi64_rrk_128: 4222; X64: # %bb.0: 4223; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4224; X64-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 4225; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4226; X64-NEXT: retq # encoding: [0xc3] 4227 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 4228 ret <2 x i64> %res 4229} 4230 4231define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 4232; X86-LABEL: test_mask_andnot_epi64_rrkz_128: 4233; X86: # %bb.0: 4234; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4235; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4236; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 4237; X86-NEXT: retl # encoding: [0xc3] 4238; 4239; X64-LABEL: test_mask_andnot_epi64_rrkz_128: 4240; X64: # %bb.0: 4241; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4242; X64-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 4243; X64-NEXT: retq # encoding: [0xc3] 4244 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 4245 ret <2 x i64> %res 4246} 4247 4248define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 4249; X86-LABEL: test_mask_andnot_epi64_rm_128: 4250; X86: # %bb.0: 4251; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4252; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00] 4253; X86-NEXT: retl # encoding: [0xc3] 4254; 4255; X64-LABEL: test_mask_andnot_epi64_rm_128: 4256; X64: # %bb.0: 4257; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07] 4258; X64-NEXT: retq # encoding: [0xc3] 4259 %b = load <2 x i64>, <2 x i64>* %ptr_b 4260 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 4261 ret <2 x i64> %res 4262} 4263 4264define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 4265; X86-LABEL: test_mask_andnot_epi64_rmk_128: 4266; X86: # %bb.0: 4267; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4268; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4269; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4270; X86-NEXT: vpandnq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x08] 4271; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4272; X86-NEXT: retl # encoding: [0xc3] 4273; 4274; X64-LABEL: test_mask_andnot_epi64_rmk_128: 4275; X64: # %bb.0: 4276; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4277; X64-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] 4278; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4279; X64-NEXT: retq # encoding: [0xc3] 4280 %b = load <2 x i64>, <2 x i64>* %ptr_b 4281 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 4282 ret <2 x i64> %res 4283} 4284 4285define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 4286; X86-LABEL: test_mask_andnot_epi64_rmkz_128: 4287; X86: # %bb.0: 4288; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4289; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4290; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4291; X86-NEXT: vpandnq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x00] 4292; X86-NEXT: retl # encoding: [0xc3] 4293; 4294; X64-LABEL: test_mask_andnot_epi64_rmkz_128: 4295; X64: # %bb.0: 4296; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4297; X64-NEXT: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07] 4298; X64-NEXT: retq # encoding: [0xc3] 4299 %b = load <2 x i64>, <2 x i64>* %ptr_b 4300 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 4301 ret <2 x i64> %res 4302} 4303 4304define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 4305; X86-LABEL: test_mask_andnot_epi64_rmb_128: 4306; X86: # %bb.0: 4307; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4308; X86-NEXT: vpandnq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x00] 4309; X86-NEXT: retl # encoding: [0xc3] 4310; 4311; X64-LABEL: test_mask_andnot_epi64_rmb_128: 4312; X64: # %bb.0: 4313; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07] 4314; X64-NEXT: retq # encoding: [0xc3] 4315 %q = load i64, i64* %ptr_b 4316 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 4317 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 4318 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 4319 ret <2 x i64> %res 4320} 4321 4322define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 4323; X86-LABEL: test_mask_andnot_epi64_rmbk_128: 4324; X86: # %bb.0: 4325; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4326; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4327; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4328; X86-NEXT: vpandnq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x08] 4329; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4330; X86-NEXT: retl # encoding: [0xc3] 4331; 4332; X64-LABEL: test_mask_andnot_epi64_rmbk_128: 4333; X64: # %bb.0: 4334; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4335; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] 4336; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4337; X64-NEXT: retq # encoding: [0xc3] 4338 %q = load i64, i64* %ptr_b 4339 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 4340 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 4341 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 4342 ret <2 x i64> %res 4343} 4344 4345define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 4346; X86-LABEL: test_mask_andnot_epi64_rmbkz_128: 4347; X86: # %bb.0: 4348; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4349; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4350; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4351; X86-NEXT: vpandnq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x00] 4352; X86-NEXT: retl # encoding: [0xc3] 4353; 4354; X64-LABEL: test_mask_andnot_epi64_rmbkz_128: 4355; X64: # %bb.0: 4356; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4357; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07] 4358; X64-NEXT: retq # encoding: [0xc3] 4359 %q = load i64, i64* %ptr_b 4360 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 4361 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 4362 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 4363 ret <2 x i64> %res 4364} 4365 4366declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 4367 4368define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 4369; CHECK-LABEL: test_mask_andnot_epi64_rr_256: 4370; CHECK: # %bb.0: 4371; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] 4372; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4373 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 4374 ret <4 x i64> %res 4375} 4376 4377define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 4378; X86-LABEL: test_mask_andnot_epi64_rrk_256: 4379; X86: # %bb.0: 4380; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4381; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4382; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 4383; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4384; X86-NEXT: retl # encoding: [0xc3] 4385; 4386; X64-LABEL: test_mask_andnot_epi64_rrk_256: 4387; X64: # %bb.0: 4388; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4389; X64-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 4390; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4391; X64-NEXT: retq # encoding: [0xc3] 4392 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 4393 ret <4 x i64> %res 4394} 4395 4396define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 4397; X86-LABEL: test_mask_andnot_epi64_rrkz_256: 4398; X86: # %bb.0: 4399; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4400; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4401; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 4402; X86-NEXT: retl # encoding: [0xc3] 4403; 4404; X64-LABEL: test_mask_andnot_epi64_rrkz_256: 4405; X64: # %bb.0: 4406; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4407; X64-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 4408; X64-NEXT: retq # encoding: [0xc3] 4409 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 4410 ret <4 x i64> %res 4411} 4412 4413define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 4414; X86-LABEL: test_mask_andnot_epi64_rm_256: 4415; X86: # %bb.0: 4416; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4417; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00] 4418; X86-NEXT: retl # encoding: [0xc3] 4419; 4420; X64-LABEL: test_mask_andnot_epi64_rm_256: 4421; X64: # %bb.0: 4422; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07] 4423; X64-NEXT: retq # encoding: [0xc3] 4424 %b = load <4 x i64>, <4 x i64>* %ptr_b 4425 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 4426 ret <4 x i64> %res 4427} 4428 4429define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 4430; X86-LABEL: test_mask_andnot_epi64_rmk_256: 4431; X86: # %bb.0: 4432; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4433; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4434; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4435; X86-NEXT: vpandnq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x08] 4436; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4437; X86-NEXT: retl # encoding: [0xc3] 4438; 4439; X64-LABEL: test_mask_andnot_epi64_rmk_256: 4440; X64: # %bb.0: 4441; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4442; X64-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] 4443; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4444; X64-NEXT: retq # encoding: [0xc3] 4445 %b = load <4 x i64>, <4 x i64>* %ptr_b 4446 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 4447 ret <4 x i64> %res 4448} 4449 4450define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 4451; X86-LABEL: test_mask_andnot_epi64_rmkz_256: 4452; X86: # %bb.0: 4453; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4454; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4455; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4456; X86-NEXT: vpandnq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x00] 4457; X86-NEXT: retl # encoding: [0xc3] 4458; 4459; X64-LABEL: test_mask_andnot_epi64_rmkz_256: 4460; X64: # %bb.0: 4461; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4462; X64-NEXT: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07] 4463; X64-NEXT: retq # encoding: [0xc3] 4464 %b = load <4 x i64>, <4 x i64>* %ptr_b 4465 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 4466 ret <4 x i64> %res 4467} 4468 4469define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 4470; X86-LABEL: test_mask_andnot_epi64_rmb_256: 4471; X86: # %bb.0: 4472; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4473; X86-NEXT: vpandnq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x00] 4474; X86-NEXT: retl # encoding: [0xc3] 4475; 4476; X64-LABEL: test_mask_andnot_epi64_rmb_256: 4477; X64: # %bb.0: 4478; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07] 4479; X64-NEXT: retq # encoding: [0xc3] 4480 %q = load i64, i64* %ptr_b 4481 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 4482 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 4483 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 4484 ret <4 x i64> %res 4485} 4486 4487define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 4488; X86-LABEL: test_mask_andnot_epi64_rmbk_256: 4489; X86: # %bb.0: 4490; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4491; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4492; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4493; X86-NEXT: vpandnq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x08] 4494; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4495; X86-NEXT: retl # encoding: [0xc3] 4496; 4497; X64-LABEL: test_mask_andnot_epi64_rmbk_256: 4498; X64: # %bb.0: 4499; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4500; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] 4501; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4502; X64-NEXT: retq # encoding: [0xc3] 4503 %q = load i64, i64* %ptr_b 4504 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 4505 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 4506 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 4507 ret <4 x i64> %res 4508} 4509 4510define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 4511; X86-LABEL: test_mask_andnot_epi64_rmbkz_256: 4512; X86: # %bb.0: 4513; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4514; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4515; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4516; X86-NEXT: vpandnq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x00] 4517; X86-NEXT: retl # encoding: [0xc3] 4518; 4519; X64-LABEL: test_mask_andnot_epi64_rmbkz_256: 4520; X64: # %bb.0: 4521; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4522; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07] 4523; X64-NEXT: retq # encoding: [0xc3] 4524 %q = load i64, i64* %ptr_b 4525 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 4526 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 4527 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 4528 ret <4 x i64> %res 4529} 4530 4531declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 4532 4533define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 4534; CHECK-LABEL: test_mask_add_epi32_rr_128: 4535; CHECK: # %bb.0: 4536; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 4537; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4538 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4539 ret <4 x i32> %res 4540} 4541 4542define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 4543; X86-LABEL: test_mask_add_epi32_rrk_128: 4544; X86: # %bb.0: 4545; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4546; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4547; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] 4548; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4549; X86-NEXT: retl # encoding: [0xc3] 4550; 4551; X64-LABEL: test_mask_add_epi32_rrk_128: 4552; X64: # %bb.0: 4553; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4554; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] 4555; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4556; X64-NEXT: retq # encoding: [0xc3] 4557 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4558 ret <4 x i32> %res 4559} 4560 4561define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 4562; X86-LABEL: test_mask_add_epi32_rrkz_128: 4563; X86: # %bb.0: 4564; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4565; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4566; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 4567; X86-NEXT: retl # encoding: [0xc3] 4568; 4569; X64-LABEL: test_mask_add_epi32_rrkz_128: 4570; X64: # %bb.0: 4571; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4572; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 4573; X64-NEXT: retq # encoding: [0xc3] 4574 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4575 ret <4 x i32> %res 4576} 4577 4578define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 4579; X86-LABEL: test_mask_add_epi32_rm_128: 4580; X86: # %bb.0: 4581; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4582; X86-NEXT: vpaddd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x00] 4583; X86-NEXT: retl # encoding: [0xc3] 4584; 4585; X64-LABEL: test_mask_add_epi32_rm_128: 4586; X64: # %bb.0: 4587; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07] 4588; X64-NEXT: retq # encoding: [0xc3] 4589 %b = load <4 x i32>, <4 x i32>* %ptr_b 4590 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4591 ret <4 x i32> %res 4592} 4593 4594define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4595; X86-LABEL: test_mask_add_epi32_rmk_128: 4596; X86: # %bb.0: 4597; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4598; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4599; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4600; X86-NEXT: vpaddd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x08] 4601; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4602; X86-NEXT: retl # encoding: [0xc3] 4603; 4604; X64-LABEL: test_mask_add_epi32_rmk_128: 4605; X64: # %bb.0: 4606; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4607; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] 4608; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4609; X64-NEXT: retq # encoding: [0xc3] 4610 %b = load <4 x i32>, <4 x i32>* %ptr_b 4611 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4612 ret <4 x i32> %res 4613} 4614 4615define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 4616; X86-LABEL: test_mask_add_epi32_rmkz_128: 4617; X86: # %bb.0: 4618; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4619; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4620; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4621; X86-NEXT: vpaddd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x00] 4622; X86-NEXT: retl # encoding: [0xc3] 4623; 4624; X64-LABEL: test_mask_add_epi32_rmkz_128: 4625; X64: # %bb.0: 4626; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4627; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] 4628; X64-NEXT: retq # encoding: [0xc3] 4629 %b = load <4 x i32>, <4 x i32>* %ptr_b 4630 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4631 ret <4 x i32> %res 4632} 4633 4634define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 4635; X86-LABEL: test_mask_add_epi32_rmb_128: 4636; X86: # %bb.0: 4637; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4638; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x00] 4639; X86-NEXT: retl # encoding: [0xc3] 4640; 4641; X64-LABEL: test_mask_add_epi32_rmb_128: 4642; X64: # %bb.0: 4643; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07] 4644; X64-NEXT: retq # encoding: [0xc3] 4645 %q = load i32, i32* %ptr_b 4646 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4647 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4648 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4649 ret <4 x i32> %res 4650} 4651 4652define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4653; X86-LABEL: test_mask_add_epi32_rmbk_128: 4654; X86: # %bb.0: 4655; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4656; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4657; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4658; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x08] 4659; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4660; X86-NEXT: retl # encoding: [0xc3] 4661; 4662; X64-LABEL: test_mask_add_epi32_rmbk_128: 4663; X64: # %bb.0: 4664; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4665; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] 4666; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4667; X64-NEXT: retq # encoding: [0xc3] 4668 %q = load i32, i32* %ptr_b 4669 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4670 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4671 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4672 ret <4 x i32> %res 4673} 4674 4675define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 4676; X86-LABEL: test_mask_add_epi32_rmbkz_128: 4677; X86: # %bb.0: 4678; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4679; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4680; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4681; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x00] 4682; X86-NEXT: retl # encoding: [0xc3] 4683; 4684; X64-LABEL: test_mask_add_epi32_rmbkz_128: 4685; X64: # %bb.0: 4686; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4687; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07] 4688; X64-NEXT: retq # encoding: [0xc3] 4689 %q = load i32, i32* %ptr_b 4690 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4691 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4692 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4693 ret <4 x i32> %res 4694} 4695 4696declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 4697 4698define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 4699; CHECK-LABEL: test_mask_sub_epi32_rr_128: 4700; CHECK: # %bb.0: 4701; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 4702; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4703 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4704 ret <4 x i32> %res 4705} 4706 4707define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 4708; X86-LABEL: test_mask_sub_epi32_rrk_128: 4709; X86: # %bb.0: 4710; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4711; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4712; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] 4713; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4714; X86-NEXT: retl # encoding: [0xc3] 4715; 4716; X64-LABEL: test_mask_sub_epi32_rrk_128: 4717; X64: # %bb.0: 4718; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4719; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] 4720; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4721; X64-NEXT: retq # encoding: [0xc3] 4722 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4723 ret <4 x i32> %res 4724} 4725 4726define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 4727; X86-LABEL: test_mask_sub_epi32_rrkz_128: 4728; X86: # %bb.0: 4729; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4730; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4731; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] 4732; X86-NEXT: retl # encoding: [0xc3] 4733; 4734; X64-LABEL: test_mask_sub_epi32_rrkz_128: 4735; X64: # %bb.0: 4736; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4737; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] 4738; X64-NEXT: retq # encoding: [0xc3] 4739 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4740 ret <4 x i32> %res 4741} 4742 4743define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 4744; X86-LABEL: test_mask_sub_epi32_rm_128: 4745; X86: # %bb.0: 4746; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4747; X86-NEXT: vpsubd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x00] 4748; X86-NEXT: retl # encoding: [0xc3] 4749; 4750; X64-LABEL: test_mask_sub_epi32_rm_128: 4751; X64: # %bb.0: 4752; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07] 4753; X64-NEXT: retq # encoding: [0xc3] 4754 %b = load <4 x i32>, <4 x i32>* %ptr_b 4755 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4756 ret <4 x i32> %res 4757} 4758 4759define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4760; X86-LABEL: test_mask_sub_epi32_rmk_128: 4761; X86: # %bb.0: 4762; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4763; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4764; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4765; X86-NEXT: vpsubd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x08] 4766; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4767; X86-NEXT: retl # encoding: [0xc3] 4768; 4769; X64-LABEL: test_mask_sub_epi32_rmk_128: 4770; X64: # %bb.0: 4771; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4772; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] 4773; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4774; X64-NEXT: retq # encoding: [0xc3] 4775 %b = load <4 x i32>, <4 x i32>* %ptr_b 4776 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4777 ret <4 x i32> %res 4778} 4779 4780define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 4781; X86-LABEL: test_mask_sub_epi32_rmkz_128: 4782; X86: # %bb.0: 4783; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4784; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4785; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4786; X86-NEXT: vpsubd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x00] 4787; X86-NEXT: retl # encoding: [0xc3] 4788; 4789; X64-LABEL: test_mask_sub_epi32_rmkz_128: 4790; X64: # %bb.0: 4791; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4792; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07] 4793; X64-NEXT: retq # encoding: [0xc3] 4794 %b = load <4 x i32>, <4 x i32>* %ptr_b 4795 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4796 ret <4 x i32> %res 4797} 4798 4799define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 4800; X86-LABEL: test_mask_sub_epi32_rmb_128: 4801; X86: # %bb.0: 4802; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4803; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x00] 4804; X86-NEXT: retl # encoding: [0xc3] 4805; 4806; X64-LABEL: test_mask_sub_epi32_rmb_128: 4807; X64: # %bb.0: 4808; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07] 4809; X64-NEXT: retq # encoding: [0xc3] 4810 %q = load i32, i32* %ptr_b 4811 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4812 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4813 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4814 ret <4 x i32> %res 4815} 4816 4817define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4818; X86-LABEL: test_mask_sub_epi32_rmbk_128: 4819; X86: # %bb.0: 4820; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4821; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4822; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4823; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x08] 4824; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4825; X86-NEXT: retl # encoding: [0xc3] 4826; 4827; X64-LABEL: test_mask_sub_epi32_rmbk_128: 4828; X64: # %bb.0: 4829; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4830; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] 4831; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4832; X64-NEXT: retq # encoding: [0xc3] 4833 %q = load i32, i32* %ptr_b 4834 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4835 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4836 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4837 ret <4 x i32> %res 4838} 4839 4840define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 4841; X86-LABEL: test_mask_sub_epi32_rmbkz_128: 4842; X86: # %bb.0: 4843; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4844; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4845; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4846; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x00] 4847; X86-NEXT: retl # encoding: [0xc3] 4848; 4849; X64-LABEL: test_mask_sub_epi32_rmbkz_128: 4850; X64: # %bb.0: 4851; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4852; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07] 4853; X64-NEXT: retq # encoding: [0xc3] 4854 %q = load i32, i32* %ptr_b 4855 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4856 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4857 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4858 ret <4 x i32> %res 4859} 4860 4861declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 4862 4863define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 4864; CHECK-LABEL: test_mask_sub_epi32_rr_256: 4865; CHECK: # %bb.0: 4866; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] 4867; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4868 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4869 ret <8 x i32> %res 4870} 4871 4872define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 4873; X86-LABEL: test_mask_sub_epi32_rrk_256: 4874; X86: # %bb.0: 4875; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4876; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4877; X86-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] 4878; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4879; X86-NEXT: retl # encoding: [0xc3] 4880; 4881; X64-LABEL: test_mask_sub_epi32_rrk_256: 4882; X64: # %bb.0: 4883; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4884; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] 4885; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4886; X64-NEXT: retq # encoding: [0xc3] 4887 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4888 ret <8 x i32> %res 4889} 4890 4891define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 4892; X86-LABEL: test_mask_sub_epi32_rrkz_256: 4893; X86: # %bb.0: 4894; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4895; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4896; X86-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] 4897; X86-NEXT: retl # encoding: [0xc3] 4898; 4899; X64-LABEL: test_mask_sub_epi32_rrkz_256: 4900; X64: # %bb.0: 4901; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4902; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] 4903; X64-NEXT: retq # encoding: [0xc3] 4904 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4905 ret <8 x i32> %res 4906} 4907 4908define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 4909; X86-LABEL: test_mask_sub_epi32_rm_256: 4910; X86: # %bb.0: 4911; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4912; X86-NEXT: vpsubd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x00] 4913; X86-NEXT: retl # encoding: [0xc3] 4914; 4915; X64-LABEL: test_mask_sub_epi32_rm_256: 4916; X64: # %bb.0: 4917; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07] 4918; X64-NEXT: retq # encoding: [0xc3] 4919 %b = load <8 x i32>, <8 x i32>* %ptr_b 4920 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4921 ret <8 x i32> %res 4922} 4923 4924define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4925; X86-LABEL: test_mask_sub_epi32_rmk_256: 4926; X86: # %bb.0: 4927; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4928; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4929; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4930; X86-NEXT: vpsubd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x08] 4931; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4932; X86-NEXT: retl # encoding: [0xc3] 4933; 4934; X64-LABEL: test_mask_sub_epi32_rmk_256: 4935; X64: # %bb.0: 4936; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4937; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] 4938; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4939; X64-NEXT: retq # encoding: [0xc3] 4940 %b = load <8 x i32>, <8 x i32>* %ptr_b 4941 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4942 ret <8 x i32> %res 4943} 4944 4945define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 4946; X86-LABEL: test_mask_sub_epi32_rmkz_256: 4947; X86: # %bb.0: 4948; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4949; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4950; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4951; X86-NEXT: vpsubd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x00] 4952; X86-NEXT: retl # encoding: [0xc3] 4953; 4954; X64-LABEL: test_mask_sub_epi32_rmkz_256: 4955; X64: # %bb.0: 4956; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4957; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07] 4958; X64-NEXT: retq # encoding: [0xc3] 4959 %b = load <8 x i32>, <8 x i32>* %ptr_b 4960 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4961 ret <8 x i32> %res 4962} 4963 4964define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4965; X86-LABEL: test_mask_sub_epi32_rmb_256: 4966; X86: # %bb.0: 4967; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4968; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x00] 4969; X86-NEXT: retl # encoding: [0xc3] 4970; 4971; X64-LABEL: test_mask_sub_epi32_rmb_256: 4972; X64: # %bb.0: 4973; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07] 4974; X64-NEXT: retq # encoding: [0xc3] 4975 %q = load i32, i32* %ptr_b 4976 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4977 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4978 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4979 ret <8 x i32> %res 4980} 4981 4982define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4983; X86-LABEL: test_mask_sub_epi32_rmbk_256: 4984; X86: # %bb.0: 4985; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4986; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4987; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4988; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x08] 4989; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4990; X86-NEXT: retl # encoding: [0xc3] 4991; 4992; X64-LABEL: test_mask_sub_epi32_rmbk_256: 4993; X64: # %bb.0: 4994; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4995; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] 4996; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4997; X64-NEXT: retq # encoding: [0xc3] 4998 %q = load i32, i32* %ptr_b 4999 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 5000 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 5001 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 5002 ret <8 x i32> %res 5003} 5004 5005define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 5006; X86-LABEL: test_mask_sub_epi32_rmbkz_256: 5007; X86: # %bb.0: 5008; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5009; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5010; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 5011; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x00] 5012; X86-NEXT: retl # encoding: [0xc3] 5013; 5014; X64-LABEL: test_mask_sub_epi32_rmbkz_256: 5015; X64: # %bb.0: 5016; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 5017; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07] 5018; X64-NEXT: retq # encoding: [0xc3] 5019 %q = load i32, i32* %ptr_b 5020 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 5021 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 5022 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 5023 ret <8 x i32> %res 5024} 5025 5026declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5027 5028define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 5029; CHECK-LABEL: test_mask_add_epi32_rr_256: 5030; CHECK: # %bb.0: 5031; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 5032; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5033 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 5034 ret <8 x i32> %res 5035} 5036 5037define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 5038; X86-LABEL: test_mask_add_epi32_rrk_256: 5039; X86: # %bb.0: 5040; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5041; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5042; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] 5043; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5044; X86-NEXT: retl # encoding: [0xc3] 5045; 5046; X64-LABEL: test_mask_add_epi32_rrk_256: 5047; X64: # %bb.0: 5048; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5049; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] 5050; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5051; X64-NEXT: retq # encoding: [0xc3] 5052 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 5053 ret <8 x i32> %res 5054} 5055 5056define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 5057; X86-LABEL: test_mask_add_epi32_rrkz_256: 5058; X86: # %bb.0: 5059; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5060; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5061; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 5062; X86-NEXT: retl # encoding: [0xc3] 5063; 5064; X64-LABEL: test_mask_add_epi32_rrkz_256: 5065; X64: # %bb.0: 5066; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5067; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 5068; X64-NEXT: retq # encoding: [0xc3] 5069 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 5070 ret <8 x i32> %res 5071} 5072 5073define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 5074; X86-LABEL: test_mask_add_epi32_rm_256: 5075; X86: # %bb.0: 5076; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5077; X86-NEXT: vpaddd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x00] 5078; X86-NEXT: retl # encoding: [0xc3] 5079; 5080; X64-LABEL: test_mask_add_epi32_rm_256: 5081; X64: # %bb.0: 5082; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07] 5083; X64-NEXT: retq # encoding: [0xc3] 5084 %b = load <8 x i32>, <8 x i32>* %ptr_b 5085 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 5086 ret <8 x i32> %res 5087} 5088 5089define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 5090; X86-LABEL: test_mask_add_epi32_rmk_256: 5091; X86: # %bb.0: 5092; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5093; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5094; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 5095; X86-NEXT: vpaddd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x08] 5096; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5097; X86-NEXT: retl # encoding: [0xc3] 5098; 5099; X64-LABEL: test_mask_add_epi32_rmk_256: 5100; X64: # %bb.0: 5101; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 5102; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] 5103; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5104; X64-NEXT: retq # encoding: [0xc3] 5105 %b = load <8 x i32>, <8 x i32>* %ptr_b 5106 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 5107 ret <8 x i32> %res 5108} 5109 5110define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 5111; X86-LABEL: test_mask_add_epi32_rmkz_256: 5112; X86: # %bb.0: 5113; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5114; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5115; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 5116; X86-NEXT: vpaddd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x00] 5117; X86-NEXT: retl # encoding: [0xc3] 5118; 5119; X64-LABEL: test_mask_add_epi32_rmkz_256: 5120; X64: # %bb.0: 5121; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 5122; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] 5123; X64-NEXT: retq # encoding: [0xc3] 5124 %b = load <8 x i32>, <8 x i32>* %ptr_b 5125 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 5126 ret <8 x i32> %res 5127} 5128 5129define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 5130; X86-LABEL: test_mask_add_epi32_rmb_256: 5131; X86: # %bb.0: 5132; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5133; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x00] 5134; X86-NEXT: retl # encoding: [0xc3] 5135; 5136; X64-LABEL: test_mask_add_epi32_rmb_256: 5137; X64: # %bb.0: 5138; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07] 5139; X64-NEXT: retq # encoding: [0xc3] 5140 %q = load i32, i32* %ptr_b 5141 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 5142 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 5143 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 5144 ret <8 x i32> %res 5145} 5146 5147define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 5148; X86-LABEL: test_mask_add_epi32_rmbk_256: 5149; X86: # %bb.0: 5150; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5151; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5152; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 5153; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x08] 5154; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5155; X86-NEXT: retl # encoding: [0xc3] 5156; 5157; X64-LABEL: test_mask_add_epi32_rmbk_256: 5158; X64: # %bb.0: 5159; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 5160; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] 5161; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5162; X64-NEXT: retq # encoding: [0xc3] 5163 %q = load i32, i32* %ptr_b 5164 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 5165 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 5166 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 5167 ret <8 x i32> %res 5168} 5169 5170define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 5171; X86-LABEL: test_mask_add_epi32_rmbkz_256: 5172; X86: # %bb.0: 5173; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5174; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5175; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 5176; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x00] 5177; X86-NEXT: retl # encoding: [0xc3] 5178; 5179; X64-LABEL: test_mask_add_epi32_rmbkz_256: 5180; X64: # %bb.0: 5181; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 5182; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07] 5183; X64-NEXT: retq # encoding: [0xc3] 5184 %q = load i32, i32* %ptr_b 5185 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 5186 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 5187 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 5188 ret <8 x i32> %res 5189} 5190 5191declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5192 5193define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5194; X86-LABEL: test_mm512_maskz_add_ps_256: 5195; X86: # %bb.0: 5196; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5197; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5198; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] 5199; X86-NEXT: retl # encoding: [0xc3] 5200; 5201; X64-LABEL: test_mm512_maskz_add_ps_256: 5202; X64: # %bb.0: 5203; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5204; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] 5205; X64-NEXT: retq # encoding: [0xc3] 5206 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 5207 ret <8 x float> %res 5208} 5209 5210define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 5211; X86-LABEL: test_mm512_mask_add_ps_256: 5212; X86: # %bb.0: 5213; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5214; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5215; X86-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] 5216; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5217; X86-NEXT: retl # encoding: [0xc3] 5218; 5219; X64-LABEL: test_mm512_mask_add_ps_256: 5220; X64: # %bb.0: 5221; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5222; X64-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] 5223; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5224; X64-NEXT: retq # encoding: [0xc3] 5225 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 5226 ret <8 x float> %res 5227} 5228 5229define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5230; CHECK-LABEL: test_mm512_add_ps_256: 5231; CHECK: # %bb.0: 5232; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 5233; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5234 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 5235 ret <8 x float> %res 5236} 5237declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 5238 5239define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5240; X86-LABEL: test_mm512_maskz_add_ps_128: 5241; X86: # %bb.0: 5242; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5243; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5244; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] 5245; X86-NEXT: retl # encoding: [0xc3] 5246; 5247; X64-LABEL: test_mm512_maskz_add_ps_128: 5248; X64: # %bb.0: 5249; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5250; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] 5251; X64-NEXT: retq # encoding: [0xc3] 5252 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 5253 ret <4 x float> %res 5254} 5255 5256define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 5257; X86-LABEL: test_mm512_mask_add_ps_128: 5258; X86: # %bb.0: 5259; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5260; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5261; X86-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] 5262; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5263; X86-NEXT: retl # encoding: [0xc3] 5264; 5265; X64-LABEL: test_mm512_mask_add_ps_128: 5266; X64: # %bb.0: 5267; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5268; X64-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] 5269; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5270; X64-NEXT: retq # encoding: [0xc3] 5271 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 5272 ret <4 x float> %res 5273} 5274 5275define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5276; CHECK-LABEL: test_mm512_add_ps_128: 5277; CHECK: # %bb.0: 5278; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 5279; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5280 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 5281 ret <4 x float> %res 5282} 5283declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 5284 5285define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5286; X86-LABEL: test_mm512_maskz_sub_ps_256: 5287; X86: # %bb.0: 5288; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5289; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5290; X86-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] 5291; X86-NEXT: retl # encoding: [0xc3] 5292; 5293; X64-LABEL: test_mm512_maskz_sub_ps_256: 5294; X64: # %bb.0: 5295; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5296; X64-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] 5297; X64-NEXT: retq # encoding: [0xc3] 5298 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 5299 ret <8 x float> %res 5300} 5301 5302define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 5303; X86-LABEL: test_mm512_mask_sub_ps_256: 5304; X86: # %bb.0: 5305; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5306; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5307; X86-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] 5308; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5309; X86-NEXT: retl # encoding: [0xc3] 5310; 5311; X64-LABEL: test_mm512_mask_sub_ps_256: 5312; X64: # %bb.0: 5313; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5314; X64-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] 5315; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5316; X64-NEXT: retq # encoding: [0xc3] 5317 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 5318 ret <8 x float> %res 5319} 5320 5321define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5322; CHECK-LABEL: test_mm512_sub_ps_256: 5323; CHECK: # %bb.0: 5324; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1] 5325; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5326 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 5327 ret <8 x float> %res 5328} 5329declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 5330 5331define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5332; X86-LABEL: test_mm512_maskz_sub_ps_128: 5333; X86: # %bb.0: 5334; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5335; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5336; X86-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] 5337; X86-NEXT: retl # encoding: [0xc3] 5338; 5339; X64-LABEL: test_mm512_maskz_sub_ps_128: 5340; X64: # %bb.0: 5341; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5342; X64-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] 5343; X64-NEXT: retq # encoding: [0xc3] 5344 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 5345 ret <4 x float> %res 5346} 5347 5348define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 5349; X86-LABEL: test_mm512_mask_sub_ps_128: 5350; X86: # %bb.0: 5351; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5352; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5353; X86-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] 5354; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5355; X86-NEXT: retl # encoding: [0xc3] 5356; 5357; X64-LABEL: test_mm512_mask_sub_ps_128: 5358; X64: # %bb.0: 5359; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5360; X64-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] 5361; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5362; X64-NEXT: retq # encoding: [0xc3] 5363 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 5364 ret <4 x float> %res 5365} 5366 5367define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5368; CHECK-LABEL: test_mm512_sub_ps_128: 5369; CHECK: # %bb.0: 5370; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 5371; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5372 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 5373 ret <4 x float> %res 5374} 5375declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 5376 5377define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5378; X86-LABEL: test_mm512_maskz_mul_ps_256: 5379; X86: # %bb.0: 5380; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5381; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5382; X86-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] 5383; X86-NEXT: retl # encoding: [0xc3] 5384; 5385; X64-LABEL: test_mm512_maskz_mul_ps_256: 5386; X64: # %bb.0: 5387; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5388; X64-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] 5389; X64-NEXT: retq # encoding: [0xc3] 5390 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 5391 ret <8 x float> %res 5392} 5393 5394define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 5395; X86-LABEL: test_mm512_mask_mul_ps_256: 5396; X86: # %bb.0: 5397; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5398; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5399; X86-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] 5400; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5401; X86-NEXT: retl # encoding: [0xc3] 5402; 5403; X64-LABEL: test_mm512_mask_mul_ps_256: 5404; X64: # %bb.0: 5405; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5406; X64-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] 5407; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5408; X64-NEXT: retq # encoding: [0xc3] 5409 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 5410 ret <8 x float> %res 5411} 5412 5413define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5414; CHECK-LABEL: test_mm512_mul_ps_256: 5415; CHECK: # %bb.0: 5416; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1] 5417; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5418 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 5419 ret <8 x float> %res 5420} 5421declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 5422 5423define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5424; X86-LABEL: test_mm512_maskz_mul_ps_128: 5425; X86: # %bb.0: 5426; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5427; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5428; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] 5429; X86-NEXT: retl # encoding: [0xc3] 5430; 5431; X64-LABEL: test_mm512_maskz_mul_ps_128: 5432; X64: # %bb.0: 5433; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5434; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] 5435; X64-NEXT: retq # encoding: [0xc3] 5436 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 5437 ret <4 x float> %res 5438} 5439 5440define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 5441; X86-LABEL: test_mm512_mask_mul_ps_128: 5442; X86: # %bb.0: 5443; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5444; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5445; X86-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] 5446; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5447; X86-NEXT: retl # encoding: [0xc3] 5448; 5449; X64-LABEL: test_mm512_mask_mul_ps_128: 5450; X64: # %bb.0: 5451; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5452; X64-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] 5453; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5454; X64-NEXT: retq # encoding: [0xc3] 5455 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 5456 ret <4 x float> %res 5457} 5458 5459define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5460; CHECK-LABEL: test_mm512_mul_ps_128: 5461; CHECK: # %bb.0: 5462; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 5463; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5464 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 5465 ret <4 x float> %res 5466} 5467declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 5468 5469define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5470; X86-LABEL: test_mm512_maskz_div_ps_256: 5471; X86: # %bb.0: 5472; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5473; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5474; X86-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] 5475; X86-NEXT: retl # encoding: [0xc3] 5476; 5477; X64-LABEL: test_mm512_maskz_div_ps_256: 5478; X64: # %bb.0: 5479; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5480; X64-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] 5481; X64-NEXT: retq # encoding: [0xc3] 5482 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 5483 ret <8 x float> %res 5484} 5485 5486define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 5487; X86-LABEL: test_mm512_mask_div_ps_256: 5488; X86: # %bb.0: 5489; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5490; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5491; X86-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] 5492; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5493; X86-NEXT: retl # encoding: [0xc3] 5494; 5495; X64-LABEL: test_mm512_mask_div_ps_256: 5496; X64: # %bb.0: 5497; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5498; X64-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] 5499; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5500; X64-NEXT: retq # encoding: [0xc3] 5501 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 5502 ret <8 x float> %res 5503} 5504 5505define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5506; CHECK-LABEL: test_mm512_div_ps_256: 5507; CHECK: # %bb.0: 5508; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1] 5509; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5510 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 5511 ret <8 x float> %res 5512} 5513declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 5514 5515define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5516; X86-LABEL: test_mm512_maskz_div_ps_128: 5517; X86: # %bb.0: 5518; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5519; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5520; X86-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] 5521; X86-NEXT: retl # encoding: [0xc3] 5522; 5523; X64-LABEL: test_mm512_maskz_div_ps_128: 5524; X64: # %bb.0: 5525; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5526; X64-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] 5527; X64-NEXT: retq # encoding: [0xc3] 5528 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 5529 ret <4 x float> %res 5530} 5531 5532define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 5533; X86-LABEL: test_mm512_mask_div_ps_128: 5534; X86: # %bb.0: 5535; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5536; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5537; X86-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] 5538; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5539; X86-NEXT: retl # encoding: [0xc3] 5540; 5541; X64-LABEL: test_mm512_mask_div_ps_128: 5542; X64: # %bb.0: 5543; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5544; X64-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] 5545; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5546; X64-NEXT: retq # encoding: [0xc3] 5547 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 5548 ret <4 x float> %res 5549} 5550 5551define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5552; CHECK-LABEL: test_mm512_div_ps_128: 5553; CHECK: # %bb.0: 5554; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 5555; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5556 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 5557 ret <4 x float> %res 5558} 5559declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 5560 5561declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 5562 5563define <8 x float>@test_int_x86_avx512_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3) { 5564; CHECK-LABEL: test_int_x86_avx512_shuf_f32x4_256: 5565; CHECK: # %bb.0: 5566; CHECK-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5567; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5568; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5569 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 5570 ret <8 x float> %res 5571} 5572 5573define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 5574; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: 5575; X86: # %bb.0: 5576; X86-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5577; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5578; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5579; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5580; X86-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x65,0xc0] 5581; X86-NEXT: retl # encoding: [0xc3] 5582; 5583; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: 5584; X64: # %bb.0: 5585; X64-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5586; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5587; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5588; X64-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x65,0xc0] 5589; X64-NEXT: retq # encoding: [0xc3] 5590 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 5591 ret <8 x float> %res 5592} 5593 5594define <8 x float>@test_int_x86_avx512_maskz_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, i8 %x4) { 5595; X86-LABEL: test_int_x86_avx512_maskz_shuf_f32x4_256: 5596; X86: # %bb.0: 5597; X86-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5598; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5599; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5600; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5601; X86-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc0] 5602; X86-NEXT: retl # encoding: [0xc3] 5603; 5604; X64-LABEL: test_int_x86_avx512_maskz_shuf_f32x4_256: 5605; X64: # %bb.0: 5606; X64-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5607; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5608; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5609; X64-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc0] 5610; X64-NEXT: retq # encoding: [0xc3] 5611 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4) 5612 ret <8 x float> %res 5613} 5614 5615declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 5616 5617define <4 x double>@test_int_x86_avx512_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 5618; CHECK-LABEL: test_int_x86_avx512_shuf_f64x2_256: 5619; CHECK: # %bb.0: 5620; CHECK-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5621; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5622; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5623 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) 5624 ret <4 x double> %res 5625} 5626 5627define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 5628; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: 5629; X86: # %bb.0: 5630; X86-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] 5631; X86-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3] 5632; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5633; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5634; X86-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x65,0xc0] 5635; X86-NEXT: retl # encoding: [0xc3] 5636; 5637; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: 5638; X64: # %bb.0: 5639; X64-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] 5640; X64-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3] 5641; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5642; X64-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x65,0xc0] 5643; X64-NEXT: retq # encoding: [0xc3] 5644 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) 5645 ret <4 x double> %res 5646} 5647 5648define <4 x double>@test_int_x86_avx512_maskz_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, i8 %x4) { 5649; X86-LABEL: test_int_x86_avx512_maskz_shuf_f64x2_256: 5650; X86: # %bb.0: 5651; X86-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] 5652; X86-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3] 5653; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5654; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5655; X86-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc0] 5656; X86-NEXT: retl # encoding: [0xc3] 5657; 5658; X64-LABEL: test_int_x86_avx512_maskz_shuf_f64x2_256: 5659; X64: # %bb.0: 5660; X64-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] 5661; X64-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3] 5662; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5663; X64-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc0] 5664; X64-NEXT: retq # encoding: [0xc3] 5665 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4) 5666 ret <4 x double> %res 5667} 5668 5669declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 5670 5671define <8 x i32>@test_int_x86_avx512_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 5672; CHECK-LABEL: test_int_x86_avx512_shuf_i32x4_256: 5673; CHECK: # %bb.0: 5674; CHECK-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5675; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5676; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5677 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 5678 ret <8 x i32> %res 5679} 5680 5681define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 5682; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: 5683; X86: # %bb.0: 5684; X86-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5685; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5686; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5687; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5688; X86-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x64,0xc0] 5689; X86-NEXT: retl # encoding: [0xc3] 5690; 5691; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: 5692; X64: # %bb.0: 5693; X64-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5694; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5695; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5696; X64-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x64,0xc0] 5697; X64-NEXT: retq # encoding: [0xc3] 5698 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 5699 ret <8 x i32> %res 5700} 5701 5702declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 5703 5704define <4 x i64>@test_int_x86_avx512_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3) { 5705; CHECK-LABEL: test_int_x86_avx512_shuf_i64x2_256: 5706; CHECK: # %bb.0: 5707; CHECK-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5708; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5709; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5710 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 5711 ret <4 x i64> %res 5712} 5713 5714define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 5715; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: 5716; X86: # %bb.0: 5717; X86-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5718; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5719; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5720; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5721; X86-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0] 5722; X86-NEXT: retl # encoding: [0xc3] 5723; 5724; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: 5725; X64: # %bb.0: 5726; X64-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5727; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5728; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5729; X64-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0] 5730; X64-NEXT: retq # encoding: [0xc3] 5731 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 5732 ret <4 x i64> %res 5733} 5734 5735declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) 5736 5737define <2 x double>@test_int_x86_avx512_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 5738; CHECK-LABEL: test_int_x86_avx512_shuf_pd_128: 5739; CHECK: # %bb.0: 5740; CHECK-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 5741; CHECK-NEXT: # xmm0 = xmm0[1],xmm1[0] 5742; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5743 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1) 5744 ret <2 x double> %res 5745} 5746 5747define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 5748; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_128: 5749; X86: # %bb.0: 5750; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5751; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5752; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01] 5753; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[0] 5754; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 5755; X86-NEXT: retl # encoding: [0xc3] 5756; 5757; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_128: 5758; X64: # %bb.0: 5759; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5760; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01] 5761; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[0] 5762; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 5763; X64-NEXT: retq # encoding: [0xc3] 5764 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4) 5765 ret <2 x double> %res 5766} 5767 5768define <2 x double>@test_int_x86_avx512_maskz_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, i8 %x4) { 5769; X86-LABEL: test_int_x86_avx512_maskz_shuf_pd_128: 5770; X86: # %bb.0: 5771; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5772; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5773; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01] 5774; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1],xmm1[0] 5775; X86-NEXT: retl # encoding: [0xc3] 5776; 5777; X64-LABEL: test_int_x86_avx512_maskz_shuf_pd_128: 5778; X64: # %bb.0: 5779; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5780; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01] 5781; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1],xmm1[0] 5782; X64-NEXT: retq # encoding: [0xc3] 5783 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> zeroinitializer, i8 %x4) 5784 ret <2 x double> %res 5785} 5786 5787declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 5788 5789define <4 x double>@test_int_x86_avx512_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3) { 5790; CHECK-LABEL: test_int_x86_avx512_shuf_pd_256: 5791; CHECK: # %bb.0: 5792; CHECK-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xc1,0x06] 5793; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5794; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5795 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1) 5796 ret <4 x double> %res 5797} 5798 5799define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 5800; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_256: 5801; X86: # %bb.0: 5802; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5803; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5804; X86-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06] 5805; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5806; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 5807; X86-NEXT: retl # encoding: [0xc3] 5808; 5809; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_256: 5810; X64: # %bb.0: 5811; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5812; X64-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06] 5813; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5814; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 5815; X64-NEXT: retq # encoding: [0xc3] 5816 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4) 5817 ret <4 x double> %res 5818} 5819 5820declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) 5821 5822define <4 x float>@test_int_x86_avx512_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3) { 5823; CHECK-LABEL: test_int_x86_avx512_shuf_ps_128: 5824; CHECK: # %bb.0: 5825; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x16] 5826; CHECK-NEXT: # xmm0 = xmm0[2,1],xmm1[1,0] 5827; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5828 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1) 5829 ret <4 x float> %res 5830} 5831 5832define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 5833; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_128: 5834; X86: # %bb.0: 5835; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5836; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5837; X86-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] 5838; X86-NEXT: # xmm2 {%k1} = xmm0[2,1],xmm1[1,0] 5839; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5840; X86-NEXT: retl # encoding: [0xc3] 5841; 5842; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_128: 5843; X64: # %bb.0: 5844; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5845; X64-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] 5846; X64-NEXT: # xmm2 {%k1} = xmm0[2,1],xmm1[1,0] 5847; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5848; X64-NEXT: retq # encoding: [0xc3] 5849 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4) 5850 ret <4 x float> %res 5851} 5852 5853declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 5854 5855define <8 x float>@test_int_x86_avx512_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3) { 5856; CHECK-LABEL: test_int_x86_avx512_shuf_ps_256: 5857; CHECK: # %bb.0: 5858; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xc1,0x16] 5859; CHECK-NEXT: # ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5860; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5861 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 5862 ret <8 x float> %res 5863} 5864 5865define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 5866; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_256: 5867; X86: # %bb.0: 5868; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5869; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5870; X86-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] 5871; X86-NEXT: # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5872; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5873; X86-NEXT: retl # encoding: [0xc3] 5874; 5875; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_256: 5876; X64: # %bb.0: 5877; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5878; X64-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] 5879; X64-NEXT: # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5880; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5881; X64-NEXT: retq # encoding: [0xc3] 5882 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 5883 ret <8 x float> %res 5884} 5885 5886declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5887 5888define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 5889; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_128: 5890; X86: # %bb.0: 5891; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5892; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5893; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1] 5894; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5895; X86-NEXT: retl # encoding: [0xc3] 5896; 5897; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_128: 5898; X64: # %bb.0: 5899; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5900; X64-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1] 5901; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5902; X64-NEXT: retq # encoding: [0xc3] 5903 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask) 5904 ret <4 x i32> %res 5905} 5906 5907define <4 x i32>@test_int_x86_avx512_maskz_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 5908; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_d_128: 5909; X86: # %bb.0: 5910; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5911; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5912; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1] 5913; X86-NEXT: retl # encoding: [0xc3] 5914; 5915; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_d_128: 5916; X64: # %bb.0: 5917; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5918; X64-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1] 5919; X64-NEXT: retq # encoding: [0xc3] 5920 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 5921 ret <4 x i32> %res 5922} 5923 5924declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5925 5926define <8 x i32>@test_int_x86_avx512_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 5927; CHECK-LABEL: test_int_x86_avx512_pmaxs_d_256: 5928; CHECK: # %bb.0: 5929; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1] 5930; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5931 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5932 ret <8 x i32> %res 5933} 5934 5935define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5936; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_256: 5937; X86: # %bb.0: 5938; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5939; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5940; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1] 5941; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5942; X86-NEXT: retl # encoding: [0xc3] 5943; 5944; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_256: 5945; X64: # %bb.0: 5946; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5947; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1] 5948; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5949; X64-NEXT: retq # encoding: [0xc3] 5950 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5951 ret <8 x i32> %res 5952} 5953 5954declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5955 5956define <2 x i64>@test_int_x86_avx512_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 5957; CHECK-LABEL: test_int_x86_avx512_pmaxs_q_128: 5958; CHECK: # %bb.0: 5959; CHECK-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xc1] 5960; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5961 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5962 ret <2 x i64> %res 5963} 5964 5965define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5966; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_128: 5967; X86: # %bb.0: 5968; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5969; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5970; X86-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1] 5971; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5972; X86-NEXT: retl # encoding: [0xc3] 5973; 5974; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_128: 5975; X64: # %bb.0: 5976; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5977; X64-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1] 5978; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5979; X64-NEXT: retq # encoding: [0xc3] 5980 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5981 ret <2 x i64> %res 5982} 5983 5984declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5985 5986define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 5987; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_256: 5988; X86: # %bb.0: 5989; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5990; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5991; X86-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1] 5992; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5993; X86-NEXT: retl # encoding: [0xc3] 5994; 5995; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_256: 5996; X64: # %bb.0: 5997; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5998; X64-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1] 5999; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6000; X64-NEXT: retq # encoding: [0xc3] 6001 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 6002 ret <4 x i64> %res 6003} 6004 6005define <4 x i64>@test_int_x86_avx512_maskz_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) { 6006; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_q_256: 6007; X86: # %bb.0: 6008; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6009; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6010; X86-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1] 6011; X86-NEXT: retl # encoding: [0xc3] 6012; 6013; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_q_256: 6014; X64: # %bb.0: 6015; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6016; X64-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1] 6017; X64-NEXT: retq # encoding: [0xc3] 6018 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 6019 ret <4 x i64> %res 6020} 6021 6022declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6023 6024define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) { 6025; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_128: 6026; X86: # %bb.0: 6027; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6028; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6029; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1] 6030; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6031; X86-NEXT: retl # encoding: [0xc3] 6032; 6033; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_128: 6034; X64: # %bb.0: 6035; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6036; X64-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1] 6037; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6038; X64-NEXT: retq # encoding: [0xc3] 6039 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 6040 ret <4 x i32> %res 6041} 6042 6043define <4 x i32>@test_int_x86_avx512_maskz_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 6044; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_d_128: 6045; X86: # %bb.0: 6046; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6047; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6048; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1] 6049; X86-NEXT: retl # encoding: [0xc3] 6050; 6051; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_d_128: 6052; X64: # %bb.0: 6053; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6054; X64-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1] 6055; X64-NEXT: retq # encoding: [0xc3] 6056 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 6057 ret <4 x i32> %res 6058} 6059 6060declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6061 6062define <8 x i32>@test_int_x86_avx512_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 6063; CHECK-LABEL: test_int_x86_avx512_pmaxu_d_256: 6064; CHECK: # %bb.0: 6065; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1] 6066; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6067 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6068 ret <8 x i32> %res 6069} 6070 6071define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6072; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_256: 6073; X86: # %bb.0: 6074; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6075; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6076; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1] 6077; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6078; X86-NEXT: retl # encoding: [0xc3] 6079; 6080; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_256: 6081; X64: # %bb.0: 6082; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6083; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1] 6084; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6085; X64-NEXT: retq # encoding: [0xc3] 6086 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6087 ret <8 x i32> %res 6088} 6089 6090declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6091 6092define <2 x i64>@test_int_x86_avx512_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 6093; CHECK-LABEL: test_int_x86_avx512_pmaxu_q_128: 6094; CHECK: # %bb.0: 6095; CHECK-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xc1] 6096; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6097 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6098 ret <2 x i64> %res 6099} 6100 6101define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6102; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_128: 6103; X86: # %bb.0: 6104; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6105; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6106; X86-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1] 6107; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6108; X86-NEXT: retl # encoding: [0xc3] 6109; 6110; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_128: 6111; X64: # %bb.0: 6112; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6113; X64-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1] 6114; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6115; X64-NEXT: retq # encoding: [0xc3] 6116 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6117 ret <2 x i64> %res 6118} 6119 6120declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6121 6122define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 6123; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_256: 6124; X86: # %bb.0: 6125; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6126; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6127; X86-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1] 6128; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6129; X86-NEXT: retl # encoding: [0xc3] 6130; 6131; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_256: 6132; X64: # %bb.0: 6133; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6134; X64-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1] 6135; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6136; X64-NEXT: retq # encoding: [0xc3] 6137 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 6138 ret <4 x i64> %res 6139} 6140 6141define <4 x i64>@test_int_x86_avx512_maskz_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) { 6142; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_q_256: 6143; X86: # %bb.0: 6144; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6145; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6146; X86-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1] 6147; X86-NEXT: retl # encoding: [0xc3] 6148; 6149; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_q_256: 6150; X64: # %bb.0: 6151; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6152; X64-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1] 6153; X64-NEXT: retq # encoding: [0xc3] 6154 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 6155 ret <4 x i64> %res 6156} 6157 6158declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6159 6160define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 6161; X86-LABEL: test_int_x86_avx512_mask_pmins_d_128: 6162; X86: # %bb.0: 6163; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6164; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6165; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1] 6166; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6167; X86-NEXT: retl # encoding: [0xc3] 6168; 6169; X64-LABEL: test_int_x86_avx512_mask_pmins_d_128: 6170; X64: # %bb.0: 6171; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6172; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1] 6173; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6174; X64-NEXT: retq # encoding: [0xc3] 6175 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 6176 ret <4 x i32> %res 6177} 6178 6179define <4 x i32>@test_int_x86_avx512_maskz_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 6180; X86-LABEL: test_int_x86_avx512_maskz_pmins_d_128: 6181; X86: # %bb.0: 6182; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6183; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6184; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1] 6185; X86-NEXT: retl # encoding: [0xc3] 6186; 6187; X64-LABEL: test_int_x86_avx512_maskz_pmins_d_128: 6188; X64: # %bb.0: 6189; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6190; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1] 6191; X64-NEXT: retq # encoding: [0xc3] 6192 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 6193 ret <4 x i32> %res 6194} 6195 6196declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6197 6198define <8 x i32>@test_int_x86_avx512_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 6199; CHECK-LABEL: test_int_x86_avx512_pmins_d_256: 6200; CHECK: # %bb.0: 6201; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1] 6202; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6203 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6204 ret <8 x i32> %res 6205} 6206 6207define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6208; X86-LABEL: test_int_x86_avx512_mask_pmins_d_256: 6209; X86: # %bb.0: 6210; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6211; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6212; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1] 6213; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6214; X86-NEXT: retl # encoding: [0xc3] 6215; 6216; X64-LABEL: test_int_x86_avx512_mask_pmins_d_256: 6217; X64: # %bb.0: 6218; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6219; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1] 6220; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6221; X64-NEXT: retq # encoding: [0xc3] 6222 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6223 ret <8 x i32> %res 6224} 6225 6226declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6227 6228define <2 x i64>@test_int_x86_avx512_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 6229; CHECK-LABEL: test_int_x86_avx512_pmins_q_128: 6230; CHECK: # %bb.0: 6231; CHECK-NEXT: vpminsq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xc1] 6232; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6233 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6234 ret <2 x i64> %res 6235} 6236 6237define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6238; X86-LABEL: test_int_x86_avx512_mask_pmins_q_128: 6239; X86: # %bb.0: 6240; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6241; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6242; X86-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1] 6243; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6244; X86-NEXT: retl # encoding: [0xc3] 6245; 6246; X64-LABEL: test_int_x86_avx512_mask_pmins_q_128: 6247; X64: # %bb.0: 6248; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6249; X64-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1] 6250; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6251; X64-NEXT: retq # encoding: [0xc3] 6252 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6253 ret <2 x i64> %res 6254} 6255 6256declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6257 6258define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 6259; X86-LABEL: test_int_x86_avx512_mask_pmins_q_256: 6260; X86: # %bb.0: 6261; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6262; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6263; X86-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1] 6264; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6265; X86-NEXT: retl # encoding: [0xc3] 6266; 6267; X64-LABEL: test_int_x86_avx512_mask_pmins_q_256: 6268; X64: # %bb.0: 6269; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6270; X64-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1] 6271; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6272; X64-NEXT: retq # encoding: [0xc3] 6273 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 6274 ret <4 x i64> %res 6275} 6276 6277define <4 x i64>@test_int_x86_avx512_maskz_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) { 6278; X86-LABEL: test_int_x86_avx512_maskz_pmins_q_256: 6279; X86: # %bb.0: 6280; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6281; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6282; X86-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1] 6283; X86-NEXT: retl # encoding: [0xc3] 6284; 6285; X64-LABEL: test_int_x86_avx512_maskz_pmins_q_256: 6286; X64: # %bb.0: 6287; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6288; X64-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1] 6289; X64-NEXT: retq # encoding: [0xc3] 6290 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 6291 ret <4 x i64> %res 6292} 6293 6294declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6295 6296define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 6297; X86-LABEL: test_int_x86_avx512_mask_pminu_d_128: 6298; X86: # %bb.0: 6299; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6300; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6301; X86-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1] 6302; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6303; X86-NEXT: retl # encoding: [0xc3] 6304; 6305; X64-LABEL: test_int_x86_avx512_mask_pminu_d_128: 6306; X64: # %bb.0: 6307; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6308; X64-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1] 6309; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6310; X64-NEXT: retq # encoding: [0xc3] 6311 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 6312 ret <4 x i32> %res 6313} 6314 6315define <4 x i32>@test_int_x86_avx512_maskz_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 6316; X86-LABEL: test_int_x86_avx512_maskz_pminu_d_128: 6317; X86: # %bb.0: 6318; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6319; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6320; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1] 6321; X86-NEXT: retl # encoding: [0xc3] 6322; 6323; X64-LABEL: test_int_x86_avx512_maskz_pminu_d_128: 6324; X64: # %bb.0: 6325; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6326; X64-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1] 6327; X64-NEXT: retq # encoding: [0xc3] 6328 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 6329 ret <4 x i32> %res 6330} 6331 6332declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6333 6334define <8 x i32>@test_int_x86_avx512_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 6335; CHECK-LABEL: test_int_x86_avx512_pminu_d_256: 6336; CHECK: # %bb.0: 6337; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1] 6338; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6339 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6340 ret <8 x i32> %res 6341} 6342 6343define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6344; X86-LABEL: test_int_x86_avx512_mask_pminu_d_256: 6345; X86: # %bb.0: 6346; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6347; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6348; X86-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1] 6349; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6350; X86-NEXT: retl # encoding: [0xc3] 6351; 6352; X64-LABEL: test_int_x86_avx512_mask_pminu_d_256: 6353; X64: # %bb.0: 6354; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6355; X64-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1] 6356; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6357; X64-NEXT: retq # encoding: [0xc3] 6358 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6359 ret <8 x i32> %res 6360} 6361 6362declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6363 6364define <2 x i64>@test_int_x86_avx512_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 6365; CHECK-LABEL: test_int_x86_avx512_pminu_q_128: 6366; CHECK: # %bb.0: 6367; CHECK-NEXT: vpminuq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xc1] 6368; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6369 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6370 ret <2 x i64> %res 6371} 6372 6373define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6374; X86-LABEL: test_int_x86_avx512_mask_pminu_q_128: 6375; X86: # %bb.0: 6376; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6377; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6378; X86-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1] 6379; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6380; X86-NEXT: retl # encoding: [0xc3] 6381; 6382; X64-LABEL: test_int_x86_avx512_mask_pminu_q_128: 6383; X64: # %bb.0: 6384; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6385; X64-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1] 6386; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6387; X64-NEXT: retq # encoding: [0xc3] 6388 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6389 ret <2 x i64> %res 6390} 6391 6392declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6393 6394define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 6395; X86-LABEL: test_int_x86_avx512_mask_pminu_q_256: 6396; X86: # %bb.0: 6397; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6398; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6399; X86-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1] 6400; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6401; X86-NEXT: retl # encoding: [0xc3] 6402; 6403; X64-LABEL: test_int_x86_avx512_mask_pminu_q_256: 6404; X64: # %bb.0: 6405; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6406; X64-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1] 6407; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6408; X64-NEXT: retq # encoding: [0xc3] 6409 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 6410 ret <4 x i64> %res 6411} 6412 6413define <4 x i64>@test_int_x86_avx512_maskz_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) { 6414; X86-LABEL: test_int_x86_avx512_maskz_pminu_q_256: 6415; X86: # %bb.0: 6416; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6417; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6418; X86-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1] 6419; X86-NEXT: retl # encoding: [0xc3] 6420; 6421; X64-LABEL: test_int_x86_avx512_maskz_pminu_q_256: 6422; X64: # %bb.0: 6423; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6424; X64-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1] 6425; X64-NEXT: retq # encoding: [0xc3] 6426 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 6427 ret <4 x i64> %res 6428} 6429 6430declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6431 6432define <2 x i64>@test_int_x86_avx512_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 6433; CHECK-LABEL: test_int_x86_avx512_psrl_q_128: 6434; CHECK: # %bb.0: 6435; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] 6436; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6437 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6438 ret <2 x i64> %res 6439} 6440 6441define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6442; X86-LABEL: test_int_x86_avx512_mask_psrl_q_128: 6443; X86: # %bb.0: 6444; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6445; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6446; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1] 6447; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6448; X86-NEXT: retl # encoding: [0xc3] 6449; 6450; X64-LABEL: test_int_x86_avx512_mask_psrl_q_128: 6451; X64: # %bb.0: 6452; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6453; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1] 6454; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6455; X64-NEXT: retq # encoding: [0xc3] 6456 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6457 ret <2 x i64> %res 6458} 6459 6460define <2 x i64>@test_int_x86_avx512_maskz_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 6461; X86-LABEL: test_int_x86_avx512_maskz_psrl_q_128: 6462; X86: # %bb.0: 6463; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6464; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6465; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1] 6466; X86-NEXT: retl # encoding: [0xc3] 6467; 6468; X64-LABEL: test_int_x86_avx512_maskz_psrl_q_128: 6469; X64: # %bb.0: 6470; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6471; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1] 6472; X64-NEXT: retq # encoding: [0xc3] 6473 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 6474 ret <2 x i64> %res 6475} 6476 6477declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 6478 6479define <4 x i64>@test_int_x86_avx512_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2) { 6480; CHECK-LABEL: test_int_x86_avx512_psrl_q_256: 6481; CHECK: # %bb.0: 6482; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1] 6483; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6484 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 6485 ret <4 x i64> %res 6486} 6487 6488define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6489; X86-LABEL: test_int_x86_avx512_mask_psrl_q_256: 6490; X86: # %bb.0: 6491; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6492; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6493; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1] 6494; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6495; X86-NEXT: retl # encoding: [0xc3] 6496; 6497; X64-LABEL: test_int_x86_avx512_mask_psrl_q_256: 6498; X64: # %bb.0: 6499; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6500; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1] 6501; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6502; X64-NEXT: retq # encoding: [0xc3] 6503 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 6504 ret <4 x i64> %res 6505} 6506 6507define <4 x i64>@test_int_x86_avx512_maskz_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x3) { 6508; X86-LABEL: test_int_x86_avx512_maskz_psrl_q_256: 6509; X86: # %bb.0: 6510; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6511; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6512; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1] 6513; X86-NEXT: retl # encoding: [0xc3] 6514; 6515; X64-LABEL: test_int_x86_avx512_maskz_psrl_q_256: 6516; X64: # %bb.0: 6517; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6518; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1] 6519; X64-NEXT: retq # encoding: [0xc3] 6520 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6521 ret <4 x i64> %res 6522} 6523 6524declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6525 6526define <4 x i32>@test_int_x86_avx512_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 6527; CHECK-LABEL: test_int_x86_avx512_psrl_d_128: 6528; CHECK: # %bb.0: 6529; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] 6530; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6531 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6532 ret <4 x i32> %res 6533} 6534 6535define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6536; X86-LABEL: test_int_x86_avx512_mask_psrl_d_128: 6537; X86: # %bb.0: 6538; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6539; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6540; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1] 6541; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6542; X86-NEXT: retl # encoding: [0xc3] 6543; 6544; X64-LABEL: test_int_x86_avx512_mask_psrl_d_128: 6545; X64: # %bb.0: 6546; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6547; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1] 6548; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6549; X64-NEXT: retq # encoding: [0xc3] 6550 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6551 ret <4 x i32> %res 6552} 6553 6554define <4 x i32>@test_int_x86_avx512_maskz_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 6555; X86-LABEL: test_int_x86_avx512_maskz_psrl_d_128: 6556; X86: # %bb.0: 6557; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6558; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6559; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1] 6560; X86-NEXT: retl # encoding: [0xc3] 6561; 6562; X64-LABEL: test_int_x86_avx512_maskz_psrl_d_128: 6563; X64: # %bb.0: 6564; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6565; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1] 6566; X64-NEXT: retq # encoding: [0xc3] 6567 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6568 ret <4 x i32> %res 6569} 6570 6571declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 6572 6573define <8 x i32>@test_int_x86_avx512_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2) { 6574; CHECK-LABEL: test_int_x86_avx512_psrl_d_256: 6575; CHECK: # %bb.0: 6576; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1] 6577; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6578 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 6579 ret <8 x i32> %res 6580} 6581 6582define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6583; X86-LABEL: test_int_x86_avx512_mask_psrl_d_256: 6584; X86: # %bb.0: 6585; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6586; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6587; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1] 6588; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6589; X86-NEXT: retl # encoding: [0xc3] 6590; 6591; X64-LABEL: test_int_x86_avx512_mask_psrl_d_256: 6592; X64: # %bb.0: 6593; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6594; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1] 6595; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6596; X64-NEXT: retq # encoding: [0xc3] 6597 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 6598 ret <8 x i32> %res 6599} 6600 6601define <8 x i32>@test_int_x86_avx512_maskz_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x3) { 6602; X86-LABEL: test_int_x86_avx512_maskz_psrl_d_256: 6603; X86: # %bb.0: 6604; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6605; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6606; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1] 6607; X86-NEXT: retl # encoding: [0xc3] 6608; 6609; X64-LABEL: test_int_x86_avx512_maskz_psrl_d_256: 6610; X64: # %bb.0: 6611; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6612; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1] 6613; X64-NEXT: retq # encoding: [0xc3] 6614 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6615 ret <8 x i32> %res 6616} 6617 6618declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6619 6620define <4 x i32>@test_int_x86_avx512_ask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 6621; CHECK-LABEL: test_int_x86_avx512_ask_psra_d_128: 6622; CHECK: # %bb.0: 6623; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] 6624; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6625 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6626 ret <4 x i32> %res 6627} 6628 6629define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6630; X86-LABEL: test_int_x86_avx512_mask_psra_d_128: 6631; X86: # %bb.0: 6632; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6633; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6634; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1] 6635; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6636; X86-NEXT: retl # encoding: [0xc3] 6637; 6638; X64-LABEL: test_int_x86_avx512_mask_psra_d_128: 6639; X64: # %bb.0: 6640; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6641; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1] 6642; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6643; X64-NEXT: retq # encoding: [0xc3] 6644 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6645 ret <4 x i32> %res 6646} 6647 6648define <4 x i32>@test_int_x86_avx512_maskz_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 6649; X86-LABEL: test_int_x86_avx512_maskz_psra_d_128: 6650; X86: # %bb.0: 6651; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6652; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6653; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1] 6654; X86-NEXT: retl # encoding: [0xc3] 6655; 6656; X64-LABEL: test_int_x86_avx512_maskz_psra_d_128: 6657; X64: # %bb.0: 6658; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6659; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1] 6660; X64-NEXT: retq # encoding: [0xc3] 6661 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6662 ret <4 x i32> %res 6663} 6664 6665declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 6666 6667define <8 x i32>@test_int_x86_avx512_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2) { 6668; CHECK-LABEL: test_int_x86_avx512_psra_d_256: 6669; CHECK: # %bb.0: 6670; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1] 6671; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6672 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 6673 ret <8 x i32> %res 6674} 6675 6676define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6677; X86-LABEL: test_int_x86_avx512_mask_psra_d_256: 6678; X86: # %bb.0: 6679; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6680; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6681; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1] 6682; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6683; X86-NEXT: retl # encoding: [0xc3] 6684; 6685; X64-LABEL: test_int_x86_avx512_mask_psra_d_256: 6686; X64: # %bb.0: 6687; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6688; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1] 6689; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6690; X64-NEXT: retq # encoding: [0xc3] 6691 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 6692 ret <8 x i32> %res 6693} 6694 6695define <8 x i32>@test_int_x86_avx512_maskz_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x3) { 6696; X86-LABEL: test_int_x86_avx512_maskz_psra_d_256: 6697; X86: # %bb.0: 6698; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6699; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6700; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1] 6701; X86-NEXT: retl # encoding: [0xc3] 6702; 6703; X64-LABEL: test_int_x86_avx512_maskz_psra_d_256: 6704; X64: # %bb.0: 6705; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6706; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1] 6707; X64-NEXT: retq # encoding: [0xc3] 6708 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6709 ret <8 x i32> %res 6710} 6711 6712declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6713 6714define <4 x i32>@test_int_x86_avx512_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 6715; CHECK-LABEL: test_int_x86_avx512_psll_d_128: 6716; CHECK: # %bb.0: 6717; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] 6718; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6719 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6720 ret <4 x i32> %res 6721} 6722 6723define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6724; X86-LABEL: test_int_x86_avx512_mask_psll_d_128: 6725; X86: # %bb.0: 6726; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6727; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6728; X86-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1] 6729; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6730; X86-NEXT: retl # encoding: [0xc3] 6731; 6732; X64-LABEL: test_int_x86_avx512_mask_psll_d_128: 6733; X64: # %bb.0: 6734; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6735; X64-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1] 6736; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6737; X64-NEXT: retq # encoding: [0xc3] 6738 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6739 ret <4 x i32> %res 6740} 6741 6742define <4 x i32>@test_int_x86_avx512_maskz_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 6743; X86-LABEL: test_int_x86_avx512_maskz_psll_d_128: 6744; X86: # %bb.0: 6745; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6746; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6747; X86-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1] 6748; X86-NEXT: retl # encoding: [0xc3] 6749; 6750; X64-LABEL: test_int_x86_avx512_maskz_psll_d_128: 6751; X64: # %bb.0: 6752; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6753; X64-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1] 6754; X64-NEXT: retq # encoding: [0xc3] 6755 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6756 ret <4 x i32> %res 6757} 6758 6759declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 6760 6761define <8 x i32>@test_int_x86_avx512_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2) { 6762; CHECK-LABEL: test_int_x86_avx512_psll_d_256: 6763; CHECK: # %bb.0: 6764; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1] 6765; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6766 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 6767 ret <8 x i32> %res 6768} 6769 6770define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6771; X86-LABEL: test_int_x86_avx512_mask_psll_d_256: 6772; X86: # %bb.0: 6773; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6774; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6775; X86-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1] 6776; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6777; X86-NEXT: retl # encoding: [0xc3] 6778; 6779; X64-LABEL: test_int_x86_avx512_mask_psll_d_256: 6780; X64: # %bb.0: 6781; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6782; X64-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1] 6783; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6784; X64-NEXT: retq # encoding: [0xc3] 6785 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 6786 ret <8 x i32> %res 6787} 6788 6789define <8 x i32>@test_int_x86_avx512_maskz_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x3) { 6790; X86-LABEL: test_int_x86_avx512_maskz_psll_d_256: 6791; X86: # %bb.0: 6792; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6793; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6794; X86-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1] 6795; X86-NEXT: retl # encoding: [0xc3] 6796; 6797; X64-LABEL: test_int_x86_avx512_maskz_psll_d_256: 6798; X64: # %bb.0: 6799; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6800; X64-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1] 6801; X64-NEXT: retq # encoding: [0xc3] 6802 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6803 ret <8 x i32> %res 6804} 6805 6806declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 6807 6808define <4 x i64>@test_int_x86_avx512_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2) { 6809; CHECK-LABEL: test_int_x86_avx512_psll_q_256: 6810; CHECK: # %bb.0: 6811; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1] 6812; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6813 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 6814 ret <4 x i64> %res 6815} 6816 6817define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6818; X86-LABEL: test_int_x86_avx512_mask_psll_q_256: 6819; X86: # %bb.0: 6820; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6821; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6822; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1] 6823; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6824; X86-NEXT: retl # encoding: [0xc3] 6825; 6826; X64-LABEL: test_int_x86_avx512_mask_psll_q_256: 6827; X64: # %bb.0: 6828; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6829; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1] 6830; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6831; X64-NEXT: retq # encoding: [0xc3] 6832 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 6833 ret <4 x i64> %res 6834} 6835 6836define <4 x i64>@test_int_x86_avx512_maskz_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x3) { 6837; X86-LABEL: test_int_x86_avx512_maskz_psll_q_256: 6838; X86: # %bb.0: 6839; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6840; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6841; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1] 6842; X86-NEXT: retl # encoding: [0xc3] 6843; 6844; X64-LABEL: test_int_x86_avx512_maskz_psll_q_256: 6845; X64: # %bb.0: 6846; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6847; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1] 6848; X64-NEXT: retq # encoding: [0xc3] 6849 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6850 ret <4 x i64> %res 6851} 6852 6853declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8) 6854 6855define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 6856; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_128: 6857; X86: # %bb.0: 6858; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6859; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6860; X86-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] 6861; X86-NEXT: vpsrlq $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x04] 6862; X86-NEXT: vpsrlq $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x05] 6863; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6864; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6865; X86-NEXT: retl # encoding: [0xc3] 6866; 6867; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128: 6868; X64: # %bb.0: 6869; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6870; X64-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] 6871; X64-NEXT: vpsrlq $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x04] 6872; X64-NEXT: vpsrlq $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x05] 6873; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6874; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6875; X64-NEXT: retq # encoding: [0xc3] 6876 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 6877 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 4, <2 x i64> %x2, i8 -1) 6878 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 5, <2 x i64> zeroinitializer, i8 %x3) 6879 %res3 = add <2 x i64> %res, %res1 6880 %res4 = add <2 x i64> %res2, %res3 6881 ret <2 x i64> %res4 6882} 6883 6884declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8) 6885 6886define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 6887; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_256: 6888; X86: # %bb.0: 6889; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6890; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6891; X86-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] 6892; X86-NEXT: vpsrlq $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x04] 6893; X86-NEXT: vpsrlq $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x05] 6894; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6895; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6896; X86-NEXT: retl # encoding: [0xc3] 6897; 6898; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256: 6899; X64: # %bb.0: 6900; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6901; X64-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] 6902; X64-NEXT: vpsrlq $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x04] 6903; X64-NEXT: vpsrlq $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x05] 6904; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6905; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6906; X64-NEXT: retq # encoding: [0xc3] 6907 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 6908 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 4, <4 x i64> %x2, i8 -1) 6909 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 5, <4 x i64> zeroinitializer, i8 %x3) 6910 %res3 = add <4 x i64> %res, %res1 6911 %res4 = add <4 x i64> %res2, %res3 6912 ret <4 x i64> %res4 6913} 6914 6915declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8) 6916 6917define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 6918; X86-LABEL: test_int_x86_avx512_mask_psrl_di_128: 6919; X86: # %bb.0: 6920; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6921; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6922; X86-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] 6923; X86-NEXT: vpsrld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x04] 6924; X86-NEXT: vpsrld $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x05] 6925; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6926; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6927; X86-NEXT: retl # encoding: [0xc3] 6928; 6929; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128: 6930; X64: # %bb.0: 6931; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6932; X64-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] 6933; X64-NEXT: vpsrld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x04] 6934; X64-NEXT: vpsrld $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x05] 6935; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6936; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6937; X64-NEXT: retq # encoding: [0xc3] 6938 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 6939 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 4, <4 x i32> %x2, i8 -1) 6940 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 5, <4 x i32> zeroinitializer, i8 %x3) 6941 %res3 = add <4 x i32> %res, %res1 6942 %res4 = add <4 x i32> %res2, %res3 6943 ret <4 x i32> %res4 6944} 6945 6946declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8) 6947 6948define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 6949; X86-LABEL: test_int_x86_avx512_mask_psrl_di_256: 6950; X86: # %bb.0: 6951; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6952; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6953; X86-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] 6954; X86-NEXT: vpsrld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x04] 6955; X86-NEXT: vpsrld $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x05] 6956; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6957; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6958; X86-NEXT: retl # encoding: [0xc3] 6959; 6960; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256: 6961; X64: # %bb.0: 6962; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6963; X64-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] 6964; X64-NEXT: vpsrld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x04] 6965; X64-NEXT: vpsrld $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x05] 6966; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6967; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6968; X64-NEXT: retq # encoding: [0xc3] 6969 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 6970 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 4, <8 x i32> %x2, i8 -1) 6971 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 5, <8 x i32> zeroinitializer, i8 %x3) 6972 %res3 = add <8 x i32> %res, %res1 6973 %res4 = add <8 x i32> %res2, %res3 6974 ret <8 x i32> %res4 6975} 6976 6977declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8) 6978 6979define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 6980; X86-LABEL: test_int_x86_avx512_mask_psll_di_128: 6981; X86: # %bb.0: 6982; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6983; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6984; X86-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] 6985; X86-NEXT: vpslld $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x04] 6986; X86-NEXT: vpslld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x05] 6987; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6988; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6989; X86-NEXT: retl # encoding: [0xc3] 6990; 6991; X64-LABEL: test_int_x86_avx512_mask_psll_di_128: 6992; X64: # %bb.0: 6993; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6994; X64-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] 6995; X64-NEXT: vpslld $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x04] 6996; X64-NEXT: vpslld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x05] 6997; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6998; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6999; X64-NEXT: retq # encoding: [0xc3] 7000 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 7001 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 4, <4 x i32> zeroinitializer, i8 %x3) 7002 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 5, <4 x i32> %x2, i8 -1) 7003 %res3 = add <4 x i32> %res, %res1 7004 %res4 = add <4 x i32> %res3, %res2 7005 ret <4 x i32> %res4 7006} 7007 7008declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8) 7009 7010define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 7011; X86-LABEL: test_int_x86_avx512_mask_psll_di_256: 7012; X86: # %bb.0: 7013; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 7014; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7015; X86-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] 7016; X86-NEXT: vpslld $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x04] 7017; X86-NEXT: vpslld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x05] 7018; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 7019; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 7020; X86-NEXT: retl # encoding: [0xc3] 7021; 7022; X64-LABEL: test_int_x86_avx512_mask_psll_di_256: 7023; X64: # %bb.0: 7024; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 7025; X64-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] 7026; X64-NEXT: vpslld $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x04] 7027; X64-NEXT: vpslld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x05] 7028; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 7029; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 7030; X64-NEXT: retq # encoding: [0xc3] 7031 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 7032 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 4, <8 x i32> zeroinitializer, i8 %x3) 7033 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 5, <8 x i32> %x2, i8 -1) 7034 %res3 = add <8 x i32> %res, %res1 7035 %res4 = add <8 x i32> %res3, %res2 7036 ret <8 x i32> %res4 7037} 7038 7039declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8) 7040 7041define <2 x i64>@test_int_x86_avx512_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 7042; CHECK-LABEL: test_int_x86_avx512_psrlv2_di: 7043; CHECK: # %bb.0: 7044; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1] 7045; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7046 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 7047 ret <2 x i64> %res 7048} 7049 7050define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 7051; X86-LABEL: test_int_x86_avx512_mask_psrlv2_di: 7052; X86: # %bb.0: 7053; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7054; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7055; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1] 7056; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7057; X86-NEXT: retl # encoding: [0xc3] 7058; 7059; X64-LABEL: test_int_x86_avx512_mask_psrlv2_di: 7060; X64: # %bb.0: 7061; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7062; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1] 7063; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7064; X64-NEXT: retq # encoding: [0xc3] 7065 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 7066 ret <2 x i64> %res 7067} 7068 7069define <2 x i64>@test_int_x86_avx512_maskz_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 7070; X86-LABEL: test_int_x86_avx512_maskz_psrlv2_di: 7071; X86: # %bb.0: 7072; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7073; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7074; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1] 7075; X86-NEXT: retl # encoding: [0xc3] 7076; 7077; X64-LABEL: test_int_x86_avx512_maskz_psrlv2_di: 7078; X64: # %bb.0: 7079; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7080; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1] 7081; X64-NEXT: retq # encoding: [0xc3] 7082 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 7083 ret <2 x i64> %res 7084} 7085 7086declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8) 7087 7088define <4 x i64>@test_int_x86_avx512_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 7089; CHECK-LABEL: test_int_x86_avx512_psrlv4_di: 7090; CHECK: # %bb.0: 7091; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1] 7092; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7093 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 7094 ret <4 x i64> %res 7095} 7096 7097define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 7098; X86-LABEL: test_int_x86_avx512_mask_psrlv4_di: 7099; X86: # %bb.0: 7100; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7101; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7102; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1] 7103; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7104; X86-NEXT: retl # encoding: [0xc3] 7105; 7106; X64-LABEL: test_int_x86_avx512_mask_psrlv4_di: 7107; X64: # %bb.0: 7108; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7109; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1] 7110; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7111; X64-NEXT: retq # encoding: [0xc3] 7112 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 7113 ret <4 x i64> %res 7114} 7115 7116define <4 x i64>@test_int_x86_avx512_maskz_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) { 7117; X86-LABEL: test_int_x86_avx512_maskz_psrlv4_di: 7118; X86: # %bb.0: 7119; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7120; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7121; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1] 7122; X86-NEXT: retl # encoding: [0xc3] 7123; 7124; X64-LABEL: test_int_x86_avx512_maskz_psrlv4_di: 7125; X64: # %bb.0: 7126; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7127; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1] 7128; X64-NEXT: retq # encoding: [0xc3] 7129 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 7130 ret <4 x i64> %res 7131} 7132 7133declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 7134 7135define <4 x i32>@test_int_x86_avx512_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 7136; CHECK-LABEL: test_int_x86_avx512_psrlv4_si: 7137; CHECK: # %bb.0: 7138; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1] 7139; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7140 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 7141 ret <4 x i32> %res 7142} 7143 7144define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 7145; X86-LABEL: test_int_x86_avx512_mask_psrlv4_si: 7146; X86: # %bb.0: 7147; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7148; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7149; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1] 7150; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7151; X86-NEXT: retl # encoding: [0xc3] 7152; 7153; X64-LABEL: test_int_x86_avx512_mask_psrlv4_si: 7154; X64: # %bb.0: 7155; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7156; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1] 7157; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7158; X64-NEXT: retq # encoding: [0xc3] 7159 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 7160 ret <4 x i32> %res 7161} 7162 7163define <4 x i32>@test_int_x86_avx512_maskz_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 7164; X86-LABEL: test_int_x86_avx512_maskz_psrlv4_si: 7165; X86: # %bb.0: 7166; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7167; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7168; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1] 7169; X86-NEXT: retl # encoding: [0xc3] 7170; 7171; X64-LABEL: test_int_x86_avx512_maskz_psrlv4_si: 7172; X64: # %bb.0: 7173; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7174; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1] 7175; X64-NEXT: retq # encoding: [0xc3] 7176 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 7177 ret <4 x i32> %res 7178} 7179 7180declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 7181 7182define <8 x i32>@test_int_x86_avx512_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 7183; CHECK-LABEL: test_int_x86_avx512_psrlv8_si: 7184; CHECK: # %bb.0: 7185; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1] 7186; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7187 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 7188 ret <8 x i32> %res 7189} 7190 7191define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 7192; X86-LABEL: test_int_x86_avx512_mask_psrlv8_si: 7193; X86: # %bb.0: 7194; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7195; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7196; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1] 7197; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7198; X86-NEXT: retl # encoding: [0xc3] 7199; 7200; X64-LABEL: test_int_x86_avx512_mask_psrlv8_si: 7201; X64: # %bb.0: 7202; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7203; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1] 7204; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7205; X64-NEXT: retq # encoding: [0xc3] 7206 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 7207 ret <8 x i32> %res 7208} 7209 7210define <8 x i32>@test_int_x86_avx512_maskz_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 7211; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_si: 7212; X86: # %bb.0: 7213; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7214; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7215; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1] 7216; X86-NEXT: retl # encoding: [0xc3] 7217; 7218; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_si: 7219; X64: # %bb.0: 7220; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7221; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1] 7222; X64-NEXT: retq # encoding: [0xc3] 7223 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 7224 ret <8 x i32> %res 7225} 7226 7227declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 7228 7229define <4 x i32>@test_int_x86_avx512_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 7230; CHECK-LABEL: test_int_x86_avx512_psrav4_si: 7231; CHECK: # %bb.0: 7232; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1] 7233; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7234 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 7235 ret <4 x i32> %res 7236} 7237 7238define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 7239; X86-LABEL: test_int_x86_avx512_mask_psrav4_si: 7240; X86: # %bb.0: 7241; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7242; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7243; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1] 7244; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7245; X86-NEXT: retl # encoding: [0xc3] 7246; 7247; X64-LABEL: test_int_x86_avx512_mask_psrav4_si: 7248; X64: # %bb.0: 7249; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7250; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1] 7251; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7252; X64-NEXT: retq # encoding: [0xc3] 7253 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 7254 ret <4 x i32> %res 7255} 7256 7257define <4 x i32>@test_int_x86_avx512_maskz_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 7258; X86-LABEL: test_int_x86_avx512_maskz_psrav4_si: 7259; X86: # %bb.0: 7260; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7261; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7262; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1] 7263; X86-NEXT: retl # encoding: [0xc3] 7264; 7265; X64-LABEL: test_int_x86_avx512_maskz_psrav4_si: 7266; X64: # %bb.0: 7267; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7268; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1] 7269; X64-NEXT: retq # encoding: [0xc3] 7270 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 7271 ret <4 x i32> %res 7272} 7273 7274declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 7275 7276define <8 x i32>@test_int_x86_avx512_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 7277; CHECK-LABEL: test_int_x86_avx512_psrav8_si: 7278; CHECK: # %bb.0: 7279; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1] 7280; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7281 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 7282 ret <8 x i32> %res 7283} 7284 7285define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 7286; X86-LABEL: test_int_x86_avx512_mask_psrav8_si: 7287; X86: # %bb.0: 7288; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7289; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7290; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1] 7291; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7292; X86-NEXT: retl # encoding: [0xc3] 7293; 7294; X64-LABEL: test_int_x86_avx512_mask_psrav8_si: 7295; X64: # %bb.0: 7296; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7297; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1] 7298; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7299; X64-NEXT: retq # encoding: [0xc3] 7300 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 7301 ret <8 x i32> %res 7302} 7303 7304define <8 x i32>@test_int_x86_avx512_maskz_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 7305; X86-LABEL: test_int_x86_avx512_maskz_psrav8_si: 7306; X86: # %bb.0: 7307; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7308; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7309; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1] 7310; X86-NEXT: retl # encoding: [0xc3] 7311; 7312; X64-LABEL: test_int_x86_avx512_maskz_psrav8_si: 7313; X64: # %bb.0: 7314; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7315; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1] 7316; X64-NEXT: retq # encoding: [0xc3] 7317 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 7318 ret <8 x i32> %res 7319} 7320 7321define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() { 7322; X86-LABEL: test_int_x86_avx512_mask_psrav8_si_const: 7323; X86: # %bb.0: 7324; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 7325; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 7326; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 7327; X86-NEXT: vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 7328; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 7329; X86-NEXT: retl # encoding: [0xc3] 7330; 7331; X64-LABEL: test_int_x86_avx512_mask_psrav8_si_const: 7332; X64: # %bb.0: 7333; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 7334; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 7335; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 7336; X64-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 7337; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 7338; X64-NEXT: retq # encoding: [0xc3] 7339 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1) 7340 ret <8 x i32> %res 7341} 7342 7343declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8) 7344 7345define <2 x i64>@test_int_x86_avx512_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 7346; CHECK-LABEL: test_int_x86_avx512_psllv2_di: 7347; CHECK: # %bb.0: 7348; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1] 7349; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7350 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 7351 ret <2 x i64> %res 7352} 7353 7354define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 7355; X86-LABEL: test_int_x86_avx512_mask_psllv2_di: 7356; X86: # %bb.0: 7357; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7358; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7359; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1] 7360; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7361; X86-NEXT: retl # encoding: [0xc3] 7362; 7363; X64-LABEL: test_int_x86_avx512_mask_psllv2_di: 7364; X64: # %bb.0: 7365; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7366; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1] 7367; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7368; X64-NEXT: retq # encoding: [0xc3] 7369 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 7370 ret <2 x i64> %res 7371} 7372 7373define <2 x i64>@test_int_x86_avx512_maskz_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 7374; X86-LABEL: test_int_x86_avx512_maskz_psllv2_di: 7375; X86: # %bb.0: 7376; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7377; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7378; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1] 7379; X86-NEXT: retl # encoding: [0xc3] 7380; 7381; X64-LABEL: test_int_x86_avx512_maskz_psllv2_di: 7382; X64: # %bb.0: 7383; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7384; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1] 7385; X64-NEXT: retq # encoding: [0xc3] 7386 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 7387 ret <2 x i64> %res 7388} 7389 7390declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8) 7391 7392define <4 x i64>@test_int_x86_avx512_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 7393; CHECK-LABEL: test_int_x86_avx512_psllv4_di: 7394; CHECK: # %bb.0: 7395; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1] 7396; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7397 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 7398 ret <4 x i64> %res 7399} 7400 7401define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 7402; X86-LABEL: test_int_x86_avx512_mask_psllv4_di: 7403; X86: # %bb.0: 7404; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7405; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7406; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1] 7407; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7408; X86-NEXT: retl # encoding: [0xc3] 7409; 7410; X64-LABEL: test_int_x86_avx512_mask_psllv4_di: 7411; X64: # %bb.0: 7412; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7413; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1] 7414; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7415; X64-NEXT: retq # encoding: [0xc3] 7416 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 7417 ret <4 x i64> %res 7418} 7419 7420define <4 x i64>@test_int_x86_avx512_maskz_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) { 7421; X86-LABEL: test_int_x86_avx512_maskz_psllv4_di: 7422; X86: # %bb.0: 7423; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7424; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7425; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1] 7426; X86-NEXT: retl # encoding: [0xc3] 7427; 7428; X64-LABEL: test_int_x86_avx512_maskz_psllv4_di: 7429; X64: # %bb.0: 7430; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7431; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1] 7432; X64-NEXT: retq # encoding: [0xc3] 7433 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 7434 ret <4 x i64> %res 7435} 7436 7437declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 7438 7439define <4 x i32>@test_int_x86_avx512_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 7440; CHECK-LABEL: test_int_x86_avx512_psllv4_si: 7441; CHECK: # %bb.0: 7442; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1] 7443; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7444 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 7445 ret <4 x i32> %res 7446} 7447 7448define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 7449; X86-LABEL: test_int_x86_avx512_mask_psllv4_si: 7450; X86: # %bb.0: 7451; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7452; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7453; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1] 7454; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7455; X86-NEXT: retl # encoding: [0xc3] 7456; 7457; X64-LABEL: test_int_x86_avx512_mask_psllv4_si: 7458; X64: # %bb.0: 7459; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7460; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1] 7461; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7462; X64-NEXT: retq # encoding: [0xc3] 7463 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 7464 ret <4 x i32> %res 7465} 7466 7467define <4 x i32>@test_int_x86_avx512_maskz_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 7468; X86-LABEL: test_int_x86_avx512_maskz_psllv4_si: 7469; X86: # %bb.0: 7470; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7471; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7472; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1] 7473; X86-NEXT: retl # encoding: [0xc3] 7474; 7475; X64-LABEL: test_int_x86_avx512_maskz_psllv4_si: 7476; X64: # %bb.0: 7477; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7478; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1] 7479; X64-NEXT: retq # encoding: [0xc3] 7480 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 7481 ret <4 x i32> %res 7482} 7483 7484declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 7485 7486define <8 x i32>@test_int_x86_avx512_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 7487; CHECK-LABEL: test_int_x86_avx512_psllv8_si: 7488; CHECK: # %bb.0: 7489; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1] 7490; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7491 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 7492 ret <8 x i32> %res 7493} 7494 7495define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 7496; X86-LABEL: test_int_x86_avx512_mask_psllv8_si: 7497; X86: # %bb.0: 7498; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7499; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7500; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1] 7501; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7502; X86-NEXT: retl # encoding: [0xc3] 7503; 7504; X64-LABEL: test_int_x86_avx512_mask_psllv8_si: 7505; X64: # %bb.0: 7506; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7507; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1] 7508; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7509; X64-NEXT: retq # encoding: [0xc3] 7510 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 7511 ret <8 x i32> %res 7512} 7513 7514define <8 x i32>@test_int_x86_avx512_maskz_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 7515; X86-LABEL: test_int_x86_avx512_maskz_psllv8_si: 7516; X86: # %bb.0: 7517; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7518; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7519; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1] 7520; X86-NEXT: retl # encoding: [0xc3] 7521; 7522; X64-LABEL: test_int_x86_avx512_maskz_psllv8_si: 7523; X64: # %bb.0: 7524; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7525; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1] 7526; X64-NEXT: retq # encoding: [0xc3] 7527 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 7528 ret <8 x i32> %res 7529} 7530 7531declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8) 7532 7533define <4 x i32>@test_int_x86_avx512_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1) { 7534; CHECK-LABEL: test_int_x86_avx512_pmovzxb_d_128: 7535; CHECK: # %bb.0: 7536; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0] 7537; CHECK-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 7538; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7539 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) 7540 ret <4 x i32> %res 7541} 7542 7543define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { 7544; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: 7545; X86: # %bb.0: 7546; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7547; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7548; X86-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8] 7549; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 7550; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7551; X86-NEXT: retl # encoding: [0xc3] 7552; 7553; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: 7554; X64: # %bb.0: 7555; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7556; X64-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8] 7557; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 7558; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7559; X64-NEXT: retq # encoding: [0xc3] 7560 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) 7561 ret <4 x i32> %res 7562} 7563 7564define <4 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_128(<16 x i8> %x0, i8 %x2) { 7565; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_128: 7566; X86: # %bb.0: 7567; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7568; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7569; X86-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0] 7570; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 7571; X86-NEXT: retl # encoding: [0xc3] 7572; 7573; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_128: 7574; X64: # %bb.0: 7575; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7576; X64-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0] 7577; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 7578; X64-NEXT: retq # encoding: [0xc3] 7579 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) 7580 ret <4 x i32> %res 7581} 7582 7583declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8) 7584 7585define <8 x i32>@test_int_x86_avx512_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1) { 7586; CHECK-LABEL: test_int_x86_avx512_pmovzxb_d_256: 7587; CHECK: # %bb.0: 7588; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xc0] 7589; CHECK-NEXT: # ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 7590; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7591 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) 7592 ret <8 x i32> %res 7593} 7594 7595define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { 7596; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: 7597; X86: # %bb.0: 7598; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7599; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7600; X86-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8] 7601; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 7602; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7603; X86-NEXT: retl # encoding: [0xc3] 7604; 7605; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: 7606; X64: # %bb.0: 7607; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7608; X64-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8] 7609; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 7610; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7611; X64-NEXT: retq # encoding: [0xc3] 7612 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) 7613 ret <8 x i32> %res 7614} 7615 7616define <8 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_256(<16 x i8> %x0, i8 %x2) { 7617; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_256: 7618; X86: # %bb.0: 7619; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7620; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7621; X86-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0] 7622; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 7623; X86-NEXT: retl # encoding: [0xc3] 7624; 7625; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_256: 7626; X64: # %bb.0: 7627; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7628; X64-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0] 7629; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 7630; X64-NEXT: retq # encoding: [0xc3] 7631 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) 7632 ret <8 x i32> %res 7633} 7634 7635declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8) 7636 7637define <2 x i64>@test_int_x86_avx512_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1) { 7638; CHECK-LABEL: test_int_x86_avx512_pmovzxb_q_128: 7639; CHECK: # %bb.0: 7640; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0] 7641; CHECK-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 7642; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7643 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) 7644 ret <2 x i64> %res 7645} 7646 7647define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { 7648; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: 7649; X86: # %bb.0: 7650; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7651; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7652; X86-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8] 7653; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 7654; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7655; X86-NEXT: retl # encoding: [0xc3] 7656; 7657; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: 7658; X64: # %bb.0: 7659; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7660; X64-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8] 7661; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 7662; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7663; X64-NEXT: retq # encoding: [0xc3] 7664 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) 7665 ret <2 x i64> %res 7666} 7667 7668define <2 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_128(<16 x i8> %x0, i8 %x2) { 7669; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_128: 7670; X86: # %bb.0: 7671; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7672; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7673; X86-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0] 7674; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 7675; X86-NEXT: retl # encoding: [0xc3] 7676; 7677; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_128: 7678; X64: # %bb.0: 7679; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7680; X64-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0] 7681; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 7682; X64-NEXT: retq # encoding: [0xc3] 7683 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) 7684 ret <2 x i64> %res 7685} 7686 7687declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8) 7688 7689define <4 x i64>@test_int_x86_avx512_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1) { 7690; CHECK-LABEL: test_int_x86_avx512_pmovzxb_q_256: 7691; CHECK: # %bb.0: 7692; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xc0] 7693; CHECK-NEXT: # ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 7694; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7695 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) 7696 ret <4 x i64> %res 7697} 7698 7699define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { 7700; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: 7701; X86: # %bb.0: 7702; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7703; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7704; X86-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8] 7705; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 7706; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7707; X86-NEXT: retl # encoding: [0xc3] 7708; 7709; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: 7710; X64: # %bb.0: 7711; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7712; X64-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8] 7713; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 7714; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7715; X64-NEXT: retq # encoding: [0xc3] 7716 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) 7717 ret <4 x i64> %res 7718} 7719 7720define <4 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_256(<16 x i8> %x0, i8 %x2) { 7721; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_256: 7722; X86: # %bb.0: 7723; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7724; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7725; X86-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0] 7726; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 7727; X86-NEXT: retl # encoding: [0xc3] 7728; 7729; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_256: 7730; X64: # %bb.0: 7731; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7732; X64-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0] 7733; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 7734; X64-NEXT: retq # encoding: [0xc3] 7735 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) 7736 ret <4 x i64> %res 7737} 7738 7739declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8) 7740 7741define <2 x i64>@test_int_x86_avx512_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1) { 7742; CHECK-LABEL: test_int_x86_avx512_pmovzxd_q_128: 7743; CHECK: # %bb.0: 7744; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0] 7745; CHECK-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 7746; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7747 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 7748 ret <2 x i64> %res 7749} 7750 7751define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 7752; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: 7753; X86: # %bb.0: 7754; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7755; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7756; X86-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8] 7757; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero 7758; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7759; X86-NEXT: retl # encoding: [0xc3] 7760; 7761; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: 7762; X64: # %bb.0: 7763; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7764; X64-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8] 7765; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero 7766; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7767; X64-NEXT: retq # encoding: [0xc3] 7768 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 7769 ret <2 x i64> %res 7770} 7771 7772define <2 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_128(<4 x i32> %x0, i8 %x2) { 7773; X86-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_128: 7774; X86: # %bb.0: 7775; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7776; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7777; X86-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0] 7778; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero 7779; X86-NEXT: retl # encoding: [0xc3] 7780; 7781; X64-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_128: 7782; X64: # %bb.0: 7783; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7784; X64-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0] 7785; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero 7786; X64-NEXT: retq # encoding: [0xc3] 7787 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 7788 ret <2 x i64> %res 7789} 7790 7791declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8) 7792 7793define <4 x i64>@test_int_x86_avx512_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1) { 7794; CHECK-LABEL: test_int_x86_avx512_pmovzxd_q_256: 7795; CHECK: # %bb.0: 7796; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xc0] 7797; CHECK-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7798; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7799 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 7800 ret <4 x i64> %res 7801} 7802 7803define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 7804; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: 7805; X86: # %bb.0: 7806; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7807; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7808; X86-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8] 7809; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7810; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7811; X86-NEXT: retl # encoding: [0xc3] 7812; 7813; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: 7814; X64: # %bb.0: 7815; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7816; X64-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8] 7817; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7818; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7819; X64-NEXT: retq # encoding: [0xc3] 7820 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 7821 ret <4 x i64> %res 7822} 7823 7824define <4 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_256(<4 x i32> %x0, i8 %x2) { 7825; X86-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_256: 7826; X86: # %bb.0: 7827; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7828; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7829; X86-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0] 7830; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7831; X86-NEXT: retl # encoding: [0xc3] 7832; 7833; X64-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_256: 7834; X64: # %bb.0: 7835; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7836; X64-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0] 7837; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7838; X64-NEXT: retq # encoding: [0xc3] 7839 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 7840 ret <4 x i64> %res 7841} 7842 7843declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8) 7844 7845define <4 x i32>@test_int_x86_avx512_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1) { 7846; CHECK-LABEL: test_int_x86_avx512_pmovzxw_d_128: 7847; CHECK: # %bb.0: 7848; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] 7849; CHECK-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7850; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7851 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) 7852 ret <4 x i32> %res 7853} 7854 7855 7856define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { 7857; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: 7858; X86: # %bb.0: 7859; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7860; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7861; X86-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8] 7862; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7863; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7864; X86-NEXT: retl # encoding: [0xc3] 7865; 7866; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: 7867; X64: # %bb.0: 7868; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7869; X64-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8] 7870; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7871; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7872; X64-NEXT: retq # encoding: [0xc3] 7873 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) 7874 ret <4 x i32> %res 7875} 7876 7877 7878define <4 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_128(<8 x i16> %x0, i8 %x2) { 7879; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_128: 7880; X86: # %bb.0: 7881; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7882; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7883; X86-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0] 7884; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7885; X86-NEXT: retl # encoding: [0xc3] 7886; 7887; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_128: 7888; X64: # %bb.0: 7889; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7890; X64-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0] 7891; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 7892; X64-NEXT: retq # encoding: [0xc3] 7893 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) 7894 ret <4 x i32> %res 7895} 7896 7897declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8) 7898 7899define <8 x i32>@test_int_x86_avx512_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1) { 7900; CHECK-LABEL: test_int_x86_avx512_pmovzxw_d_256: 7901; CHECK: # %bb.0: 7902; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0] 7903; CHECK-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 7904; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7905 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) 7906 ret <8 x i32> %res 7907} 7908 7909define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { 7910; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: 7911; X86: # %bb.0: 7912; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7913; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7914; X86-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8] 7915; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 7916; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7917; X86-NEXT: retl # encoding: [0xc3] 7918; 7919; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: 7920; X64: # %bb.0: 7921; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7922; X64-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8] 7923; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 7924; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7925; X64-NEXT: retq # encoding: [0xc3] 7926 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) 7927 ret <8 x i32> %res 7928} 7929 7930define <8 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_256(<8 x i16> %x0, i8 %x2) { 7931; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_256: 7932; X86: # %bb.0: 7933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7934; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7935; X86-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0] 7936; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 7937; X86-NEXT: retl # encoding: [0xc3] 7938; 7939; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_256: 7940; X64: # %bb.0: 7941; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7942; X64-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0] 7943; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 7944; X64-NEXT: retq # encoding: [0xc3] 7945 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) 7946 ret <8 x i32> %res 7947} 7948 7949declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8) 7950 7951define <2 x i64>@test_int_x86_avx512_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1) { 7952; CHECK-LABEL: test_int_x86_avx512_pmovzxw_q_128: 7953; CHECK: # %bb.0: 7954; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0] 7955; CHECK-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 7956; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7957 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) 7958 ret <2 x i64> %res 7959} 7960 7961define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { 7962; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: 7963; X86: # %bb.0: 7964; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7965; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7966; X86-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8] 7967; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 7968; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7969; X86-NEXT: retl # encoding: [0xc3] 7970; 7971; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: 7972; X64: # %bb.0: 7973; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7974; X64-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8] 7975; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 7976; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7977; X64-NEXT: retq # encoding: [0xc3] 7978 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) 7979 ret <2 x i64> %res 7980} 7981 7982define <2 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_128(<8 x i16> %x0, i8 %x2) { 7983; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_128: 7984; X86: # %bb.0: 7985; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7986; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7987; X86-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0] 7988; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 7989; X86-NEXT: retl # encoding: [0xc3] 7990; 7991; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_128: 7992; X64: # %bb.0: 7993; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7994; X64-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0] 7995; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 7996; X64-NEXT: retq # encoding: [0xc3] 7997 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) 7998 ret <2 x i64> %res 7999} 8000 8001declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8) 8002 8003define <4 x i64>@test_int_x86_avx512_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1) { 8004; CHECK-LABEL: test_int_x86_avx512_pmovzxw_q_256: 8005; CHECK: # %bb.0: 8006; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xc0] 8007; CHECK-NEXT: # ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 8008; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8009 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) 8010 ret <4 x i64> %res 8011} 8012 8013define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { 8014; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: 8015; X86: # %bb.0: 8016; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8017; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8018; X86-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8] 8019; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 8020; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8021; X86-NEXT: retl # encoding: [0xc3] 8022; 8023; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: 8024; X64: # %bb.0: 8025; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8026; X64-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8] 8027; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 8028; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8029; X64-NEXT: retq # encoding: [0xc3] 8030 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) 8031 ret <4 x i64> %res 8032} 8033 8034define <4 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_256(<8 x i16> %x0, i8 %x2) { 8035; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_256: 8036; X86: # %bb.0: 8037; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8038; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8039; X86-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0] 8040; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 8041; X86-NEXT: retl # encoding: [0xc3] 8042; 8043; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_256: 8044; X64: # %bb.0: 8045; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8046; X64-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0] 8047; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 8048; X64-NEXT: retq # encoding: [0xc3] 8049 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) 8050 ret <4 x i64> %res 8051} 8052 8053declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8) 8054 8055define <4 x i32>@test_int_x86_avx512_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1) { 8056; CHECK-LABEL: test_int_x86_avx512_pmovsxb_d_128: 8057; CHECK: # %bb.0: 8058; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0] 8059; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8060 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) 8061 ret <4 x i32> %res 8062} 8063 8064define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { 8065; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: 8066; X86: # %bb.0: 8067; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8068; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8069; X86-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8] 8070; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8071; X86-NEXT: retl # encoding: [0xc3] 8072; 8073; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: 8074; X64: # %bb.0: 8075; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8076; X64-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8] 8077; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8078; X64-NEXT: retq # encoding: [0xc3] 8079 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) 8080 ret <4 x i32> %res 8081} 8082 8083define <4 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_128(<16 x i8> %x0, i8 %x2) { 8084; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_128: 8085; X86: # %bb.0: 8086; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8087; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8088; X86-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0] 8089; X86-NEXT: retl # encoding: [0xc3] 8090; 8091; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_128: 8092; X64: # %bb.0: 8093; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8094; X64-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0] 8095; X64-NEXT: retq # encoding: [0xc3] 8096 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) 8097 ret <4 x i32> %res 8098} 8099 8100declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8) 8101 8102define <8 x i32>@test_int_x86_avx512_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1) { 8103; CHECK-LABEL: test_int_x86_avx512_pmovsxb_d_256: 8104; CHECK: # %bb.0: 8105; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xc0] 8106; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8107 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) 8108 ret <8 x i32> %res 8109} 8110 8111define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { 8112; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: 8113; X86: # %bb.0: 8114; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8115; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8116; X86-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8] 8117; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8118; X86-NEXT: retl # encoding: [0xc3] 8119; 8120; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: 8121; X64: # %bb.0: 8122; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8123; X64-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8] 8124; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8125; X64-NEXT: retq # encoding: [0xc3] 8126 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) 8127 ret <8 x i32> %res 8128} 8129 8130define <8 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_256(<16 x i8> %x0, i8 %x2) { 8131; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_256: 8132; X86: # %bb.0: 8133; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8134; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8135; X86-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0] 8136; X86-NEXT: retl # encoding: [0xc3] 8137; 8138; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_256: 8139; X64: # %bb.0: 8140; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8141; X64-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0] 8142; X64-NEXT: retq # encoding: [0xc3] 8143 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) 8144 ret <8 x i32> %res 8145} 8146 8147declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8) 8148 8149define <2 x i64>@test_int_x86_avx512_ask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1) { 8150; CHECK-LABEL: test_int_x86_avx512_ask_pmovsxb_q_128: 8151; CHECK: # %bb.0: 8152; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0] 8153; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8154 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) 8155 ret <2 x i64> %res 8156} 8157 8158define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { 8159; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: 8160; X86: # %bb.0: 8161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8162; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8163; X86-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8] 8164; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8165; X86-NEXT: retl # encoding: [0xc3] 8166; 8167; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: 8168; X64: # %bb.0: 8169; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8170; X64-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8] 8171; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8172; X64-NEXT: retq # encoding: [0xc3] 8173 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) 8174 ret <2 x i64> %res 8175} 8176 8177define <2 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_128(<16 x i8> %x0, i8 %x2) { 8178; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_128: 8179; X86: # %bb.0: 8180; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8181; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8182; X86-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0] 8183; X86-NEXT: retl # encoding: [0xc3] 8184; 8185; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_128: 8186; X64: # %bb.0: 8187; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8188; X64-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0] 8189; X64-NEXT: retq # encoding: [0xc3] 8190 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) 8191 ret <2 x i64> %res 8192} 8193 8194declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8) 8195 8196define <4 x i64>@test_int_x86_avx512_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1) { 8197; CHECK-LABEL: test_int_x86_avx512_pmovsxb_q_256: 8198; CHECK: # %bb.0: 8199; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xc0] 8200; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8201 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) 8202 ret <4 x i64> %res 8203} 8204 8205define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { 8206; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: 8207; X86: # %bb.0: 8208; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8209; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8210; X86-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8] 8211; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8212; X86-NEXT: retl # encoding: [0xc3] 8213; 8214; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: 8215; X64: # %bb.0: 8216; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8217; X64-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8] 8218; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8219; X64-NEXT: retq # encoding: [0xc3] 8220 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) 8221 ret <4 x i64> %res 8222} 8223 8224define <4 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_256(<16 x i8> %x0, i8 %x2) { 8225; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_256: 8226; X86: # %bb.0: 8227; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8228; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8229; X86-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0] 8230; X86-NEXT: retl # encoding: [0xc3] 8231; 8232; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_256: 8233; X64: # %bb.0: 8234; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8235; X64-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0] 8236; X64-NEXT: retq # encoding: [0xc3] 8237 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) 8238 ret <4 x i64> %res 8239} 8240 8241declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8) 8242 8243define <4 x i32>@test_int_x86_avx512_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1) { 8244; CHECK-LABEL: test_int_x86_avx512_pmovsxw_d_128: 8245; CHECK: # %bb.0: 8246; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0] 8247; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8248 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) 8249 ret <4 x i32> %res 8250} 8251 8252define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { 8253; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: 8254; X86: # %bb.0: 8255; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8256; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8257; X86-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8] 8258; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8259; X86-NEXT: retl # encoding: [0xc3] 8260; 8261; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: 8262; X64: # %bb.0: 8263; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8264; X64-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8] 8265; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8266; X64-NEXT: retq # encoding: [0xc3] 8267 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) 8268 ret <4 x i32> %res 8269} 8270 8271define <4 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_128(<8 x i16> %x0, i8 %x2) { 8272; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_128: 8273; X86: # %bb.0: 8274; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8275; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8276; X86-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0] 8277; X86-NEXT: retl # encoding: [0xc3] 8278; 8279; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_128: 8280; X64: # %bb.0: 8281; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8282; X64-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0] 8283; X64-NEXT: retq # encoding: [0xc3] 8284 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) 8285 ret <4 x i32> %res 8286} 8287 8288declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8) 8289 8290define <8 x i32>@test_int_x86_avx512_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1) { 8291; CHECK-LABEL: test_int_x86_avx512_pmovsxw_d_256: 8292; CHECK: # %bb.0: 8293; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xc0] 8294; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8295 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) 8296 ret <8 x i32> %res 8297} 8298 8299define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { 8300; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: 8301; X86: # %bb.0: 8302; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8303; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8304; X86-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8] 8305; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8306; X86-NEXT: retl # encoding: [0xc3] 8307; 8308; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: 8309; X64: # %bb.0: 8310; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8311; X64-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8] 8312; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8313; X64-NEXT: retq # encoding: [0xc3] 8314 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) 8315 ret <8 x i32> %res 8316} 8317 8318define <8 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_256(<8 x i16> %x0, i8 %x2) { 8319; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_256: 8320; X86: # %bb.0: 8321; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8322; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8323; X86-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0] 8324; X86-NEXT: retl # encoding: [0xc3] 8325; 8326; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_256: 8327; X64: # %bb.0: 8328; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8329; X64-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0] 8330; X64-NEXT: retq # encoding: [0xc3] 8331 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) 8332 ret <8 x i32> %res 8333} 8334 8335declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8) 8336 8337define <2 x i64>@test_int_x86_avx512_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1) { 8338; CHECK-LABEL: test_int_x86_avx512_pmovsxw_q_128: 8339; CHECK: # %bb.0: 8340; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0] 8341; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8342 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) 8343 ret <2 x i64> %res 8344} 8345 8346define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { 8347; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: 8348; X86: # %bb.0: 8349; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8350; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8351; X86-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8] 8352; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8353; X86-NEXT: retl # encoding: [0xc3] 8354; 8355; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: 8356; X64: # %bb.0: 8357; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8358; X64-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8] 8359; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8360; X64-NEXT: retq # encoding: [0xc3] 8361 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) 8362 ret <2 x i64> %res 8363} 8364 8365define <2 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_128(<8 x i16> %x0, i8 %x2) { 8366; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_128: 8367; X86: # %bb.0: 8368; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8369; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8370; X86-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0] 8371; X86-NEXT: retl # encoding: [0xc3] 8372; 8373; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_128: 8374; X64: # %bb.0: 8375; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8376; X64-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0] 8377; X64-NEXT: retq # encoding: [0xc3] 8378 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) 8379 ret <2 x i64> %res 8380} 8381 8382declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8) 8383 8384define <4 x i64>@test_int_x86_avx512_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1) { 8385; CHECK-LABEL: test_int_x86_avx512_pmovsxw_q_256: 8386; CHECK: # %bb.0: 8387; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xc0] 8388; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8389 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) 8390 ret <4 x i64> %res 8391} 8392 8393define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { 8394; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: 8395; X86: # %bb.0: 8396; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8397; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8398; X86-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8] 8399; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8400; X86-NEXT: retl # encoding: [0xc3] 8401; 8402; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: 8403; X64: # %bb.0: 8404; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8405; X64-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8] 8406; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8407; X64-NEXT: retq # encoding: [0xc3] 8408 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) 8409 ret <4 x i64> %res 8410} 8411 8412define <4 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_256(<8 x i16> %x0, i8 %x2) { 8413; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_256: 8414; X86: # %bb.0: 8415; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8416; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8417; X86-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0] 8418; X86-NEXT: retl # encoding: [0xc3] 8419; 8420; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_256: 8421; X64: # %bb.0: 8422; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8423; X64-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0] 8424; X64-NEXT: retq # encoding: [0xc3] 8425 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) 8426 ret <4 x i64> %res 8427} 8428 8429declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 8430 8431define <2 x i64>@test_int_x86_avx512_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 8432; CHECK-LABEL: test_int_x86_avx512_psra_q_128: 8433; CHECK: # %bb.0: 8434; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1] 8435; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8436 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 8437 ret <2 x i64> %res 8438} 8439 8440define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 8441; X86-LABEL: test_int_x86_avx512_mask_psra_q_128: 8442; X86: # %bb.0: 8443; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8444; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8445; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] 8446; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8447; X86-NEXT: retl # encoding: [0xc3] 8448; 8449; X64-LABEL: test_int_x86_avx512_mask_psra_q_128: 8450; X64: # %bb.0: 8451; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8452; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] 8453; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8454; X64-NEXT: retq # encoding: [0xc3] 8455 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 8456 ret <2 x i64> %res 8457} 8458 8459define <2 x i64>@test_int_x86_avx512_maskz_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 8460; X86-LABEL: test_int_x86_avx512_maskz_psra_q_128: 8461; X86: # %bb.0: 8462; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8463; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8464; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1] 8465; X86-NEXT: retl # encoding: [0xc3] 8466; 8467; X64-LABEL: test_int_x86_avx512_maskz_psra_q_128: 8468; X64: # %bb.0: 8469; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8470; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1] 8471; X64-NEXT: retq # encoding: [0xc3] 8472 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 8473 ret <2 x i64> %res 8474} 8475 8476declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 8477 8478define <4 x i64>@test_int_x86_avx512_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2) { 8479; CHECK-LABEL: test_int_x86_avx512_psra_q_256: 8480; CHECK: # %bb.0: 8481; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1] 8482; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8483 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 8484 ret <4 x i64> %res 8485} 8486 8487define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 8488; X86-LABEL: test_int_x86_avx512_mask_psra_q_256: 8489; X86: # %bb.0: 8490; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8491; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8492; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] 8493; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8494; X86-NEXT: retl # encoding: [0xc3] 8495; 8496; X64-LABEL: test_int_x86_avx512_mask_psra_q_256: 8497; X64: # %bb.0: 8498; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8499; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] 8500; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8501; X64-NEXT: retq # encoding: [0xc3] 8502 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 8503 ret <4 x i64> %res 8504} 8505 8506define <4 x i64>@test_int_x86_avx512_maskz_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x3) { 8507; X86-LABEL: test_int_x86_avx512_maskz_psra_q_256: 8508; X86: # %bb.0: 8509; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8510; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8511; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1] 8512; X86-NEXT: retl # encoding: [0xc3] 8513; 8514; X64-LABEL: test_int_x86_avx512_maskz_psra_q_256: 8515; X64: # %bb.0: 8516; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8517; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1] 8518; X64-NEXT: retq # encoding: [0xc3] 8519 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 8520 ret <4 x i64> %res 8521} 8522 8523declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8) 8524 8525define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 8526; X86-LABEL: test_int_x86_avx512_mask_psra_qi_128: 8527; X86: # %bb.0: 8528; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 8529; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8530; X86-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] 8531; X86-NEXT: vpsraq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x04] 8532; X86-NEXT: vpsraq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x05] 8533; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 8534; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 8535; X86-NEXT: retl # encoding: [0xc3] 8536; 8537; X64-LABEL: test_int_x86_avx512_mask_psra_qi_128: 8538; X64: # %bb.0: 8539; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 8540; X64-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] 8541; X64-NEXT: vpsraq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x04] 8542; X64-NEXT: vpsraq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x05] 8543; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 8544; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 8545; X64-NEXT: retq # encoding: [0xc3] 8546 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 8547 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 4, <2 x i64> zeroinitializer, i8 %x3) 8548 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 5, <2 x i64> %x2, i8 -1) 8549 %res3 = add <2 x i64> %res, %res1 8550 %res4 = add <2 x i64> %res3, %res2 8551 ret <2 x i64> %res4 8552} 8553 8554declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8) 8555 8556define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 8557; X86-LABEL: test_int_x86_avx512_mask_psra_qi_256: 8558; X86: # %bb.0: 8559; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 8560; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8561; X86-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] 8562; X86-NEXT: vpsraq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x04] 8563; X86-NEXT: vpsraq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x05] 8564; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 8565; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 8566; X86-NEXT: retl # encoding: [0xc3] 8567; 8568; X64-LABEL: test_int_x86_avx512_mask_psra_qi_256: 8569; X64: # %bb.0: 8570; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 8571; X64-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] 8572; X64-NEXT: vpsraq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x04] 8573; X64-NEXT: vpsraq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x05] 8574; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 8575; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 8576; X64-NEXT: retq # encoding: [0xc3] 8577 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 8578 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 4, <4 x i64> zeroinitializer, i8 %x3) 8579 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 5, <4 x i64> %x2, i8 -1) 8580 %res3 = add <4 x i64> %res, %res1 8581 %res4 = add <4 x i64> %res3, %res2 8582 ret <4 x i64> %res4 8583} 8584 8585declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 8586 8587define <2 x i64>@test_int_x86_avx512_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 8588; CHECK-LABEL: test_int_x86_avx512_psrav_q_128: 8589; CHECK: # %bb.0: 8590; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1] 8591; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8592 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 8593 ret <2 x i64> %res 8594} 8595 8596define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 8597; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128: 8598; X86: # %bb.0: 8599; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8600; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8601; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] 8602; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8603; X86-NEXT: retl # encoding: [0xc3] 8604; 8605; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128: 8606; X64: # %bb.0: 8607; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8608; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] 8609; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8610; X64-NEXT: retq # encoding: [0xc3] 8611 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 8612 ret <2 x i64> %res 8613} 8614 8615define <2 x i64>@test_int_x86_avx512_maskz_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 8616; X86-LABEL: test_int_x86_avx512_maskz_psrav_q_128: 8617; X86: # %bb.0: 8618; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8619; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8620; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1] 8621; X86-NEXT: retl # encoding: [0xc3] 8622; 8623; X64-LABEL: test_int_x86_avx512_maskz_psrav_q_128: 8624; X64: # %bb.0: 8625; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8626; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1] 8627; X64-NEXT: retq # encoding: [0xc3] 8628 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 8629 ret <2 x i64> %res 8630} 8631 8632define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) { 8633; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128_const: 8634; X86: # %bb.0: 8635; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,0,4294967287,4294967295] 8636; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 8637; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 8638; X86-NEXT: vpsravq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] 8639; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 8640; X86-NEXT: retl # encoding: [0xc3] 8641; 8642; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128_const: 8643; X64: # %bb.0: 8644; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,18446744073709551607] 8645; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 8646; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 8647; X64-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] 8648; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 8649; X64-NEXT: retq # encoding: [0xc3] 8650 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1) 8651 ret <2 x i64> %res 8652} 8653 8654declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 8655 8656define <4 x i64>@test_int_x86_avx512_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 8657; CHECK-LABEL: test_int_x86_avx512_psrav_q_256: 8658; CHECK: # %bb.0: 8659; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1] 8660; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8661 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 8662 ret <4 x i64> %res 8663} 8664 8665define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 8666; X86-LABEL: test_int_x86_avx512_mask_psrav_q_256: 8667; X86: # %bb.0: 8668; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8669; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8670; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] 8671; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8672; X86-NEXT: retl # encoding: [0xc3] 8673; 8674; X64-LABEL: test_int_x86_avx512_mask_psrav_q_256: 8675; X64: # %bb.0: 8676; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8677; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] 8678; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8679; X64-NEXT: retq # encoding: [0xc3] 8680 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 8681 ret <4 x i64> %res 8682} 8683 8684define <4 x i64>@test_int_x86_avx512_maskz_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) { 8685; X86-LABEL: test_int_x86_avx512_maskz_psrav_q_256: 8686; X86: # %bb.0: 8687; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8688; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8689; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1] 8690; X86-NEXT: retl # encoding: [0xc3] 8691; 8692; X64-LABEL: test_int_x86_avx512_maskz_psrav_q_256: 8693; X64: # %bb.0: 8694; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8695; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1] 8696; X64-NEXT: retq # encoding: [0xc3] 8697 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 8698 ret <4 x i64> %res 8699} 8700 8701declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) 8702 8703define <2 x double>@test_int_x86_avx512_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1) { 8704; CHECK-LABEL: test_int_x86_avx512_cvt_dq2pd_128: 8705; CHECK: # %bb.0: 8706; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 8707; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8708 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 8709 ret <2 x double> %res 8710} 8711 8712define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 8713; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: 8714; X86: # %bb.0: 8715; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8716; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8717; X86-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] 8718; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 8719; X86-NEXT: retl # encoding: [0xc3] 8720; 8721; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: 8722; X64: # %bb.0: 8723; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8724; X64-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] 8725; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 8726; X64-NEXT: retq # encoding: [0xc3] 8727 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 8728 ret <2 x double> %res 8729} 8730 8731declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8) 8732 8733define <4 x double>@test_int_x86_avx512_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1) { 8734; CHECK-LABEL: test_int_x86_avx512_cvt_dq2pd_256: 8735; CHECK: # %bb.0: 8736; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0] 8737; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8738 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 8739 ret <4 x double> %res 8740} 8741 8742define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 8743; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: 8744; X86: # %bb.0: 8745; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8746; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8747; X86-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] 8748; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 8749; X86-NEXT: retl # encoding: [0xc3] 8750; 8751; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: 8752; X64: # %bb.0: 8753; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8754; X64-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] 8755; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 8756; X64-NEXT: retq # encoding: [0xc3] 8757 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 8758 ret <4 x double> %res 8759} 8760 8761declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8) 8762 8763define <2 x double>@test_int_x86_avx512_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1) { 8764; CHECK-LABEL: test_int_x86_avx512_cvt_udq2pd_128: 8765; CHECK: # %bb.0: 8766; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0] 8767; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8768 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 8769 ret <2 x double> %res 8770} 8771 8772define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 8773; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: 8774; X86: # %bb.0: 8775; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8776; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8777; X86-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] 8778; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 8779; X86-NEXT: retl # encoding: [0xc3] 8780; 8781; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: 8782; X64: # %bb.0: 8783; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8784; X64-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] 8785; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 8786; X64-NEXT: retq # encoding: [0xc3] 8787 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 8788 ret <2 x double> %res 8789} 8790 8791declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8) 8792 8793define <4 x double>@test_int_x86_avx512_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1) { 8794; CHECK-LABEL: test_int_x86_avx512_cvt_udq2pd_256: 8795; CHECK: # %bb.0: 8796; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0] 8797; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8798 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 8799 ret <4 x double> %res 8800} 8801 8802define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 8803; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: 8804; X86: # %bb.0: 8805; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8806; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8807; X86-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] 8808; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 8809; X86-NEXT: retl # encoding: [0xc3] 8810; 8811; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: 8812; X64: # %bb.0: 8813; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8814; X64-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] 8815; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 8816; X64-NEXT: retq # encoding: [0xc3] 8817 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 8818 ret <4 x double> %res 8819} 8820 8821declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 8822 8823define <4 x i32>@test_int_x86_avx512_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3) { 8824; CHECK-LABEL: test_int_x86_avx512_valign_d_128: 8825; CHECK: # %bb.0: 8826; CHECK-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x08] 8827; CHECK-NEXT: # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 8828; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8829 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1) 8830 ret <4 x i32> %res 8831} 8832 8833define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 8834; X86-LABEL: test_int_x86_avx512_mask_valign_d_128: 8835; X86: # %bb.0: 8836; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8837; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8838; X86-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02] 8839; X86-NEXT: # xmm2 {%k1} = xmm1[2,3],xmm0[0,1] 8840; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8841; X86-NEXT: retl # encoding: [0xc3] 8842; 8843; X64-LABEL: test_int_x86_avx512_mask_valign_d_128: 8844; X64: # %bb.0: 8845; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8846; X64-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02] 8847; X64-NEXT: # xmm2 {%k1} = xmm1[2,3],xmm0[0,1] 8848; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8849; X64-NEXT: retq # encoding: [0xc3] 8850 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4) 8851 ret <4 x i32> %res 8852} 8853 8854define <4 x i32>@test_int_x86_avx512_maskz_valign_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x4) { 8855; X86-LABEL: test_int_x86_avx512_maskz_valign_d_128: 8856; X86: # %bb.0: 8857; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8858; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8859; X86-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02] 8860; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1] 8861; X86-NEXT: retl # encoding: [0xc3] 8862; 8863; X64-LABEL: test_int_x86_avx512_maskz_valign_d_128: 8864; X64: # %bb.0: 8865; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8866; X64-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02] 8867; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1] 8868; X64-NEXT: retq # encoding: [0xc3] 8869 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> zeroinitializer,i8 %x4) 8870 ret <4 x i32> %res 8871} 8872 8873declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 8874 8875define <8 x i32>@test_int_x86_avx512_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3) { 8876; CHECK-LABEL: test_int_x86_avx512_valign_d_256: 8877; CHECK: # %bb.0: 8878; CHECK-NEXT: valignq $3, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xc1,0x03] 8879; CHECK-NEXT: # ymm0 = ymm1[3],ymm0[0,1,2] 8880; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8881 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1) 8882 ret <8 x i32> %res 8883} 8884 8885define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 8886; X86-LABEL: test_int_x86_avx512_mask_valign_d_256: 8887; X86: # %bb.0: 8888; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8889; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8890; X86-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06] 8891; X86-NEXT: # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5] 8892; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8893; X86-NEXT: retl # encoding: [0xc3] 8894; 8895; X64-LABEL: test_int_x86_avx512_mask_valign_d_256: 8896; X64: # %bb.0: 8897; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8898; X64-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06] 8899; X64-NEXT: # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5] 8900; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8901; X64-NEXT: retq # encoding: [0xc3] 8902 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4) 8903 ret <8 x i32> %res 8904} 8905 8906declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 8907 8908define <2 x i64>@test_int_x86_avx512_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3) { 8909; CHECK-LABEL: test_int_x86_avx512_valign_q_128: 8910; CHECK: # %bb.0: 8911; CHECK-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x08] 8912; CHECK-NEXT: # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 8913; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8914 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1) 8915 ret <2 x i64> %res 8916} 8917 8918define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 8919; X86-LABEL: test_int_x86_avx512_mask_valign_q_128: 8920; X86: # %bb.0: 8921; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8922; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8923; X86-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01] 8924; X86-NEXT: # xmm2 {%k1} = xmm1[1],xmm0[0] 8925; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8926; X86-NEXT: retl # encoding: [0xc3] 8927; 8928; X64-LABEL: test_int_x86_avx512_mask_valign_q_128: 8929; X64: # %bb.0: 8930; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8931; X64-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01] 8932; X64-NEXT: # xmm2 {%k1} = xmm1[1],xmm0[0] 8933; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8934; X64-NEXT: retq # encoding: [0xc3] 8935 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4) 8936 ret <2 x i64> %res 8937} 8938 8939declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 8940 8941define <4 x i64>@test_int_x86_avx512_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3) { 8942; CHECK-LABEL: test_int_x86_avx512_valign_q_256: 8943; CHECK: # %bb.0: 8944; CHECK-NEXT: valignq $3, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xc1,0x03] 8945; CHECK-NEXT: # ymm0 = ymm1[3],ymm0[0,1,2] 8946; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8947 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1) 8948 ret <4 x i64> %res 8949} 8950 8951define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 8952; X86-LABEL: test_int_x86_avx512_mask_valign_q_256: 8953; X86: # %bb.0: 8954; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8955; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8956; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03] 8957; X86-NEXT: # ymm2 {%k1} = ymm1[3],ymm0[0,1,2] 8958; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8959; X86-NEXT: retl # encoding: [0xc3] 8960; 8961; X64-LABEL: test_int_x86_avx512_mask_valign_q_256: 8962; X64: # %bb.0: 8963; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8964; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03] 8965; X64-NEXT: # ymm2 {%k1} = ymm1[3],ymm0[0,1,2] 8966; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8967; X64-NEXT: retq # encoding: [0xc3] 8968 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4) 8969 ret <4 x i64> %res 8970} 8971 8972declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 8973 8974define <4 x double>@test_int_x86_avx512_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) { 8975; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_256: 8976; CHECK: # %bb.0: 8977; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1] 8978; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8979 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 8980 ret <4 x double> %res 8981} 8982 8983define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 8984; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: 8985; X86: # %bb.0: 8986; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8987; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8988; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] 8989; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 8990; X86-NEXT: retl # encoding: [0xc3] 8991; 8992; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: 8993; X64: # %bb.0: 8994; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8995; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] 8996; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 8997; X64-NEXT: retq # encoding: [0xc3] 8998 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 8999 ret <4 x double> %res 9000} 9001 9002define <4 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, i8 %x3) { 9003; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_256: 9004; X86: # %bb.0: 9005; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9006; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9007; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1] 9008; X86-NEXT: retl # encoding: [0xc3] 9009; 9010; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_256: 9011; X64: # %bb.0: 9012; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9013; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1] 9014; X64-NEXT: retq # encoding: [0xc3] 9015 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 9016 ret <4 x double> %res 9017} 9018 9019declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 9020 9021define <2 x double>@test_int_x86_avx512_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) { 9022; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_128: 9023; CHECK: # %bb.0: 9024; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1] 9025; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9026 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 9027 ret <2 x double> %res 9028} 9029 9030define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 9031; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: 9032; X86: # %bb.0: 9033; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9034; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9035; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] 9036; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 9037; X86-NEXT: retl # encoding: [0xc3] 9038; 9039; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: 9040; X64: # %bb.0: 9041; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9042; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] 9043; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 9044; X64-NEXT: retq # encoding: [0xc3] 9045 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 9046 ret <2 x double> %res 9047} 9048 9049define <2 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 9050; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_128: 9051; X86: # %bb.0: 9052; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9053; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9054; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1] 9055; X86-NEXT: retl # encoding: [0xc3] 9056; 9057; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_128: 9058; X64: # %bb.0: 9059; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9060; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1] 9061; X64-NEXT: retq # encoding: [0xc3] 9062 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) 9063 ret <2 x double> %res 9064} 9065 9066declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 9067 9068define <8 x float>@test_int_x86_avx512_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) { 9069; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_256: 9070; CHECK: # %bb.0: 9071; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1] 9072; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9073 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 9074 ret <8 x float> %res 9075} 9076 9077define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 9078; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: 9079; X86: # %bb.0: 9080; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9081; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9082; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] 9083; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9084; X86-NEXT: retl # encoding: [0xc3] 9085; 9086; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: 9087; X64: # %bb.0: 9088; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9089; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] 9090; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9091; X64-NEXT: retq # encoding: [0xc3] 9092 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 9093 ret <8 x float> %res 9094} 9095 9096define <8 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, i8 %x3) { 9097; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_256: 9098; X86: # %bb.0: 9099; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9100; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9101; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1] 9102; X86-NEXT: retl # encoding: [0xc3] 9103; 9104; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_256: 9105; X64: # %bb.0: 9106; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9107; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1] 9108; X64-NEXT: retq # encoding: [0xc3] 9109 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 9110 ret <8 x float> %res 9111} 9112 9113declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 9114 9115define <4 x float>@test_int_x86_avx512_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) { 9116; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_128: 9117; CHECK: # %bb.0: 9118; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1] 9119; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9120 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 9121 ret <4 x float> %res 9122} 9123 9124define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 9125; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: 9126; X86: # %bb.0: 9127; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9128; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9129; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] 9130; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 9131; X86-NEXT: retl # encoding: [0xc3] 9132; 9133; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: 9134; X64: # %bb.0: 9135; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9136; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] 9137; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 9138; X64-NEXT: retq # encoding: [0xc3] 9139 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 9140 ret <4 x float> %res 9141} 9142 9143define <4 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, i8 %x3) { 9144; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_128: 9145; X86: # %bb.0: 9146; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9147; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9148; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1] 9149; X86-NEXT: retl # encoding: [0xc3] 9150; 9151; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_128: 9152; X64: # %bb.0: 9153; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9154; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1] 9155; X64-NEXT: retq # encoding: [0xc3] 9156 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) 9157 ret <4 x float> %res 9158} 9159 9160declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8) 9161 9162define <4 x float>@test_int_x86_avx512_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2) { 9163; CHECK-LABEL: test_int_x86_avx512_vextractf32x4_256: 9164; CHECK: # %bb.0: 9165; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 9166; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9167; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9168 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1) 9169 ret <4 x float> %res 9170} 9171 9172define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) { 9173; X86-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: 9174; X86: # %bb.0: 9175; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9176; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9177; X86-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] 9178; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 9179; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9180; X86-NEXT: retl # encoding: [0xc3] 9181; 9182; X64-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: 9183; X64: # %bb.0: 9184; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9185; X64-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] 9186; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 9187; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9188; X64-NEXT: retq # encoding: [0xc3] 9189 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3) 9190 ret <4 x float> %res 9191} 9192 9193define <4 x float>@test_int_x86_avx512_maskz_vextractf32x4_256(<8 x float> %x0, i8 %x3) { 9194; X86-LABEL: test_int_x86_avx512_maskz_vextractf32x4_256: 9195; X86: # %bb.0: 9196; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9197; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9198; X86-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01] 9199; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9200; X86-NEXT: retl # encoding: [0xc3] 9201; 9202; X64-LABEL: test_int_x86_avx512_maskz_vextractf32x4_256: 9203; X64: # %bb.0: 9204; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9205; X64-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01] 9206; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9207; X64-NEXT: retq # encoding: [0xc3] 9208 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3) 9209 ret <4 x float> %res 9210} 9211 9212declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8) 9213 9214define <8 x float>@test_int_x86_avx512_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3) { 9215; CHECK-LABEL: test_int_x86_avx512_insertf32x4_256: 9216; CHECK: # %bb.0: 9217; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 9218; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9219 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1) 9220 ret <8 x float> %res 9221} 9222 9223define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { 9224; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_256: 9225; X86: # %bb.0: 9226; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9227; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9228; X86-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] 9229; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9230; X86-NEXT: retl # encoding: [0xc3] 9231; 9232; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_256: 9233; X64: # %bb.0: 9234; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9235; X64-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] 9236; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9237; X64-NEXT: retq # encoding: [0xc3] 9238 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4) 9239 ret <8 x float> %res 9240} 9241 9242define <8 x float>@test_int_x86_avx512_maskz_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, i8 %x4) { 9243; X86-LABEL: test_int_x86_avx512_maskz_insertf32x4_256: 9244; X86: # %bb.0: 9245; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9246; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9247; X86-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01] 9248; X86-NEXT: retl # encoding: [0xc3] 9249; 9250; X64-LABEL: test_int_x86_avx512_maskz_insertf32x4_256: 9251; X64: # %bb.0: 9252; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9253; X64-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01] 9254; X64-NEXT: retq # encoding: [0xc3] 9255 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4) 9256 ret <8 x float> %res 9257} 9258 9259declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8) 9260 9261define <8 x i32>@test_int_x86_avx512_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3) { 9262; CHECK-LABEL: test_int_x86_avx512_inserti32x4_256: 9263; CHECK: # %bb.0: 9264; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 9265; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9266 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1) 9267 ret <8 x i32> %res 9268} 9269 9270define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { 9271; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_256: 9272; X86: # %bb.0: 9273; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9274; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9275; X86-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] 9276; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9277; X86-NEXT: retl # encoding: [0xc3] 9278; 9279; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_256: 9280; X64: # %bb.0: 9281; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9282; X64-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] 9283; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9284; X64-NEXT: retq # encoding: [0xc3] 9285 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4) 9286 ret <8 x i32> %res 9287} 9288 9289define <8 x i32>@test_int_x86_avx512_maskz_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x4) { 9290; X86-LABEL: test_int_x86_avx512_maskz_inserti32x4_256: 9291; X86: # %bb.0: 9292; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9293; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9294; X86-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01] 9295; X86-NEXT: retl # encoding: [0xc3] 9296; 9297; X64-LABEL: test_int_x86_avx512_maskz_inserti32x4_256: 9298; X64: # %bb.0: 9299; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9300; X64-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01] 9301; X64-NEXT: retq # encoding: [0xc3] 9302 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4) 9303 ret <8 x i32> %res 9304} 9305 9306define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 9307; X86-LABEL: test_mm512_maskz_max_ps_256: 9308; X86: # %bb.0: 9309; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9310; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9311; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1] 9312; X86-NEXT: retl # encoding: [0xc3] 9313; 9314; X64-LABEL: test_mm512_maskz_max_ps_256: 9315; X64: # %bb.0: 9316; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9317; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1] 9318; X64-NEXT: retq # encoding: [0xc3] 9319 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 9320 ret <8 x float> %res 9321} 9322 9323define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 9324; X86-LABEL: test_mm512_mask_max_ps_256: 9325; X86: # %bb.0: 9326; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9327; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9328; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] 9329; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9330; X86-NEXT: retl # encoding: [0xc3] 9331; 9332; X64-LABEL: test_mm512_mask_max_ps_256: 9333; X64: # %bb.0: 9334; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9335; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] 9336; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9337; X64-NEXT: retq # encoding: [0xc3] 9338 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 9339 ret <8 x float> %res 9340} 9341 9342define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 9343; CHECK-LABEL: test_mm512_max_ps_256: 9344; CHECK: # %bb.0: 9345; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1] 9346; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9347 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 9348 ret <8 x float> %res 9349} 9350declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 9351 9352define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 9353; X86-LABEL: test_mm512_maskz_max_ps_128: 9354; X86: # %bb.0: 9355; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9356; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9357; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1] 9358; X86-NEXT: retl # encoding: [0xc3] 9359; 9360; X64-LABEL: test_mm512_maskz_max_ps_128: 9361; X64: # %bb.0: 9362; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9363; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1] 9364; X64-NEXT: retq # encoding: [0xc3] 9365 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 9366 ret <4 x float> %res 9367} 9368 9369define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 9370; X86-LABEL: test_mm512_mask_max_ps_128: 9371; X86: # %bb.0: 9372; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9373; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9374; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] 9375; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 9376; X86-NEXT: retl # encoding: [0xc3] 9377; 9378; X64-LABEL: test_mm512_mask_max_ps_128: 9379; X64: # %bb.0: 9380; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9381; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] 9382; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 9383; X64-NEXT: retq # encoding: [0xc3] 9384 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 9385 ret <4 x float> %res 9386} 9387 9388define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 9389; CHECK-LABEL: test_mm512_max_ps_128: 9390; CHECK: # %bb.0: 9391; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 9392; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9393 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 9394 ret <4 x float> %res 9395} 9396declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 9397 9398define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 9399; X86-LABEL: test_mm512_maskz_min_ps_256: 9400; X86: # %bb.0: 9401; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9402; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9403; X86-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1] 9404; X86-NEXT: retl # encoding: [0xc3] 9405; 9406; X64-LABEL: test_mm512_maskz_min_ps_256: 9407; X64: # %bb.0: 9408; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9409; X64-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1] 9410; X64-NEXT: retq # encoding: [0xc3] 9411 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 9412 ret <8 x float> %res 9413} 9414 9415define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 9416; X86-LABEL: test_mm512_mask_min_ps_256: 9417; X86: # %bb.0: 9418; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9419; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9420; X86-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] 9421; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9422; X86-NEXT: retl # encoding: [0xc3] 9423; 9424; X64-LABEL: test_mm512_mask_min_ps_256: 9425; X64: # %bb.0: 9426; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9427; X64-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] 9428; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 9429; X64-NEXT: retq # encoding: [0xc3] 9430 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 9431 ret <8 x float> %res 9432} 9433 9434define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 9435; CHECK-LABEL: test_mm512_min_ps_256: 9436; CHECK: # %bb.0: 9437; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1] 9438; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9439 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 9440 ret <8 x float> %res 9441} 9442declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 9443 9444define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 9445; X86-LABEL: test_mm512_maskz_min_ps_128: 9446; X86: # %bb.0: 9447; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9448; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9449; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1] 9450; X86-NEXT: retl # encoding: [0xc3] 9451; 9452; X64-LABEL: test_mm512_maskz_min_ps_128: 9453; X64: # %bb.0: 9454; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9455; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1] 9456; X64-NEXT: retq # encoding: [0xc3] 9457 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 9458 ret <4 x float> %res 9459} 9460 9461define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 9462; X86-LABEL: test_mm512_mask_min_ps_128: 9463; X86: # %bb.0: 9464; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9465; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9466; X86-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] 9467; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 9468; X86-NEXT: retl # encoding: [0xc3] 9469; 9470; X64-LABEL: test_mm512_mask_min_ps_128: 9471; X64: # %bb.0: 9472; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9473; X64-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] 9474; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 9475; X64-NEXT: retq # encoding: [0xc3] 9476 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 9477 ret <4 x float> %res 9478} 9479 9480define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 9481; CHECK-LABEL: test_mm512_min_ps_128: 9482; CHECK: # %bb.0: 9483; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 9484; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9485 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 9486 ret <4 x float> %res 9487} 9488declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 9489 9490define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 9491; CHECK-LABEL: test_cmp_d_256: 9492; CHECK: # %bb.0: 9493; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 9494; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9495; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc0] 9496; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x02] 9497; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04] 9498; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x05] 9499; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe1] 9500; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9501; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 9502; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 9503; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 9504; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9505; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9506; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9507; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9508; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9509; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9510; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9511; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9512; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 9513; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9514; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9515; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9516 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 9517 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9518 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 9519 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9520 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 9521 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9522 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 9523 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9524 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 9525 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9526 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 9527 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9528 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 9529 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9530 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 9531 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9532 ret <8 x i8> %vec7 9533} 9534 9535define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 9536; X86-LABEL: test_mask_cmp_d_256: 9537; X86: # %bb.0: 9538; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9539; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9540; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 9541; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9542; X86-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0] 9543; X86-NEXT: vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02] 9544; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] 9545; X86-NEXT: vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05] 9546; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9] 9547; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 9548; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] 9549; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9550; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] 9551; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca] 9552; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] 9553; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb] 9554; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] 9555; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc] 9556; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] 9557; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9558; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] 9559; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9560; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9561; X86-NEXT: retl # encoding: [0xc3] 9562; 9563; X64-LABEL: test_mask_cmp_d_256: 9564; X64: # %bb.0: 9565; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9566; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 9567; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9568; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0] 9569; X64-NEXT: vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02] 9570; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] 9571; X64-NEXT: vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05] 9572; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9] 9573; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9574; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 9575; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 9576; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 9577; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9578; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9579; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9580; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9581; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9582; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9583; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9584; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9585; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] 9586; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9587; X64-NEXT: retq # encoding: [0xc3] 9588 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 9589 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9590 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 9591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9592 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 9593 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9594 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 9595 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9596 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 9597 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9598 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 9599 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9600 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 9601 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9602 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 9603 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9604 ret <8 x i8> %vec7 9605} 9606 9607declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 9608 9609define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 9610; CHECK-LABEL: test_ucmp_d_256: 9611; CHECK: # %bb.0: 9612; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 9613; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9614; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01] 9615; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x02] 9616; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04] 9617; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x05] 9618; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x06] 9619; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9620; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 9621; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 9622; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 9623; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9624; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9625; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9626; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9627; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9628; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9629; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9630; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9631; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 9632; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9633; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9634; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9635 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 9636 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9637 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 9638 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9639 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 9640 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9641 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 9642 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9643 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 9644 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9645 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 9646 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9647 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 9648 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9649 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 9650 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9651 ret <8 x i8> %vec7 9652} 9653 9654define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 9655; X86-LABEL: test_mask_ucmp_d_256: 9656; X86: # %bb.0: 9657; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9658; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9659; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 9660; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9661; X86-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01] 9662; X86-NEXT: vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02] 9663; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] 9664; X86-NEXT: vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05] 9665; X86-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06] 9666; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 9667; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] 9668; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9669; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] 9670; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca] 9671; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] 9672; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb] 9673; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] 9674; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc] 9675; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] 9676; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9677; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] 9678; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9679; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9680; X86-NEXT: retl # encoding: [0xc3] 9681; 9682; X64-LABEL: test_mask_ucmp_d_256: 9683; X64: # %bb.0: 9684; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9685; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 9686; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9687; X64-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01] 9688; X64-NEXT: vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02] 9689; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] 9690; X64-NEXT: vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05] 9691; X64-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06] 9692; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9693; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 9694; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 9695; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 9696; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9697; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9698; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9699; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9700; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9701; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9702; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9703; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9704; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] 9705; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9706; X64-NEXT: retq # encoding: [0xc3] 9707 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 9708 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9709 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 9710 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9711 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 9712 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9713 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 9714 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9715 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 9716 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9717 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 9718 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9719 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 9720 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9721 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 9722 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9723 ret <8 x i8> %vec7 9724} 9725 9726declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 9727 9728define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 9729; CHECK-LABEL: test_cmp_q_256: 9730; CHECK: # %bb.0: 9731; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 9732; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 # encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8] 9733; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02] 9734; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04] 9735; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05] 9736; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9] 9737; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9738; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9739; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9740; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9741; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9742; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9743; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9744; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9745; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9746; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9747; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9748; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9749; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 9750; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9751; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9752; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9753 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 9754 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9755 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 9756 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9757 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 9758 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9759 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 9760 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9761 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 9762 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9763 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 9764 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9765 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 9766 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9767 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 9768 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9769 ret <8 x i8> %vec7 9770} 9771 9772define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 9773; X86-LABEL: test_mask_cmp_q_256: 9774; X86: # %bb.0: 9775; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9776; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9777; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 9778; X86-NEXT: vpcmpgtq %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd0] 9779; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02] 9780; X86-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04] 9781; X86-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x05] 9782; X86-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf1] 9783; X86-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 9784; X86-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 9785; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9786; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9787; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9788; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9789; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9790; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9791; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9792; X86-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9793; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9794; X86-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9795; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 9796; X86-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9797; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9798; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9799; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9800; X86-NEXT: retl # encoding: [0xc3] 9801; 9802; X64-LABEL: test_mask_cmp_q_256: 9803; X64: # %bb.0: 9804; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9805; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 9806; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd0] 9807; X64-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02] 9808; X64-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04] 9809; X64-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x05] 9810; X64-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf1] 9811; X64-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 9812; X64-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 9813; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9814; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9815; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9816; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9817; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9818; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9819; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9820; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9821; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9822; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9823; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 9824; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9825; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9826; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9827; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9828; X64-NEXT: retq # encoding: [0xc3] 9829 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 9830 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9831 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 9832 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9833 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 9834 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9835 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 9836 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9837 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 9838 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9839 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 9840 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9841 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 9842 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9843 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 9844 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9845 ret <8 x i8> %vec7 9846} 9847 9848declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 9849 9850define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 9851; CHECK-LABEL: test_ucmp_q_256: 9852; CHECK: # %bb.0: 9853; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 9854; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01] 9855; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02] 9856; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04] 9857; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05] 9858; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06] 9859; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9860; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9861; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9862; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9863; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9864; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9865; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9866; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9867; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9868; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9869; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9870; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9871; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 9872; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9873; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9874; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9875 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 9876 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9877 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 9878 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9879 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 9880 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9881 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 9882 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9883 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 9884 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9885 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 9886 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9887 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 9888 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9889 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 9890 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9891 ret <8 x i8> %vec7 9892} 9893 9894define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 9895; X86-LABEL: test_mask_ucmp_q_256: 9896; X86: # %bb.0: 9897; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9898; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9899; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 9900; X86-NEXT: vpcmpltuq %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01] 9901; X86-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02] 9902; X86-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04] 9903; X86-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x05] 9904; X86-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06] 9905; X86-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 9906; X86-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 9907; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9908; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9909; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9910; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9911; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9912; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9913; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9914; X86-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9915; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9916; X86-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9917; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 9918; X86-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9919; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9920; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9921; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9922; X86-NEXT: retl # encoding: [0xc3] 9923; 9924; X64-LABEL: test_mask_ucmp_q_256: 9925; X64: # %bb.0: 9926; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9927; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 9928; X64-NEXT: vpcmpltuq %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01] 9929; X64-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02] 9930; X64-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04] 9931; X64-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x05] 9932; X64-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06] 9933; X64-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 9934; X64-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 9935; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9936; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9937; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9938; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9939; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9940; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9941; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9942; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9943; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9944; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9945; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 9946; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9947; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9948; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9949; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9950; X64-NEXT: retq # encoding: [0xc3] 9951 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 9952 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9953 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 9954 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9955 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 9956 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9957 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 9958 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9959 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 9960 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9961 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 9962 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9963 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 9964 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9965 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 9966 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9967 ret <8 x i8> %vec7 9968} 9969 9970declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 9971 9972define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 9973; CHECK-LABEL: test_cmp_d_128: 9974; CHECK: # %bb.0: 9975; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 9976; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8] 9977; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02] 9978; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04] 9979; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05] 9980; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9] 9981; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9982; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9983; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 9984; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 9985; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9986; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 9987; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9988; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 9989; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9990; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 9991; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9992; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 9993; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 9994; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 9995; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9996 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 9997 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9998 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 9999 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10000 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 10001 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10002 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 10003 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10004 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 10005 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10006 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 10007 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10008 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 10009 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10010 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 10011 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10012 ret <8 x i8> %vec7 10013} 10014 10015define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 10016; X86-LABEL: test_mask_cmp_d_128: 10017; X86: # %bb.0: 10018; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10019; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10020; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 10021; X86-NEXT: vpcmpgtd %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x66,0xd0] 10022; X86-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02] 10023; X86-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04] 10024; X86-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x05] 10025; X86-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf1] 10026; X86-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 10027; X86-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 10028; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10029; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10030; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10031; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10032; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10033; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10034; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10035; X86-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10036; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10037; X86-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10038; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10039; X86-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10040; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10041; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10042; X86-NEXT: retl # encoding: [0xc3] 10043; 10044; X64-LABEL: test_mask_cmp_d_128: 10045; X64: # %bb.0: 10046; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10047; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 10048; X64-NEXT: vpcmpgtd %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x66,0xd0] 10049; X64-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02] 10050; X64-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04] 10051; X64-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x05] 10052; X64-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf1] 10053; X64-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 10054; X64-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 10055; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10056; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10057; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10058; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10059; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10060; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10061; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10062; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10063; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10064; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10065; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10066; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10067; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10068; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10069; X64-NEXT: retq # encoding: [0xc3] 10070 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 10071 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10072 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 10073 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10074 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 10075 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10076 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 10077 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10078 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 10079 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10080 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 10081 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10082 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 10083 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10084 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 10085 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10086 ret <8 x i8> %vec7 10087} 10088 10089declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 10090 10091define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 10092; CHECK-LABEL: test_ucmp_d_128: 10093; CHECK: # %bb.0: 10094; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 10095; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01] 10096; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02] 10097; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04] 10098; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05] 10099; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06] 10100; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10101; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10102; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10103; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10104; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10105; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10106; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10107; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10108; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10109; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10110; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10111; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10112; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 10113; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10114; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10115 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 10116 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10117 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 10118 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10119 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 10120 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10121 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 10122 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10123 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 10124 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10125 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 10126 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10127 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 10128 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10129 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 10130 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10131 ret <8 x i8> %vec7 10132} 10133 10134define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 10135; X86-LABEL: test_mask_ucmp_d_128: 10136; X86: # %bb.0: 10137; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10138; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10139; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 10140; X86-NEXT: vpcmpltud %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01] 10141; X86-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02] 10142; X86-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04] 10143; X86-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x05] 10144; X86-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06] 10145; X86-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 10146; X86-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 10147; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10148; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10149; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10150; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10151; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10152; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10153; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10154; X86-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10155; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10156; X86-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10157; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10158; X86-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10159; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10160; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10161; X86-NEXT: retl # encoding: [0xc3] 10162; 10163; X64-LABEL: test_mask_ucmp_d_128: 10164; X64: # %bb.0: 10165; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10166; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 10167; X64-NEXT: vpcmpltud %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01] 10168; X64-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02] 10169; X64-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04] 10170; X64-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x05] 10171; X64-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06] 10172; X64-NEXT: kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c] 10173; X64-NEXT: kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c] 10174; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10175; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10176; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10177; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10178; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10179; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10180; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10181; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10182; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10183; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10184; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10185; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10186; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10187; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10188; X64-NEXT: retq # encoding: [0xc3] 10189 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 10190 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10191 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 10192 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10193 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 10194 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10195 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 10196 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10197 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 10198 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10199 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 10200 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10201 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 10202 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10203 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 10204 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10205 ret <8 x i8> %vec7 10206} 10207 10208declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 10209 10210define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 10211; CHECK-LABEL: test_cmp_q_128: 10212; CHECK: # %bb.0: 10213; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 10214; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8] 10215; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02] 10216; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04] 10217; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05] 10218; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9] 10219; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10220; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10221; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10222; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10223; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10224; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10225; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10226; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10227; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10228; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10229; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10230; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10231; CHECK-NEXT: movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00] 10232; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10233; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10234 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 10235 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10236 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 10237 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10238 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 10239 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10240 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 10241 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10242 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 10243 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10244 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 10245 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10246 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 10247 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10248 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 10249 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10250 ret <8 x i8> %vec7 10251} 10252 10253define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 10254; X86-LABEL: test_mask_cmp_q_128: 10255; X86: # %bb.0: 10256; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10257; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10258; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 10259; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd0] 10260; X86-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02] 10261; X86-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04] 10262; X86-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x05] 10263; X86-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf1] 10264; X86-NEXT: kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e] 10265; X86-NEXT: kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e] 10266; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10267; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10268; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10269; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10270; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10271; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10272; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10273; X86-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10274; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10275; X86-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10276; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10277; X86-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10278; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10279; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10280; X86-NEXT: retl # encoding: [0xc3] 10281; 10282; X64-LABEL: test_mask_cmp_q_128: 10283; X64: # %bb.0: 10284; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10285; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 10286; X64-NEXT: vpcmpgtq %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd0] 10287; X64-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02] 10288; X64-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04] 10289; X64-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x05] 10290; X64-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf1] 10291; X64-NEXT: kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e] 10292; X64-NEXT: kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e] 10293; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10294; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10295; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10296; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10297; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10298; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10299; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10300; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10301; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10302; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10303; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10304; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10305; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10306; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10307; X64-NEXT: retq # encoding: [0xc3] 10308 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 10309 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10310 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 10311 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10312 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 10313 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10314 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 10315 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10316 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 10317 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10318 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 10319 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10320 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 10321 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10322 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 10323 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10324 ret <8 x i8> %vec7 10325} 10326 10327declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 10328 10329define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 10330; CHECK-LABEL: test_ucmp_q_128: 10331; CHECK: # %bb.0: 10332; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 10333; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01] 10334; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02] 10335; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04] 10336; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05] 10337; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06] 10338; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10339; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10340; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10341; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10342; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10343; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10344; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10345; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10346; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10347; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10348; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10349; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10350; CHECK-NEXT: movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00] 10351; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10352; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10353 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 10354 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10355 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 10356 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10357 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 10358 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10359 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 10360 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10361 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 10362 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10363 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 10364 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10365 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 10366 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10367 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 10368 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10369 ret <8 x i8> %vec7 10370} 10371 10372define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 10373; X86-LABEL: test_mask_ucmp_q_128: 10374; X86: # %bb.0: 10375; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10376; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10377; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 10378; X86-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01] 10379; X86-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02] 10380; X86-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04] 10381; X86-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x05] 10382; X86-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x06] 10383; X86-NEXT: kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e] 10384; X86-NEXT: kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e] 10385; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10386; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10387; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10388; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10389; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10390; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10391; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10392; X86-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10393; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10394; X86-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10395; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10396; X86-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10397; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10398; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10399; X86-NEXT: retl # encoding: [0xc3] 10400; 10401; X64-LABEL: test_mask_ucmp_q_128: 10402; X64: # %bb.0: 10403; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10404; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 10405; X64-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01] 10406; X64-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02] 10407; X64-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04] 10408; X64-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x05] 10409; X64-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x06] 10410; X64-NEXT: kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e] 10411; X64-NEXT: kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e] 10412; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 10413; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10414; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 10415; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 10416; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 10417; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 10418; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 10419; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 10420; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 10421; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 10422; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 10423; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 10424; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10425; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 10426; X64-NEXT: retq # encoding: [0xc3] 10427 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 10428 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 10429 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 10430 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 10431 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 10432 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 10433 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 10434 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 10435 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 10436 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 10437 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 10438 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 10439 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 10440 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 10441 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 10442 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 10443 ret <8 x i8> %vec7 10444} 10445 10446declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 10447 10448declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8) 10449 10450define <8 x float>@test_int_x86_avx512_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2) { 10451; CHECK-LABEL: test_int_x86_avx512_broadcastf32x4_256: 10452; CHECK: # %bb.0: 10453; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10454; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 10455; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10456 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1) 10457 ret <8 x float> %res 10458} 10459 10460define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) { 10461; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256: 10462; X86: # %bb.0: 10463; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10464; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10465; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10466; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01] 10467; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 10468; X86-NEXT: retl # encoding: [0xc3] 10469; 10470; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256: 10471; X64: # %bb.0: 10472; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10473; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10474; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01] 10475; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 10476; X64-NEXT: retq # encoding: [0xc3] 10477 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask) 10478 ret <8 x float> %res 10479} 10480 10481define <8 x float>@test_int_x86_avx512_maskz_broadcastf32x4_256(<4 x float> %x0, i8 %mask) { 10482; X86-LABEL: test_int_x86_avx512_maskz_broadcastf32x4_256: 10483; X86: # %bb.0: 10484; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10485; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10486; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10487; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01] 10488; X86-NEXT: retl # encoding: [0xc3] 10489; 10490; X64-LABEL: test_int_x86_avx512_maskz_broadcastf32x4_256: 10491; X64: # %bb.0: 10492; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10493; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10494; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01] 10495; X64-NEXT: retq # encoding: [0xc3] 10496 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask) 10497 ret <8 x float> %res 10498} 10499 10500define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256_load(<4 x float>* %x0ptr, <8 x float> %x2, i8 %mask) { 10501; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load: 10502; X86: # %bb.0: 10503; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10504; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10505; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 10506; X86-NEXT: vbroadcastf32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x00] 10507; X86-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 10508; X86-NEXT: retl # encoding: [0xc3] 10509; 10510; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load: 10511; X64: # %bb.0: 10512; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10513; X64-NEXT: vbroadcastf32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x07] 10514; X64-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 10515; X64-NEXT: retq # encoding: [0xc3] 10516 %x0 = load <4 x float>, <4 x float>* %x0ptr 10517 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask) 10518 ret <8 x float> %res 10519} 10520 10521declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8) 10522 10523define <8 x i32>@test_int_x86_avx512_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2) { 10524; CHECK-LABEL: test_int_x86_avx512_broadcasti32x4_256: 10525; CHECK: # %bb.0: 10526; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10527; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 10528; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10529 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) 10530 ret <8 x i32> %res 10531} 10532 10533define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) { 10534; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256: 10535; X86: # %bb.0: 10536; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10537; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10538; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10539; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01] 10540; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10541; X86-NEXT: retl # encoding: [0xc3] 10542; 10543; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256: 10544; X64: # %bb.0: 10545; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10546; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10547; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01] 10548; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10549; X64-NEXT: retq # encoding: [0xc3] 10550 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) 10551 ret <8 x i32> %res 10552} 10553 10554define <8 x i32>@test_int_x86_avx512_maskz_broadcasti32x4_256(<4 x i32> %x0, i8 %mask) { 10555; X86-LABEL: test_int_x86_avx512_maskz_broadcasti32x4_256: 10556; X86: # %bb.0: 10557; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10558; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10559; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10560; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] 10561; X86-NEXT: retl # encoding: [0xc3] 10562; 10563; X64-LABEL: test_int_x86_avx512_maskz_broadcasti32x4_256: 10564; X64: # %bb.0: 10565; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 10566; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10567; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] 10568; X64-NEXT: retq # encoding: [0xc3] 10569 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 10570 ret <8 x i32> %res 10571} 10572 10573define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256_load(<4 x i32>* %x0ptr, <8 x i32> %x2, i8 %mask) { 10574; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load: 10575; X86: # %bb.0: 10576; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10577; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10578; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 10579; X86-NEXT: vbroadcasti32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x00] 10580; X86-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 10581; X86-NEXT: retl # encoding: [0xc3] 10582; 10583; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load: 10584; X64: # %bb.0: 10585; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10586; X64-NEXT: vbroadcasti32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x07] 10587; X64-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 10588; X64-NEXT: retq # encoding: [0xc3] 10589 %x0 = load <4 x i32>, <4 x i32>* %x0ptr 10590 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) 10591 ret <8 x i32> %res 10592} 10593 10594declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) 10595 10596define <2 x i64>@test_int_x86_avx512_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1) { 10597; CHECK-LABEL: test_int_x86_avx512_pabs_q_128: 10598; CHECK: # %bb.0: 10599; CHECK-NEXT: vpabsq %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0] 10600; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10601 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 10602 ret <2 x i64> %res 10603} 10604 10605define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 10606; X86-LABEL: test_int_x86_avx512_mask_pabs_q_128: 10607; X86: # %bb.0: 10608; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10609; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10610; X86-NEXT: vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] 10611; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 10612; X86-NEXT: retl # encoding: [0xc3] 10613; 10614; X64-LABEL: test_int_x86_avx512_mask_pabs_q_128: 10615; X64: # %bb.0: 10616; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10617; X64-NEXT: vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] 10618; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 10619; X64-NEXT: retq # encoding: [0xc3] 10620 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 10621 ret <2 x i64> %res 10622} 10623 10624declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) 10625 10626define <4 x i64>@test_int_x86_avx512_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1) { 10627; CHECK-LABEL: test_int_x86_avx512_pabs_q_256: 10628; CHECK: # %bb.0: 10629; CHECK-NEXT: vpabsq %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0] 10630; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10631 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 10632 ret <4 x i64> %res 10633} 10634 10635define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 10636; X86-LABEL: test_int_x86_avx512_mask_pabs_q_256: 10637; X86: # %bb.0: 10638; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10639; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10640; X86-NEXT: vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] 10641; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10642; X86-NEXT: retl # encoding: [0xc3] 10643; 10644; X64-LABEL: test_int_x86_avx512_mask_pabs_q_256: 10645; X64: # %bb.0: 10646; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10647; X64-NEXT: vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] 10648; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10649; X64-NEXT: retq # encoding: [0xc3] 10650 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 10651 ret <4 x i64> %res 10652} 10653 10654declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) 10655 10656define <4 x i32>@test_int_x86_avx512_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1) { 10657; CHECK-LABEL: test_int_x86_avx512_pabs_d_128: 10658; CHECK: # %bb.0: 10659; CHECK-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 10660; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10661 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 10662 ret <4 x i32> %res 10663} 10664 10665define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 10666; X86-LABEL: test_int_x86_avx512_mask_pabs_d_128: 10667; X86: # %bb.0: 10668; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10669; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10670; X86-NEXT: vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] 10671; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 10672; X86-NEXT: retl # encoding: [0xc3] 10673; 10674; X64-LABEL: test_int_x86_avx512_mask_pabs_d_128: 10675; X64: # %bb.0: 10676; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10677; X64-NEXT: vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] 10678; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 10679; X64-NEXT: retq # encoding: [0xc3] 10680 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 10681 ret <4 x i32> %res 10682} 10683 10684declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) 10685 10686define <8 x i32>@test_int_x86_avx512_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1) { 10687; CHECK-LABEL: test_int_x86_avx512_pabs_d_256: 10688; CHECK: # %bb.0: 10689; CHECK-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 10690; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10691 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 10692 ret <8 x i32> %res 10693} 10694 10695define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 10696; X86-LABEL: test_int_x86_avx512_mask_pabs_d_256: 10697; X86: # %bb.0: 10698; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10699; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10700; X86-NEXT: vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] 10701; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10702; X86-NEXT: retl # encoding: [0xc3] 10703; 10704; X64-LABEL: test_int_x86_avx512_mask_pabs_d_256: 10705; X64: # %bb.0: 10706; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10707; X64-NEXT: vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] 10708; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10709; X64-NEXT: retq # encoding: [0xc3] 10710 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 10711 ret <8 x i32> %res 10712} 10713 10714declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8) 10715 10716define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 10717; X86-LABEL: test_int_x86_avx512_ptestm_d_128: 10718; X86: # %bb.0: 10719; X86-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] 10720; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10721; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10722; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10723; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 10724; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10725; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10726; X86-NEXT: # kill: def $al killed $al killed $eax 10727; X86-NEXT: retl # encoding: [0xc3] 10728; 10729; X64-LABEL: test_int_x86_avx512_ptestm_d_128: 10730; X64: # %bb.0: 10731; X64-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] 10732; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10733; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10734; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10735; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10736; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 10737; X64-NEXT: # kill: def $al killed $al killed $eax 10738; X64-NEXT: retq # encoding: [0xc3] 10739 %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 10740 %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) 10741 %res2 = add i8 %res, %res1 10742 ret i8 %res2 10743} 10744 10745declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8) 10746 10747define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 10748; X86-LABEL: test_int_x86_avx512_ptestm_d_256: 10749; X86: # %bb.0: 10750; X86-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] 10751; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10752; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 10753; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 10754; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10755; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10756; X86-NEXT: retl # encoding: [0xc3] 10757; 10758; X64-LABEL: test_int_x86_avx512_ptestm_d_256: 10759; X64: # %bb.0: 10760; X64-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] 10761; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10762; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 10763; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 10764; X64-NEXT: # kill: def $al killed $al killed $eax 10765; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10766; X64-NEXT: retq # encoding: [0xc3] 10767 %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 10768 %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) 10769 %res2 = add i8 %res, %res1 10770 ret i8 %res2 10771} 10772 10773declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8) 10774 10775define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 10776; X86-LABEL: test_int_x86_avx512_ptestm_q_128: 10777; X86: # %bb.0: 10778; X86-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] 10779; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10780; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10781; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10782; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 10783; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10784; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10785; X86-NEXT: # kill: def $al killed $al killed $eax 10786; X86-NEXT: retl # encoding: [0xc3] 10787; 10788; X64-LABEL: test_int_x86_avx512_ptestm_q_128: 10789; X64: # %bb.0: 10790; X64-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] 10791; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10792; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10793; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10794; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10795; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 10796; X64-NEXT: # kill: def $al killed $al killed $eax 10797; X64-NEXT: retq # encoding: [0xc3] 10798 %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 10799 %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) 10800 %res2 = add i8 %res, %res1 10801 ret i8 %res2 10802} 10803 10804declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8) 10805 10806define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 10807; X86-LABEL: test_int_x86_avx512_ptestm_q_256: 10808; X86: # %bb.0: 10809; X86-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] 10810; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10811; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10812; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10813; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 10814; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10815; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10816; X86-NEXT: # kill: def $al killed $al killed $eax 10817; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10818; X86-NEXT: retl # encoding: [0xc3] 10819; 10820; X64-LABEL: test_int_x86_avx512_ptestm_q_256: 10821; X64: # %bb.0: 10822; X64-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] 10823; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10824; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10825; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10826; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10827; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 10828; X64-NEXT: # kill: def $al killed $al killed $eax 10829; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10830; X64-NEXT: retq # encoding: [0xc3] 10831 %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 10832 %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) 10833 %res2 = add i8 %res, %res1 10834 ret i8 %res2 10835} 10836 10837declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2) 10838 10839define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 10840; X86-LABEL: test_int_x86_avx512_ptestnm_d_128: 10841; X86: # %bb.0: 10842; X86-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] 10843; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10844; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10845; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10846; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 10847; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10848; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10849; X86-NEXT: # kill: def $al killed $al killed $eax 10850; X86-NEXT: retl # encoding: [0xc3] 10851; 10852; X64-LABEL: test_int_x86_avx512_ptestnm_d_128: 10853; X64: # %bb.0: 10854; X64-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] 10855; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10856; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10857; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10858; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10859; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 10860; X64-NEXT: # kill: def $al killed $al killed $eax 10861; X64-NEXT: retq # encoding: [0xc3] 10862 %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 10863 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) 10864 %res2 = add i8 %res, %res1 10865 ret i8 %res2 10866} 10867 10868declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2) 10869 10870define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 10871; X86-LABEL: test_int_x86_avx512_ptestnm_d_256: 10872; X86: # %bb.0: 10873; X86-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] 10874; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10875; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 10876; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 10877; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10878; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10879; X86-NEXT: retl # encoding: [0xc3] 10880; 10881; X64-LABEL: test_int_x86_avx512_ptestnm_d_256: 10882; X64: # %bb.0: 10883; X64-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] 10884; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10885; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 10886; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 10887; X64-NEXT: # kill: def $al killed $al killed $eax 10888; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10889; X64-NEXT: retq # encoding: [0xc3] 10890 %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 10891 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) 10892 %res2 = add i8 %res, %res1 10893 ret i8 %res2 10894} 10895 10896declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2) 10897 10898define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 10899; X86-LABEL: test_int_x86_avx512_ptestnm_q_128: 10900; X86: # %bb.0: 10901; X86-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] 10902; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10903; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10904; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10905; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 10906; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10907; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10908; X86-NEXT: # kill: def $al killed $al killed $eax 10909; X86-NEXT: retl # encoding: [0xc3] 10910; 10911; X64-LABEL: test_int_x86_avx512_ptestnm_q_128: 10912; X64: # %bb.0: 10913; X64-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] 10914; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10915; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10916; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10917; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10918; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 10919; X64-NEXT: # kill: def $al killed $al killed $eax 10920; X64-NEXT: retq # encoding: [0xc3] 10921 %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 10922 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) 10923 %res2 = add i8 %res, %res1 10924 ret i8 %res2 10925} 10926 10927declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2) 10928 10929define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 10930; X86-LABEL: test_int_x86_avx512_ptestnm_q_256: 10931; X86: # %bb.0: 10932; X86-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] 10933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10934; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10935; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10936; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 10937; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10938; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 10939; X86-NEXT: # kill: def $al killed $al killed $eax 10940; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10941; X86-NEXT: retl # encoding: [0xc3] 10942; 10943; X64-LABEL: test_int_x86_avx512_ptestnm_q_256: 10944; X64: # %bb.0: 10945; X64-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] 10946; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10947; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9] 10948; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 10949; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 10950; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 10951; X64-NEXT: # kill: def $al killed $al killed $eax 10952; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10953; X64-NEXT: retq # encoding: [0xc3] 10954 %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 10955 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) 10956 %res2 = add i8 %res, %res1 10957 ret i8 %res2 10958} 10959 10960define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { 10961; CHECK-LABEL: test_cmpps_256: 10962; CHECK: # %bb.0: 10963; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02] 10964; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10965; CHECK-NEXT: # kill: def $al killed $al killed $eax 10966; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10967; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10968 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1) 10969 ret i8 %res 10970} 10971declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) 10972 10973define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { 10974; CHECK-LABEL: test_cmpps_128: 10975; CHECK: # %bb.0: 10976; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 10977; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10978; CHECK-NEXT: # kill: def $al killed $al killed $eax 10979; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10980 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1) 10981 ret i8 %res 10982} 10983declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8) 10984 10985define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { 10986; CHECK-LABEL: test_cmppd_256: 10987; CHECK: # %bb.0: 10988; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02] 10989; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 10990; CHECK-NEXT: # kill: def $al killed $al killed $eax 10991; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10992; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10993 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1) 10994 ret i8 %res 10995} 10996declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8) 10997 10998define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { 10999; CHECK-LABEL: test_cmppd_128: 11000; CHECK: # %bb.0: 11001; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 11002; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 11003; CHECK-NEXT: # kill: def $al killed $al killed $eax 11004; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11005 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1) 11006 ret i8 %res 11007} 11008declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) 11009 11010define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 11011; CHECK-LABEL: test_mask_mul_epi32_rr_128: 11012; CHECK: # %bb.0: 11013; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] 11014; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11015 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 11016 ret < 2 x i64> %res 11017} 11018 11019define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 11020; X86-LABEL: test_mask_mul_epi32_rrk_128: 11021; X86: # %bb.0: 11022; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11023; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11024; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] 11025; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 11026; X86-NEXT: retl # encoding: [0xc3] 11027; 11028; X64-LABEL: test_mask_mul_epi32_rrk_128: 11029; X64: # %bb.0: 11030; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11031; X64-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] 11032; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 11033; X64-NEXT: retq # encoding: [0xc3] 11034 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 11035 ret < 2 x i64> %res 11036} 11037 11038define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 11039; X86-LABEL: test_mask_mul_epi32_rrkz_128: 11040; X86: # %bb.0: 11041; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11042; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11043; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 11044; X86-NEXT: retl # encoding: [0xc3] 11045; 11046; X64-LABEL: test_mask_mul_epi32_rrkz_128: 11047; X64: # %bb.0: 11048; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11049; X64-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 11050; X64-NEXT: retq # encoding: [0xc3] 11051 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 11052 ret < 2 x i64> %res 11053} 11054 11055define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 11056; X86-LABEL: test_mask_mul_epi32_rm_128: 11057; X86: # %bb.0: 11058; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11059; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x00] 11060; X86-NEXT: retl # encoding: [0xc3] 11061; 11062; X64-LABEL: test_mask_mul_epi32_rm_128: 11063; X64: # %bb.0: 11064; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07] 11065; X64-NEXT: retq # encoding: [0xc3] 11066 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 11067 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 11068 ret < 2 x i64> %res 11069} 11070 11071define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 11072; X86-LABEL: test_mask_mul_epi32_rmk_128: 11073; X86: # %bb.0: 11074; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11075; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11076; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11077; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x08] 11078; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11079; X86-NEXT: retl # encoding: [0xc3] 11080; 11081; X64-LABEL: test_mask_mul_epi32_rmk_128: 11082; X64: # %bb.0: 11083; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11084; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] 11085; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11086; X64-NEXT: retq # encoding: [0xc3] 11087 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 11088 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 11089 ret < 2 x i64> %res 11090} 11091 11092define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 11093; X86-LABEL: test_mask_mul_epi32_rmkz_128: 11094; X86: # %bb.0: 11095; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11096; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11097; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11098; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x00] 11099; X86-NEXT: retl # encoding: [0xc3] 11100; 11101; X64-LABEL: test_mask_mul_epi32_rmkz_128: 11102; X64: # %bb.0: 11103; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11104; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07] 11105; X64-NEXT: retq # encoding: [0xc3] 11106 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 11107 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 11108 ret < 2 x i64> %res 11109} 11110 11111define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 11112; X86-LABEL: test_mask_mul_epi32_rmb_128: 11113; X86: # %bb.0: 11114; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11115; X86-NEXT: vpmuldq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x00] 11116; X86-NEXT: retl # encoding: [0xc3] 11117; 11118; X64-LABEL: test_mask_mul_epi32_rmb_128: 11119; X64: # %bb.0: 11120; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07] 11121; X64-NEXT: retq # encoding: [0xc3] 11122 %q = load i64, i64* %ptr_b 11123 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 11124 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 11125 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 11126 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 11127 ret < 2 x i64> %res 11128} 11129 11130define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 11131; X86-LABEL: test_mask_mul_epi32_rmbk_128: 11132; X86: # %bb.0: 11133; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11134; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11135; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11136; X86-NEXT: vpmuldq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x08] 11137; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11138; X86-NEXT: retl # encoding: [0xc3] 11139; 11140; X64-LABEL: test_mask_mul_epi32_rmbk_128: 11141; X64: # %bb.0: 11142; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11143; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] 11144; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11145; X64-NEXT: retq # encoding: [0xc3] 11146 %q = load i64, i64* %ptr_b 11147 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 11148 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 11149 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 11150 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 11151 ret < 2 x i64> %res 11152} 11153 11154define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 11155; X86-LABEL: test_mask_mul_epi32_rmbkz_128: 11156; X86: # %bb.0: 11157; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11158; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11159; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11160; X86-NEXT: vpmuldq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x00] 11161; X86-NEXT: retl # encoding: [0xc3] 11162; 11163; X64-LABEL: test_mask_mul_epi32_rmbkz_128: 11164; X64: # %bb.0: 11165; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11166; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07] 11167; X64-NEXT: retq # encoding: [0xc3] 11168 %q = load i64, i64* %ptr_b 11169 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 11170 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 11171 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 11172 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 11173 ret < 2 x i64> %res 11174} 11175 11176declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 11177 11178define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 11179; CHECK-LABEL: test_mask_mul_epi32_rr_256: 11180; CHECK: # %bb.0: 11181; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1] 11182; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11183 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 11184 ret < 4 x i64> %res 11185} 11186 11187define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 11188; X86-LABEL: test_mask_mul_epi32_rrk_256: 11189; X86: # %bb.0: 11190; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11191; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11192; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] 11193; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 11194; X86-NEXT: retl # encoding: [0xc3] 11195; 11196; X64-LABEL: test_mask_mul_epi32_rrk_256: 11197; X64: # %bb.0: 11198; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11199; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] 11200; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 11201; X64-NEXT: retq # encoding: [0xc3] 11202 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 11203 ret < 4 x i64> %res 11204} 11205 11206define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 11207; X86-LABEL: test_mask_mul_epi32_rrkz_256: 11208; X86: # %bb.0: 11209; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11210; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11211; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 11212; X86-NEXT: retl # encoding: [0xc3] 11213; 11214; X64-LABEL: test_mask_mul_epi32_rrkz_256: 11215; X64: # %bb.0: 11216; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11217; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 11218; X64-NEXT: retq # encoding: [0xc3] 11219 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 11220 ret < 4 x i64> %res 11221} 11222 11223define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 11224; X86-LABEL: test_mask_mul_epi32_rm_256: 11225; X86: # %bb.0: 11226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11227; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x00] 11228; X86-NEXT: retl # encoding: [0xc3] 11229; 11230; X64-LABEL: test_mask_mul_epi32_rm_256: 11231; X64: # %bb.0: 11232; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07] 11233; X64-NEXT: retq # encoding: [0xc3] 11234 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 11235 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 11236 ret < 4 x i64> %res 11237} 11238 11239define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 11240; X86-LABEL: test_mask_mul_epi32_rmk_256: 11241; X86: # %bb.0: 11242; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11243; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11244; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11245; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x08] 11246; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11247; X86-NEXT: retl # encoding: [0xc3] 11248; 11249; X64-LABEL: test_mask_mul_epi32_rmk_256: 11250; X64: # %bb.0: 11251; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11252; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] 11253; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11254; X64-NEXT: retq # encoding: [0xc3] 11255 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 11256 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 11257 ret < 4 x i64> %res 11258} 11259 11260define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 11261; X86-LABEL: test_mask_mul_epi32_rmkz_256: 11262; X86: # %bb.0: 11263; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11264; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11265; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11266; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x00] 11267; X86-NEXT: retl # encoding: [0xc3] 11268; 11269; X64-LABEL: test_mask_mul_epi32_rmkz_256: 11270; X64: # %bb.0: 11271; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11272; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07] 11273; X64-NEXT: retq # encoding: [0xc3] 11274 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 11275 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 11276 ret < 4 x i64> %res 11277} 11278 11279define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 11280; X86-LABEL: test_mask_mul_epi32_rmb_256: 11281; X86: # %bb.0: 11282; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11283; X86-NEXT: vpmuldq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x00] 11284; X86-NEXT: retl # encoding: [0xc3] 11285; 11286; X64-LABEL: test_mask_mul_epi32_rmb_256: 11287; X64: # %bb.0: 11288; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07] 11289; X64-NEXT: retq # encoding: [0xc3] 11290 %q = load i64, i64* %ptr_b 11291 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 11292 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 11293 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 11294 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 11295 ret < 4 x i64> %res 11296} 11297 11298define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 11299; X86-LABEL: test_mask_mul_epi32_rmbk_256: 11300; X86: # %bb.0: 11301; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11302; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11303; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11304; X86-NEXT: vpmuldq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x08] 11305; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11306; X86-NEXT: retl # encoding: [0xc3] 11307; 11308; X64-LABEL: test_mask_mul_epi32_rmbk_256: 11309; X64: # %bb.0: 11310; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11311; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] 11312; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11313; X64-NEXT: retq # encoding: [0xc3] 11314 %q = load i64, i64* %ptr_b 11315 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 11316 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 11317 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 11318 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 11319 ret < 4 x i64> %res 11320} 11321 11322define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 11323; X86-LABEL: test_mask_mul_epi32_rmbkz_256: 11324; X86: # %bb.0: 11325; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11326; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11327; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11328; X86-NEXT: vpmuldq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x00] 11329; X86-NEXT: retl # encoding: [0xc3] 11330; 11331; X64-LABEL: test_mask_mul_epi32_rmbkz_256: 11332; X64: # %bb.0: 11333; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11334; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07] 11335; X64-NEXT: retq # encoding: [0xc3] 11336 %q = load i64, i64* %ptr_b 11337 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 11338 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 11339 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 11340 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 11341 ret < 4 x i64> %res 11342} 11343 11344declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 11345 11346define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 11347; CHECK-LABEL: test_mask_mul_epu32_rr_128: 11348; CHECK: # %bb.0: 11349; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] 11350; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11351 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 11352 ret < 2 x i64> %res 11353} 11354 11355define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 11356; X86-LABEL: test_mask_mul_epu32_rrk_128: 11357; X86: # %bb.0: 11358; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11359; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11360; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] 11361; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 11362; X86-NEXT: retl # encoding: [0xc3] 11363; 11364; X64-LABEL: test_mask_mul_epu32_rrk_128: 11365; X64: # %bb.0: 11366; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11367; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] 11368; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 11369; X64-NEXT: retq # encoding: [0xc3] 11370 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 11371 ret < 2 x i64> %res 11372} 11373 11374define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 11375; X86-LABEL: test_mask_mul_epu32_rrkz_128: 11376; X86: # %bb.0: 11377; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11378; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11379; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 11380; X86-NEXT: retl # encoding: [0xc3] 11381; 11382; X64-LABEL: test_mask_mul_epu32_rrkz_128: 11383; X64: # %bb.0: 11384; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11385; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 11386; X64-NEXT: retq # encoding: [0xc3] 11387 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 11388 ret < 2 x i64> %res 11389} 11390 11391define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 11392; X86-LABEL: test_mask_mul_epu32_rm_128: 11393; X86: # %bb.0: 11394; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11395; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x00] 11396; X86-NEXT: retl # encoding: [0xc3] 11397; 11398; X64-LABEL: test_mask_mul_epu32_rm_128: 11399; X64: # %bb.0: 11400; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07] 11401; X64-NEXT: retq # encoding: [0xc3] 11402 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 11403 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 11404 ret < 2 x i64> %res 11405} 11406 11407define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 11408; X86-LABEL: test_mask_mul_epu32_rmk_128: 11409; X86: # %bb.0: 11410; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11411; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11412; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11413; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x08] 11414; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11415; X86-NEXT: retl # encoding: [0xc3] 11416; 11417; X64-LABEL: test_mask_mul_epu32_rmk_128: 11418; X64: # %bb.0: 11419; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11420; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] 11421; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11422; X64-NEXT: retq # encoding: [0xc3] 11423 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 11424 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 11425 ret < 2 x i64> %res 11426} 11427 11428define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 11429; X86-LABEL: test_mask_mul_epu32_rmkz_128: 11430; X86: # %bb.0: 11431; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11432; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11433; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11434; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x00] 11435; X86-NEXT: retl # encoding: [0xc3] 11436; 11437; X64-LABEL: test_mask_mul_epu32_rmkz_128: 11438; X64: # %bb.0: 11439; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11440; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07] 11441; X64-NEXT: retq # encoding: [0xc3] 11442 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 11443 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 11444 ret < 2 x i64> %res 11445} 11446 11447define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 11448; X86-LABEL: test_mask_mul_epu32_rmb_128: 11449; X86: # %bb.0: 11450; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11451; X86-NEXT: vpmuludq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x00] 11452; X86-NEXT: retl # encoding: [0xc3] 11453; 11454; X64-LABEL: test_mask_mul_epu32_rmb_128: 11455; X64: # %bb.0: 11456; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07] 11457; X64-NEXT: retq # encoding: [0xc3] 11458 %q = load i64, i64* %ptr_b 11459 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 11460 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 11461 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 11462 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 11463 ret < 2 x i64> %res 11464} 11465 11466define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 11467; X86-LABEL: test_mask_mul_epu32_rmbk_128: 11468; X86: # %bb.0: 11469; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11470; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11471; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11472; X86-NEXT: vpmuludq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x08] 11473; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11474; X86-NEXT: retl # encoding: [0xc3] 11475; 11476; X64-LABEL: test_mask_mul_epu32_rmbk_128: 11477; X64: # %bb.0: 11478; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11479; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] 11480; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 11481; X64-NEXT: retq # encoding: [0xc3] 11482 %q = load i64, i64* %ptr_b 11483 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 11484 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 11485 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 11486 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 11487 ret < 2 x i64> %res 11488} 11489 11490define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 11491; X86-LABEL: test_mask_mul_epu32_rmbkz_128: 11492; X86: # %bb.0: 11493; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11494; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11495; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11496; X86-NEXT: vpmuludq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x00] 11497; X86-NEXT: retl # encoding: [0xc3] 11498; 11499; X64-LABEL: test_mask_mul_epu32_rmbkz_128: 11500; X64: # %bb.0: 11501; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11502; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07] 11503; X64-NEXT: retq # encoding: [0xc3] 11504 %q = load i64, i64* %ptr_b 11505 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 11506 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 11507 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 11508 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 11509 ret < 2 x i64> %res 11510} 11511 11512declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 11513 11514define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 11515; CHECK-LABEL: test_mask_mul_epu32_rr_256: 11516; CHECK: # %bb.0: 11517; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1] 11518; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11519 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 11520 ret < 4 x i64> %res 11521} 11522 11523define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 11524; X86-LABEL: test_mask_mul_epu32_rrk_256: 11525; X86: # %bb.0: 11526; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11527; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11528; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] 11529; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 11530; X86-NEXT: retl # encoding: [0xc3] 11531; 11532; X64-LABEL: test_mask_mul_epu32_rrk_256: 11533; X64: # %bb.0: 11534; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11535; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] 11536; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 11537; X64-NEXT: retq # encoding: [0xc3] 11538 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 11539 ret < 4 x i64> %res 11540} 11541 11542define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 11543; X86-LABEL: test_mask_mul_epu32_rrkz_256: 11544; X86: # %bb.0: 11545; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11546; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11547; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 11548; X86-NEXT: retl # encoding: [0xc3] 11549; 11550; X64-LABEL: test_mask_mul_epu32_rrkz_256: 11551; X64: # %bb.0: 11552; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11553; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 11554; X64-NEXT: retq # encoding: [0xc3] 11555 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 11556 ret < 4 x i64> %res 11557} 11558 11559define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 11560; X86-LABEL: test_mask_mul_epu32_rm_256: 11561; X86: # %bb.0: 11562; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11563; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x00] 11564; X86-NEXT: retl # encoding: [0xc3] 11565; 11566; X64-LABEL: test_mask_mul_epu32_rm_256: 11567; X64: # %bb.0: 11568; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07] 11569; X64-NEXT: retq # encoding: [0xc3] 11570 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 11571 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 11572 ret < 4 x i64> %res 11573} 11574 11575define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 11576; X86-LABEL: test_mask_mul_epu32_rmk_256: 11577; X86: # %bb.0: 11578; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11579; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11580; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11581; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x08] 11582; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11583; X86-NEXT: retl # encoding: [0xc3] 11584; 11585; X64-LABEL: test_mask_mul_epu32_rmk_256: 11586; X64: # %bb.0: 11587; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11588; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] 11589; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11590; X64-NEXT: retq # encoding: [0xc3] 11591 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 11592 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 11593 ret < 4 x i64> %res 11594} 11595 11596define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 11597; X86-LABEL: test_mask_mul_epu32_rmkz_256: 11598; X86: # %bb.0: 11599; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11600; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11601; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11602; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x00] 11603; X86-NEXT: retl # encoding: [0xc3] 11604; 11605; X64-LABEL: test_mask_mul_epu32_rmkz_256: 11606; X64: # %bb.0: 11607; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11608; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07] 11609; X64-NEXT: retq # encoding: [0xc3] 11610 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 11611 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 11612 ret < 4 x i64> %res 11613} 11614 11615define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 11616; X86-LABEL: test_mask_mul_epu32_rmb_256: 11617; X86: # %bb.0: 11618; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11619; X86-NEXT: vpmuludq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x00] 11620; X86-NEXT: retl # encoding: [0xc3] 11621; 11622; X64-LABEL: test_mask_mul_epu32_rmb_256: 11623; X64: # %bb.0: 11624; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07] 11625; X64-NEXT: retq # encoding: [0xc3] 11626 %q = load i64, i64* %ptr_b 11627 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 11628 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 11629 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 11630 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 11631 ret < 4 x i64> %res 11632} 11633 11634define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 11635; X86-LABEL: test_mask_mul_epu32_rmbk_256: 11636; X86: # %bb.0: 11637; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11638; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11639; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11640; X86-NEXT: vpmuludq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x08] 11641; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11642; X86-NEXT: retl # encoding: [0xc3] 11643; 11644; X64-LABEL: test_mask_mul_epu32_rmbk_256: 11645; X64: # %bb.0: 11646; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11647; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] 11648; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 11649; X64-NEXT: retq # encoding: [0xc3] 11650 %q = load i64, i64* %ptr_b 11651 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 11652 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 11653 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 11654 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 11655 ret < 4 x i64> %res 11656} 11657 11658define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 11659; X86-LABEL: test_mask_mul_epu32_rmbkz_256: 11660; X86: # %bb.0: 11661; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11662; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11663; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11664; X86-NEXT: vpmuludq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x00] 11665; X86-NEXT: retl # encoding: [0xc3] 11666; 11667; X64-LABEL: test_mask_mul_epu32_rmbkz_256: 11668; X64: # %bb.0: 11669; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11670; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07] 11671; X64-NEXT: retq # encoding: [0xc3] 11672 %q = load i64, i64* %ptr_b 11673 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 11674 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 11675 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 11676 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 11677 ret < 4 x i64> %res 11678} 11679 11680declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 11681 11682declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8) 11683 11684define <4 x float>@test_int_x86_avx512_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1) { 11685; CHECK-LABEL: test_int_x86_avx512_cvt_dq2ps_128: 11686; CHECK: # %bb.0: 11687; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] 11688; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11689 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 11690 ret <4 x float> %res 11691} 11692 11693define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 11694; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: 11695; X86: # %bb.0: 11696; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11697; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11698; X86-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8] 11699; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11700; X86-NEXT: retl # encoding: [0xc3] 11701; 11702; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: 11703; X64: # %bb.0: 11704; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11705; X64-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8] 11706; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11707; X64-NEXT: retq # encoding: [0xc3] 11708 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 11709 ret <4 x float> %res 11710} 11711 11712declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8) 11713 11714define <8 x float>@test_int_x86_avx512_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1) { 11715; CHECK-LABEL: test_int_x86_avx512_cvt_dq2ps_256: 11716; CHECK: # %bb.0: 11717; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] 11718; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11719 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 11720 ret <8 x float> %res 11721} 11722 11723define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 11724; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: 11725; X86: # %bb.0: 11726; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11727; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11728; X86-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8] 11729; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 11730; X86-NEXT: retl # encoding: [0xc3] 11731; 11732; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: 11733; X64: # %bb.0: 11734; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11735; X64-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8] 11736; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 11737; X64-NEXT: retq # encoding: [0xc3] 11738 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 11739 ret <8 x float> %res 11740} 11741 11742define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { 11743; CHECK-LABEL: test_x86_vcvtph2ps_128: 11744; CHECK: # %bb.0: 11745; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] 11746; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11747 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1) 11748 ret <4 x float> %res 11749} 11750 11751define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) { 11752; X86-LABEL: test_x86_vcvtph2ps_128_rrk: 11753; X86: # %bb.0: 11754; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11755; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11756; X86-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8] 11757; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11758; X86-NEXT: retl # encoding: [0xc3] 11759; 11760; X64-LABEL: test_x86_vcvtph2ps_128_rrk: 11761; X64: # %bb.0: 11762; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11763; X64-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8] 11764; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11765; X64-NEXT: retq # encoding: [0xc3] 11766 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask) 11767 ret <4 x float> %res 11768} 11769 11770define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) { 11771; X86-LABEL: test_x86_vcvtph2ps_128_rrkz: 11772; X86: # %bb.0: 11773; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11774; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11775; X86-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0] 11776; X86-NEXT: retl # encoding: [0xc3] 11777; 11778; X64-LABEL: test_x86_vcvtph2ps_128_rrkz: 11779; X64: # %bb.0: 11780; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11781; X64-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0] 11782; X64-NEXT: retq # encoding: [0xc3] 11783 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask) 11784 ret <4 x float> %res 11785} 11786 11787declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly 11788 11789define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) { 11790; CHECK-LABEL: test_x86_vcvtph2ps_256: 11791; CHECK: # %bb.0: 11792; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0] 11793; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11794 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1) 11795 ret <8 x float> %res 11796} 11797 11798define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) { 11799; X86-LABEL: test_x86_vcvtph2ps_256_rrk: 11800; X86: # %bb.0: 11801; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11802; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11803; X86-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8] 11804; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 11805; X86-NEXT: retl # encoding: [0xc3] 11806; 11807; X64-LABEL: test_x86_vcvtph2ps_256_rrk: 11808; X64: # %bb.0: 11809; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11810; X64-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8] 11811; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 11812; X64-NEXT: retq # encoding: [0xc3] 11813 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) 11814 ret <8 x float> %res 11815} 11816 11817define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) { 11818; X86-LABEL: test_x86_vcvtph2ps_256_rrkz: 11819; X86: # %bb.0: 11820; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11821; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11822; X86-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0] 11823; X86-NEXT: retl # encoding: [0xc3] 11824; 11825; X64-LABEL: test_x86_vcvtph2ps_256_rrkz: 11826; X64: # %bb.0: 11827; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11828; X64-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0] 11829; X64-NEXT: retq # encoding: [0xc3] 11830 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask) 11831 ret <8 x float> %res 11832} 11833 11834declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly 11835 11836declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8) 11837 11838define <4 x i32>@test_int_x86_avx512_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1) { 11839; CHECK-LABEL: test_int_x86_avx512_cvt_pd2dq_256: 11840; CHECK: # %bb.0: 11841; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0] 11842; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11843; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11844 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 11845 ret <4 x i32> %res 11846} 11847 11848define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 11849; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: 11850; X86: # %bb.0: 11851; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11852; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11853; X86-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8] 11854; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11855; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11856; X86-NEXT: retl # encoding: [0xc3] 11857; 11858; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: 11859; X64: # %bb.0: 11860; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11861; X64-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8] 11862; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11863; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11864; X64-NEXT: retq # encoding: [0xc3] 11865 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 11866 ret <4 x i32> %res 11867} 11868 11869declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8) 11870 11871define <4 x float>@test_int_x86_avx512_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1) { 11872; CHECK-LABEL: test_int_x86_avx512_cvt_pd2ps_256: 11873; CHECK: # %bb.0: 11874; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0] 11875; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11876; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11877 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1) 11878 ret <4 x float> %res 11879} 11880 11881define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) { 11882; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: 11883; X86: # %bb.0: 11884; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11885; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11886; X86-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8] 11887; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11888; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11889; X86-NEXT: retl # encoding: [0xc3] 11890; 11891; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: 11892; X64: # %bb.0: 11893; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11894; X64-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8] 11895; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11896; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11897; X64-NEXT: retq # encoding: [0xc3] 11898 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2) 11899 ret <4 x float> %res 11900} 11901 11902declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8) 11903 11904define <4 x double>@test_int_x86_avx512_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1) { 11905; CHECK-LABEL: test_int_x86_avx512_cvt_ps2pd_256: 11906; CHECK: # %bb.0: 11907; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0] 11908; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11909 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1) 11910 ret <4 x double> %res 11911} 11912 11913define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) { 11914; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: 11915; X86: # %bb.0: 11916; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11917; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11918; X86-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8] 11919; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 11920; X86-NEXT: retl # encoding: [0xc3] 11921; 11922; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: 11923; X64: # %bb.0: 11924; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11925; X64-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8] 11926; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 11927; X64-NEXT: retq # encoding: [0xc3] 11928 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2) 11929 ret <4 x double> %res 11930} 11931 11932declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8) 11933 11934define <2 x double>@test_int_x86_avx512_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1) { 11935; CHECK-LABEL: test_int_x86_avx512_cvt_ps2pd_128: 11936; CHECK: # %bb.0: 11937; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 11938; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11939 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1) 11940 ret <2 x double> %res 11941} 11942 11943define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) { 11944; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: 11945; X86: # %bb.0: 11946; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11947; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11948; X86-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8] 11949; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11950; X86-NEXT: retl # encoding: [0xc3] 11951; 11952; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: 11953; X64: # %bb.0: 11954; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11955; X64-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8] 11956; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11957; X64-NEXT: retq # encoding: [0xc3] 11958 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2) 11959 ret <2 x double> %res 11960} 11961 11962declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8) 11963 11964define <4 x i32>@test_int_x86_avx512_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1) { 11965; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2dq_256: 11966; CHECK: # %bb.0: 11967; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] 11968; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11969; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 11970 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 11971 ret <4 x i32> %res 11972} 11973 11974define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 11975; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: 11976; X86: # %bb.0: 11977; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11978; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11979; X86-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] 11980; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11981; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11982; X86-NEXT: retl # encoding: [0xc3] 11983; 11984; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: 11985; X64: # %bb.0: 11986; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11987; X64-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] 11988; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11989; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11990; X64-NEXT: retq # encoding: [0xc3] 11991 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 11992 ret <4 x i32> %res 11993} 11994 11995declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8) 11996 11997define <4 x i32>@test_int_x86_avx512_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1) { 11998; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2dq_128: 11999; CHECK: # %bb.0: 12000; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] 12001; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12002 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 12003 ret <4 x i32> %res 12004} 12005 12006define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 12007; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: 12008; X86: # %bb.0: 12009; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12010; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12011; X86-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] 12012; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12013; X86-NEXT: retl # encoding: [0xc3] 12014; 12015; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: 12016; X64: # %bb.0: 12017; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12018; X64-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] 12019; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12020; X64-NEXT: retq # encoding: [0xc3] 12021 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 12022 ret <4 x i32> %res 12023} 12024 12025declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8) 12026 12027define <8 x i32>@test_int_x86_avx512_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1) { 12028; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2dq_256: 12029; CHECK: # %bb.0: 12030; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] 12031; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12032 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 12033 ret <8 x i32> %res 12034} 12035 12036define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 12037; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: 12038; X86: # %bb.0: 12039; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12040; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12041; X86-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] 12042; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 12043; X86-NEXT: retl # encoding: [0xc3] 12044; 12045; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: 12046; X64: # %bb.0: 12047; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12048; X64-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] 12049; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 12050; X64-NEXT: retq # encoding: [0xc3] 12051 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 12052 ret <8 x i32> %res 12053} 12054 12055declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) 12056 12057define <8 x float>@test_int_x86_avx512_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 12058; CHECK-LABEL: test_int_x86_avx512_permvar_sf_256: 12059; CHECK: # %bb.0: 12060; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] 12061; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12062 %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 12063 ret <8 x float> %res 12064} 12065 12066define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 12067; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_256: 12068; X86: # %bb.0: 12069; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12070; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12071; X86-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] 12072; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 12073; X86-NEXT: retl # encoding: [0xc3] 12074; 12075; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_256: 12076; X64: # %bb.0: 12077; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12078; X64-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] 12079; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 12080; X64-NEXT: retq # encoding: [0xc3] 12081 %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 12082 ret <8 x float> %res 12083} 12084 12085define <8 x float>@test_int_x86_avx512_maskz_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, i8 %x3) { 12086; X86-LABEL: test_int_x86_avx512_maskz_permvar_sf_256: 12087; X86: # %bb.0: 12088; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12089; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12090; X86-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] 12091; X86-NEXT: retl # encoding: [0xc3] 12092; 12093; X64-LABEL: test_int_x86_avx512_maskz_permvar_sf_256: 12094; X64: # %bb.0: 12095; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12096; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] 12097; X64-NEXT: retq # encoding: [0xc3] 12098 %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 12099 ret <8 x float> %res 12100} 12101 12102declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 12103 12104define <8 x i32>@test_int_x86_avx512_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 12105; CHECK-LABEL: test_int_x86_avx512_permvar_si_256: 12106; CHECK: # %bb.0: 12107; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] 12108; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12109 %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 12110 ret <8 x i32> %res 12111} 12112 12113define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12114; X86-LABEL: test_int_x86_avx512_mask_permvar_si_256: 12115; X86: # %bb.0: 12116; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12117; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12118; X86-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] 12119; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 12120; X86-NEXT: retl # encoding: [0xc3] 12121; 12122; X64-LABEL: test_int_x86_avx512_mask_permvar_si_256: 12123; X64: # %bb.0: 12124; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12125; X64-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] 12126; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 12127; X64-NEXT: retq # encoding: [0xc3] 12128 %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 12129 ret <8 x i32> %res 12130} 12131 12132define <8 x i32>@test_int_x86_avx512_maskz_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 12133; X86-LABEL: test_int_x86_avx512_maskz_permvar_si_256: 12134; X86: # %bb.0: 12135; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12136; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12137; X86-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] 12138; X86-NEXT: retl # encoding: [0xc3] 12139; 12140; X64-LABEL: test_int_x86_avx512_maskz_permvar_si_256: 12141; X64: # %bb.0: 12142; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12143; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] 12144; X64-NEXT: retq # encoding: [0xc3] 12145 %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 12146 ret <8 x i32> %res 12147} 12148 12149declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) 12150 12151define <4 x double>@test_int_x86_avx512_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) { 12152; CHECK-LABEL: test_int_x86_avx512_permvar_df_256: 12153; CHECK: # %bb.0: 12154; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0] 12155; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12156 %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 12157 ret <4 x double> %res 12158} 12159 12160define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 12161; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256: 12162; X86: # %bb.0: 12163; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12164; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12165; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] 12166; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 12167; X86-NEXT: retl # encoding: [0xc3] 12168; 12169; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256: 12170; X64: # %bb.0: 12171; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12172; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] 12173; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 12174; X64-NEXT: retq # encoding: [0xc3] 12175 %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 12176 ret <4 x double> %res 12177} 12178 12179define <4 x double>@test_int_x86_avx512_maskz_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, i8 %x3) { 12180; X86-LABEL: test_int_x86_avx512_maskz_permvar_df_256: 12181; X86: # %bb.0: 12182; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12183; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12184; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0] 12185; X86-NEXT: retl # encoding: [0xc3] 12186; 12187; X64-LABEL: test_int_x86_avx512_maskz_permvar_df_256: 12188; X64: # %bb.0: 12189; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12190; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0] 12191; X64-NEXT: retq # encoding: [0xc3] 12192 %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 12193 ret <4 x double> %res 12194} 12195 12196declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 12197 12198define <4 x i64>@test_int_x86_avx512_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 12199; CHECK-LABEL: test_int_x86_avx512_permvar_di_256: 12200; CHECK: # %bb.0: 12201; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0] 12202; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12203 %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 12204 ret <4 x i64> %res 12205} 12206 12207define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 12208; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256: 12209; X86: # %bb.0: 12210; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12211; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12212; X86-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] 12213; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 12214; X86-NEXT: retl # encoding: [0xc3] 12215; 12216; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256: 12217; X64: # %bb.0: 12218; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12219; X64-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] 12220; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 12221; X64-NEXT: retq # encoding: [0xc3] 12222 %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 12223 ret <4 x i64> %res 12224} 12225 12226define <4 x i64>@test_int_x86_avx512_maskz_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) { 12227; X86-LABEL: test_int_x86_avx512_maskz_permvar_di_256: 12228; X86: # %bb.0: 12229; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12230; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12231; X86-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0] 12232; X86-NEXT: retl # encoding: [0xc3] 12233; 12234; X64-LABEL: test_int_x86_avx512_maskz_permvar_di_256: 12235; X64: # %bb.0: 12236; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12237; X64-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0] 12238; X64-NEXT: retq # encoding: [0xc3] 12239 %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 12240 ret <4 x i64> %res 12241} 12242 12243declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 12244 12245define <4 x i32>@test_int_x86_avx512_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 12246; CHECK-LABEL: test_int_x86_avx512_pternlog_d_128: 12247; CHECK: # %bb.0: 12248; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] 12249; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12250 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 12251 ret <4 x i32> %res 12252} 12253 12254define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 12255; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128: 12256; X86: # %bb.0: 12257; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12258; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12259; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21] 12260; X86-NEXT: retl # encoding: [0xc3] 12261; 12262; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128: 12263; X64: # %bb.0: 12264; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12265; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21] 12266; X64-NEXT: retq # encoding: [0xc3] 12267 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 12268 ret <4 x i32> %res 12269} 12270 12271declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 12272 12273define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 12274; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: 12275; X86: # %bb.0: 12276; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12277; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12278; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21] 12279; X86-NEXT: retl # encoding: [0xc3] 12280; 12281; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: 12282; X64: # %bb.0: 12283; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12284; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21] 12285; X64-NEXT: retq # encoding: [0xc3] 12286 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 12287 ret <4 x i32> %res 12288} 12289 12290declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 12291 12292define <8 x i32>@test_int_x86_avx512_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 12293; CHECK-LABEL: test_int_x86_avx512_pternlog_d_256: 12294; CHECK: # %bb.0: 12295; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] 12296; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12297 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 12298 ret <8 x i32> %res 12299} 12300 12301define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 12302; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256: 12303; X86: # %bb.0: 12304; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12305; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12306; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21] 12307; X86-NEXT: retl # encoding: [0xc3] 12308; 12309; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256: 12310; X64: # %bb.0: 12311; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12312; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21] 12313; X64-NEXT: retq # encoding: [0xc3] 12314 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 12315 ret <8 x i32> %res 12316} 12317 12318declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 12319 12320define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 12321; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: 12322; X86: # %bb.0: 12323; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12324; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12325; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21] 12326; X86-NEXT: retl # encoding: [0xc3] 12327; 12328; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: 12329; X64: # %bb.0: 12330; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12331; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21] 12332; X64-NEXT: retq # encoding: [0xc3] 12333 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 12334 ret <8 x i32> %res 12335} 12336 12337declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 12338 12339define <2 x i64>@test_int_x86_avx512_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 12340; CHECK-LABEL: test_int_x86_avx512_pternlog_q_128: 12341; CHECK: # %bb.0: 12342; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] 12343; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12344 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 12345 ret <2 x i64> %res 12346} 12347 12348define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 12349; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128: 12350; X86: # %bb.0: 12351; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12352; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12353; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21] 12354; X86-NEXT: retl # encoding: [0xc3] 12355; 12356; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128: 12357; X64: # %bb.0: 12358; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12359; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21] 12360; X64-NEXT: retq # encoding: [0xc3] 12361 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 12362 ret <2 x i64> %res 12363} 12364 12365declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 12366 12367define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 12368; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: 12369; X86: # %bb.0: 12370; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12371; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12372; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21] 12373; X86-NEXT: retl # encoding: [0xc3] 12374; 12375; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: 12376; X64: # %bb.0: 12377; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12378; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21] 12379; X64-NEXT: retq # encoding: [0xc3] 12380 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 12381 ret <2 x i64> %res 12382} 12383 12384declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 12385 12386define <4 x i64>@test_int_x86_avx512_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 12387; CHECK-LABEL: test_int_x86_avx512_pternlog_q_256: 12388; CHECK: # %bb.0: 12389; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] 12390; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12391 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 12392 ret <4 x i64> %res 12393} 12394 12395define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 12396; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256: 12397; X86: # %bb.0: 12398; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12399; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12400; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21] 12401; X86-NEXT: retl # encoding: [0xc3] 12402; 12403; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256: 12404; X64: # %bb.0: 12405; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12406; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21] 12407; X64-NEXT: retq # encoding: [0xc3] 12408 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 12409 ret <4 x i64> %res 12410} 12411 12412declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 12413 12414define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 12415; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: 12416; X86: # %bb.0: 12417; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12418; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12419; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21] 12420; X86-NEXT: retl # encoding: [0xc3] 12421; 12422; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: 12423; X64: # %bb.0: 12424; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12425; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21] 12426; X64-NEXT: retq # encoding: [0xc3] 12427 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 12428 ret <4 x i64> %res 12429} 12430 12431declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) 12432 12433define <4 x float>@test_int_x86_avx512_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1) { 12434; CHECK-LABEL: test_int_x86_avx512_cvt_udq2ps_128: 12435; CHECK: # %bb.0: 12436; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xc0] 12437; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12438 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 12439 ret <4 x float> %res 12440} 12441 12442define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 12443; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: 12444; X86: # %bb.0: 12445; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12446; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12447; X86-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] 12448; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12449; X86-NEXT: retl # encoding: [0xc3] 12450; 12451; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: 12452; X64: # %bb.0: 12453; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12454; X64-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] 12455; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12456; X64-NEXT: retq # encoding: [0xc3] 12457 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 12458 ret <4 x float> %res 12459} 12460 12461declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) 12462 12463define <8 x float>@test_int_x86_avx512_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1) { 12464; CHECK-LABEL: test_int_x86_avx512_cvt_udq2ps_256: 12465; CHECK: # %bb.0: 12466; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xc0] 12467; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12468 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 12469 ret <8 x float> %res 12470} 12471 12472define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 12473; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: 12474; X86: # %bb.0: 12475; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12476; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12477; X86-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] 12478; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 12479; X86-NEXT: retl # encoding: [0xc3] 12480; 12481; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: 12482; X64: # %bb.0: 12483; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12484; X64-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] 12485; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 12486; X64-NEXT: retq # encoding: [0xc3] 12487 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 12488 ret <8 x float> %res 12489} 12490 12491declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 12492 12493define <4 x i32>@test_int_x86_avx512_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 12494; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_128: 12495; CHECK: # %bb.0: 12496; CHECK-NEXT: vpermt2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xc2] 12497; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12498 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 12499 ret <4 x i32> %res 12500} 12501 12502define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 12503; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128: 12504; X86: # %bb.0: 12505; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12506; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12507; X86-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca] 12508; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12509; X86-NEXT: retl # encoding: [0xc3] 12510; 12511; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128: 12512; X64: # %bb.0: 12513; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12514; X64-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca] 12515; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12516; X64-NEXT: retq # encoding: [0xc3] 12517 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 12518 ret <4 x i32> %res 12519} 12520 12521declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 12522 12523define <4 x i32>@test_int_x86_avx512_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 12524; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_128: 12525; CHECK: # %bb.0: 12526; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x76,0xc2] 12527; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12528 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 12529 ret <4 x i32> %res 12530} 12531 12532define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 12533; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 12534; X86: # %bb.0: 12535; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12536; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12537; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca] 12538; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12539; X86-NEXT: retl # encoding: [0xc3] 12540; 12541; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 12542; X64: # %bb.0: 12543; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12544; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca] 12545; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12546; X64-NEXT: retq # encoding: [0xc3] 12547 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 12548 ret <4 x i32> %res 12549} 12550 12551declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 12552 12553define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 12554; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 12555; X86: # %bb.0: 12556; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12557; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12558; X86-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2] 12559; X86-NEXT: retl # encoding: [0xc3] 12560; 12561; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 12562; X64: # %bb.0: 12563; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12564; X64-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2] 12565; X64-NEXT: retq # encoding: [0xc3] 12566 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 12567 ret <4 x i32> %res 12568} 12569 12570declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 12571 12572define <8 x i32>@test_int_x86_avx512_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12573; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_256: 12574; CHECK: # %bb.0: 12575; CHECK-NEXT: vpermt2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xc2] 12576; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12577 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 12578 ret <8 x i32> %res 12579} 12580 12581define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12582; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256: 12583; X86: # %bb.0: 12584; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12585; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12586; X86-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca] 12587; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12588; X86-NEXT: retl # encoding: [0xc3] 12589; 12590; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256: 12591; X64: # %bb.0: 12592; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12593; X64-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca] 12594; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12595; X64-NEXT: retq # encoding: [0xc3] 12596 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 12597 ret <8 x i32> %res 12598} 12599 12600declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 12601 12602define <8 x i32>@test_int_x86_avx512_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 12603; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_256: 12604; CHECK: # %bb.0: 12605; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x76,0xc2] 12606; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12607 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 12608 ret <8 x i32> %res 12609} 12610 12611define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12612; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 12613; X86: # %bb.0: 12614; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12615; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12616; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca] 12617; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12618; X86-NEXT: retl # encoding: [0xc3] 12619; 12620; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 12621; X64: # %bb.0: 12622; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12623; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca] 12624; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12625; X64-NEXT: retq # encoding: [0xc3] 12626 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 12627 ret <8 x i32> %res 12628} 12629 12630declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 12631 12632define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12633; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 12634; X86: # %bb.0: 12635; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12636; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12637; X86-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2] 12638; X86-NEXT: retl # encoding: [0xc3] 12639; 12640; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 12641; X64: # %bb.0: 12642; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12643; X64-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2] 12644; X64-NEXT: retq # encoding: [0xc3] 12645 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 12646 ret <8 x i32> %res 12647} 12648 12649declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 12650 12651define <2 x double>@test_int_x86_avx512_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) { 12652; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_128: 12653; CHECK: # %bb.0: 12654; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xc2] 12655; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12656 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 12657 ret <2 x double> %res 12658} 12659 12660define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 12661; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 12662; X86: # %bb.0: 12663; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12664; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12665; X86-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca] 12666; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 12667; X86-NEXT: retl # encoding: [0xc3] 12668; 12669; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 12670; X64: # %bb.0: 12671; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12672; X64-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca] 12673; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 12674; X64-NEXT: retq # encoding: [0xc3] 12675 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 12676 ret <2 x double> %res 12677} 12678 12679declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 12680 12681define <4 x double>@test_int_x86_avx512_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) { 12682; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_256: 12683; CHECK: # %bb.0: 12684; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xc2] 12685; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12686 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 12687 ret <4 x double> %res 12688} 12689 12690define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 12691; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 12692; X86: # %bb.0: 12693; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12694; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12695; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca] 12696; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 12697; X86-NEXT: retl # encoding: [0xc3] 12698; 12699; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 12700; X64: # %bb.0: 12701; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12702; X64-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca] 12703; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 12704; X64-NEXT: retq # encoding: [0xc3] 12705 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 12706 ret <4 x double> %res 12707} 12708 12709declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 12710 12711define <4 x float>@test_int_x86_avx512_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) { 12712; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_128: 12713; CHECK: # %bb.0: 12714; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xc2] 12715; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12716 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 12717 ret <4 x float> %res 12718} 12719 12720define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 12721; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 12722; X86: # %bb.0: 12723; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12724; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12725; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 12726; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12727; X86-NEXT: retl # encoding: [0xc3] 12728; 12729; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 12730; X64: # %bb.0: 12731; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12732; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 12733; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12734; X64-NEXT: retq # encoding: [0xc3] 12735 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 12736 ret <4 x float> %res 12737} 12738 12739define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) { 12740; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast: 12741; X86: # %bb.0: 12742; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12743; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12744; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 12745; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12746; X86-NEXT: retl # encoding: [0xc3] 12747; 12748; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast: 12749; X64: # %bb.0: 12750; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12751; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 12752; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 12753; X64-NEXT: retq # encoding: [0xc3] 12754 %x1cast = bitcast <2 x i64> %x1 to <4 x i32> 12755 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3) 12756 ret <4 x float> %res 12757} 12758 12759declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 12760 12761define <8 x float>@test_int_x86_avx512_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) { 12762; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_256: 12763; CHECK: # %bb.0: 12764; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xc2] 12765; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12766 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 12767 ret <8 x float> %res 12768} 12769 12770define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 12771; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 12772; X86: # %bb.0: 12773; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12774; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12775; X86-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca] 12776; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 12777; X86-NEXT: retl # encoding: [0xc3] 12778; 12779; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 12780; X64: # %bb.0: 12781; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12782; X64-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca] 12783; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 12784; X64-NEXT: retq # encoding: [0xc3] 12785 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 12786 ret <8 x float> %res 12787} 12788 12789declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 12790 12791define <2 x i64>@test_int_x86_avx512_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 12792; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_128: 12793; CHECK: # %bb.0: 12794; CHECK-NEXT: vpermt2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xc2] 12795; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12796 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 12797 ret <2 x i64> %res 12798} 12799 12800define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 12801; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128: 12802; X86: # %bb.0: 12803; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12804; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12805; X86-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca] 12806; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12807; X86-NEXT: retl # encoding: [0xc3] 12808; 12809; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128: 12810; X64: # %bb.0: 12811; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12812; X64-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca] 12813; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12814; X64-NEXT: retq # encoding: [0xc3] 12815 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 12816 ret <2 x i64> %res 12817} 12818 12819declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 12820 12821define <2 x i64>@test_int_x86_avx512_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 12822; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_128: 12823; CHECK: # %bb.0: 12824; CHECK-NEXT: vpermi2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x76,0xc2] 12825; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12826 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 12827 ret <2 x i64> %res 12828} 12829 12830define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 12831; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128: 12832; X86: # %bb.0: 12833; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12834; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12835; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca] 12836; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12837; X86-NEXT: retl # encoding: [0xc3] 12838; 12839; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128: 12840; X64: # %bb.0: 12841; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12842; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca] 12843; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 12844; X64-NEXT: retq # encoding: [0xc3] 12845 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 12846 ret <2 x i64> %res 12847} 12848 12849declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 12850 12851define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 12852; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128: 12853; X86: # %bb.0: 12854; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12855; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12856; X86-NEXT: vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2] 12857; X86-NEXT: retl # encoding: [0xc3] 12858; 12859; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128: 12860; X64: # %bb.0: 12861; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12862; X64-NEXT: vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2] 12863; X64-NEXT: retq # encoding: [0xc3] 12864 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 12865 ret <2 x i64> %res 12866} 12867 12868declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 12869 12870define <4 x i64>@test_int_x86_avx512_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 12871; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_256: 12872; CHECK: # %bb.0: 12873; CHECK-NEXT: vpermt2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xc2] 12874; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12875 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 12876 ret <4 x i64> %res 12877} 12878 12879define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 12880; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256: 12881; X86: # %bb.0: 12882; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12883; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12884; X86-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca] 12885; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12886; X86-NEXT: retl # encoding: [0xc3] 12887; 12888; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256: 12889; X64: # %bb.0: 12890; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12891; X64-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca] 12892; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12893; X64-NEXT: retq # encoding: [0xc3] 12894 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 12895 ret <4 x i64> %res 12896} 12897 12898declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 12899 12900define <4 x i64>@test_int_x86_avx512_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 12901; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_256: 12902; CHECK: # %bb.0: 12903; CHECK-NEXT: vpermi2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x76,0xc2] 12904; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12905 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 12906 ret <4 x i64> %res 12907} 12908 12909define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 12910; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256: 12911; X86: # %bb.0: 12912; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12913; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12914; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca] 12915; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12916; X86-NEXT: retl # encoding: [0xc3] 12917; 12918; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256: 12919; X64: # %bb.0: 12920; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12921; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca] 12922; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 12923; X64-NEXT: retq # encoding: [0xc3] 12924 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 12925 ret <4 x i64> %res 12926} 12927 12928declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 12929 12930define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 12931; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256: 12932; X86: # %bb.0: 12933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12934; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12935; X86-NEXT: vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2] 12936; X86-NEXT: retl # encoding: [0xc3] 12937; 12938; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256: 12939; X64: # %bb.0: 12940; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12941; X64-NEXT: vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2] 12942; X64-NEXT: retq # encoding: [0xc3] 12943 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 12944 ret <4 x i64> %res 12945} 12946 12947define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { 12948; X86-LABEL: test_mask_compress_store_pd_128: 12949; X86: # %bb.0: 12950; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12951; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 12952; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 12953; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] 12954; X86-NEXT: retl # encoding: [0xc3] 12955; 12956; X64-LABEL: test_mask_compress_store_pd_128: 12957; X64: # %bb.0: 12958; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12959; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] 12960; X64-NEXT: retq # encoding: [0xc3] 12961 call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 12962 ret void 12963} 12964 12965declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 12966 12967define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) { 12968; X86-LABEL: test_compress_store_pd_128: 12969; X86: # %bb.0: 12970; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12971; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 12972; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] 12973; X86-NEXT: retl # encoding: [0xc3] 12974; 12975; X64-LABEL: test_compress_store_pd_128: 12976; X64: # %bb.0: 12977; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 12978; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] 12979; X64-NEXT: retq # encoding: [0xc3] 12980 call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1) 12981 ret void 12982} 12983 12984define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { 12985; X86-LABEL: test_mask_compress_store_ps_128: 12986; X86: # %bb.0: 12987; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12988; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 12989; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 12990; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] 12991; X86-NEXT: retl # encoding: [0xc3] 12992; 12993; X64-LABEL: test_mask_compress_store_ps_128: 12994; X64: # %bb.0: 12995; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12996; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] 12997; X64-NEXT: retq # encoding: [0xc3] 12998 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 12999 ret void 13000} 13001 13002declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 13003 13004define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) { 13005; X86-LABEL: test_compress_store_ps_128: 13006; X86: # %bb.0: 13007; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13008; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13009; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] 13010; X86-NEXT: retl # encoding: [0xc3] 13011; 13012; X64-LABEL: test_compress_store_ps_128: 13013; X64: # %bb.0: 13014; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13015; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] 13016; X64-NEXT: retq # encoding: [0xc3] 13017 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1) 13018 ret void 13019} 13020 13021define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { 13022; X86-LABEL: test_mask_compress_store_q_128: 13023; X86: # %bb.0: 13024; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13025; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13026; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13027; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] 13028; X86-NEXT: retl # encoding: [0xc3] 13029; 13030; X64-LABEL: test_mask_compress_store_q_128: 13031; X64: # %bb.0: 13032; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13033; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] 13034; X64-NEXT: retq # encoding: [0xc3] 13035 call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 13036 ret void 13037} 13038 13039declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 13040 13041define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) { 13042; X86-LABEL: test_compress_store_q_128: 13043; X86: # %bb.0: 13044; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13045; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13046; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] 13047; X86-NEXT: retl # encoding: [0xc3] 13048; 13049; X64-LABEL: test_compress_store_q_128: 13050; X64: # %bb.0: 13051; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13052; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] 13053; X64-NEXT: retq # encoding: [0xc3] 13054 call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1) 13055 ret void 13056} 13057 13058define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { 13059; X86-LABEL: test_mask_compress_store_d_128: 13060; X86: # %bb.0: 13061; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13062; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13063; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13064; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] 13065; X86-NEXT: retl # encoding: [0xc3] 13066; 13067; X64-LABEL: test_mask_compress_store_d_128: 13068; X64: # %bb.0: 13069; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13070; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] 13071; X64-NEXT: retq # encoding: [0xc3] 13072 call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 13073 ret void 13074} 13075 13076declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 13077 13078define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) { 13079; X86-LABEL: test_compress_store_d_128: 13080; X86: # %bb.0: 13081; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13082; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13083; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] 13084; X86-NEXT: retl # encoding: [0xc3] 13085; 13086; X64-LABEL: test_compress_store_d_128: 13087; X64: # %bb.0: 13088; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13089; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] 13090; X64-NEXT: retq # encoding: [0xc3] 13091 call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1) 13092 ret void 13093} 13094 13095define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { 13096; X86-LABEL: test_mask_expand_load_pd_128: 13097; X86: # %bb.0: 13098; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13099; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13100; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13101; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] 13102; X86-NEXT: retl # encoding: [0xc3] 13103; 13104; X64-LABEL: test_mask_expand_load_pd_128: 13105; X64: # %bb.0: 13106; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13107; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] 13108; X64-NEXT: retq # encoding: [0xc3] 13109 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 13110 ret <2 x double> %res 13111} 13112 13113define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) { 13114; X86-LABEL: test_maskz_expand_load_pd_128: 13115; X86: # %bb.0: 13116; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13117; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13118; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13119; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00] 13120; X86-NEXT: retl # encoding: [0xc3] 13121; 13122; X64-LABEL: test_maskz_expand_load_pd_128: 13123; X64: # %bb.0: 13124; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13125; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07] 13126; X64-NEXT: retq # encoding: [0xc3] 13127 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask) 13128 ret <2 x double> %res 13129} 13130 13131declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 13132 13133define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) { 13134; X86-LABEL: test_expand_load_pd_128: 13135; X86: # %bb.0: 13136; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13137; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13138; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] 13139; X86-NEXT: retl # encoding: [0xc3] 13140; 13141; X64-LABEL: test_expand_load_pd_128: 13142; X64: # %bb.0: 13143; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13144; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] 13145; X64-NEXT: retq # encoding: [0xc3] 13146 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1) 13147 ret <2 x double> %res 13148} 13149 13150define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { 13151; X86-LABEL: test_mask_expand_load_ps_128: 13152; X86: # %bb.0: 13153; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13154; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13155; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13156; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] 13157; X86-NEXT: retl # encoding: [0xc3] 13158; 13159; X64-LABEL: test_mask_expand_load_ps_128: 13160; X64: # %bb.0: 13161; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13162; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] 13163; X64-NEXT: retq # encoding: [0xc3] 13164 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 13165 ret <4 x float> %res 13166} 13167 13168define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) { 13169; X86-LABEL: test_maskz_expand_load_ps_128: 13170; X86: # %bb.0: 13171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13172; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13173; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13174; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00] 13175; X86-NEXT: retl # encoding: [0xc3] 13176; 13177; X64-LABEL: test_maskz_expand_load_ps_128: 13178; X64: # %bb.0: 13179; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13180; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07] 13181; X64-NEXT: retq # encoding: [0xc3] 13182 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask) 13183 ret <4 x float> %res 13184} 13185 13186declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 13187 13188define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) { 13189; X86-LABEL: test_expand_load_ps_128: 13190; X86: # %bb.0: 13191; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13192; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13193; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] 13194; X86-NEXT: retl # encoding: [0xc3] 13195; 13196; X64-LABEL: test_expand_load_ps_128: 13197; X64: # %bb.0: 13198; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13199; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] 13200; X64-NEXT: retq # encoding: [0xc3] 13201 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1) 13202 ret <4 x float> %res 13203} 13204 13205define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { 13206; X86-LABEL: test_mask_expand_load_q_128: 13207; X86: # %bb.0: 13208; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13209; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13210; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13211; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] 13212; X86-NEXT: retl # encoding: [0xc3] 13213; 13214; X64-LABEL: test_mask_expand_load_q_128: 13215; X64: # %bb.0: 13216; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13217; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] 13218; X64-NEXT: retq # encoding: [0xc3] 13219 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 13220 ret <2 x i64> %res 13221} 13222 13223define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) { 13224; X86-LABEL: test_maskz_expand_load_q_128: 13225; X86: # %bb.0: 13226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13227; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13228; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13229; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00] 13230; X86-NEXT: retl # encoding: [0xc3] 13231; 13232; X64-LABEL: test_maskz_expand_load_q_128: 13233; X64: # %bb.0: 13234; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13235; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07] 13236; X64-NEXT: retq # encoding: [0xc3] 13237 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask) 13238 ret <2 x i64> %res 13239} 13240 13241declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 13242 13243define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) { 13244; X86-LABEL: test_expand_load_q_128: 13245; X86: # %bb.0: 13246; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13247; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13248; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] 13249; X86-NEXT: retl # encoding: [0xc3] 13250; 13251; X64-LABEL: test_expand_load_q_128: 13252; X64: # %bb.0: 13253; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13254; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] 13255; X64-NEXT: retq # encoding: [0xc3] 13256 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1) 13257 ret <2 x i64> %res 13258} 13259 13260define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { 13261; X86-LABEL: test_mask_expand_load_d_128: 13262; X86: # %bb.0: 13263; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13264; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13265; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13266; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] 13267; X86-NEXT: retl # encoding: [0xc3] 13268; 13269; X64-LABEL: test_mask_expand_load_d_128: 13270; X64: # %bb.0: 13271; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13272; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] 13273; X64-NEXT: retq # encoding: [0xc3] 13274 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 13275 ret <4 x i32> %res 13276} 13277 13278define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) { 13279; X86-LABEL: test_maskz_expand_load_d_128: 13280; X86: # %bb.0: 13281; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13282; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13283; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13284; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00] 13285; X86-NEXT: retl # encoding: [0xc3] 13286; 13287; X64-LABEL: test_maskz_expand_load_d_128: 13288; X64: # %bb.0: 13289; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13290; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07] 13291; X64-NEXT: retq # encoding: [0xc3] 13292 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask) 13293 ret <4 x i32> %res 13294} 13295 13296declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 13297 13298define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) { 13299; X86-LABEL: test_expand_load_d_128: 13300; X86: # %bb.0: 13301; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13302; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13303; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] 13304; X86-NEXT: retl # encoding: [0xc3] 13305; 13306; X64-LABEL: test_expand_load_d_128: 13307; X64: # %bb.0: 13308; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13309; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] 13310; X64-NEXT: retq # encoding: [0xc3] 13311 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1) 13312 ret <4 x i32> %res 13313} 13314 13315define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { 13316; X86-LABEL: test_mask_compress_store_pd_256: 13317; X86: # %bb.0: 13318; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13319; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13320; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13321; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] 13322; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13323; X86-NEXT: retl # encoding: [0xc3] 13324; 13325; X64-LABEL: test_mask_compress_store_pd_256: 13326; X64: # %bb.0: 13327; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13328; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] 13329; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13330; X64-NEXT: retq # encoding: [0xc3] 13331 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 13332 ret void 13333} 13334 13335declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 13336 13337define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) { 13338; X86-LABEL: test_compress_store_pd_256: 13339; X86: # %bb.0: 13340; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13341; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13342; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] 13343; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13344; X86-NEXT: retl # encoding: [0xc3] 13345; 13346; X64-LABEL: test_compress_store_pd_256: 13347; X64: # %bb.0: 13348; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13349; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] 13350; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13351; X64-NEXT: retq # encoding: [0xc3] 13352 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1) 13353 ret void 13354} 13355 13356define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { 13357; X86-LABEL: test_mask_compress_store_ps_256: 13358; X86: # %bb.0: 13359; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13360; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13361; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13362; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] 13363; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13364; X86-NEXT: retl # encoding: [0xc3] 13365; 13366; X64-LABEL: test_mask_compress_store_ps_256: 13367; X64: # %bb.0: 13368; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13369; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] 13370; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13371; X64-NEXT: retq # encoding: [0xc3] 13372 call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 13373 ret void 13374} 13375 13376declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 13377 13378define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) { 13379; X86-LABEL: test_compress_store_ps_256: 13380; X86: # %bb.0: 13381; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13382; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13383; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] 13384; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13385; X86-NEXT: retl # encoding: [0xc3] 13386; 13387; X64-LABEL: test_compress_store_ps_256: 13388; X64: # %bb.0: 13389; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13390; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] 13391; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13392; X64-NEXT: retq # encoding: [0xc3] 13393 call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1) 13394 ret void 13395} 13396 13397define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { 13398; X86-LABEL: test_mask_compress_store_q_256: 13399; X86: # %bb.0: 13400; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13401; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13402; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13403; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] 13404; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13405; X86-NEXT: retl # encoding: [0xc3] 13406; 13407; X64-LABEL: test_mask_compress_store_q_256: 13408; X64: # %bb.0: 13409; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13410; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] 13411; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13412; X64-NEXT: retq # encoding: [0xc3] 13413 call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 13414 ret void 13415} 13416 13417declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 13418 13419define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) { 13420; X86-LABEL: test_compress_store_q_256: 13421; X86: # %bb.0: 13422; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13423; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13424; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] 13425; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13426; X86-NEXT: retl # encoding: [0xc3] 13427; 13428; X64-LABEL: test_compress_store_q_256: 13429; X64: # %bb.0: 13430; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13431; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] 13432; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13433; X64-NEXT: retq # encoding: [0xc3] 13434 call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1) 13435 ret void 13436} 13437 13438define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { 13439; X86-LABEL: test_mask_compress_store_d_256: 13440; X86: # %bb.0: 13441; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13442; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13443; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13444; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] 13445; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13446; X86-NEXT: retl # encoding: [0xc3] 13447; 13448; X64-LABEL: test_mask_compress_store_d_256: 13449; X64: # %bb.0: 13450; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13451; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] 13452; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13453; X64-NEXT: retq # encoding: [0xc3] 13454 call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 13455 ret void 13456} 13457 13458declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 13459 13460define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) { 13461; X86-LABEL: test_compress_store_d_256: 13462; X86: # %bb.0: 13463; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13464; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13465; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] 13466; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13467; X86-NEXT: retl # encoding: [0xc3] 13468; 13469; X64-LABEL: test_compress_store_d_256: 13470; X64: # %bb.0: 13471; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13472; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] 13473; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 13474; X64-NEXT: retq # encoding: [0xc3] 13475 call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1) 13476 ret void 13477} 13478 13479define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { 13480; X86-LABEL: test_mask_expand_load_pd_256: 13481; X86: # %bb.0: 13482; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13483; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13484; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13485; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] 13486; X86-NEXT: retl # encoding: [0xc3] 13487; 13488; X64-LABEL: test_mask_expand_load_pd_256: 13489; X64: # %bb.0: 13490; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13491; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] 13492; X64-NEXT: retq # encoding: [0xc3] 13493 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 13494 ret <4 x double> %res 13495} 13496 13497define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) { 13498; X86-LABEL: test_maskz_expand_load_pd_256: 13499; X86: # %bb.0: 13500; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13501; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13502; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13503; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00] 13504; X86-NEXT: retl # encoding: [0xc3] 13505; 13506; X64-LABEL: test_maskz_expand_load_pd_256: 13507; X64: # %bb.0: 13508; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13509; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07] 13510; X64-NEXT: retq # encoding: [0xc3] 13511 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask) 13512 ret <4 x double> %res 13513} 13514 13515declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 13516 13517define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) { 13518; X86-LABEL: test_expand_load_pd_256: 13519; X86: # %bb.0: 13520; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13521; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13522; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] 13523; X86-NEXT: retl # encoding: [0xc3] 13524; 13525; X64-LABEL: test_expand_load_pd_256: 13526; X64: # %bb.0: 13527; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13528; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] 13529; X64-NEXT: retq # encoding: [0xc3] 13530 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1) 13531 ret <4 x double> %res 13532} 13533 13534define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { 13535; X86-LABEL: test_mask_expand_load_ps_256: 13536; X86: # %bb.0: 13537; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13538; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13539; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13540; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] 13541; X86-NEXT: retl # encoding: [0xc3] 13542; 13543; X64-LABEL: test_mask_expand_load_ps_256: 13544; X64: # %bb.0: 13545; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13546; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] 13547; X64-NEXT: retq # encoding: [0xc3] 13548 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 13549 ret <8 x float> %res 13550} 13551 13552define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) { 13553; X86-LABEL: test_maskz_expand_load_ps_256: 13554; X86: # %bb.0: 13555; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13556; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13557; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13558; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00] 13559; X86-NEXT: retl # encoding: [0xc3] 13560; 13561; X64-LABEL: test_maskz_expand_load_ps_256: 13562; X64: # %bb.0: 13563; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13564; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07] 13565; X64-NEXT: retq # encoding: [0xc3] 13566 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask) 13567 ret <8 x float> %res 13568} 13569 13570declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 13571 13572define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) { 13573; X86-LABEL: test_expand_load_ps_256: 13574; X86: # %bb.0: 13575; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13576; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13577; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] 13578; X86-NEXT: retl # encoding: [0xc3] 13579; 13580; X64-LABEL: test_expand_load_ps_256: 13581; X64: # %bb.0: 13582; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13583; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] 13584; X64-NEXT: retq # encoding: [0xc3] 13585 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1) 13586 ret <8 x float> %res 13587} 13588 13589define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { 13590; X86-LABEL: test_mask_expand_load_q_256: 13591; X86: # %bb.0: 13592; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13593; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13594; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13595; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] 13596; X86-NEXT: retl # encoding: [0xc3] 13597; 13598; X64-LABEL: test_mask_expand_load_q_256: 13599; X64: # %bb.0: 13600; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13601; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] 13602; X64-NEXT: retq # encoding: [0xc3] 13603 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 13604 ret <4 x i64> %res 13605} 13606 13607define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) { 13608; X86-LABEL: test_maskz_expand_load_q_256: 13609; X86: # %bb.0: 13610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13611; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13612; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13613; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00] 13614; X86-NEXT: retl # encoding: [0xc3] 13615; 13616; X64-LABEL: test_maskz_expand_load_q_256: 13617; X64: # %bb.0: 13618; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13619; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07] 13620; X64-NEXT: retq # encoding: [0xc3] 13621 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask) 13622 ret <4 x i64> %res 13623} 13624 13625declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 13626 13627define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) { 13628; X86-LABEL: test_expand_load_q_256: 13629; X86: # %bb.0: 13630; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13631; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13632; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] 13633; X86-NEXT: retl # encoding: [0xc3] 13634; 13635; X64-LABEL: test_expand_load_q_256: 13636; X64: # %bb.0: 13637; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13638; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] 13639; X64-NEXT: retq # encoding: [0xc3] 13640 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1) 13641 ret <4 x i64> %res 13642} 13643 13644define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { 13645; X86-LABEL: test_mask_expand_load_d_256: 13646; X86: # %bb.0: 13647; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13648; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13649; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13650; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] 13651; X86-NEXT: retl # encoding: [0xc3] 13652; 13653; X64-LABEL: test_mask_expand_load_d_256: 13654; X64: # %bb.0: 13655; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13656; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] 13657; X64-NEXT: retq # encoding: [0xc3] 13658 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 13659 ret <8 x i32> %res 13660} 13661 13662define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) { 13663; X86-LABEL: test_maskz_expand_load_d_256: 13664; X86: # %bb.0: 13665; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13666; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13667; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13668; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00] 13669; X86-NEXT: retl # encoding: [0xc3] 13670; 13671; X64-LABEL: test_maskz_expand_load_d_256: 13672; X64: # %bb.0: 13673; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13674; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07] 13675; X64-NEXT: retq # encoding: [0xc3] 13676 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask) 13677 ret <8 x i32> %res 13678} 13679 13680declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 13681 13682define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) { 13683; X86-LABEL: test_expand_load_d_256: 13684; X86: # %bb.0: 13685; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13686; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13687; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] 13688; X86-NEXT: retl # encoding: [0xc3] 13689; 13690; X64-LABEL: test_expand_load_d_256: 13691; X64: # %bb.0: 13692; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 13693; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] 13694; X64-NEXT: retq # encoding: [0xc3] 13695 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1) 13696 ret <8 x i32> %res 13697} 13698 13699define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { 13700; X86-LABEL: test_sqrt_pd_256: 13701; X86: # %bb.0: 13702; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13703; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13704; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] 13705; X86-NEXT: retl # encoding: [0xc3] 13706; 13707; X64-LABEL: test_sqrt_pd_256: 13708; X64: # %bb.0: 13709; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13710; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] 13711; X64-NEXT: retq # encoding: [0xc3] 13712 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 13713 ret <4 x double> %res 13714} 13715declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 13716 13717define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { 13718; X86-LABEL: test_sqrt_ps_256: 13719; X86: # %bb.0: 13720; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13721; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13722; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] 13723; X86-NEXT: retl # encoding: [0xc3] 13724; 13725; X64-LABEL: test_sqrt_ps_256: 13726; X64: # %bb.0: 13727; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13728; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] 13729; X64-NEXT: retq # encoding: [0xc3] 13730 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 13731 ret <8 x float> %res 13732} 13733 13734declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 13735 13736declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 13737 13738define <4 x i32>@test_int_x86_avx512_prorv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 13739; CHECK-LABEL: test_int_x86_avx512_prorv_d_128_old: 13740; CHECK: # %bb.0: 13741; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1] 13742; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13743 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 13744 ret <4 x i32> %res 13745} 13746 13747define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 13748; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128_old: 13749; X86: # %bb.0: 13750; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13751; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13752; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 13753; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 13754; X86-NEXT: retl # encoding: [0xc3] 13755; 13756; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128_old: 13757; X64: # %bb.0: 13758; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13759; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 13760; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 13761; X64-NEXT: retq # encoding: [0xc3] 13762 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 13763 ret <4 x i32> %res 13764} 13765 13766define <4 x i32>@test_int_x86_avx512_maskz_prorv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 13767; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_128_old: 13768; X86: # %bb.0: 13769; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13770; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13771; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] 13772; X86-NEXT: retl # encoding: [0xc3] 13773; 13774; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_128_old: 13775; X64: # %bb.0: 13776; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13777; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] 13778; X64-NEXT: retq # encoding: [0xc3] 13779 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 13780 ret <4 x i32> %res 13781} 13782 13783declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 13784 13785define <8 x i32>@test_int_x86_avx512_prorv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 13786; CHECK-LABEL: test_int_x86_avx512_prorv_d_256_old: 13787; CHECK: # %bb.0: 13788; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1] 13789; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13790 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 13791 ret <8 x i32> %res 13792} 13793 13794define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 13795; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256_old: 13796; X86: # %bb.0: 13797; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13798; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13799; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 13800; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 13801; X86-NEXT: retl # encoding: [0xc3] 13802; 13803; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256_old: 13804; X64: # %bb.0: 13805; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13806; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 13807; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 13808; X64-NEXT: retq # encoding: [0xc3] 13809 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 13810 ret <8 x i32> %res 13811} 13812 13813define <8 x i32>@test_int_x86_avx512_maskz_prorv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 13814; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_256_old: 13815; X86: # %bb.0: 13816; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13817; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13818; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] 13819; X86-NEXT: retl # encoding: [0xc3] 13820; 13821; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_256_old: 13822; X64: # %bb.0: 13823; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13824; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] 13825; X64-NEXT: retq # encoding: [0xc3] 13826 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 13827 ret <8 x i32> %res 13828} 13829 13830declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 13831 13832define <2 x i64>@test_int_x86_avx512_prorv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 13833; CHECK-LABEL: test_int_x86_avx512_prorv_q_128_old: 13834; CHECK: # %bb.0: 13835; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1] 13836; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13837 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 13838 ret <2 x i64> %res 13839} 13840 13841define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 13842; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128_old: 13843; X86: # %bb.0: 13844; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13845; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13846; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 13847; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 13848; X86-NEXT: retl # encoding: [0xc3] 13849; 13850; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128_old: 13851; X64: # %bb.0: 13852; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13853; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 13854; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 13855; X64-NEXT: retq # encoding: [0xc3] 13856 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 13857 ret <2 x i64> %res 13858} 13859 13860define <2 x i64>@test_int_x86_avx512_maskz_prorv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 13861; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_128_old: 13862; X86: # %bb.0: 13863; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13864; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13865; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] 13866; X86-NEXT: retl # encoding: [0xc3] 13867; 13868; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_128_old: 13869; X64: # %bb.0: 13870; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13871; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] 13872; X64-NEXT: retq # encoding: [0xc3] 13873 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 13874 ret <2 x i64> %res 13875} 13876 13877declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 13878 13879define <4 x i64>@test_int_x86_avx512_prorv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 13880; CHECK-LABEL: test_int_x86_avx512_prorv_q_256_old: 13881; CHECK: # %bb.0: 13882; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1] 13883; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13884 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 13885 ret <4 x i64> %res 13886} 13887 13888define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 13889; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256_old: 13890; X86: # %bb.0: 13891; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13892; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13893; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 13894; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 13895; X86-NEXT: retl # encoding: [0xc3] 13896; 13897; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256_old: 13898; X64: # %bb.0: 13899; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13900; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 13901; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 13902; X64-NEXT: retq # encoding: [0xc3] 13903 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 13904 ret <4 x i64> %res 13905} 13906 13907define <4 x i64>@test_int_x86_avx512_maskz_prorv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 13908; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_256_old: 13909; X86: # %bb.0: 13910; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13911; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13912; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] 13913; X86-NEXT: retl # encoding: [0xc3] 13914; 13915; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_256_old: 13916; X64: # %bb.0: 13917; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13918; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] 13919; X64-NEXT: retq # encoding: [0xc3] 13920 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 13921 ret <4 x i64> %res 13922} 13923 13924declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8) 13925 13926define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 13927; X86-LABEL: test_int_x86_avx512_mask_prol_d_128: 13928; X86: # %bb.0: 13929; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 13930; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13931; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 13932; X86-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] 13933; X86-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] 13934; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 13935; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 13936; X86-NEXT: retl # encoding: [0xc3] 13937; 13938; X64-LABEL: test_int_x86_avx512_mask_prol_d_128: 13939; X64: # %bb.0: 13940; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13941; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 13942; X64-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] 13943; X64-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] 13944; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 13945; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 13946; X64-NEXT: retq # encoding: [0xc3] 13947 %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 13948 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 4, <4 x i32> zeroinitializer, i8 %x3) 13949 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 5, <4 x i32> %x2, i8 -1) 13950 %res3 = add <4 x i32> %res, %res1 13951 %res4 = add <4 x i32> %res3, %res2 13952 ret <4 x i32> %res4 13953} 13954 13955declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8) 13956 13957define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 13958; X86-LABEL: test_int_x86_avx512_mask_prol_d_256: 13959; X86: # %bb.0: 13960; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 13961; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13962; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 13963; X86-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] 13964; X86-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] 13965; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 13966; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 13967; X86-NEXT: retl # encoding: [0xc3] 13968; 13969; X64-LABEL: test_int_x86_avx512_mask_prol_d_256: 13970; X64: # %bb.0: 13971; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13972; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 13973; X64-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] 13974; X64-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] 13975; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 13976; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 13977; X64-NEXT: retq # encoding: [0xc3] 13978 %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 13979 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 4, <8 x i32> zeroinitializer, i8 %x3) 13980 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 5, <8 x i32> %x2, i8 -1) 13981 %res3 = add <8 x i32> %res, %res1 13982 %res4 = add <8 x i32> %res3, %res2 13983 ret <8 x i32> %res4 13984} 13985 13986declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8) 13987 13988define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 13989; X86-LABEL: test_int_x86_avx512_mask_prol_q_128: 13990; X86: # %bb.0: 13991; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 13992; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13993; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 13994; X86-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] 13995; X86-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] 13996; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 13997; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 13998; X86-NEXT: retl # encoding: [0xc3] 13999; 14000; X64-LABEL: test_int_x86_avx512_mask_prol_q_128: 14001; X64: # %bb.0: 14002; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14003; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 14004; X64-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] 14005; X64-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] 14006; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 14007; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 14008; X64-NEXT: retq # encoding: [0xc3] 14009 %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 14010 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 4, <2 x i64> zeroinitializer, i8 %x3) 14011 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 5, <2 x i64> %x2, i8 -1) 14012 %res3 = add <2 x i64> %res, %res1 14013 %res4 = add <2 x i64> %res3, %res2 14014 ret <2 x i64> %res4 14015} 14016 14017declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8) 14018 14019define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 14020; X86-LABEL: test_int_x86_avx512_mask_prol_q_256: 14021; X86: # %bb.0: 14022; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14023; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14024; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 14025; X86-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] 14026; X86-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] 14027; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 14028; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 14029; X86-NEXT: retl # encoding: [0xc3] 14030; 14031; X64-LABEL: test_int_x86_avx512_mask_prol_q_256: 14032; X64: # %bb.0: 14033; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14034; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 14035; X64-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] 14036; X64-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] 14037; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 14038; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 14039; X64-NEXT: retq # encoding: [0xc3] 14040 %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 14041 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 4, <4 x i64> zeroinitializer, i8 %x3) 14042 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 5, <4 x i64> %x2, i8 -1) 14043 %res3 = add <4 x i64> %res, %res1 14044 %res4 = add <4 x i64> %res3, %res2 14045 ret <4 x i64> %res4 14046} 14047 14048declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 14049 14050define <4 x i32>@test_int_x86_avx512_prolv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 14051; CHECK-LABEL: test_int_x86_avx512_prolv_d_128_old: 14052; CHECK: # %bb.0: 14053; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1] 14054; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14055 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 14056 ret <4 x i32> %res 14057} 14058 14059define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 14060; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128_old: 14061; X86: # %bb.0: 14062; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14063; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14064; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 14065; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14066; X86-NEXT: retl # encoding: [0xc3] 14067; 14068; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128_old: 14069; X64: # %bb.0: 14070; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14071; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 14072; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14073; X64-NEXT: retq # encoding: [0xc3] 14074 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 14075 ret <4 x i32> %res 14076} 14077 14078define <4 x i32>@test_int_x86_avx512_maskz_prolv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 14079; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_128_old: 14080; X86: # %bb.0: 14081; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14082; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14083; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] 14084; X86-NEXT: retl # encoding: [0xc3] 14085; 14086; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_128_old: 14087; X64: # %bb.0: 14088; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14089; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] 14090; X64-NEXT: retq # encoding: [0xc3] 14091 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 14092 ret <4 x i32> %res 14093} 14094 14095declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 14096 14097define <8 x i32>@test_int_x86_avx512_prolv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 14098; CHECK-LABEL: test_int_x86_avx512_prolv_d_256_old: 14099; CHECK: # %bb.0: 14100; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1] 14101; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14102 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 14103 ret <8 x i32> %res 14104} 14105 14106define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 14107; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256_old: 14108; X86: # %bb.0: 14109; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14110; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14111; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 14112; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14113; X86-NEXT: retl # encoding: [0xc3] 14114; 14115; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256_old: 14116; X64: # %bb.0: 14117; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14118; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 14119; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14120; X64-NEXT: retq # encoding: [0xc3] 14121 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 14122 ret <8 x i32> %res 14123} 14124 14125define <8 x i32>@test_int_x86_avx512_maskz_prolv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 14126; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_256_old: 14127; X86: # %bb.0: 14128; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14129; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14130; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] 14131; X86-NEXT: retl # encoding: [0xc3] 14132; 14133; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_256_old: 14134; X64: # %bb.0: 14135; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14136; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] 14137; X64-NEXT: retq # encoding: [0xc3] 14138 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 14139 ret <8 x i32> %res 14140} 14141 14142declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 14143 14144define <2 x i64>@test_int_x86_avx512_prolv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 14145; CHECK-LABEL: test_int_x86_avx512_prolv_q_128_old: 14146; CHECK: # %bb.0: 14147; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1] 14148; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14149 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 14150 ret <2 x i64> %res 14151} 14152 14153define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 14154; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128_old: 14155; X86: # %bb.0: 14156; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14157; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14158; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 14159; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14160; X86-NEXT: retl # encoding: [0xc3] 14161; 14162; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128_old: 14163; X64: # %bb.0: 14164; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14165; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 14166; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14167; X64-NEXT: retq # encoding: [0xc3] 14168 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 14169 ret <2 x i64> %res 14170} 14171 14172define <2 x i64>@test_int_x86_avx512_maskz_prolv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 14173; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_128_old: 14174; X86: # %bb.0: 14175; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14176; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14177; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] 14178; X86-NEXT: retl # encoding: [0xc3] 14179; 14180; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_128_old: 14181; X64: # %bb.0: 14182; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14183; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] 14184; X64-NEXT: retq # encoding: [0xc3] 14185 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 14186 ret <2 x i64> %res 14187} 14188 14189declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 14190 14191define <4 x i64>@test_int_x86_avx512_prolv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 14192; CHECK-LABEL: test_int_x86_avx512_prolv_q_256_old: 14193; CHECK: # %bb.0: 14194; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1] 14195; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14196 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 14197 ret <4 x i64> %res 14198} 14199 14200define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 14201; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256_old: 14202; X86: # %bb.0: 14203; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14204; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14205; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 14206; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14207; X86-NEXT: retl # encoding: [0xc3] 14208; 14209; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256_old: 14210; X64: # %bb.0: 14211; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14212; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 14213; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14214; X64-NEXT: retq # encoding: [0xc3] 14215 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 14216 ret <4 x i64> %res 14217} 14218 14219define <4 x i64>@test_int_x86_avx512_maskz_prolv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 14220; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_256_old: 14221; X86: # %bb.0: 14222; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14223; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14224; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] 14225; X86-NEXT: retl # encoding: [0xc3] 14226; 14227; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_256_old: 14228; X64: # %bb.0: 14229; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14230; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] 14231; X64-NEXT: retq # encoding: [0xc3] 14232 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 14233 ret <4 x i64> %res 14234} 14235 14236declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8) 14237 14238define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 14239; X86-LABEL: test_int_x86_avx512_mask_pror_d_128: 14240; X86: # %bb.0: 14241; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14242; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14243; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 14244; X86-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] 14245; X86-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] 14246; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 14247; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 14248; X86-NEXT: retl # encoding: [0xc3] 14249; 14250; X64-LABEL: test_int_x86_avx512_mask_pror_d_128: 14251; X64: # %bb.0: 14252; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14253; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 14254; X64-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] 14255; X64-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] 14256; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 14257; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 14258; X64-NEXT: retq # encoding: [0xc3] 14259 %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 14260 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 4, <4 x i32> zeroinitializer, i8 %x3) 14261 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 5, <4 x i32> %x2, i8 -1) 14262 %res3 = add <4 x i32> %res, %res1 14263 %res4 = add <4 x i32> %res3, %res2 14264 ret <4 x i32> %res4 14265} 14266 14267declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8) 14268 14269define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 14270; X86-LABEL: test_int_x86_avx512_mask_pror_d_256: 14271; X86: # %bb.0: 14272; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14273; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14274; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 14275; X86-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] 14276; X86-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] 14277; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 14278; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 14279; X86-NEXT: retl # encoding: [0xc3] 14280; 14281; X64-LABEL: test_int_x86_avx512_mask_pror_d_256: 14282; X64: # %bb.0: 14283; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14284; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 14285; X64-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] 14286; X64-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] 14287; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 14288; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 14289; X64-NEXT: retq # encoding: [0xc3] 14290 %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 14291 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 4, <8 x i32> zeroinitializer, i8 %x3) 14292 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 5, <8 x i32> %x2, i8 -1) 14293 %res3 = add <8 x i32> %res, %res1 14294 %res4 = add <8 x i32> %res3, %res2 14295 ret <8 x i32> %res4 14296} 14297 14298declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8) 14299 14300define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 14301; X86-LABEL: test_int_x86_avx512_mask_pror_q_128: 14302; X86: # %bb.0: 14303; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14304; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14305; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 14306; X86-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] 14307; X86-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] 14308; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 14309; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 14310; X86-NEXT: retl # encoding: [0xc3] 14311; 14312; X64-LABEL: test_int_x86_avx512_mask_pror_q_128: 14313; X64: # %bb.0: 14314; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14315; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 14316; X64-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] 14317; X64-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] 14318; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 14319; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 14320; X64-NEXT: retq # encoding: [0xc3] 14321 %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 14322 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 4, <2 x i64> zeroinitializer, i8 %x3) 14323 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 5, <2 x i64> %x2, i8 -1) 14324 %res3 = add <2 x i64> %res, %res1 14325 %res4 = add <2 x i64> %res3, %res2 14326 ret <2 x i64> %res4 14327} 14328 14329declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8) 14330 14331define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 14332; X86-LABEL: test_int_x86_avx512_mask_pror_q_256: 14333; X86: # %bb.0: 14334; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14335; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14336; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 14337; X86-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] 14338; X86-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] 14339; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 14340; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 14341; X86-NEXT: retl # encoding: [0xc3] 14342; 14343; X64-LABEL: test_int_x86_avx512_mask_pror_q_256: 14344; X64: # %bb.0: 14345; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14346; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 14347; X64-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] 14348; X64-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] 14349; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 14350; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 14351; X64-NEXT: retq # encoding: [0xc3] 14352 %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 14353 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 4, <4 x i64> zeroinitializer, i8 %x3) 14354 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 5, <4 x i64> %x2, i8 -1) 14355 %res3 = add <4 x i64> %res, %res1 14356 %res4 = add <4 x i64> %res3, %res2 14357 ret <4 x i64> %res4 14358} 14359 14360declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>) 14361 14362define <4 x i32>@test_int_x86_avx512_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1) { 14363; CHECK-LABEL: test_int_x86_avx512_prorv_d_128: 14364; CHECK: # %bb.0: 14365; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1] 14366; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14367 %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) 14368 ret <4 x i32> %1 14369} 14370 14371define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 14372; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128: 14373; X86: # %bb.0: 14374; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14375; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14376; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 14377; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14378; X86-NEXT: retl # encoding: [0xc3] 14379; 14380; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128: 14381; X64: # %bb.0: 14382; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14383; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 14384; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14385; X64-NEXT: retq # encoding: [0xc3] 14386 %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) 14387 %2 = bitcast i8 %x3 to <8 x i1> 14388 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14389 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 14390 ret <4 x i32> %3 14391} 14392 14393define <4 x i32>@test_int_x86_avx512_maskz_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 14394; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_128: 14395; X86: # %bb.0: 14396; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14397; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14398; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] 14399; X86-NEXT: retl # encoding: [0xc3] 14400; 14401; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_128: 14402; X64: # %bb.0: 14403; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14404; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] 14405; X64-NEXT: retq # encoding: [0xc3] 14406 %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) 14407 %2 = bitcast i8 %x3 to <8 x i1> 14408 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14409 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer 14410 ret <4 x i32> %3 14411} 14412 14413declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>) 14414 14415define <8 x i32>@test_int_x86_avx512_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1) { 14416; CHECK-LABEL: test_int_x86_avx512_prorv_d_256: 14417; CHECK: # %bb.0: 14418; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1] 14419; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14420 %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) 14421 ret <8 x i32> %1 14422} 14423 14424define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 14425; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256: 14426; X86: # %bb.0: 14427; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14428; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14429; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 14430; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14431; X86-NEXT: retl # encoding: [0xc3] 14432; 14433; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256: 14434; X64: # %bb.0: 14435; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14436; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 14437; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14438; X64-NEXT: retq # encoding: [0xc3] 14439 %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) 14440 %2 = bitcast i8 %x3 to <8 x i1> 14441 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 14442 ret <8 x i32> %3 14443} 14444 14445define <8 x i32>@test_int_x86_avx512_maskz_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) { 14446; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_256: 14447; X86: # %bb.0: 14448; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14449; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14450; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] 14451; X86-NEXT: retl # encoding: [0xc3] 14452; 14453; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_256: 14454; X64: # %bb.0: 14455; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14456; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] 14457; X64-NEXT: retq # encoding: [0xc3] 14458 %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) 14459 %2 = bitcast i8 %x3 to <8 x i1> 14460 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer 14461 ret <8 x i32> %3 14462} 14463 14464declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>) 14465 14466define <2 x i64>@test_int_x86_avx512_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1) { 14467; CHECK-LABEL: test_int_x86_avx512_prorv_q_128: 14468; CHECK: # %bb.0: 14469; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1] 14470; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14471 %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) 14472 ret <2 x i64> %1 14473} 14474 14475define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 14476; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128: 14477; X86: # %bb.0: 14478; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14479; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14480; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 14481; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14482; X86-NEXT: retl # encoding: [0xc3] 14483; 14484; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128: 14485; X64: # %bb.0: 14486; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14487; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 14488; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14489; X64-NEXT: retq # encoding: [0xc3] 14490 %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) 14491 %2 = bitcast i8 %x3 to <8 x i1> 14492 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 14493 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 14494 ret <2 x i64> %3 14495} 14496 14497define <2 x i64>@test_int_x86_avx512_maskz_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 14498; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_128: 14499; X86: # %bb.0: 14500; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14501; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14502; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] 14503; X86-NEXT: retl # encoding: [0xc3] 14504; 14505; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_128: 14506; X64: # %bb.0: 14507; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14508; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] 14509; X64-NEXT: retq # encoding: [0xc3] 14510 %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) 14511 %2 = bitcast i8 %x3 to <8 x i1> 14512 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 14513 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer 14514 ret <2 x i64> %3 14515} 14516 14517declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>) 14518 14519define <4 x i64>@test_int_x86_avx512_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1) { 14520; CHECK-LABEL: test_int_x86_avx512_prorv_q_256: 14521; CHECK: # %bb.0: 14522; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1] 14523; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14524 %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) 14525 ret <4 x i64> %1 14526} 14527 14528define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 14529; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256: 14530; X86: # %bb.0: 14531; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14532; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14533; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 14534; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14535; X86-NEXT: retl # encoding: [0xc3] 14536; 14537; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256: 14538; X64: # %bb.0: 14539; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14540; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 14541; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14542; X64-NEXT: retq # encoding: [0xc3] 14543 %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) 14544 %2 = bitcast i8 %x3 to <8 x i1> 14545 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14546 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 14547 ret <4 x i64> %3 14548} 14549 14550define <4 x i64>@test_int_x86_avx512_maskz_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) { 14551; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_256: 14552; X86: # %bb.0: 14553; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14554; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14555; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] 14556; X86-NEXT: retl # encoding: [0xc3] 14557; 14558; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_256: 14559; X64: # %bb.0: 14560; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14561; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] 14562; X64-NEXT: retq # encoding: [0xc3] 14563 %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) 14564 %2 = bitcast i8 %x3 to <8 x i1> 14565 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14566 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer 14567 ret <4 x i64> %3 14568} 14569 14570declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32) 14571 14572define <4 x i32>@test_int_x86_avx512_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 14573; X86-LABEL: test_int_x86_avx512_prol_d_128: 14574; X86: # %bb.0: 14575; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14576; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14577; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 14578; X86-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] 14579; X86-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] 14580; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 14581; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 14582; X86-NEXT: retl # encoding: [0xc3] 14583; 14584; X64-LABEL: test_int_x86_avx512_prol_d_128: 14585; X64: # %bb.0: 14586; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14587; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 14588; X64-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] 14589; X64-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] 14590; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 14591; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 14592; X64-NEXT: retq # encoding: [0xc3] 14593 %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) 14594 %2 = bitcast i8 %x3 to <8 x i1> 14595 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14596 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 14597 %4 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 4) 14598 %5 = bitcast i8 %x3 to <8 x i1> 14599 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14600 %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer 14601 %7 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 5) 14602 %res3 = add <4 x i32> %3, %6 14603 %res4 = add <4 x i32> %res3, %7 14604 ret <4 x i32> %res4 14605} 14606 14607declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32) 14608 14609define <8 x i32>@test_int_x86_avx512_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 14610; X86-LABEL: test_int_x86_avx512_prol_d_256: 14611; X86: # %bb.0: 14612; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14613; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14614; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 14615; X86-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] 14616; X86-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] 14617; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 14618; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 14619; X86-NEXT: retl # encoding: [0xc3] 14620; 14621; X64-LABEL: test_int_x86_avx512_prol_d_256: 14622; X64: # %bb.0: 14623; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14624; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 14625; X64-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] 14626; X64-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] 14627; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 14628; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 14629; X64-NEXT: retq # encoding: [0xc3] 14630 %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) 14631 %2 = bitcast i8 %x3 to <8 x i1> 14632 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 14633 %4 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 4) 14634 %5 = bitcast i8 %x3 to <8 x i1> 14635 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer 14636 %7 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 5) 14637 %res3 = add <8 x i32> %3, %6 14638 %res4 = add <8 x i32> %res3, %7 14639 ret <8 x i32> %res4 14640} 14641 14642declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32) 14643 14644define <2 x i64>@test_int_x86_avx512_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 14645; X86-LABEL: test_int_x86_avx512_prol_q_128: 14646; X86: # %bb.0: 14647; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14648; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14649; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 14650; X86-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] 14651; X86-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] 14652; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 14653; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 14654; X86-NEXT: retl # encoding: [0xc3] 14655; 14656; X64-LABEL: test_int_x86_avx512_prol_q_128: 14657; X64: # %bb.0: 14658; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14659; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 14660; X64-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] 14661; X64-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] 14662; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 14663; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 14664; X64-NEXT: retq # encoding: [0xc3] 14665 %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) 14666 %2 = bitcast i8 %x3 to <8 x i1> 14667 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 14668 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 14669 %4 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 4) 14670 %5 = bitcast i8 %x3 to <8 x i1> 14671 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> 14672 %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer 14673 %7 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 5) 14674 %res3 = add <2 x i64> %3, %6 14675 %res4 = add <2 x i64> %res3, %7 14676 ret <2 x i64> %res4 14677} 14678 14679declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32) 14680 14681define <4 x i64>@test_int_x86_avx512_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 14682; X86-LABEL: test_int_x86_avx512_prol_q_256: 14683; X86: # %bb.0: 14684; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14685; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14686; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 14687; X86-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] 14688; X86-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] 14689; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 14690; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 14691; X86-NEXT: retl # encoding: [0xc3] 14692; 14693; X64-LABEL: test_int_x86_avx512_prol_q_256: 14694; X64: # %bb.0: 14695; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14696; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 14697; X64-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] 14698; X64-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] 14699; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 14700; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 14701; X64-NEXT: retq # encoding: [0xc3] 14702 %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) 14703 %2 = bitcast i8 %x3 to <8 x i1> 14704 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14705 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 14706 %4 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 4) 14707 %5 = bitcast i8 %x3 to <8 x i1> 14708 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14709 %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer 14710 %7 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 5) 14711 %res3 = add <4 x i64> %3, %6 14712 %res4 = add <4 x i64> %res3, %7 14713 ret <4 x i64> %res4 14714} 14715 14716declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>) 14717 14718define <4 x i32>@test_int_x86_avx512_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1) { 14719; CHECK-LABEL: test_int_x86_avx512_prolv_d_128: 14720; CHECK: # %bb.0: 14721; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1] 14722; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14723 %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) 14724 ret <4 x i32> %1 14725} 14726 14727define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 14728; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128: 14729; X86: # %bb.0: 14730; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14731; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14732; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 14733; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14734; X86-NEXT: retl # encoding: [0xc3] 14735; 14736; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128: 14737; X64: # %bb.0: 14738; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14739; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 14740; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14741; X64-NEXT: retq # encoding: [0xc3] 14742 %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) 14743 %2 = bitcast i8 %x3 to <8 x i1> 14744 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14745 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 14746 ret <4 x i32> %3 14747} 14748 14749define <4 x i32>@test_int_x86_avx512_maskz_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) { 14750; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_128: 14751; X86: # %bb.0: 14752; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14753; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14754; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] 14755; X86-NEXT: retl # encoding: [0xc3] 14756; 14757; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_128: 14758; X64: # %bb.0: 14759; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14760; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] 14761; X64-NEXT: retq # encoding: [0xc3] 14762 %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) 14763 %2 = bitcast i8 %x3 to <8 x i1> 14764 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14765 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer 14766 ret <4 x i32> %3 14767} 14768 14769declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>) 14770 14771define <8 x i32>@test_int_x86_avx512_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 14772; CHECK-LABEL: test_int_x86_avx512_prolv_d_256: 14773; CHECK: # %bb.0: 14774; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1] 14775; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14776 %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) 14777 ret <8 x i32> %1 14778} 14779 14780define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 14781; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256: 14782; X86: # %bb.0: 14783; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14784; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14785; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 14786; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14787; X86-NEXT: retl # encoding: [0xc3] 14788; 14789; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256: 14790; X64: # %bb.0: 14791; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14792; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 14793; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14794; X64-NEXT: retq # encoding: [0xc3] 14795 %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) 14796 %2 = bitcast i8 %x3 to <8 x i1> 14797 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 14798 ret <8 x i32> %3 14799} 14800 14801define <8 x i32>@test_int_x86_avx512_maskz_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 14802; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_256: 14803; X86: # %bb.0: 14804; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14805; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14806; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] 14807; X86-NEXT: retl # encoding: [0xc3] 14808; 14809; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_256: 14810; X64: # %bb.0: 14811; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14812; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] 14813; X64-NEXT: retq # encoding: [0xc3] 14814 %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) 14815 %2 = bitcast i8 %x3 to <8 x i1> 14816 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer 14817 ret <8 x i32> %3 14818} 14819 14820declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>) 14821 14822define <2 x i64>@test_int_x86_avx512_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1) { 14823; CHECK-LABEL: test_int_x86_avx512_prolv_q_128: 14824; CHECK: # %bb.0: 14825; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1] 14826; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14827 %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) 14828 ret <2 x i64> %1 14829} 14830 14831define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 14832; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128: 14833; X86: # %bb.0: 14834; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14835; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14836; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 14837; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14838; X86-NEXT: retl # encoding: [0xc3] 14839; 14840; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128: 14841; X64: # %bb.0: 14842; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14843; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 14844; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 14845; X64-NEXT: retq # encoding: [0xc3] 14846 %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) 14847 %2 = bitcast i8 %x3 to <8 x i1> 14848 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 14849 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 14850 ret <2 x i64> %3 14851} 14852 14853define <2 x i64>@test_int_x86_avx512_maskz_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) { 14854; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_128: 14855; X86: # %bb.0: 14856; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14857; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14858; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] 14859; X86-NEXT: retl # encoding: [0xc3] 14860; 14861; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_128: 14862; X64: # %bb.0: 14863; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14864; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] 14865; X64-NEXT: retq # encoding: [0xc3] 14866 %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) 14867 %2 = bitcast i8 %x3 to <8 x i1> 14868 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 14869 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer 14870 ret <2 x i64> %3 14871} 14872 14873declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>) 14874 14875define <4 x i64>@test_int_x86_avx512_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1) { 14876; CHECK-LABEL: test_int_x86_avx512_prolv_q_256: 14877; CHECK: # %bb.0: 14878; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1] 14879; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14880 %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) 14881 ret <4 x i64> %1 14882} 14883 14884define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 14885; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256: 14886; X86: # %bb.0: 14887; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14888; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14889; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 14890; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14891; X86-NEXT: retl # encoding: [0xc3] 14892; 14893; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256: 14894; X64: # %bb.0: 14895; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14896; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 14897; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 14898; X64-NEXT: retq # encoding: [0xc3] 14899 %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) 14900 %2 = bitcast i8 %x3 to <8 x i1> 14901 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14902 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 14903 ret <4 x i64> %3 14904} 14905 14906define <4 x i64>@test_int_x86_avx512_maskz_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) { 14907; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_256: 14908; X86: # %bb.0: 14909; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 14910; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14911; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] 14912; X86-NEXT: retl # encoding: [0xc3] 14913; 14914; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_256: 14915; X64: # %bb.0: 14916; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 14917; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] 14918; X64-NEXT: retq # encoding: [0xc3] 14919 %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) 14920 %2 = bitcast i8 %x3 to <8 x i1> 14921 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14922 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer 14923 ret <4 x i64> %3 14924} 14925 14926declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32) 14927 14928define <4 x i32>@test_int_x86_avx512_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 14929; X86-LABEL: test_int_x86_avx512_pror_d_128: 14930; X86: # %bb.0: 14931; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14932; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14933; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 14934; X86-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] 14935; X86-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] 14936; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 14937; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 14938; X86-NEXT: retl # encoding: [0xc3] 14939; 14940; X64-LABEL: test_int_x86_avx512_pror_d_128: 14941; X64: # %bb.0: 14942; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14943; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 14944; X64-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] 14945; X64-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] 14946; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 14947; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 14948; X64-NEXT: retq # encoding: [0xc3] 14949 %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) 14950 %2 = bitcast i8 %x3 to <8 x i1> 14951 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14952 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 14953 %4 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 4) 14954 %5 = bitcast i8 %x3 to <8 x i1> 14955 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14956 %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer 14957 %7 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 5) 14958 %res3 = add <4 x i32> %3, %6 14959 %res4 = add <4 x i32> %res3, %7 14960 ret <4 x i32> %res4 14961} 14962 14963declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32) 14964 14965define <8 x i32>@test_int_x86_avx512_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 14966; X86-LABEL: test_int_x86_avx512_pror_d_256: 14967; X86: # %bb.0: 14968; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 14969; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14970; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 14971; X86-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] 14972; X86-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] 14973; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 14974; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 14975; X86-NEXT: retl # encoding: [0xc3] 14976; 14977; X64-LABEL: test_int_x86_avx512_pror_d_256: 14978; X64: # %bb.0: 14979; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 14980; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 14981; X64-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] 14982; X64-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] 14983; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 14984; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 14985; X64-NEXT: retq # encoding: [0xc3] 14986 %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) 14987 %2 = bitcast i8 %x3 to <8 x i1> 14988 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 14989 %4 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 4) 14990 %5 = bitcast i8 %x3 to <8 x i1> 14991 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer 14992 %7 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 5) 14993 %res3 = add <8 x i32> %3, %6 14994 %res4 = add <8 x i32> %res3, %7 14995 ret <8 x i32> %res4 14996} 14997 14998declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32) 14999 15000define <2 x i64>@test_int_x86_avx512_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 15001; X86-LABEL: test_int_x86_avx512_pror_q_128: 15002; X86: # %bb.0: 15003; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 15004; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15005; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 15006; X86-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] 15007; X86-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] 15008; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 15009; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 15010; X86-NEXT: retl # encoding: [0xc3] 15011; 15012; X64-LABEL: test_int_x86_avx512_pror_q_128: 15013; X64: # %bb.0: 15014; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 15015; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 15016; X64-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] 15017; X64-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] 15018; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 15019; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 15020; X64-NEXT: retq # encoding: [0xc3] 15021 %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) 15022 %2 = bitcast i8 %x3 to <8 x i1> 15023 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 15024 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 15025 %4 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 4) 15026 %5 = bitcast i8 %x3 to <8 x i1> 15027 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> 15028 %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer 15029 %7 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 5) 15030 %res3 = add <2 x i64> %3, %6 15031 %res4 = add <2 x i64> %res3, %7 15032 ret <2 x i64> %res4 15033} 15034 15035declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32) 15036 15037define <4 x i64>@test_int_x86_avx512_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 15038; X86-LABEL: test_int_x86_avx512_pror_q_256: 15039; X86: # %bb.0: 15040; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 15041; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15042; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 15043; X86-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] 15044; X86-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] 15045; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 15046; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 15047; X86-NEXT: retl # encoding: [0xc3] 15048; 15049; X64-LABEL: test_int_x86_avx512_pror_q_256: 15050; X64: # %bb.0: 15051; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 15052; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 15053; X64-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] 15054; X64-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] 15055; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 15056; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 15057; X64-NEXT: retq # encoding: [0xc3] 15058 %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) 15059 %2 = bitcast i8 %x3 to <8 x i1> 15060 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15061 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 15062 %4 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 4) 15063 %5 = bitcast i8 %x3 to <8 x i1> 15064 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15065 %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer 15066 %7 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 5) 15067 %res3 = add <4 x i64> %3, %6 15068 %res4 = add <4 x i64> %res3, %7 15069 ret <4 x i64> %res4 15070} 15071 15072declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 15073 15074define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 15075; CHECK-LABEL: test_vfmadd256_ps: 15076; CHECK: # %bb.0: 15077; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 15078; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 15079; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15080 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind 15081 ret <8 x float> %res 15082} 15083 15084define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 15085; X86-LABEL: test_mask_vfmadd256_ps: 15086; X86: # %bb.0: 15087; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15088; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15089; X86-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1] 15090; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2 15091; X86-NEXT: retl # encoding: [0xc3] 15092; 15093; X64-LABEL: test_mask_vfmadd256_ps: 15094; X64: # %bb.0: 15095; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15096; X64-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1] 15097; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2 15098; X64-NEXT: retq # encoding: [0xc3] 15099 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 15100 ret <8 x float> %res 15101} 15102 15103declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 15104 15105define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 15106; CHECK-LABEL: test_vfmadd128_ps: 15107; CHECK: # %bb.0: 15108; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 15109; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 15110; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15111 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 15112 ret <4 x float> %res 15113} 15114 15115define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 15116; X86-LABEL: test_mask_vfmadd128_ps: 15117; X86: # %bb.0: 15118; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15119; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15120; X86-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] 15121; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2 15122; X86-NEXT: retl # encoding: [0xc3] 15123; 15124; X64-LABEL: test_mask_vfmadd128_ps: 15125; X64: # %bb.0: 15126; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15127; X64-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] 15128; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2 15129; X64-NEXT: retq # encoding: [0xc3] 15130 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 15131 ret <4 x float> %res 15132} 15133 15134declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15135 15136define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 15137; CHECK-LABEL: test_fmadd256_pd: 15138; CHECK: # %bb.0: 15139; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 15140; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 15141; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15142 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 -1) 15143 ret <4 x double> %res 15144} 15145 15146define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) { 15147; X86-LABEL: test_mask_fmadd256_pd: 15148; X86: # %bb.0: 15149; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15150; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15151; X86-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1] 15152; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2 15153; X86-NEXT: retl # encoding: [0xc3] 15154; 15155; X64-LABEL: test_mask_fmadd256_pd: 15156; X64: # %bb.0: 15157; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15158; X64-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1] 15159; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2 15160; X64-NEXT: retq # encoding: [0xc3] 15161 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) 15162 ret <4 x double> %res 15163} 15164 15165declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15166 15167define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 15168; CHECK-LABEL: test_fmadd128_pd: 15169; CHECK: # %bb.0: 15170; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 15171; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 15172; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15173 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1) 15174 ret <2 x double> %res 15175} 15176 15177define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 15178; X86-LABEL: test_mask_fmadd128_pd: 15179; X86: # %bb.0: 15180; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15181; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15182; X86-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1] 15183; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2 15184; X86-NEXT: retl # encoding: [0xc3] 15185; 15186; X64-LABEL: test_mask_fmadd128_pd: 15187; X64: # %bb.0: 15188; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15189; X64-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1] 15190; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2 15191; X64-NEXT: retq # encoding: [0xc3] 15192 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) 15193 ret <2 x double> %res 15194} 15195 15196declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15197 15198define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 15199; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: 15200; X86: # %bb.0: 15201; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15202; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15203; X86-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1] 15204; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2 15205; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15206; X86-NEXT: retl # encoding: [0xc3] 15207; 15208; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: 15209; X64: # %bb.0: 15210; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15211; X64-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1] 15212; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2 15213; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15214; X64-NEXT: retq # encoding: [0xc3] 15215 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 15216 ret <2 x double> %res 15217} 15218 15219declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15220 15221define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 15222; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: 15223; X86: # %bb.0: 15224; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15225; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15226; X86-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2] 15227; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 15228; X86-NEXT: retl # encoding: [0xc3] 15229; 15230; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: 15231; X64: # %bb.0: 15232; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15233; X64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2] 15234; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 15235; X64-NEXT: retq # encoding: [0xc3] 15236 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 15237 ret <2 x double> %res 15238} 15239 15240declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15241 15242define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 15243; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: 15244; X86: # %bb.0: 15245; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15246; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15247; X86-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1] 15248; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2 15249; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15250; X86-NEXT: retl # encoding: [0xc3] 15251; 15252; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: 15253; X64: # %bb.0: 15254; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15255; X64-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1] 15256; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2 15257; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15258; X64-NEXT: retq # encoding: [0xc3] 15259 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 15260 ret <4 x double> %res 15261} 15262 15263declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15264 15265define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 15266; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: 15267; X86: # %bb.0: 15268; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15269; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15270; X86-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2] 15271; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2 15272; X86-NEXT: retl # encoding: [0xc3] 15273; 15274; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: 15275; X64: # %bb.0: 15276; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15277; X64-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2] 15278; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2 15279; X64-NEXT: retq # encoding: [0xc3] 15280 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 15281 ret <4 x double> %res 15282} 15283 15284declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 15285 15286define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 15287; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: 15288; X86: # %bb.0: 15289; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15290; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15291; X86-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1] 15292; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2 15293; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 15294; X86-NEXT: retl # encoding: [0xc3] 15295; 15296; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: 15297; X64: # %bb.0: 15298; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15299; X64-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1] 15300; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2 15301; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 15302; X64-NEXT: retq # encoding: [0xc3] 15303 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 15304 ret <4 x float> %res 15305} 15306 15307declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 15308 15309define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 15310; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: 15311; X86: # %bb.0: 15312; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15313; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15314; X86-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2] 15315; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 15316; X86-NEXT: retl # encoding: [0xc3] 15317; 15318; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: 15319; X64: # %bb.0: 15320; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15321; X64-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2] 15322; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 15323; X64-NEXT: retq # encoding: [0xc3] 15324 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 15325 ret <4 x float> %res 15326} 15327 15328declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 15329 15330define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 15331; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: 15332; X86: # %bb.0: 15333; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15334; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15335; X86-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1] 15336; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2 15337; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 15338; X86-NEXT: retl # encoding: [0xc3] 15339; 15340; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: 15341; X64: # %bb.0: 15342; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15343; X64-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1] 15344; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2 15345; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 15346; X64-NEXT: retq # encoding: [0xc3] 15347 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 15348 ret <8 x float> %res 15349} 15350 15351declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 15352 15353define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 15354; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: 15355; X86: # %bb.0: 15356; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15357; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15358; X86-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2] 15359; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2 15360; X86-NEXT: retl # encoding: [0xc3] 15361; 15362; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: 15363; X64: # %bb.0: 15364; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15365; X64-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2] 15366; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2 15367; X64-NEXT: retq # encoding: [0xc3] 15368 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 15369 ret <8 x float> %res 15370} 15371 15372 15373declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15374 15375define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 15376; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: 15377; X86: # %bb.0: 15378; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15379; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15380; X86-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1] 15381; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2 15382; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15383; X86-NEXT: retl # encoding: [0xc3] 15384; 15385; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: 15386; X64: # %bb.0: 15387; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15388; X64-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1] 15389; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2 15390; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15391; X64-NEXT: retq # encoding: [0xc3] 15392 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 15393 ret <2 x double> %res 15394} 15395 15396 15397declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15398 15399define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 15400; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: 15401; X86: # %bb.0: 15402; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15403; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15404; X86-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1] 15405; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2 15406; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15407; X86-NEXT: retl # encoding: [0xc3] 15408; 15409; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: 15410; X64: # %bb.0: 15411; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15412; X64-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1] 15413; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2 15414; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15415; X64-NEXT: retq # encoding: [0xc3] 15416 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 15417 ret <4 x double> %res 15418} 15419 15420declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 15421 15422define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 15423; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: 15424; X86: # %bb.0: 15425; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15426; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15427; X86-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1] 15428; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2 15429; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 15430; X86-NEXT: retl # encoding: [0xc3] 15431; 15432; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: 15433; X64: # %bb.0: 15434; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15435; X64-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1] 15436; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2 15437; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 15438; X64-NEXT: retq # encoding: [0xc3] 15439 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 15440 ret <4 x float> %res 15441} 15442 15443declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 15444 15445define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 15446; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: 15447; X86: # %bb.0: 15448; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15449; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15450; X86-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1] 15451; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2 15452; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 15453; X86-NEXT: retl # encoding: [0xc3] 15454; 15455; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: 15456; X64: # %bb.0: 15457; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15458; X64-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1] 15459; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2 15460; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 15461; X64-NEXT: retq # encoding: [0xc3] 15462 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 15463 ret <8 x float> %res 15464} 15465 15466declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 15467 15468define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 15469; CHECK-LABEL: test_vfnmadd256_ps: 15470; CHECK: # %bb.0: 15471; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2] 15472; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 15473; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15474 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind 15475 ret <8 x float> %res 15476} 15477 15478define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 15479; X86-LABEL: test_mask_vfnmadd256_ps: 15480; X86: # %bb.0: 15481; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15482; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15483; X86-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1] 15484; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2 15485; X86-NEXT: retl # encoding: [0xc3] 15486; 15487; X64-LABEL: test_mask_vfnmadd256_ps: 15488; X64: # %bb.0: 15489; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15490; X64-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1] 15491; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2 15492; X64-NEXT: retq # encoding: [0xc3] 15493 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 15494 ret <8 x float> %res 15495} 15496 15497declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 15498 15499define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 15500; CHECK-LABEL: test_vfnmadd128_ps: 15501; CHECK: # %bb.0: 15502; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2] 15503; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 15504; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15505 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 15506 ret <4 x float> %res 15507} 15508 15509define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 15510; X86-LABEL: test_mask_vfnmadd128_ps: 15511; X86: # %bb.0: 15512; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15513; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15514; X86-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1] 15515; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2 15516; X86-NEXT: retl # encoding: [0xc3] 15517; 15518; X64-LABEL: test_mask_vfnmadd128_ps: 15519; X64: # %bb.0: 15520; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15521; X64-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1] 15522; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2 15523; X64-NEXT: retq # encoding: [0xc3] 15524 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 15525 ret <4 x float> %res 15526} 15527 15528declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 15529 15530define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 15531; CHECK-LABEL: test_vfnmadd256_pd: 15532; CHECK: # %bb.0: 15533; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 15534; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 15535; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15536 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 15537 ret <4 x double> %res 15538} 15539 15540define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 15541; X86-LABEL: test_mask_vfnmadd256_pd: 15542; X86: # %bb.0: 15543; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15544; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15545; X86-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1] 15546; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2 15547; X86-NEXT: retl # encoding: [0xc3] 15548; 15549; X64-LABEL: test_mask_vfnmadd256_pd: 15550; X64: # %bb.0: 15551; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15552; X64-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1] 15553; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2 15554; X64-NEXT: retq # encoding: [0xc3] 15555 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 15556 ret <4 x double> %res 15557} 15558 15559declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 15560 15561define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 15562; CHECK-LABEL: test_vfnmadd128_pd: 15563; CHECK: # %bb.0: 15564; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 15565; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 15566; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15567 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 15568 ret <2 x double> %res 15569} 15570 15571define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 15572; X86-LABEL: test_mask_vfnmadd128_pd: 15573; X86: # %bb.0: 15574; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15575; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15576; X86-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1] 15577; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2 15578; X86-NEXT: retl # encoding: [0xc3] 15579; 15580; X64-LABEL: test_mask_vfnmadd128_pd: 15581; X64: # %bb.0: 15582; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15583; X64-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1] 15584; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2 15585; X64-NEXT: retq # encoding: [0xc3] 15586 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 15587 ret <2 x double> %res 15588} 15589 15590declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 15591 15592define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 15593; CHECK-LABEL: test_vfnmsub256_ps: 15594; CHECK: # %bb.0: 15595; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2] 15596; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 15597; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15598 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind 15599 ret <8 x float> %res 15600} 15601 15602define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 15603; X86-LABEL: test_mask_vfnmsub256_ps: 15604; X86: # %bb.0: 15605; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15606; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15607; X86-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1] 15608; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2 15609; X86-NEXT: retl # encoding: [0xc3] 15610; 15611; X64-LABEL: test_mask_vfnmsub256_ps: 15612; X64: # %bb.0: 15613; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15614; X64-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1] 15615; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2 15616; X64-NEXT: retq # encoding: [0xc3] 15617 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 15618 ret <8 x float> %res 15619} 15620 15621declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 15622 15623define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 15624; CHECK-LABEL: test_vfnmsub128_ps: 15625; CHECK: # %bb.0: 15626; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2] 15627; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 15628; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15629 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 15630 ret <4 x float> %res 15631} 15632 15633define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 15634; X86-LABEL: test_mask_vfnmsub128_ps: 15635; X86: # %bb.0: 15636; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15637; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15638; X86-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1] 15639; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2 15640; X86-NEXT: retl # encoding: [0xc3] 15641; 15642; X64-LABEL: test_mask_vfnmsub128_ps: 15643; X64: # %bb.0: 15644; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15645; X64-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1] 15646; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2 15647; X64-NEXT: retq # encoding: [0xc3] 15648 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 15649 ret <4 x float> %res 15650} 15651 15652declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 15653 15654define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 15655; CHECK-LABEL: test_vfnmsub256_pd: 15656; CHECK: # %bb.0: 15657; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 15658; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 15659; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15660 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 15661 ret <4 x double> %res 15662} 15663 15664define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 15665; X86-LABEL: test_mask_vfnmsub256_pd: 15666; X86: # %bb.0: 15667; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15668; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15669; X86-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1] 15670; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2 15671; X86-NEXT: retl # encoding: [0xc3] 15672; 15673; X64-LABEL: test_mask_vfnmsub256_pd: 15674; X64: # %bb.0: 15675; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15676; X64-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1] 15677; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2 15678; X64-NEXT: retq # encoding: [0xc3] 15679 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 15680 ret <4 x double> %res 15681} 15682 15683declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 15684 15685define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 15686; CHECK-LABEL: test_vfnmsub128_pd: 15687; CHECK: # %bb.0: 15688; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 15689; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 15690; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15691 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 15692 ret <2 x double> %res 15693} 15694 15695define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 15696; X86-LABEL: test_mask_vfnmsub128_pd: 15697; X86: # %bb.0: 15698; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15699; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15700; X86-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1] 15701; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2 15702; X86-NEXT: retl # encoding: [0xc3] 15703; 15704; X64-LABEL: test_mask_vfnmsub128_pd: 15705; X64: # %bb.0: 15706; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15707; X64-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1] 15708; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2 15709; X64-NEXT: retq # encoding: [0xc3] 15710 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 15711 ret <2 x double> %res 15712} 15713 15714declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15715 15716define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 15717; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: 15718; X86: # %bb.0: 15719; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15720; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15721; X86-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1] 15722; X86-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 15723; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15724; X86-NEXT: retl # encoding: [0xc3] 15725; 15726; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: 15727; X64: # %bb.0: 15728; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15729; X64-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1] 15730; X64-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 15731; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15732; X64-NEXT: retq # encoding: [0xc3] 15733 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 15734 ret <2 x double> %res 15735} 15736 15737declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15738 15739define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 15740; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: 15741; X86: # %bb.0: 15742; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15743; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15744; X86-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1] 15745; X86-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2 15746; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15747; X86-NEXT: retl # encoding: [0xc3] 15748; 15749; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: 15750; X64: # %bb.0: 15751; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15752; X64-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1] 15753; X64-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2 15754; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15755; X64-NEXT: retq # encoding: [0xc3] 15756 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 15757 ret <4 x double> %res 15758} 15759 15760declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 15761 15762define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 15763; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: 15764; X86: # %bb.0: 15765; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15766; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15767; X86-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1] 15768; X86-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 15769; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 15770; X86-NEXT: retl # encoding: [0xc3] 15771; 15772; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: 15773; X64: # %bb.0: 15774; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15775; X64-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1] 15776; X64-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 15777; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 15778; X64-NEXT: retq # encoding: [0xc3] 15779 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 15780 ret <4 x float> %res 15781} 15782 15783declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 15784 15785define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 15786; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: 15787; X86: # %bb.0: 15788; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15789; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15790; X86-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1] 15791; X86-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2 15792; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 15793; X86-NEXT: retl # encoding: [0xc3] 15794; 15795; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: 15796; X64: # %bb.0: 15797; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15798; X64-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1] 15799; X64-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2 15800; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 15801; X64-NEXT: retq # encoding: [0xc3] 15802 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 15803 ret <8 x float> %res 15804} 15805 15806declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 15807 15808define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 15809; CHECK-LABEL: test_fmaddsub256_ps: 15810; CHECK: # %bb.0: 15811; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 15812; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 15813; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15814 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1) 15815 ret <8 x float> %res 15816} 15817 15818define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) { 15819; X86-LABEL: test_mask_fmaddsub256_ps: 15820; X86: # %bb.0: 15821; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15822; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15823; X86-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1] 15824; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2 15825; X86-NEXT: retl # encoding: [0xc3] 15826; 15827; X64-LABEL: test_mask_fmaddsub256_ps: 15828; X64: # %bb.0: 15829; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15830; X64-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1] 15831; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2 15832; X64-NEXT: retq # encoding: [0xc3] 15833 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) 15834 ret <8 x float> %res 15835} 15836 15837declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 15838 15839define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 15840; CHECK-LABEL: test_fmaddsub128_ps: 15841; CHECK: # %bb.0: 15842; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 15843; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 15844; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15845 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 -1) 15846 ret <4 x float> %res 15847} 15848 15849define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 15850; X86-LABEL: test_mask_fmaddsub128_ps: 15851; X86: # %bb.0: 15852; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15853; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15854; X86-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1] 15855; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2 15856; X86-NEXT: retl # encoding: [0xc3] 15857; 15858; X64-LABEL: test_mask_fmaddsub128_ps: 15859; X64: # %bb.0: 15860; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15861; X64-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1] 15862; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2 15863; X64-NEXT: retq # encoding: [0xc3] 15864 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) 15865 ret <4 x float> %res 15866} 15867 15868declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 15869 15870define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 15871; CHECK-LABEL: test_vfmaddsub256_pd: 15872; CHECK: # %bb.0: 15873; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 15874; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 15875; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15876 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 15877 ret <4 x double> %res 15878} 15879 15880define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 15881; X86-LABEL: test_mask_vfmaddsub256_pd: 15882; X86: # %bb.0: 15883; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15884; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15885; X86-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1] 15886; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2 15887; X86-NEXT: retl # encoding: [0xc3] 15888; 15889; X64-LABEL: test_mask_vfmaddsub256_pd: 15890; X64: # %bb.0: 15891; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15892; X64-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1] 15893; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2 15894; X64-NEXT: retq # encoding: [0xc3] 15895 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 15896 ret <4 x double> %res 15897} 15898 15899declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 15900 15901define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 15902; CHECK-LABEL: test_vfmaddsub128_pd: 15903; CHECK: # %bb.0: 15904; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 15905; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 15906; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 15907 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 15908 ret <2 x double> %res 15909} 15910 15911define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 15912; X86-LABEL: test_mask_vfmaddsub128_pd: 15913; X86: # %bb.0: 15914; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15915; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15916; X86-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1] 15917; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2 15918; X86-NEXT: retl # encoding: [0xc3] 15919; 15920; X64-LABEL: test_mask_vfmaddsub128_pd: 15921; X64: # %bb.0: 15922; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15923; X64-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1] 15924; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2 15925; X64-NEXT: retq # encoding: [0xc3] 15926 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 15927 ret <2 x double> %res 15928} 15929 15930declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15931 15932define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 15933; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: 15934; X86: # %bb.0: 15935; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15936; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15937; X86-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1] 15938; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2 15939; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15940; X86-NEXT: retl # encoding: [0xc3] 15941; 15942; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: 15943; X64: # %bb.0: 15944; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15945; X64-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1] 15946; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2 15947; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 15948; X64-NEXT: retq # encoding: [0xc3] 15949 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 15950 ret <2 x double> %res 15951} 15952 15953declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 15954 15955define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 15956; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: 15957; X86: # %bb.0: 15958; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15959; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15960; X86-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2] 15961; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2 15962; X86-NEXT: retl # encoding: [0xc3] 15963; 15964; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: 15965; X64: # %bb.0: 15966; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15967; X64-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2] 15968; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2 15969; X64-NEXT: retq # encoding: [0xc3] 15970 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 15971 ret <2 x double> %res 15972} 15973 15974declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15975 15976define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 15977; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: 15978; X86: # %bb.0: 15979; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 15980; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 15981; X86-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1] 15982; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2 15983; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15984; X86-NEXT: retl # encoding: [0xc3] 15985; 15986; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: 15987; X64: # %bb.0: 15988; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 15989; X64-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1] 15990; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2 15991; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 15992; X64-NEXT: retq # encoding: [0xc3] 15993 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 15994 ret <4 x double> %res 15995} 15996 15997declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 15998 15999define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 16000; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: 16001; X86: # %bb.0: 16002; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16003; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16004; X86-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2] 16005; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2 16006; X86-NEXT: retl # encoding: [0xc3] 16007; 16008; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: 16009; X64: # %bb.0: 16010; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16011; X64-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2] 16012; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2 16013; X64-NEXT: retq # encoding: [0xc3] 16014 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 16015 ret <4 x double> %res 16016} 16017 16018declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 16019 16020define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 16021; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: 16022; X86: # %bb.0: 16023; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16024; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16025; X86-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1] 16026; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2 16027; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 16028; X86-NEXT: retl # encoding: [0xc3] 16029; 16030; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: 16031; X64: # %bb.0: 16032; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16033; X64-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1] 16034; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2 16035; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 16036; X64-NEXT: retq # encoding: [0xc3] 16037 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 16038 ret <4 x float> %res 16039} 16040 16041declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 16042 16043define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 16044; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: 16045; X86: # %bb.0: 16046; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16047; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16048; X86-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2] 16049; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2 16050; X86-NEXT: retl # encoding: [0xc3] 16051; 16052; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: 16053; X64: # %bb.0: 16054; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16055; X64-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2] 16056; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2 16057; X64-NEXT: retq # encoding: [0xc3] 16058 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 16059 ret <4 x float> %res 16060} 16061 16062declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 16063 16064define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 16065; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: 16066; X86: # %bb.0: 16067; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16068; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16069; X86-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1] 16070; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2 16071; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 16072; X86-NEXT: retl # encoding: [0xc3] 16073; 16074; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: 16075; X64: # %bb.0: 16076; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16077; X64-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1] 16078; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2 16079; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 16080; X64-NEXT: retq # encoding: [0xc3] 16081 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 16082 ret <8 x float> %res 16083} 16084 16085declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 16086 16087define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 16088; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: 16089; X86: # %bb.0: 16090; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16091; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16092; X86-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2] 16093; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2 16094; X86-NEXT: retl # encoding: [0xc3] 16095; 16096; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: 16097; X64: # %bb.0: 16098; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16099; X64-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2] 16100; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2 16101; X64-NEXT: retq # encoding: [0xc3] 16102 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 16103 ret <8 x float> %res 16104} 16105 16106declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 16107 16108define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 16109; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: 16110; X86: # %bb.0: 16111; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16112; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16113; X86-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1] 16114; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2 16115; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 16116; X86-NEXT: retl # encoding: [0xc3] 16117; 16118; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: 16119; X64: # %bb.0: 16120; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16121; X64-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1] 16122; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2 16123; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 16124; X64-NEXT: retq # encoding: [0xc3] 16125 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 16126 ret <2 x double> %res 16127} 16128 16129declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 16130 16131define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 16132; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: 16133; X86: # %bb.0: 16134; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16135; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16136; X86-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1] 16137; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2 16138; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 16139; X86-NEXT: retl # encoding: [0xc3] 16140; 16141; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: 16142; X64: # %bb.0: 16143; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16144; X64-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1] 16145; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2 16146; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 16147; X64-NEXT: retq # encoding: [0xc3] 16148 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 16149 ret <4 x double> %res 16150} 16151 16152declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 16153 16154define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 16155; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: 16156; X86: # %bb.0: 16157; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16158; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16159; X86-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1] 16160; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2 16161; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 16162; X86-NEXT: retl # encoding: [0xc3] 16163; 16164; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: 16165; X64: # %bb.0: 16166; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16167; X64-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1] 16168; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2 16169; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 16170; X64-NEXT: retq # encoding: [0xc3] 16171 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 16172 ret <4 x float> %res 16173} 16174 16175declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 16176 16177define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 16178; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: 16179; X86: # %bb.0: 16180; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16181; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16182; X86-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1] 16183; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2 16184; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 16185; X86-NEXT: retl # encoding: [0xc3] 16186; 16187; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: 16188; X64: # %bb.0: 16189; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16190; X64-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1] 16191; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2 16192; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 16193; X64-NEXT: retq # encoding: [0xc3] 16194 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 16195 ret <8 x float> %res 16196} 16197 16198 16199define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 16200; X86-LABEL: test_mask_vfmadd128_ps_rmk: 16201; X86: # %bb.0: 16202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16203; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 16204; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 16205; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00] 16206; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16207; X86-NEXT: retl # encoding: [0xc3] 16208; 16209; X64-LABEL: test_mask_vfmadd128_ps_rmk: 16210; X64: # %bb.0: 16211; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 16212; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 16213; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16214; X64-NEXT: retq # encoding: [0xc3] 16215 %a2 = load <4 x float>, <4 x float>* %ptr_a2 16216 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 16217 ret <4 x float> %res 16218} 16219 16220define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 16221; X86-LABEL: test_mask_vfmadd128_ps_rmka: 16222; X86: # %bb.0: 16223; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16224; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 16225; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 16226; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00] 16227; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16228; X86-NEXT: retl # encoding: [0xc3] 16229; 16230; X64-LABEL: test_mask_vfmadd128_ps_rmka: 16231; X64: # %bb.0: 16232; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 16233; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 16234; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16235; X64-NEXT: retq # encoding: [0xc3] 16236 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8 16237 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 16238 ret <4 x float> %res 16239} 16240 16241define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 16242; X86-LABEL: test_mask_vfmadd128_ps_rmkz: 16243; X86: # %bb.0: 16244; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16245; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00] 16246; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16247; X86-NEXT: retl # encoding: [0xc3] 16248; 16249; X64-LABEL: test_mask_vfmadd128_ps_rmkz: 16250; X64: # %bb.0: 16251; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07] 16252; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16253; X64-NEXT: retq # encoding: [0xc3] 16254 %a2 = load <4 x float>, <4 x float>* %ptr_a2 16255 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 16256 ret <4 x float> %res 16257} 16258 16259define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 16260; X86-LABEL: test_mask_vfmadd128_ps_rmkza: 16261; X86: # %bb.0: 16262; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16263; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00] 16264; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16265; X86-NEXT: retl # encoding: [0xc3] 16266; 16267; X64-LABEL: test_mask_vfmadd128_ps_rmkza: 16268; X64: # %bb.0: 16269; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07] 16270; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16271; X64-NEXT: retq # encoding: [0xc3] 16272 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 16273 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 16274 ret <4 x float> %res 16275} 16276 16277define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 16278; X86-LABEL: test_mask_vfmadd128_ps_rmb: 16279; X86: # %bb.0: 16280; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16281; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 16282; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 16283; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00] 16284; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16285; X86-NEXT: retl # encoding: [0xc3] 16286; 16287; X64-LABEL: test_mask_vfmadd128_ps_rmb: 16288; X64: # %bb.0: 16289; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 16290; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 16291; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16292; X64-NEXT: retq # encoding: [0xc3] 16293 %q = load float, float* %ptr_a2 16294 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 16295 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 16296 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 16297 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 16298 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 16299 ret <4 x float> %res 16300} 16301 16302define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 16303; X86-LABEL: test_mask_vfmadd128_ps_rmba: 16304; X86: # %bb.0: 16305; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16306; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 16307; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 16308; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00] 16309; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16310; X86-NEXT: retl # encoding: [0xc3] 16311; 16312; X64-LABEL: test_mask_vfmadd128_ps_rmba: 16313; X64: # %bb.0: 16314; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 16315; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 16316; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16317; X64-NEXT: retq # encoding: [0xc3] 16318 %q = load float, float* %ptr_a2, align 4 16319 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 16320 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 16321 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 16322 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 16323 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 16324 ret <4 x float> %res 16325} 16326 16327define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 16328; X86-LABEL: test_mask_vfmadd128_ps_rmbz: 16329; X86: # %bb.0: 16330; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16331; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00] 16332; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16333; X86-NEXT: retl # encoding: [0xc3] 16334; 16335; X64-LABEL: test_mask_vfmadd128_ps_rmbz: 16336; X64: # %bb.0: 16337; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 16338; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16339; X64-NEXT: retq # encoding: [0xc3] 16340 %q = load float, float* %ptr_a2 16341 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 16342 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 16343 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 16344 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 16345 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 16346 ret <4 x float> %res 16347} 16348 16349define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 16350; X86-LABEL: test_mask_vfmadd128_ps_rmbza: 16351; X86: # %bb.0: 16352; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16353; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00] 16354; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16355; X86-NEXT: retl # encoding: [0xc3] 16356; 16357; X64-LABEL: test_mask_vfmadd128_ps_rmbza: 16358; X64: # %bb.0: 16359; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 16360; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16361; X64-NEXT: retq # encoding: [0xc3] 16362 %q = load float, float* %ptr_a2, align 4 16363 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 16364 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 16365 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 16366 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 16367 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 16368 ret <4 x float> %res 16369} 16370 16371define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) { 16372; X86-LABEL: test_mask_vfmadd128_pd_rmk: 16373; X86: # %bb.0: 16374; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16375; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 16376; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 16377; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00] 16378; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16379; X86-NEXT: retl # encoding: [0xc3] 16380; 16381; X64-LABEL: test_mask_vfmadd128_pd_rmk: 16382; X64: # %bb.0: 16383; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 16384; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07] 16385; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem 16386; X64-NEXT: retq # encoding: [0xc3] 16387 %a2 = load <2 x double>, <2 x double>* %ptr_a2 16388 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 16389 ret <2 x double> %res 16390} 16391 16392define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) { 16393; X86-LABEL: test_mask_vfmadd128_pd_rmkz: 16394; X86: # %bb.0: 16395; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16396; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00] 16397; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16398; X86-NEXT: retl # encoding: [0xc3] 16399; 16400; X64-LABEL: test_mask_vfmadd128_pd_rmkz: 16401; X64: # %bb.0: 16402; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07] 16403; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 16404; X64-NEXT: retq # encoding: [0xc3] 16405 %a2 = load <2 x double>, <2 x double>* %ptr_a2 16406 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 16407 ret <2 x double> %res 16408} 16409 16410define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) { 16411; X86-LABEL: test_mask_vfmadd256_pd_rmk: 16412; X86: # %bb.0: 16413; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16414; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 16415; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 16416; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00] 16417; X86-NEXT: # ymm0 {%k1} = (ymm1 * ymm0) + mem 16418; X86-NEXT: retl # encoding: [0xc3] 16419; 16420; X64-LABEL: test_mask_vfmadd256_pd_rmk: 16421; X64: # %bb.0: 16422; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 16423; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07] 16424; X64-NEXT: # ymm0 {%k1} = (ymm1 * ymm0) + mem 16425; X64-NEXT: retq # encoding: [0xc3] 16426 %a2 = load <4 x double>, <4 x double>* %ptr_a2 16427 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 16428 ret <4 x double> %res 16429} 16430 16431define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) { 16432; X86-LABEL: test_mask_vfmadd256_pd_rmkz: 16433; X86: # %bb.0: 16434; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 16435; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00] 16436; X86-NEXT: # ymm0 = (ymm1 * ymm0) + mem 16437; X86-NEXT: retl # encoding: [0xc3] 16438; 16439; X64-LABEL: test_mask_vfmadd256_pd_rmkz: 16440; X64: # %bb.0: 16441; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07] 16442; X64-NEXT: # ymm0 = (ymm1 * ymm0) + mem 16443; X64-NEXT: retq # encoding: [0xc3] 16444 %a2 = load <4 x double>, <4 x double>* %ptr_a2 16445 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 16446 ret <4 x double> %res 16447} 16448 16449declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8) 16450 16451define <4 x i32>@test_int_x86_avx512_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1) { 16452; CHECK-LABEL: test_int_x86_avx512_pmov_qd_256: 16453; CHECK: # %bb.0: 16454; CHECK-NEXT: vpmovqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0] 16455; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 16456; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16457 %res = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 16458 ret <4 x i32> %res 16459} 16460 16461define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 16462; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256: 16463; X86: # %bb.0: 16464; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16465; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16466; X86-NEXT: vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1] 16467; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16468; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 16469; X86-NEXT: retl # encoding: [0xc3] 16470; 16471; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256: 16472; X64: # %bb.0: 16473; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16474; X64-NEXT: vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1] 16475; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16476; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 16477; X64-NEXT: retq # encoding: [0xc3] 16478 %res = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 16479 ret <4 x i32> %res 16480} 16481 16482define <4 x i32>@test_int_x86_avx512_maskz_pmov_qd_256(<4 x i64> %x0, i8 %x2) { 16483; X86-LABEL: test_int_x86_avx512_maskz_pmov_qd_256: 16484; X86: # %bb.0: 16485; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16486; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16487; X86-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0] 16488; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 16489; X86-NEXT: retl # encoding: [0xc3] 16490; 16491; X64-LABEL: test_int_x86_avx512_maskz_pmov_qd_256: 16492; X64: # %bb.0: 16493; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16494; X64-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0] 16495; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 16496; X64-NEXT: retq # encoding: [0xc3] 16497 %res = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 16498 ret <4 x i32> %res 16499} 16500 16501define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) { 16502; X86-LABEL: test_mask_compress_pd_128: 16503; X86: # %bb.0: 16504; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16505; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16506; X86-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1] 16507; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16508; X86-NEXT: retl # encoding: [0xc3] 16509; 16510; X64-LABEL: test_mask_compress_pd_128: 16511; X64: # %bb.0: 16512; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16513; X64-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1] 16514; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16515; X64-NEXT: retq # encoding: [0xc3] 16516 %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask) 16517 ret <2 x double> %res 16518} 16519 16520define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) { 16521; X86-LABEL: test_maskz_compress_pd_128: 16522; X86: # %bb.0: 16523; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16524; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16525; X86-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0] 16526; X86-NEXT: retl # encoding: [0xc3] 16527; 16528; X64-LABEL: test_maskz_compress_pd_128: 16529; X64: # %bb.0: 16530; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16531; X64-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0] 16532; X64-NEXT: retq # encoding: [0xc3] 16533 %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask) 16534 ret <2 x double> %res 16535} 16536 16537define <2 x double> @test_compress_pd_128(<2 x double> %data) { 16538; CHECK-LABEL: test_compress_pd_128: 16539; CHECK: # %bb.0: 16540; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16541 %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1) 16542 ret <2 x double> %res 16543} 16544 16545declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask) 16546 16547define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) { 16548; X86-LABEL: test_mask_compress_ps_128: 16549; X86: # %bb.0: 16550; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16551; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16552; X86-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1] 16553; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16554; X86-NEXT: retl # encoding: [0xc3] 16555; 16556; X64-LABEL: test_mask_compress_ps_128: 16557; X64: # %bb.0: 16558; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16559; X64-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1] 16560; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16561; X64-NEXT: retq # encoding: [0xc3] 16562 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask) 16563 ret <4 x float> %res 16564} 16565 16566define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) { 16567; X86-LABEL: test_maskz_compress_ps_128: 16568; X86: # %bb.0: 16569; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16570; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16571; X86-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0] 16572; X86-NEXT: retl # encoding: [0xc3] 16573; 16574; X64-LABEL: test_maskz_compress_ps_128: 16575; X64: # %bb.0: 16576; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16577; X64-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0] 16578; X64-NEXT: retq # encoding: [0xc3] 16579 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask) 16580 ret <4 x float> %res 16581} 16582 16583define <4 x float> @test_compress_ps_128(<4 x float> %data) { 16584; CHECK-LABEL: test_compress_ps_128: 16585; CHECK: # %bb.0: 16586; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16587 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1) 16588 ret <4 x float> %res 16589} 16590 16591declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) 16592 16593define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) { 16594; X86-LABEL: test_mask_compress_q_128: 16595; X86: # %bb.0: 16596; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16597; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16598; X86-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1] 16599; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16600; X86-NEXT: retl # encoding: [0xc3] 16601; 16602; X64-LABEL: test_mask_compress_q_128: 16603; X64: # %bb.0: 16604; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16605; X64-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1] 16606; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16607; X64-NEXT: retq # encoding: [0xc3] 16608 %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) 16609 ret <2 x i64> %res 16610} 16611 16612define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) { 16613; X86-LABEL: test_maskz_compress_q_128: 16614; X86: # %bb.0: 16615; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16616; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16617; X86-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0] 16618; X86-NEXT: retl # encoding: [0xc3] 16619; 16620; X64-LABEL: test_maskz_compress_q_128: 16621; X64: # %bb.0: 16622; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16623; X64-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0] 16624; X64-NEXT: retq # encoding: [0xc3] 16625 %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask) 16626 ret <2 x i64> %res 16627} 16628 16629define <2 x i64> @test_compress_q_128(<2 x i64> %data) { 16630; CHECK-LABEL: test_compress_q_128: 16631; CHECK: # %bb.0: 16632; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16633 %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1) 16634 ret <2 x i64> %res 16635} 16636 16637declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask) 16638 16639define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) { 16640; X86-LABEL: test_mask_compress_d_128: 16641; X86: # %bb.0: 16642; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16643; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16644; X86-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1] 16645; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16646; X86-NEXT: retl # encoding: [0xc3] 16647; 16648; X64-LABEL: test_mask_compress_d_128: 16649; X64: # %bb.0: 16650; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16651; X64-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1] 16652; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16653; X64-NEXT: retq # encoding: [0xc3] 16654 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) 16655 ret <4 x i32> %res 16656} 16657 16658define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) { 16659; X86-LABEL: test_maskz_compress_d_128: 16660; X86: # %bb.0: 16661; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16662; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16663; X86-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0] 16664; X86-NEXT: retl # encoding: [0xc3] 16665; 16666; X64-LABEL: test_maskz_compress_d_128: 16667; X64: # %bb.0: 16668; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16669; X64-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0] 16670; X64-NEXT: retq # encoding: [0xc3] 16671 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask) 16672 ret <4 x i32> %res 16673} 16674 16675define <4 x i32> @test_compress_d_128(<4 x i32> %data) { 16676; CHECK-LABEL: test_compress_d_128: 16677; CHECK: # %bb.0: 16678; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16679 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1) 16680 ret <4 x i32> %res 16681} 16682 16683declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) 16684 16685define <2 x double> @test_expand_pd_128(<2 x double> %data) { 16686; CHECK-LABEL: test_expand_pd_128: 16687; CHECK: # %bb.0: 16688; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16689 %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1) 16690 ret <2 x double> %res 16691} 16692 16693define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) { 16694; X86-LABEL: test_mask_expand_pd_128: 16695; X86: # %bb.0: 16696; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16697; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16698; X86-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8] 16699; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16700; X86-NEXT: retl # encoding: [0xc3] 16701; 16702; X64-LABEL: test_mask_expand_pd_128: 16703; X64: # %bb.0: 16704; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16705; X64-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8] 16706; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16707; X64-NEXT: retq # encoding: [0xc3] 16708 %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask) 16709 ret <2 x double> %res 16710} 16711 16712define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) { 16713; X86-LABEL: test_maskz_expand_pd_128: 16714; X86: # %bb.0: 16715; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16716; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16717; X86-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0] 16718; X86-NEXT: retl # encoding: [0xc3] 16719; 16720; X64-LABEL: test_maskz_expand_pd_128: 16721; X64: # %bb.0: 16722; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16723; X64-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0] 16724; X64-NEXT: retq # encoding: [0xc3] 16725 %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask) 16726 ret <2 x double> %res 16727} 16728 16729declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask) 16730 16731define <4 x float> @test_expand_ps_128(<4 x float> %data) { 16732; CHECK-LABEL: test_expand_ps_128: 16733; CHECK: # %bb.0: 16734; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16735 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1) 16736 ret <4 x float> %res 16737} 16738 16739define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) { 16740; X86-LABEL: test_mask_expand_ps_128: 16741; X86: # %bb.0: 16742; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16743; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16744; X86-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8] 16745; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16746; X86-NEXT: retl # encoding: [0xc3] 16747; 16748; X64-LABEL: test_mask_expand_ps_128: 16749; X64: # %bb.0: 16750; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16751; X64-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8] 16752; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16753; X64-NEXT: retq # encoding: [0xc3] 16754 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask) 16755 ret <4 x float> %res 16756} 16757 16758define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) { 16759; X86-LABEL: test_maskz_expand_ps_128: 16760; X86: # %bb.0: 16761; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16762; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16763; X86-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0] 16764; X86-NEXT: retl # encoding: [0xc3] 16765; 16766; X64-LABEL: test_maskz_expand_ps_128: 16767; X64: # %bb.0: 16768; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16769; X64-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0] 16770; X64-NEXT: retq # encoding: [0xc3] 16771 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask) 16772 ret <4 x float> %res 16773} 16774 16775declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) 16776 16777define <2 x i64> @test_expand_q_128(<2 x i64> %data) { 16778; CHECK-LABEL: test_expand_q_128: 16779; CHECK: # %bb.0: 16780; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16781 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1) 16782 ret <2 x i64> %res 16783} 16784 16785define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) { 16786; X86-LABEL: test_mask_expand_q_128: 16787; X86: # %bb.0: 16788; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16789; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16790; X86-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8] 16791; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16792; X86-NEXT: retl # encoding: [0xc3] 16793; 16794; X64-LABEL: test_mask_expand_q_128: 16795; X64: # %bb.0: 16796; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16797; X64-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8] 16798; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16799; X64-NEXT: retq # encoding: [0xc3] 16800 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) 16801 ret <2 x i64> %res 16802} 16803 16804define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) { 16805; X86-LABEL: test_maskz_expand_q_128: 16806; X86: # %bb.0: 16807; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16808; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16809; X86-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0] 16810; X86-NEXT: retl # encoding: [0xc3] 16811; 16812; X64-LABEL: test_maskz_expand_q_128: 16813; X64: # %bb.0: 16814; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16815; X64-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0] 16816; X64-NEXT: retq # encoding: [0xc3] 16817 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask) 16818 ret <2 x i64> %res 16819} 16820 16821declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask) 16822 16823define <4 x i32> @test_expand_d_128(<4 x i32> %data) { 16824; CHECK-LABEL: test_expand_d_128: 16825; CHECK: # %bb.0: 16826; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16827 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1) 16828 ret <4 x i32> %res 16829} 16830 16831define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) { 16832; X86-LABEL: test_mask_expand_d_128: 16833; X86: # %bb.0: 16834; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16835; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16836; X86-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8] 16837; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16838; X86-NEXT: retl # encoding: [0xc3] 16839; 16840; X64-LABEL: test_mask_expand_d_128: 16841; X64: # %bb.0: 16842; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16843; X64-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8] 16844; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 16845; X64-NEXT: retq # encoding: [0xc3] 16846 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) 16847 ret <4 x i32> %res 16848} 16849 16850define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) { 16851; X86-LABEL: test_maskz_expand_d_128: 16852; X86: # %bb.0: 16853; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16854; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16855; X86-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0] 16856; X86-NEXT: retl # encoding: [0xc3] 16857; 16858; X64-LABEL: test_maskz_expand_d_128: 16859; X64: # %bb.0: 16860; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16861; X64-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0] 16862; X64-NEXT: retq # encoding: [0xc3] 16863 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask) 16864 ret <4 x i32> %res 16865} 16866 16867declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) 16868 16869define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) { 16870; X86-LABEL: test_mask_compress_pd_256: 16871; X86: # %bb.0: 16872; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16873; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16874; X86-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1] 16875; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 16876; X86-NEXT: retl # encoding: [0xc3] 16877; 16878; X64-LABEL: test_mask_compress_pd_256: 16879; X64: # %bb.0: 16880; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16881; X64-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1] 16882; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 16883; X64-NEXT: retq # encoding: [0xc3] 16884 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask) 16885 ret <4 x double> %res 16886} 16887 16888define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) { 16889; X86-LABEL: test_maskz_compress_pd_256: 16890; X86: # %bb.0: 16891; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16892; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16893; X86-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0] 16894; X86-NEXT: retl # encoding: [0xc3] 16895; 16896; X64-LABEL: test_maskz_compress_pd_256: 16897; X64: # %bb.0: 16898; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16899; X64-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0] 16900; X64-NEXT: retq # encoding: [0xc3] 16901 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask) 16902 ret <4 x double> %res 16903} 16904 16905define <4 x double> @test_compress_pd_256(<4 x double> %data) { 16906; CHECK-LABEL: test_compress_pd_256: 16907; CHECK: # %bb.0: 16908; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16909 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1) 16910 ret <4 x double> %res 16911} 16912 16913declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) 16914 16915define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) { 16916; X86-LABEL: test_mask_compress_ps_256: 16917; X86: # %bb.0: 16918; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16919; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16920; X86-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1] 16921; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 16922; X86-NEXT: retl # encoding: [0xc3] 16923; 16924; X64-LABEL: test_mask_compress_ps_256: 16925; X64: # %bb.0: 16926; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16927; X64-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1] 16928; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 16929; X64-NEXT: retq # encoding: [0xc3] 16930 %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask) 16931 ret <8 x float> %res 16932} 16933 16934define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) { 16935; X86-LABEL: test_maskz_compress_ps_256: 16936; X86: # %bb.0: 16937; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16938; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16939; X86-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0] 16940; X86-NEXT: retl # encoding: [0xc3] 16941; 16942; X64-LABEL: test_maskz_compress_ps_256: 16943; X64: # %bb.0: 16944; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16945; X64-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0] 16946; X64-NEXT: retq # encoding: [0xc3] 16947 %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask) 16948 ret <8 x float> %res 16949} 16950 16951define <8 x float> @test_compress_ps_256(<8 x float> %data) { 16952; CHECK-LABEL: test_compress_ps_256: 16953; CHECK: # %bb.0: 16954; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16955 %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1) 16956 ret <8 x float> %res 16957} 16958 16959declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask) 16960 16961define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) { 16962; X86-LABEL: test_mask_compress_q_256: 16963; X86: # %bb.0: 16964; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16965; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16966; X86-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1] 16967; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 16968; X86-NEXT: retl # encoding: [0xc3] 16969; 16970; X64-LABEL: test_mask_compress_q_256: 16971; X64: # %bb.0: 16972; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16973; X64-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1] 16974; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 16975; X64-NEXT: retq # encoding: [0xc3] 16976 %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) 16977 ret <4 x i64> %res 16978} 16979 16980define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) { 16981; X86-LABEL: test_maskz_compress_q_256: 16982; X86: # %bb.0: 16983; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 16984; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 16985; X86-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0] 16986; X86-NEXT: retl # encoding: [0xc3] 16987; 16988; X64-LABEL: test_maskz_compress_q_256: 16989; X64: # %bb.0: 16990; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 16991; X64-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0] 16992; X64-NEXT: retq # encoding: [0xc3] 16993 %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask) 16994 ret <4 x i64> %res 16995} 16996 16997define <4 x i64> @test_compress_q_256(<4 x i64> %data) { 16998; CHECK-LABEL: test_compress_q_256: 16999; CHECK: # %bb.0: 17000; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17001 %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1) 17002 ret <4 x i64> %res 17003} 17004 17005declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask) 17006 17007define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) { 17008; X86-LABEL: test_mask_compress_d_256: 17009; X86: # %bb.0: 17010; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17011; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17012; X86-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1] 17013; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17014; X86-NEXT: retl # encoding: [0xc3] 17015; 17016; X64-LABEL: test_mask_compress_d_256: 17017; X64: # %bb.0: 17018; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17019; X64-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1] 17020; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17021; X64-NEXT: retq # encoding: [0xc3] 17022 %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) 17023 ret <8 x i32> %res 17024} 17025 17026define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) { 17027; X86-LABEL: test_maskz_compress_d_256: 17028; X86: # %bb.0: 17029; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17030; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17031; X86-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0] 17032; X86-NEXT: retl # encoding: [0xc3] 17033; 17034; X64-LABEL: test_maskz_compress_d_256: 17035; X64: # %bb.0: 17036; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17037; X64-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0] 17038; X64-NEXT: retq # encoding: [0xc3] 17039 %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask) 17040 ret <8 x i32> %res 17041} 17042 17043define <8 x i32> @test_compress_d_256(<8 x i32> %data) { 17044; CHECK-LABEL: test_compress_d_256: 17045; CHECK: # %bb.0: 17046; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17047 %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1) 17048 ret <8 x i32> %res 17049} 17050 17051declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask) 17052 17053define <4 x double> @test_expand_pd_256(<4 x double> %data) { 17054; CHECK-LABEL: test_expand_pd_256: 17055; CHECK: # %bb.0: 17056; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17057 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1) 17058 ret <4 x double> %res 17059} 17060 17061define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) { 17062; X86-LABEL: test_mask_expand_pd_256: 17063; X86: # %bb.0: 17064; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17065; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17066; X86-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8] 17067; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17068; X86-NEXT: retl # encoding: [0xc3] 17069; 17070; X64-LABEL: test_mask_expand_pd_256: 17071; X64: # %bb.0: 17072; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17073; X64-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8] 17074; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17075; X64-NEXT: retq # encoding: [0xc3] 17076 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask) 17077 ret <4 x double> %res 17078} 17079 17080define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) { 17081; X86-LABEL: test_maskz_expand_pd_256: 17082; X86: # %bb.0: 17083; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17084; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17085; X86-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0] 17086; X86-NEXT: retl # encoding: [0xc3] 17087; 17088; X64-LABEL: test_maskz_expand_pd_256: 17089; X64: # %bb.0: 17090; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17091; X64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0] 17092; X64-NEXT: retq # encoding: [0xc3] 17093 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask) 17094 ret <4 x double> %res 17095} 17096 17097declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) 17098 17099define <8 x float> @test_expand_ps_256(<8 x float> %data) { 17100; CHECK-LABEL: test_expand_ps_256: 17101; CHECK: # %bb.0: 17102; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17103 %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1) 17104 ret <8 x float> %res 17105} 17106 17107define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) { 17108; X86-LABEL: test_mask_expand_ps_256: 17109; X86: # %bb.0: 17110; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17111; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17112; X86-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8] 17113; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17114; X86-NEXT: retl # encoding: [0xc3] 17115; 17116; X64-LABEL: test_mask_expand_ps_256: 17117; X64: # %bb.0: 17118; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17119; X64-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8] 17120; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17121; X64-NEXT: retq # encoding: [0xc3] 17122 %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask) 17123 ret <8 x float> %res 17124} 17125 17126define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) { 17127; X86-LABEL: test_maskz_expand_ps_256: 17128; X86: # %bb.0: 17129; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17130; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17131; X86-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0] 17132; X86-NEXT: retl # encoding: [0xc3] 17133; 17134; X64-LABEL: test_maskz_expand_ps_256: 17135; X64: # %bb.0: 17136; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17137; X64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0] 17138; X64-NEXT: retq # encoding: [0xc3] 17139 %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask) 17140 ret <8 x float> %res 17141} 17142 17143declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask) 17144 17145define <4 x i64> @test_expand_q_256(<4 x i64> %data) { 17146; CHECK-LABEL: test_expand_q_256: 17147; CHECK: # %bb.0: 17148; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17149 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1) 17150 ret <4 x i64> %res 17151} 17152 17153define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) { 17154; X86-LABEL: test_mask_expand_q_256: 17155; X86: # %bb.0: 17156; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17157; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17158; X86-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8] 17159; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17160; X86-NEXT: retl # encoding: [0xc3] 17161; 17162; X64-LABEL: test_mask_expand_q_256: 17163; X64: # %bb.0: 17164; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17165; X64-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8] 17166; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17167; X64-NEXT: retq # encoding: [0xc3] 17168 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) 17169 ret <4 x i64> %res 17170} 17171 17172define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) { 17173; X86-LABEL: test_maskz_expand_q_256: 17174; X86: # %bb.0: 17175; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17176; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17177; X86-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0] 17178; X86-NEXT: retl # encoding: [0xc3] 17179; 17180; X64-LABEL: test_maskz_expand_q_256: 17181; X64: # %bb.0: 17182; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17183; X64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0] 17184; X64-NEXT: retq # encoding: [0xc3] 17185 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask) 17186 ret <4 x i64> %res 17187} 17188 17189declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask) 17190 17191define <8 x i32> @test_expand_d_256(<8 x i32> %data) { 17192; CHECK-LABEL: test_expand_d_256: 17193; CHECK: # %bb.0: 17194; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17195 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1) 17196 ret <8 x i32> %res 17197} 17198 17199define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) { 17200; X86-LABEL: test_mask_expand_d_256: 17201; X86: # %bb.0: 17202; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17203; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17204; X86-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8] 17205; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17206; X86-NEXT: retl # encoding: [0xc3] 17207; 17208; X64-LABEL: test_mask_expand_d_256: 17209; X64: # %bb.0: 17210; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17211; X64-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8] 17212; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 17213; X64-NEXT: retq # encoding: [0xc3] 17214 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) 17215 ret <8 x i32> %res 17216} 17217 17218define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) { 17219; X86-LABEL: test_maskz_expand_d_256: 17220; X86: # %bb.0: 17221; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17222; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 17223; X86-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0] 17224; X86-NEXT: retl # encoding: [0xc3] 17225; 17226; X64-LABEL: test_maskz_expand_d_256: 17227; X64: # %bb.0: 17228; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 17229; X64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0] 17230; X64-NEXT: retq # encoding: [0xc3] 17231 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask) 17232 ret <8 x i32> %res 17233} 17234 17235declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask) 17236 17237define void @test_cmp_128(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %p) { 17238; X86-LABEL: test_cmp_128: 17239; X86: # %bb.0: # %entry 17240; X86-NEXT: pushl %ebp # encoding: [0x55] 17241; X86-NEXT: .cfi_def_cfa_offset 8 17242; X86-NEXT: .cfi_offset %ebp, -8 17243; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 17244; X86-NEXT: .cfi_def_cfa_register %ebp 17245; X86-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 17246; X86-NEXT: subl $16, %esp # encoding: [0x83,0xec,0x10] 17247; X86-NEXT: movl 24(%ebp), %eax # encoding: [0x8b,0x45,0x18] 17248; X86-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 17249; X86-NEXT: vcmpltps 8(%ebp), %xmm2, %k1 # encoding: [0x62,0xf1,0x6c,0x08,0xc2,0x8d,0x08,0x00,0x00,0x00,0x01] 17250; X86-NEXT: kshiftlw $4, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x04] 17251; X86-NEXT: korw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x45,0xc9] 17252; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 17253; X86-NEXT: vmovaps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x00] 17254; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 17255; X86-NEXT: popl %ebp # encoding: [0x5d] 17256; X86-NEXT: .cfi_def_cfa %esp, 4 17257; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 17258; X86-NEXT: retl # encoding: [0xc3] 17259; 17260; X64-LABEL: test_cmp_128: 17261; X64: # %bb.0: # %entry 17262; X64-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 17263; X64-NEXT: vcmpltps %xmm3, %xmm2, %k1 # encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xcb,0x01] 17264; X64-NEXT: kshiftlw $4, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x04] 17265; X64-NEXT: korw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x45,0xc9] 17266; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 17267; X64-NEXT: vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] 17268; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 17269; X64-NEXT: retq # encoding: [0xc3] 17270 entry: 17271 %0 = tail call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 1) 17272 %1 = tail call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %c, <4 x float> %d, i32 1) 17273 %2 = bitcast float* %p to <8 x float>* 17274 %3 = shufflevector <4 x i1> %0, <4 x i1> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 17275 tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %2, i32 64, <8 x i1> %3) 17276 ret void 17277} 17278 17279define void @test_cmp_256(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) { 17280; X86-LABEL: test_cmp_256: 17281; X86: # %bb.0: # %entry 17282; X86-NEXT: pushl %ebp # encoding: [0x55] 17283; X86-NEXT: .cfi_def_cfa_offset 8 17284; X86-NEXT: .cfi_offset %ebp, -8 17285; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 17286; X86-NEXT: .cfi_def_cfa_register %ebp 17287; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0] 17288; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 17289; X86-NEXT: movl 40(%ebp), %eax # encoding: [0x8b,0x45,0x28] 17290; X86-NEXT: vcmpltps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x01] 17291; X86-NEXT: vcmpltps 8(%ebp), %ymm2, %k1 # encoding: [0x62,0xf1,0x6c,0x28,0xc2,0x8d,0x08,0x00,0x00,0x00,0x01] 17292; X86-NEXT: kunpckbw %k0, %k1, %k1 # encoding: [0xc5,0xf5,0x4b,0xc8] 17293; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 17294; X86-NEXT: vmovaps %zmm0, (%eax) {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x29,0x00] 17295; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 17296; X86-NEXT: popl %ebp # encoding: [0x5d] 17297; X86-NEXT: .cfi_def_cfa %esp, 4 17298; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 17299; X86-NEXT: retl # encoding: [0xc3] 17300; 17301; X64-LABEL: test_cmp_256: 17302; X64: # %bb.0: # %entry 17303; X64-NEXT: vcmpltps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x01] 17304; X64-NEXT: vcmpltps %ymm3, %ymm2, %k1 # encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xcb,0x01] 17305; X64-NEXT: kunpckbw %k0, %k1, %k1 # encoding: [0xc5,0xf5,0x4b,0xc8] 17306; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 17307; X64-NEXT: vmovaps %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07] 17308; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 17309; X64-NEXT: retq # encoding: [0xc3] 17310 entry: 17311 %0 = tail call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1) 17312 %1 = tail call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1) 17313 %2 = bitcast float* %p to <16 x float>* 17314 %3 = shufflevector <8 x i1> %0, <8 x i1> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17315 tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) 17316 ret void 17317} 17318 17319declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) 17320declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) 17321declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) 17322declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) 17323