1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) { 6; X86-LABEL: test_mask_expand_load_w_512: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 10; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 11; X86-NEXT: retl # encoding: [0xc3] 12; 13; X64-LABEL: test_mask_expand_load_w_512: 14; X64: # %bb.0: 15; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 16; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 17; X64-NEXT: retq # encoding: [0xc3] 18 %1 = bitcast i8* %addr to i16* 19 %2 = bitcast i32 %mask to <32 x i1> 20 %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %data) 21 ret <32 x i16> %3 22} 23 24define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) { 25; X86-LABEL: test_maskz_expand_load_w_512: 26; X86: # %bb.0: 27; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 28; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 29; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00] 30; X86-NEXT: retl # encoding: [0xc3] 31; 32; X64-LABEL: test_maskz_expand_load_w_512: 33; X64: # %bb.0: 34; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 35; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07] 36; X64-NEXT: retq # encoding: [0xc3] 37 %1 = bitcast i8* %addr to i16* 38 %2 = bitcast i32 %mask to <32 x i1> 39 %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> zeroinitializer) 40 ret <32 x i16> %3 41} 42 43define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) { 44; X86-LABEL: test_expand_load_w_512: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 47; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 48; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 49; X86-NEXT: retl # encoding: [0xc3] 50; 51; X64-LABEL: test_expand_load_w_512: 52; X64: # %bb.0: 53; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 54; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 55; X64-NEXT: retq # encoding: [0xc3] 56 %1 = bitcast i8* %addr to i16* 57 %2 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> %data) 58 ret <32 x i16> %2 59} 60 61define <32 x i16> @test_expand_w_512(<32 x i16> %data) { 62; CHECK-LABEL: test_expand_w_512: 63; CHECK: # %bb.0: 64; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 65 %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 66 ret <32 x i16> %1 67} 68 69define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 70; X86-LABEL: test_mask_expand_w_512: 71; X86: # %bb.0: 72; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 73; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 74; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 75; X86-NEXT: retl # encoding: [0xc3] 76; 77; X64-LABEL: test_mask_expand_w_512: 78; X64: # %bb.0: 79; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 80; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 81; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 82; X64-NEXT: retq # encoding: [0xc3] 83 %1 = bitcast i32 %mask to <32 x i1> 84 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1) 85 ret <32 x i16> %2 86} 87 88define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) { 89; X86-LABEL: test_maskz_expand_w_512: 90; X86: # %bb.0: 91; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 92; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 93; X86-NEXT: retl # encoding: [0xc3] 94; 95; X64-LABEL: test_maskz_expand_w_512: 96; X64: # %bb.0: 97; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 98; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 99; X64-NEXT: retq # encoding: [0xc3] 100 %1 = bitcast i32 %mask to <32 x i1> 101 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1) 102 ret <32 x i16> %2 103} 104 105define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { 106; X86-LABEL: test_mask_expand_load_b_512: 107; X86: # %bb.0: 108; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 109; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 110; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 111; X86-NEXT: retl # encoding: [0xc3] 112; 113; X64-LABEL: test_mask_expand_load_b_512: 114; X64: # %bb.0: 115; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 116; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 117; X64-NEXT: retq # encoding: [0xc3] 118 %1 = bitcast i64 %mask to <64 x i1> 119 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> %data) 120 ret <64 x i8> %2 121} 122 123define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) { 124; X86-LABEL: test_maskz_expand_load_b_512: 125; X86: # %bb.0: 126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 127; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 128; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00] 129; X86-NEXT: retl # encoding: [0xc3] 130; 131; X64-LABEL: test_maskz_expand_load_b_512: 132; X64: # %bb.0: 133; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 134; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07] 135; X64-NEXT: retq # encoding: [0xc3] 136 %1 = bitcast i64 %mask to <64 x i1> 137 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> zeroinitializer) 138 ret <64 x i8> %2 139} 140 141define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) { 142; X86-LABEL: test_expand_load_b_512: 143; X86: # %bb.0: 144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 145; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 146; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 147; X86-NEXT: retl # encoding: [0xc3] 148; 149; X64-LABEL: test_expand_load_b_512: 150; X64: # %bb.0: 151; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 152; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 153; X64-NEXT: retq # encoding: [0xc3] 154 %1 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i8> %data) 155 ret <64 x i8> %1 156} 157 158define <64 x i8> @test_expand_b_512(<64 x i8> %data) { 159; CHECK-LABEL: test_expand_b_512: 160; CHECK: # %bb.0: 161; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 162 %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 163 ret <64 x i8> %1 164} 165 166define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 167; X86-LABEL: test_mask_expand_b_512: 168; X86: # %bb.0: 169; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 170; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 171; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 172; X86-NEXT: retl # encoding: [0xc3] 173; 174; X64-LABEL: test_mask_expand_b_512: 175; X64: # %bb.0: 176; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 177; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 178; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 179; X64-NEXT: retq # encoding: [0xc3] 180 %1 = bitcast i64 %mask to <64 x i1> 181 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1) 182 ret <64 x i8> %2 183} 184 185define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) { 186; X86-LABEL: test_maskz_expand_b_512: 187; X86: # %bb.0: 188; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 189; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 190; X86-NEXT: retl # encoding: [0xc3] 191; 192; X64-LABEL: test_maskz_expand_b_512: 193; X64: # %bb.0: 194; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 195; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 196; X64-NEXT: retq # encoding: [0xc3] 197 %1 = bitcast i64 %mask to <64 x i1> 198 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1) 199 ret <64 x i8> %2 200} 201 202define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) { 203; X86-LABEL: test_mask_compress_store_w_512: 204; X86: # %bb.0: 205; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 206; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 207; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 208; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 209; X86-NEXT: retl # encoding: [0xc3] 210; 211; X64-LABEL: test_mask_compress_store_w_512: 212; X64: # %bb.0: 213; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 214; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 215; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 216; X64-NEXT: retq # encoding: [0xc3] 217 %1 = bitcast i8* %addr to i16* 218 %2 = bitcast i32 %mask to <32 x i1> 219 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> %2) 220 ret void 221} 222 223define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) { 224; X86-LABEL: test_compress_store_w_512: 225; X86: # %bb.0: 226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 227; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 228; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 229; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 230; X86-NEXT: retl # encoding: [0xc3] 231; 232; X64-LABEL: test_compress_store_w_512: 233; X64: # %bb.0: 234; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 235; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 236; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 237; X64-NEXT: retq # encoding: [0xc3] 238 %1 = bitcast i8* %addr to i16* 239 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 240 ret void 241} 242 243define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 244; X86-LABEL: test_mask_compress_w_512: 245; X86: # %bb.0: 246; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 247; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 248; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 249; X86-NEXT: retl # encoding: [0xc3] 250; 251; X64-LABEL: test_mask_compress_w_512: 252; X64: # %bb.0: 253; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 254; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 255; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 256; X64-NEXT: retq # encoding: [0xc3] 257 %1 = bitcast i32 %mask to <32 x i1> 258 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1) 259 ret <32 x i16> %2 260} 261 262define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) { 263; X86-LABEL: test_maskz_compress_w_512: 264; X86: # %bb.0: 265; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 266; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 267; X86-NEXT: retl # encoding: [0xc3] 268; 269; X64-LABEL: test_maskz_compress_w_512: 270; X64: # %bb.0: 271; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 272; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 273; X64-NEXT: retq # encoding: [0xc3] 274 %1 = bitcast i32 %mask to <32 x i1> 275 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1) 276 ret <32 x i16> %2 277} 278 279define <32 x i16> @test_compress_w_512(<32 x i16> %data) { 280; CHECK-LABEL: test_compress_w_512: 281; CHECK: # %bb.0: 282; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 283 %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 284 ret <32 x i16> %1 285} 286 287define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { 288; X86-LABEL: test_mask_compress_store_b_512: 289; X86: # %bb.0: 290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 291; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 292; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 293; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 294; X86-NEXT: retl # encoding: [0xc3] 295; 296; X64-LABEL: test_mask_compress_store_b_512: 297; X64: # %bb.0: 298; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 299; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 300; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 301; X64-NEXT: retq # encoding: [0xc3] 302 %1 = bitcast i64 %mask to <64 x i1> 303 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1) 304 ret void 305} 306 307define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) { 308; X86-LABEL: test_compress_store_b_512: 309; X86: # %bb.0: 310; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 311; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 312; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 313; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 314; X86-NEXT: retl # encoding: [0xc3] 315; 316; X64-LABEL: test_compress_store_b_512: 317; X64: # %bb.0: 318; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 319; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 320; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 321; X64-NEXT: retq # encoding: [0xc3] 322 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 323 ret void 324} 325 326define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 327; X86-LABEL: test_mask_compress_b_512: 328; X86: # %bb.0: 329; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 330; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 331; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 332; X86-NEXT: retl # encoding: [0xc3] 333; 334; X64-LABEL: test_mask_compress_b_512: 335; X64: # %bb.0: 336; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 337; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 338; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 339; X64-NEXT: retq # encoding: [0xc3] 340 %1 = bitcast i64 %mask to <64 x i1> 341 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1) 342 ret <64 x i8> %2 343} 344 345define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) { 346; X86-LABEL: test_maskz_compress_b_512: 347; X86: # %bb.0: 348; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 349; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 350; X86-NEXT: retl # encoding: [0xc3] 351; 352; X64-LABEL: test_maskz_compress_b_512: 353; X64: # %bb.0: 354; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 355; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 356; X64-NEXT: retq # encoding: [0xc3] 357 %1 = bitcast i64 %mask to <64 x i1> 358 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1) 359 ret <64 x i8> %2 360} 361 362define <64 x i8> @test_compress_b_512(<64 x i8> %data) { 363; CHECK-LABEL: test_compress_b_512: 364; CHECK: # %bb.0: 365; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 366 %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 367 ret <64 x i8> %1 368} 369 370define <16 x i32> @test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 371; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 372; X86: # %bb.0: 373; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 374; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 375; X86-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc1,0x17] 376; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 377; X86-NEXT: retl # encoding: [0xc3] 378; 379; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 380; X64: # %bb.0: 381; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 382; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 383; X64-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc1,0x17] 384; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 385; X64-NEXT: retq # encoding: [0xc3] 386 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 387 %2 = bitcast i16 %x4 to <16 x i1> 388 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 389 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 390 %res2 = add <16 x i32> %3, %4 391 ret <16 x i32> %res2 392} 393 394define <8 x i64> @test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 395; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 396; X86: # %bb.0: 397; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 398; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 399; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 400; X86-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc1,0x17] 401; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 402; X86-NEXT: retl # encoding: [0xc3] 403; 404; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 405; X64: # %bb.0: 406; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 407; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 408; X64-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc1,0x17] 409; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 410; X64-NEXT: retq # encoding: [0xc3] 411 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>) 412 %2 = bitcast i8 %x4 to <8 x i1> 413 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 414 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>) 415 %res2 = add <8 x i64> %3, %4 416 ret <8 x i64> %res2 417} 418 419define <32 x i16> @test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 420; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 421; X86: # %bb.0: 422; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 423; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 424; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc1,0x07] 425; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 426; X86-NEXT: retl # encoding: [0xc3] 427; 428; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 429; X64: # %bb.0: 430; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 431; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 432; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc1,0x07] 433; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 434; X64-NEXT: retq # encoding: [0xc3] 435 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 436 %2 = bitcast i32 %x4 to <32 x i1> 437 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 438 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 439 %res2 = add <32 x i16> %3, %4 440 ret <32 x i16> %res2 441} 442 443define <16 x i32> @test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 444; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 445; X86: # %bb.0: 446; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 447; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 448; X86-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc1,0x17] 449; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 450; X86-NEXT: retl # encoding: [0xc3] 451; 452; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 453; X64: # %bb.0: 454; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 455; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 456; X64-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc1,0x17] 457; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 458; X64-NEXT: retq # encoding: [0xc3] 459 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 460 %2 = bitcast i16 %x4 to <16 x i1> 461 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 462 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 463 %res2 = add <16 x i32> %3, %4 464 ret <16 x i32> %res2 465} 466 467define <8 x i64> @test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 468; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 469; X86: # %bb.0: 470; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 471; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 472; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 473; X86-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc1,0x17] 474; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 475; X86-NEXT: retl # encoding: [0xc3] 476; 477; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 478; X64: # %bb.0: 479; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 480; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 481; X64-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc1,0x17] 482; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 483; X64-NEXT: retq # encoding: [0xc3] 484 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>) 485 %2 = bitcast i8 %x4 to <8 x i1> 486 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 487 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>) 488 %res2 = add <8 x i64> %3, %4 489 ret <8 x i64> %res2 490} 491 492define <32 x i16> @test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 493; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 494; X86: # %bb.0: 495; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 496; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 497; X86-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc1,0x07] 498; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 499; X86-NEXT: retl # encoding: [0xc3] 500; 501; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 502; X64: # %bb.0: 503; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 504; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 505; X64-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc1,0x07] 506; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 507; X64-NEXT: retq # encoding: [0xc3] 508 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 509 %2 = bitcast i32 %x4 to <32 x i1> 510 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 511 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 512 %res2 = add <32 x i16> %3, %4 513 ret <32 x i16> %res2 514} 515 516define <16 x i32> @test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 517; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 518; X86: # %bb.0: 519; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 520; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 521; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 522; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18] 523; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] 524; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 525; X86-NEXT: retl # encoding: [0xc3] 526; 527; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 528; X64: # %bb.0: 529; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 530; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 531; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f] 532; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] 533; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 534; X64-NEXT: retq # encoding: [0xc3] 535 %x2 = load <16 x i32>, <16 x i32>* %x2p 536 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) 537 %2 = bitcast i16 %x3 to <16 x i1> 538 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 539 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4) 540 %5 = bitcast i16 %x3 to <16 x i1> 541 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 542 %res3 = add <16 x i32> %3, %6 543 ret <16 x i32> %res3 544} 545 546define <8 x i64> @test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) { 547; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 548; X86: # %bb.0: 549; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 550; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 551; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 552; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 553; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18] 554; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] 555; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 556; X86-NEXT: retl # encoding: [0xc3] 557; 558; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 559; X64: # %bb.0: 560; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 561; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 562; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f] 563; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] 564; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 565; X64-NEXT: retq # encoding: [0xc3] 566 %x2 = load <8 x i64>, <8 x i64>* %x2p 567 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2) 568 %2 = bitcast i8 %x3 to <8 x i1> 569 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 570 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4) 571 %5 = bitcast i8 %x3 to <8 x i1> 572 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 573 %res3 = add <8 x i64> %3, %6 574 ret <8 x i64> %res3 575} 576 577define <32 x i16> @test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) { 578; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 579; X86: # %bb.0: 580; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 581; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 582; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 583; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18] 584; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2] 585; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 586; X86-NEXT: retl # encoding: [0xc3] 587; 588; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 589; X64: # %bb.0: 590; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 591; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 592; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f] 593; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2] 594; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 595; X64-NEXT: retq # encoding: [0xc3] 596 %x2 = load <32 x i16>, <32 x i16>* %x2p 597 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 598 %2 = bitcast i32 %x3 to <32 x i1> 599 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0 600 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4) 601 %5 = bitcast i32 %x3 to <32 x i1> 602 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 603 %res3 = add <32 x i16> %3, %6 604 ret <32 x i16> %res3 605} 606 607define <16 x i32> @test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 608; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 609; X86: # %bb.0: 610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 611; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 612; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 613; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18] 614; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] 615; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 616; X86-NEXT: retl # encoding: [0xc3] 617; 618; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 619; X64: # %bb.0: 620; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 621; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 622; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f] 623; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] 624; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 625; X64-NEXT: retq # encoding: [0xc3] 626 %x2 = load <16 x i32>, <16 x i32>* %x2p 627 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 628 %2 = bitcast i16 %x3 to <16 x i1> 629 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 630 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 631 %5 = bitcast i16 %x3 to <16 x i1> 632 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 633 %res3 = add <16 x i32> %3, %6 634 ret <16 x i32> %res3 635} 636 637define <8 x i64> @test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) { 638; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 639; X86: # %bb.0: 640; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 641; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 642; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 643; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 644; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18] 645; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] 646; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 647; X86-NEXT: retl # encoding: [0xc3] 648; 649; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 650; X64: # %bb.0: 651; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 652; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 653; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f] 654; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] 655; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 656; X64-NEXT: retq # encoding: [0xc3] 657 %x2 = load <8 x i64>, <8 x i64>* %x2p 658 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 659 %2 = bitcast i8 %x3 to <8 x i1> 660 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 661 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4) 662 %5 = bitcast i8 %x3 to <8 x i1> 663 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 664 %res3 = add <8 x i64> %3, %6 665 ret <8 x i64> %res3 666} 667 668define <32 x i16> @test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) { 669; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 670; X86: # %bb.0: 671; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 672; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 673; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 674; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18] 675; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2] 676; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 677; X86-NEXT: retl # encoding: [0xc3] 678; 679; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 680; X64: # %bb.0: 681; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 682; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 683; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f] 684; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2] 685; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 686; X64-NEXT: retq # encoding: [0xc3] 687 %x2 = load <32 x i16>, <32 x i16>* %x2p 688 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) 689 %2 = bitcast i32 %x3 to <32 x i1> 690 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0 691 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4) 692 %5 = bitcast i32 %x3 to <32 x i1> 693 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 694 %res3 = add <32 x i16> %3, %6 695 ret <32 x i16> %res3 696} 697 698declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 699declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 700declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 701declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 702declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 703declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 704declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>) 705declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>) 706declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>) 707declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>) 708declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>) 709declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>) 710declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>) 711declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>) 712