1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) 6 7define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { 8; X86-LABEL: test_int_x86_avx512_kunpck_wd: 9; X86: # %bb.0: 10; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 11; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 12; X86-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1] 13; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 14; X86-NEXT: retl # encoding: [0xc3] 15; 16; X64-LABEL: test_int_x86_avx512_kunpck_wd: 17; X64: # %bb.0: 18; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 19; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 20; X64-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1] 21; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 22; X64-NEXT: retq # encoding: [0xc3] 23 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) 24 ret i32 %res 25} 26 27declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) 28 29define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { 30; X86-LABEL: test_int_x86_avx512_kunpck_qd: 31; X86: # %bb.0: 32; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 33; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c] 34; X86-NEXT: retl # encoding: [0xc3] 35; 36; X64-LABEL: test_int_x86_avx512_kunpck_qd: 37; X64: # %bb.0: 38; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7] 39; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 40; X64-NEXT: kunpckdq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4b,0xc1] 41; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 42; X64-NEXT: retq # encoding: [0xc3] 43 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) 44 ret i64 %res 45} 46 47declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) 48 49 define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) { 50; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: 51; X86: # %bb.0: 52; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04] 53; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 54; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1] 55; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1] 56; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] 57; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 58; X86-NEXT: retl # encoding: [0xc3] 59; 60; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: 61; X64: # %bb.0: 62; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf] 63; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 64; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7] 65; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7] 66; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] 67; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 68; X64-NEXT: retq # encoding: [0xc3] 69 %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) 70 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) 71 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask) 72 %res3 = add <64 x i8> %res, %res1 73 %res4 = add <64 x i8> %res2, %res3 74 ret <64 x i8> %res4 75 } 76 77declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32) 78 define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) { 79; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: 80; X86: # %bb.0: 81; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x4c,0x24,0x02] 82; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 83; X86-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc1] 84; X86-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd1] 85; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 86; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 87; X86-NEXT: retl # encoding: [0xc3] 88; 89; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: 90; X64: # %bb.0: 91; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf] 92; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 93; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7] 94; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7] 95; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 96; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 97; X64-NEXT: retq # encoding: [0xc3] 98 %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) 99 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) 100 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) 101 %res3 = add <32 x i16> %res, %res1 102 %res4 = add <32 x i16> %res2, %res3 103 ret <32 x i16> %res4 104 } 105 106declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) 107 108define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) { 109; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512: 110; X86: # %bb.0: 111; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 112; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 113; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] 114; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01] 115; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] 116; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 117; X86-NEXT: retl # encoding: [0xc3] 118; 119; X64-LABEL: test_int_x86_avx512_mask_storeu_b_512: 120; X64: # %bb.0: 121; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca] 122; X64-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x07] 123; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06] 124; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 125; X64-NEXT: retq # encoding: [0xc3] 126 call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) 127 call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1) 128 ret void 129} 130 131declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32) 132 133define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) { 134; X86-LABEL: test_int_x86_avx512_mask_storeu_w_512: 135; X86: # %bb.0: 136; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 137; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 138; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 139; X86-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x01] 140; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] 141; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 142; X86-NEXT: retl # encoding: [0xc3] 143; 144; X64-LABEL: test_int_x86_avx512_mask_storeu_w_512: 145; X64: # %bb.0: 146; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 147; X64-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x07] 148; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06] 149; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 150; X64-NEXT: retq # encoding: [0xc3] 151 call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) 152 call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1) 153 ret void 154} 155 156declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32) 157 158define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) { 159; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512: 160; X86: # %bb.0: 161; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 162; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 163; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] 164; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 165; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00] 166; X86-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09] 167; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 168; X86-NEXT: retl # encoding: [0xc3] 169; 170; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512: 171; X64: # %bb.0: 172; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 173; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 174; X64-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06] 175; X64-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f] 176; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 177; X64-NEXT: retq # encoding: [0xc3] 178 %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) 179 %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) 180 %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) 181 %res2 = add <32 x i16> %res, %res1 182 ret <32 x i16> %res2 183} 184 185declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64) 186 187define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) { 188; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512: 189; X86: # %bb.0: 190; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 191; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 192; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] 193; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] 194; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00] 195; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09] 196; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] 197; X86-NEXT: retl # encoding: [0xc3] 198; 199; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512: 200; X64: # %bb.0: 201; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 202; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca] 203; X64-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06] 204; X64-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f] 205; X64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] 206; X64-NEXT: retq # encoding: [0xc3] 207 %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) 208 %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) 209 %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) 210 %res2 = add <64 x i8> %res, %res1 211 ret <64 x i8> %res2 212} 213 214declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) 215 216define <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) { 217; CHECK-LABEL: test_int_x86_avx512_psll_dq_512: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08] 220; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] 221; CHECK-NEXT: vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04] 222; CHECK-NEXT: # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] 223; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 224; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 225 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) 226 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 227 %res2 = add <8 x i64> %res, %res1 228 ret <8 x i64> %res2 229} 230 231define <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) { 232; X86-LABEL: test_int_x86_avx512_psll_load_dq_512: 233; X86: # %bb.0: 234; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 235; X86-NEXT: vpslldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x38,0x04] 236; X86-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59] 237; X86-NEXT: retl # encoding: [0xc3] 238; 239; X64-LABEL: test_int_x86_avx512_psll_load_dq_512: 240; X64: # %bb.0: 241; X64-NEXT: vpslldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x3f,0x04] 242; X64-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59] 243; X64-NEXT: retq # encoding: [0xc3] 244 %x0 = load <8 x i64>, <8 x i64> *%p0 245 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 246 ret <8 x i64> %res 247} 248 249declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) 250 251define <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) { 252; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512: 253; CHECK: # %bb.0: 254; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08] 255; CHECK-NEXT: # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero 256; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04] 257; CHECK-NEXT: # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 258; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 259; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 260 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) 261 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 262 %res2 = add <8 x i64> %res, %res1 263 ret <8 x i64> %res2 264} 265 266define <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) { 267; X86-LABEL: test_int_x86_avx512_psrl_load_dq_512: 268; X86: # %bb.0: 269; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 270; X86-NEXT: vpsrldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x18,0x04] 271; X86-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 272; X86-NEXT: retl # encoding: [0xc3] 273; 274; X64-LABEL: test_int_x86_avx512_psrl_load_dq_512: 275; X64: # %bb.0: 276; X64-NEXT: vpsrldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x1f,0x04] 277; X64-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 278; X64-NEXT: retq # encoding: [0xc3] 279 %x0 = load <8 x i64>, <8 x i64> *%p0 280 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 281 ret <8 x i64> %res 282} 283 284declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) 285 286define <64 x i8>@test_int_x86_avx512_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3) { 287; CHECK-LABEL: test_int_x86_avx512_palignr_512: 288; CHECK: # %bb.0: 289; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xc1,0x02] 290; CHECK-NEXT: # zmm0 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 291; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 292 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) 293 ret <64 x i8> %res 294} 295 296define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) { 297; X86-LABEL: test_int_x86_avx512_mask_palignr_512: 298; X86: # %bb.0: 299; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 300; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02] 301; X86-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 302; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 303; X86-NEXT: retl # encoding: [0xc3] 304; 305; X64-LABEL: test_int_x86_avx512_mask_palignr_512: 306; X64: # %bb.0: 307; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 308; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02] 309; X64-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 310; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 311; X64-NEXT: retq # encoding: [0xc3] 312 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) 313 ret <64 x i8> %res 314} 315 316define <64 x i8>@test_int_x86_avx512_maskz_palignr_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x4) { 317; X86-LABEL: test_int_x86_avx512_maskz_palignr_512: 318; X86: # %bb.0: 319; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 320; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02] 321; X86-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 322; X86-NEXT: retl # encoding: [0xc3] 323; 324; X64-LABEL: test_int_x86_avx512_maskz_palignr_512: 325; X64: # %bb.0: 326; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 327; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02] 328; X64-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 329; X64-NEXT: retq # encoding: [0xc3] 330 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) 331 ret <64 x i8> %res 332} 333 334declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32) 335 336define <32 x i16>@test_int_x86_avx512_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) { 337; CHECK-LABEL: test_int_x86_avx512_pshufh_w_512: 338; CHECK: # %bb.0: 339; CHECK-NEXT: vpshufhw $3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xc0,0x03] 340; CHECK-NEXT: # zmm0 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 341; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 342 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 343 ret <32 x i16> %res 344} 345 346define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 347; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_512: 348; X86: # %bb.0: 349; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 350; X86-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03] 351; X86-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 352; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 353; X86-NEXT: retl # encoding: [0xc3] 354; 355; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_512: 356; X64: # %bb.0: 357; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 358; X64-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03] 359; X64-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 360; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 361; X64-NEXT: retq # encoding: [0xc3] 362 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 363 ret <32 x i16> %res 364} 365 366define <32 x i16>@test_int_x86_avx512_maskz_pshufh_w_512(<32 x i16> %x0, i32 %x3) { 367; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_512: 368; X86: # %bb.0: 369; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 370; X86-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03] 371; X86-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 372; X86-NEXT: retl # encoding: [0xc3] 373; 374; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_512: 375; X64: # %bb.0: 376; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 377; X64-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03] 378; X64-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 379; X64-NEXT: retq # encoding: [0xc3] 380 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 381 ret <32 x i16> %res 382} 383 384declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32) 385 386define <32 x i16>@test_int_x86_avx512_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) { 387; CHECK-LABEL: test_int_x86_avx512_pshufl_w_512: 388; CHECK: # %bb.0: 389; CHECK-NEXT: vpshuflw $3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xc0,0x03] 390; CHECK-NEXT: # zmm0 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 391; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 392 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 393 ret <32 x i16> %res 394} 395 396define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 397; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_512: 398; X86: # %bb.0: 399; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 400; X86-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03] 401; X86-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 402; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 403; X86-NEXT: retl # encoding: [0xc3] 404; 405; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_512: 406; X64: # %bb.0: 407; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 408; X64-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03] 409; X64-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 410; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 411; X64-NEXT: retq # encoding: [0xc3] 412 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 413 ret <32 x i16> %res 414} 415 416define <32 x i16>@test_int_x86_avx512_maskz_pshufl_w_512(<32 x i16> %x0, i32 %x3) { 417; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_512: 418; X86: # %bb.0: 419; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 420; X86-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03] 421; X86-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 422; X86-NEXT: retl # encoding: [0xc3] 423; 424; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_512: 425; X64: # %bb.0: 426; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 427; X64-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03] 428; X64-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 429; X64-NEXT: retq # encoding: [0xc3] 430 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 431 ret <32 x i16> %res 432} 433 434define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 435; X86-LABEL: test_pcmpeq_b: 436; X86: # %bb.0: 437; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 438; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 439; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 440; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 441; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 442; X86-NEXT: retl # encoding: [0xc3] 443; 444; X64-LABEL: test_pcmpeq_b: 445; X64: # %bb.0: 446; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 447; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 448; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 449; X64-NEXT: retq # encoding: [0xc3] 450 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 451 ret i64 %res 452} 453 454define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 455; X86-LABEL: test_mask_pcmpeq_b: 456; X86: # %bb.0: 457; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 458; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 459; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 460; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 461; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 462; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08] 463; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 464; X86-NEXT: retl # encoding: [0xc3] 465; 466; X64-LABEL: test_mask_pcmpeq_b: 467; X64: # %bb.0: 468; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 469; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 470; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] 471; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 472; X64-NEXT: retq # encoding: [0xc3] 473 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 474 ret i64 %res 475} 476 477declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) 478 479define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { 480; CHECK-LABEL: test_pcmpeq_w: 481; CHECK: # %bb.0: 482; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 483; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 484; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 485; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 486 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 487 ret i32 %res 488} 489 490define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 491; X86-LABEL: test_mask_pcmpeq_w: 492; X86: # %bb.0: 493; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 494; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 495; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 496; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 497; X86-NEXT: retl # encoding: [0xc3] 498; 499; X64-LABEL: test_mask_pcmpeq_w: 500; X64: # %bb.0: 501; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 502; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 503; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 504; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 505; X64-NEXT: retq # encoding: [0xc3] 506 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 507 ret i32 %res 508} 509 510declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) 511 512define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { 513; X86-LABEL: test_pcmpgt_b: 514; X86: # %bb.0: 515; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 516; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 517; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 518; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 519; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 520; X86-NEXT: retl # encoding: [0xc3] 521; 522; X64-LABEL: test_pcmpgt_b: 523; X64: # %bb.0: 524; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 525; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 526; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 527; X64-NEXT: retq # encoding: [0xc3] 528 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 529 ret i64 %res 530} 531 532define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 533; X86-LABEL: test_mask_pcmpgt_b: 534; X86: # %bb.0: 535; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 536; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 537; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 538; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 539; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 540; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08] 541; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 542; X86-NEXT: retl # encoding: [0xc3] 543; 544; X64-LABEL: test_mask_pcmpgt_b: 545; X64: # %bb.0: 546; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 547; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 548; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] 549; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 550; X64-NEXT: retq # encoding: [0xc3] 551 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 552 ret i64 %res 553} 554 555declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) 556 557define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { 558; CHECK-LABEL: test_pcmpgt_w: 559; CHECK: # %bb.0: 560; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 561; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 562; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 563; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 564 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 565 ret i32 %res 566} 567 568define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 569; X86-LABEL: test_mask_pcmpgt_w: 570; X86: # %bb.0: 571; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 572; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 573; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 574; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 575; X86-NEXT: retl # encoding: [0xc3] 576; 577; X64-LABEL: test_mask_pcmpgt_w: 578; X64: # %bb.0: 579; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 580; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 581; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 582; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 583; X64-NEXT: retq # encoding: [0xc3] 584 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 585 ret i32 %res 586} 587 588declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 589 590declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 591 592define <64 x i8>@test_int_x86_avx512_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 593; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_512: 594; CHECK: # %bb.0: 595; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xc1] 596; CHECK-NEXT: # zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 597; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 598 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 599 ret <64 x i8> %res 600} 601 602define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 603; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 604; X86: # %bb.0: 605; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 606; X86-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1] 607; X86-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 608; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 609; X86-NEXT: retl # encoding: [0xc3] 610; 611; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 612; X64: # %bb.0: 613; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 614; X64-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1] 615; X64-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 616; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 617; X64-NEXT: retq # encoding: [0xc3] 618 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 619 ret <64 x i8> %res 620} 621 622declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 623 624define <64 x i8>@test_int_x86_avx512_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 625; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_512: 626; CHECK: # %bb.0: 627; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xc1] 628; CHECK-NEXT: # zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 629; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 630 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 631 ret <64 x i8> %res 632} 633 634define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 635; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 636; X86: # %bb.0: 637; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 638; X86-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1] 639; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 640; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 641; X86-NEXT: retl # encoding: [0xc3] 642; 643; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 644; X64: # %bb.0: 645; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 646; X64-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1] 647; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 648; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 649; X64-NEXT: retq # encoding: [0xc3] 650 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 651 ret <64 x i8> %res 652} 653 654declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 655 656define <32 x i16>@test_int_x86_avx512_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 657; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_512: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xc1] 660; CHECK-NEXT: # zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 661; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 662 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 663 ret <32 x i16> %res 664} 665 666define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 667; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 668; X86: # %bb.0: 669; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 670; X86-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1] 671; X86-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 672; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 673; X86-NEXT: retl # encoding: [0xc3] 674; 675; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 676; X64: # %bb.0: 677; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 678; X64-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1] 679; X64-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 680; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 681; X64-NEXT: retq # encoding: [0xc3] 682 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 683 ret <32 x i16> %res 684} 685 686declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 687 688define <32 x i16>@test_int_x86_avx512_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 689; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_512: 690; CHECK: # %bb.0: 691; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xc1] 692; CHECK-NEXT: # zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 693; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 694 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 695 ret <32 x i16> %res 696} 697 698define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 699; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 700; X86: # %bb.0: 701; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 702; X86-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1] 703; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 704; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 705; X86-NEXT: retl # encoding: [0xc3] 706; 707; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 708; X64: # %bb.0: 709; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 710; X64-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1] 711; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 712; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 713; X64-NEXT: retq # encoding: [0xc3] 714 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 715 ret <32 x i16> %res 716} 717 718declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 719 720define <64 x i8>@test_int_x86_avx512_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 721; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_512: 722; CHECK: # %bb.0: 723; CHECK-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xc1] 724; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 725 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 726 ret <64 x i8> %res 727} 728 729define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 730; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 731; X86: # %bb.0: 732; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 733; X86-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1] 734; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 735; X86-NEXT: retl # encoding: [0xc3] 736; 737; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 738; X64: # %bb.0: 739; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 740; X64-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1] 741; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 742; X64-NEXT: retq # encoding: [0xc3] 743 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 744 ret <64 x i8> %res 745} 746 747declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 748 749define <32 x i16>@test_int_x86_avx512_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 750; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_512: 751; CHECK: # %bb.0: 752; CHECK-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xc1] 753; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 754 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 755 ret <32 x i16> %res 756} 757 758define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 759; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 760; X86: # %bb.0: 761; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 762; X86-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1] 763; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 764; X86-NEXT: retl # encoding: [0xc3] 765; 766; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 767; X64: # %bb.0: 768; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 769; X64-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1] 770; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 771; X64-NEXT: retq # encoding: [0xc3] 772 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 773 ret <32 x i16> %res 774} 775 776declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 777 778define <64 x i8>@test_int_x86_avx512_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 779; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_512: 780; CHECK: # %bb.0: 781; CHECK-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xc1] 782; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 783 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 784 ret <64 x i8> %res 785} 786 787define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 788; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 789; X86: # %bb.0: 790; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 791; X86-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1] 792; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 793; X86-NEXT: retl # encoding: [0xc3] 794; 795; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 796; X64: # %bb.0: 797; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 798; X64-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1] 799; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 800; X64-NEXT: retq # encoding: [0xc3] 801 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 802 ret <64 x i8> %res 803} 804 805declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 806 807define <32 x i16>@test_int_x86_avx512_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 808; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_512: 809; CHECK: # %bb.0: 810; CHECK-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xc1] 811; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 812 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 813 ret <32 x i16> %res 814} 815 816define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 817; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 818; X86: # %bb.0: 819; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 820; X86-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1] 821; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 822; X86-NEXT: retl # encoding: [0xc3] 823; 824; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 825; X64: # %bb.0: 826; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 827; X64-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1] 828; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 829; X64-NEXT: retq # encoding: [0xc3] 830 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 831 ret <32 x i16> %res 832} 833 834declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 835 836define <64 x i8>@test_int_x86_avx512_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 837; CHECK-LABEL: test_int_x86_avx512_pmins_b_512: 838; CHECK: # %bb.0: 839; CHECK-NEXT: vpminsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xc1] 840; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 841 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 842 ret <64 x i8> %res 843} 844 845define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 846; X86-LABEL: test_int_x86_avx512_mask_pmins_b_512: 847; X86: # %bb.0: 848; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 849; X86-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1] 850; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 851; X86-NEXT: retl # encoding: [0xc3] 852; 853; X64-LABEL: test_int_x86_avx512_mask_pmins_b_512: 854; X64: # %bb.0: 855; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 856; X64-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1] 857; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 858; X64-NEXT: retq # encoding: [0xc3] 859 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 860 ret <64 x i8> %res 861} 862 863declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 864 865define <32 x i16>@test_int_x86_avx512_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 866; CHECK-LABEL: test_int_x86_avx512_pmins_w_512: 867; CHECK: # %bb.0: 868; CHECK-NEXT: vpminsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xc1] 869; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 870 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 871 ret <32 x i16> %res 872} 873 874define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 875; X86-LABEL: test_int_x86_avx512_mask_pmins_w_512: 876; X86: # %bb.0: 877; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 878; X86-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1] 879; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 880; X86-NEXT: retl # encoding: [0xc3] 881; 882; X64-LABEL: test_int_x86_avx512_mask_pmins_w_512: 883; X64: # %bb.0: 884; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 885; X64-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1] 886; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 887; X64-NEXT: retq # encoding: [0xc3] 888 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 889 ret <32 x i16> %res 890} 891 892declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 893 894define <64 x i8>@test_int_x86_avx512_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 895; CHECK-LABEL: test_int_x86_avx512_pminu_b_512: 896; CHECK: # %bb.0: 897; CHECK-NEXT: vpminub %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xc1] 898; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 899 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 900 ret <64 x i8> %res 901} 902 903define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 904; X86-LABEL: test_int_x86_avx512_mask_pminu_b_512: 905; X86: # %bb.0: 906; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 907; X86-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1] 908; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 909; X86-NEXT: retl # encoding: [0xc3] 910; 911; X64-LABEL: test_int_x86_avx512_mask_pminu_b_512: 912; X64: # %bb.0: 913; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 914; X64-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1] 915; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 916; X64-NEXT: retq # encoding: [0xc3] 917 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 918 ret <64 x i8> %res 919} 920 921declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 922 923define <32 x i16>@test_int_x86_avx512_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 924; CHECK-LABEL: test_int_x86_avx512_pminu_w_512: 925; CHECK: # %bb.0: 926; CHECK-NEXT: vpminuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xc1] 927; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 928 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 929 ret <32 x i16> %res 930} 931 932define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 933; X86-LABEL: test_int_x86_avx512_mask_pminu_w_512: 934; X86: # %bb.0: 935; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 936; X86-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1] 937; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 938; X86-NEXT: retl # encoding: [0xc3] 939; 940; X64-LABEL: test_int_x86_avx512_mask_pminu_w_512: 941; X64: # %bb.0: 942; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 943; X64-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1] 944; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 945; X64-NEXT: retq # encoding: [0xc3] 946 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 947 ret <32 x i16> %res 948} 949 950declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32) 951 952define <32 x i16>@test_int_x86_avx512_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1) { 953; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_512: 954; CHECK: # %bb.0: 955; CHECK-NEXT: vpmovzxbw %ymm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xc0] 956; CHECK-NEXT: # zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 957; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 958 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) 959 ret <32 x i16> %res 960} 961 962define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { 963; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: 964; X86: # %bb.0: 965; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 966; X86-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8] 967; X86-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 968; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 969; X86-NEXT: retl # encoding: [0xc3] 970; 971; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: 972; X64: # %bb.0: 973; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 974; X64-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8] 975; X64-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 976; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 977; X64-NEXT: retq # encoding: [0xc3] 978 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) 979 ret <32 x i16> %res 980} 981 982define <32 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_512(<32 x i8> %x0, i32 %x2) { 983; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_512: 984; X86: # %bb.0: 985; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 986; X86-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0] 987; X86-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 988; X86-NEXT: retl # encoding: [0xc3] 989; 990; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_512: 991; X64: # %bb.0: 992; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 993; X64-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0] 994; X64-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 995; X64-NEXT: retq # encoding: [0xc3] 996 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) 997 ret <32 x i16> %res 998} 999 1000declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32) 1001 1002define <32 x i16>@test_int_x86_avx512_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1) { 1003; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_512: 1004; CHECK: # %bb.0: 1005; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xc0] 1006; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1007 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) 1008 ret <32 x i16> %res 1009} 1010 1011define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { 1012; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: 1013; X86: # %bb.0: 1014; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1015; X86-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8] 1016; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1017; X86-NEXT: retl # encoding: [0xc3] 1018; 1019; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: 1020; X64: # %bb.0: 1021; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1022; X64-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8] 1023; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1024; X64-NEXT: retq # encoding: [0xc3] 1025 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) 1026 ret <32 x i16> %res 1027} 1028 1029define <32 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_512(<32 x i8> %x0, i32 %x2) { 1030; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_512: 1031; X86: # %bb.0: 1032; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1033; X86-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0] 1034; X86-NEXT: retl # encoding: [0xc3] 1035; 1036; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_512: 1037; X64: # %bb.0: 1038; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1039; X64-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0] 1040; X64-NEXT: retq # encoding: [0xc3] 1041 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) 1042 ret <32 x i16> %res 1043} 1044 1045declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 1046 1047define <32 x i16>@test_int_x86_avx512_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) { 1048; CHECK-LABEL: test_int_x86_avx512_psrl_w_512: 1049; CHECK: # %bb.0: 1050; CHECK-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] 1051; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1052 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 1053 ret <32 x i16> %res 1054} 1055 1056define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1057; X86-LABEL: test_int_x86_avx512_mask_psrl_w_512: 1058; X86: # %bb.0: 1059; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1060; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 1061; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1062; X86-NEXT: retl # encoding: [0xc3] 1063; 1064; X64-LABEL: test_int_x86_avx512_mask_psrl_w_512: 1065; X64: # %bb.0: 1066; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1067; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 1068; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1069; X64-NEXT: retq # encoding: [0xc3] 1070 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 1071 ret <32 x i16> %res 1072} 1073 1074define <32 x i16>@test_int_x86_avx512_maskz_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) { 1075; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_512: 1076; X86: # %bb.0: 1077; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1078; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 1079; X86-NEXT: retl # encoding: [0xc3] 1080; 1081; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_512: 1082; X64: # %bb.0: 1083; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1084; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 1085; X64-NEXT: retq # encoding: [0xc3] 1086 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1087 ret <32 x i16> %res 1088} 1089 1090declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32) 1091 1092define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 1093; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512: 1094; X86: # %bb.0: 1095; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1096; X86-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] 1097; X86-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] 1098; X86-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] 1099; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1100; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1101; X86-NEXT: retl # encoding: [0xc3] 1102; 1103; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512: 1104; X64: # %bb.0: 1105; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1106; X64-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] 1107; X64-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] 1108; X64-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] 1109; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1110; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1111; X64-NEXT: retq # encoding: [0xc3] 1112 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 1113 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1) 1114 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 5, <32 x i16> zeroinitializer, i32 %x3) 1115 %res3 = add <32 x i16> %res, %res1 1116 %res4 = add <32 x i16> %res3, %res2 1117 ret <32 x i16> %res4 1118} 1119 1120declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 1121 1122define <32 x i16>@test_int_x86_avx512_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) { 1123; CHECK-LABEL: test_int_x86_avx512_psra_w_512: 1124; CHECK: # %bb.0: 1125; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1] 1126; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1127 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 1128 ret <32 x i16> %res 1129} 1130 1131define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1132; X86-LABEL: test_int_x86_avx512_mask_psra_w_512: 1133; X86: # %bb.0: 1134; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1135; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 1136; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1137; X86-NEXT: retl # encoding: [0xc3] 1138; 1139; X64-LABEL: test_int_x86_avx512_mask_psra_w_512: 1140; X64: # %bb.0: 1141; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1142; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 1143; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1144; X64-NEXT: retq # encoding: [0xc3] 1145 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 1146 ret <32 x i16> %res 1147} 1148 1149define <32 x i16>@test_int_x86_avx512_maskz_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) { 1150; X86-LABEL: test_int_x86_avx512_maskz_psra_w_512: 1151; X86: # %bb.0: 1152; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1153; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 1154; X86-NEXT: retl # encoding: [0xc3] 1155; 1156; X64-LABEL: test_int_x86_avx512_maskz_psra_w_512: 1157; X64: # %bb.0: 1158; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1159; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 1160; X64-NEXT: retq # encoding: [0xc3] 1161 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1162 ret <32 x i16> %res 1163} 1164 1165declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32) 1166 1167define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 1168; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512: 1169; X86: # %bb.0: 1170; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1171; X86-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] 1172; X86-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] 1173; X86-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] 1174; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1175; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1176; X86-NEXT: retl # encoding: [0xc3] 1177; 1178; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512: 1179; X64: # %bb.0: 1180; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1181; X64-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] 1182; X64-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] 1183; X64-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] 1184; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1185; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1186; X64-NEXT: retq # encoding: [0xc3] 1187 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 1188 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) 1189 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) 1190 %res3 = add <32 x i16> %res, %res1 1191 %res4 = add <32 x i16> %res3, %res2 1192 ret <32 x i16> %res4 1193} 1194 1195declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 1196 1197define <32 x i16>@test_int_x86_avx512_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) { 1198; CHECK-LABEL: test_int_x86_avx512_psll_w_512: 1199; CHECK: # %bb.0: 1200; CHECK-NEXT: vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1] 1201; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1202 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 1203 ret <32 x i16> %res 1204} 1205 1206define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1207; X86-LABEL: test_int_x86_avx512_mask_psll_w_512: 1208; X86: # %bb.0: 1209; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1210; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1211; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1212; X86-NEXT: retl # encoding: [0xc3] 1213; 1214; X64-LABEL: test_int_x86_avx512_mask_psll_w_512: 1215; X64: # %bb.0: 1216; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1217; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1218; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1219; X64-NEXT: retq # encoding: [0xc3] 1220 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 1221 ret <32 x i16> %res 1222} 1223 1224define <32 x i16>@test_int_x86_avx512_maskz_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) { 1225; X86-LABEL: test_int_x86_avx512_maskz_psll_w_512: 1226; X86: # %bb.0: 1227; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1228; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1229; X86-NEXT: retl # encoding: [0xc3] 1230; 1231; X64-LABEL: test_int_x86_avx512_maskz_psll_w_512: 1232; X64: # %bb.0: 1233; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1234; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1235; X64-NEXT: retq # encoding: [0xc3] 1236 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1237 ret <32 x i16> %res 1238} 1239 1240declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32) 1241 1242define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 1243; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512: 1244; X86: # %bb.0: 1245; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1246; X86-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] 1247; X86-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] 1248; X86-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] 1249; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1250; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1251; X86-NEXT: retl # encoding: [0xc3] 1252; 1253; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512: 1254; X64: # %bb.0: 1255; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1256; X64-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] 1257; X64-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] 1258; X64-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] 1259; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1260; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1261; X64-NEXT: retq # encoding: [0xc3] 1262 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 1263 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) 1264 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) 1265 %res3 = add <32 x i16> %res, %res1 1266 %res4 = add <32 x i16> %res3, %res2 1267 ret <32 x i16> %res4 1268} 1269 1270declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1271 1272define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 1273; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512: 1274; CHECK: # %bb.0: 1275; CHECK-NEXT: vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1] 1276; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1277 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1278 ret <64 x i8> %res 1279} 1280 1281define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1282; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 1283; X86: # %bb.0: 1284; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1285; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 1286; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1287; X86-NEXT: retl # encoding: [0xc3] 1288; 1289; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 1290; X64: # %bb.0: 1291; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1292; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 1293; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1294; X64-NEXT: retq # encoding: [0xc3] 1295 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1296 ret <64 x i8> %res 1297} 1298 1299 1300declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) 1301 1302define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) { 1303; X86-LABEL: test_int_x86_avx512_cvtmask2b_512: 1304; X86: # %bb.0: 1305; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04] 1306; X86-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] 1307; X86-NEXT: retl # encoding: [0xc3] 1308; 1309; X64-LABEL: test_int_x86_avx512_cvtmask2b_512: 1310; X64: # %bb.0: 1311; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7] 1312; X64-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] 1313; X64-NEXT: retq # encoding: [0xc3] 1314 %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) 1315 ret <64 x i8> %res 1316} 1317 1318declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) 1319 1320define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) { 1321; X86-LABEL: test_int_x86_avx512_cvtmask2w_512: 1322; X86: # %bb.0: 1323; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 1324; X86-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0] 1325; X86-NEXT: retl # encoding: [0xc3] 1326; 1327; X64-LABEL: test_int_x86_avx512_cvtmask2w_512: 1328; X64: # %bb.0: 1329; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 1330; X64-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0] 1331; X64-NEXT: retq # encoding: [0xc3] 1332 %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) 1333 ret <32 x i16> %res 1334} 1335define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 1336; CHECK-LABEL: test_mask_packs_epi32_rr_512: 1337; CHECK: # %bb.0: 1338; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] 1339; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1340 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1341 ret <32 x i16> %res 1342} 1343 1344define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 1345; X86-LABEL: test_mask_packs_epi32_rrk_512: 1346; X86: # %bb.0: 1347; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1348; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 1349; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1350; X86-NEXT: retl # encoding: [0xc3] 1351; 1352; X64-LABEL: test_mask_packs_epi32_rrk_512: 1353; X64: # %bb.0: 1354; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1355; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 1356; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1357; X64-NEXT: retq # encoding: [0xc3] 1358 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1359 ret <32 x i16> %res 1360} 1361 1362define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 1363; X86-LABEL: test_mask_packs_epi32_rrkz_512: 1364; X86: # %bb.0: 1365; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1366; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 1367; X86-NEXT: retl # encoding: [0xc3] 1368; 1369; X64-LABEL: test_mask_packs_epi32_rrkz_512: 1370; X64: # %bb.0: 1371; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1372; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 1373; X64-NEXT: retq # encoding: [0xc3] 1374 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1375 ret <32 x i16> %res 1376} 1377 1378define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 1379; X86-LABEL: test_mask_packs_epi32_rm_512: 1380; X86: # %bb.0: 1381; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1382; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00] 1383; X86-NEXT: retl # encoding: [0xc3] 1384; 1385; X64-LABEL: test_mask_packs_epi32_rm_512: 1386; X64: # %bb.0: 1387; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] 1388; X64-NEXT: retq # encoding: [0xc3] 1389 %b = load <16 x i32>, <16 x i32>* %ptr_b 1390 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1391 ret <32 x i16> %res 1392} 1393 1394define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1395; X86-LABEL: test_mask_packs_epi32_rmk_512: 1396; X86: # %bb.0: 1397; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1398; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1399; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08] 1400; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1401; X86-NEXT: retl # encoding: [0xc3] 1402; 1403; X64-LABEL: test_mask_packs_epi32_rmk_512: 1404; X64: # %bb.0: 1405; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1406; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] 1407; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1408; X64-NEXT: retq # encoding: [0xc3] 1409 %b = load <16 x i32>, <16 x i32>* %ptr_b 1410 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1411 ret <32 x i16> %res 1412} 1413 1414define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1415; X86-LABEL: test_mask_packs_epi32_rmkz_512: 1416; X86: # %bb.0: 1417; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1418; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1419; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00] 1420; X86-NEXT: retl # encoding: [0xc3] 1421; 1422; X64-LABEL: test_mask_packs_epi32_rmkz_512: 1423; X64: # %bb.0: 1424; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1425; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] 1426; X64-NEXT: retq # encoding: [0xc3] 1427 %b = load <16 x i32>, <16 x i32>* %ptr_b 1428 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1429 ret <32 x i16> %res 1430} 1431 1432define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1433; X86-LABEL: test_mask_packs_epi32_rmb_512: 1434; X86: # %bb.0: 1435; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1436; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00] 1437; X86-NEXT: retl # encoding: [0xc3] 1438; 1439; X64-LABEL: test_mask_packs_epi32_rmb_512: 1440; X64: # %bb.0: 1441; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] 1442; X64-NEXT: retq # encoding: [0xc3] 1443 %q = load i32, i32* %ptr_b 1444 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1445 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1446 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1447 ret <32 x i16> %res 1448} 1449 1450define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1451; X86-LABEL: test_mask_packs_epi32_rmbk_512: 1452; X86: # %bb.0: 1453; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1454; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1455; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08] 1456; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1457; X86-NEXT: retl # encoding: [0xc3] 1458; 1459; X64-LABEL: test_mask_packs_epi32_rmbk_512: 1460; X64: # %bb.0: 1461; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1462; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] 1463; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1464; X64-NEXT: retq # encoding: [0xc3] 1465 %q = load i32, i32* %ptr_b 1466 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1467 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1468 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1469 ret <32 x i16> %res 1470} 1471 1472define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1473; X86-LABEL: test_mask_packs_epi32_rmbkz_512: 1474; X86: # %bb.0: 1475; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1476; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1477; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00] 1478; X86-NEXT: retl # encoding: [0xc3] 1479; 1480; X64-LABEL: test_mask_packs_epi32_rmbkz_512: 1481; X64: # %bb.0: 1482; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1483; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] 1484; X64-NEXT: retq # encoding: [0xc3] 1485 %q = load i32, i32* %ptr_b 1486 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1487 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1488 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1489 ret <32 x i16> %res 1490} 1491 1492declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1493 1494define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1495; CHECK-LABEL: test_mask_packs_epi16_rr_512: 1496; CHECK: # %bb.0: 1497; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1] 1498; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1499 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1500 ret <64 x i8> %res 1501} 1502 1503define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1504; X86-LABEL: test_mask_packs_epi16_rrk_512: 1505; X86: # %bb.0: 1506; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1507; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 1508; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1509; X86-NEXT: retl # encoding: [0xc3] 1510; 1511; X64-LABEL: test_mask_packs_epi16_rrk_512: 1512; X64: # %bb.0: 1513; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1514; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 1515; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1516; X64-NEXT: retq # encoding: [0xc3] 1517 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1518 ret <64 x i8> %res 1519} 1520 1521define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1522; X86-LABEL: test_mask_packs_epi16_rrkz_512: 1523; X86: # %bb.0: 1524; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1525; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 1526; X86-NEXT: retl # encoding: [0xc3] 1527; 1528; X64-LABEL: test_mask_packs_epi16_rrkz_512: 1529; X64: # %bb.0: 1530; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1531; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 1532; X64-NEXT: retq # encoding: [0xc3] 1533 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1534 ret <64 x i8> %res 1535} 1536 1537define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1538; X86-LABEL: test_mask_packs_epi16_rm_512: 1539; X86: # %bb.0: 1540; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1541; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00] 1542; X86-NEXT: retl # encoding: [0xc3] 1543; 1544; X64-LABEL: test_mask_packs_epi16_rm_512: 1545; X64: # %bb.0: 1546; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07] 1547; X64-NEXT: retq # encoding: [0xc3] 1548 %b = load <32 x i16>, <32 x i16>* %ptr_b 1549 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1550 ret <64 x i8> %res 1551} 1552 1553define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1554; X86-LABEL: test_mask_packs_epi16_rmk_512: 1555; X86: # %bb.0: 1556; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1557; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1558; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08] 1559; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1560; X86-NEXT: retl # encoding: [0xc3] 1561; 1562; X64-LABEL: test_mask_packs_epi16_rmk_512: 1563; X64: # %bb.0: 1564; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1565; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f] 1566; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1567; X64-NEXT: retq # encoding: [0xc3] 1568 %b = load <32 x i16>, <32 x i16>* %ptr_b 1569 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1570 ret <64 x i8> %res 1571} 1572 1573define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1574; X86-LABEL: test_mask_packs_epi16_rmkz_512: 1575; X86: # %bb.0: 1576; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1577; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1578; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00] 1579; X86-NEXT: retl # encoding: [0xc3] 1580; 1581; X64-LABEL: test_mask_packs_epi16_rmkz_512: 1582; X64: # %bb.0: 1583; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1584; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07] 1585; X64-NEXT: retq # encoding: [0xc3] 1586 %b = load <32 x i16>, <32 x i16>* %ptr_b 1587 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1588 ret <64 x i8> %res 1589} 1590 1591declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1592 1593 1594define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 1595; CHECK-LABEL: test_mask_packus_epi32_rr_512: 1596; CHECK: # %bb.0: 1597; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1] 1598; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1599 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1600 ret <32 x i16> %res 1601} 1602 1603define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 1604; X86-LABEL: test_mask_packus_epi32_rrk_512: 1605; X86: # %bb.0: 1606; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1607; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 1608; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1609; X86-NEXT: retl # encoding: [0xc3] 1610; 1611; X64-LABEL: test_mask_packus_epi32_rrk_512: 1612; X64: # %bb.0: 1613; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1614; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 1615; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1616; X64-NEXT: retq # encoding: [0xc3] 1617 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1618 ret <32 x i16> %res 1619} 1620 1621define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 1622; X86-LABEL: test_mask_packus_epi32_rrkz_512: 1623; X86: # %bb.0: 1624; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1625; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 1626; X86-NEXT: retl # encoding: [0xc3] 1627; 1628; X64-LABEL: test_mask_packus_epi32_rrkz_512: 1629; X64: # %bb.0: 1630; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1631; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 1632; X64-NEXT: retq # encoding: [0xc3] 1633 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1634 ret <32 x i16> %res 1635} 1636 1637define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 1638; X86-LABEL: test_mask_packus_epi32_rm_512: 1639; X86: # %bb.0: 1640; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1641; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00] 1642; X86-NEXT: retl # encoding: [0xc3] 1643; 1644; X64-LABEL: test_mask_packus_epi32_rm_512: 1645; X64: # %bb.0: 1646; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07] 1647; X64-NEXT: retq # encoding: [0xc3] 1648 %b = load <16 x i32>, <16 x i32>* %ptr_b 1649 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1650 ret <32 x i16> %res 1651} 1652 1653define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1654; X86-LABEL: test_mask_packus_epi32_rmk_512: 1655; X86: # %bb.0: 1656; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1657; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1658; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08] 1659; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1660; X86-NEXT: retl # encoding: [0xc3] 1661; 1662; X64-LABEL: test_mask_packus_epi32_rmk_512: 1663; X64: # %bb.0: 1664; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1665; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f] 1666; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1667; X64-NEXT: retq # encoding: [0xc3] 1668 %b = load <16 x i32>, <16 x i32>* %ptr_b 1669 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1670 ret <32 x i16> %res 1671} 1672 1673define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1674; X86-LABEL: test_mask_packus_epi32_rmkz_512: 1675; X86: # %bb.0: 1676; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1677; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1678; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00] 1679; X86-NEXT: retl # encoding: [0xc3] 1680; 1681; X64-LABEL: test_mask_packus_epi32_rmkz_512: 1682; X64: # %bb.0: 1683; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1684; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07] 1685; X64-NEXT: retq # encoding: [0xc3] 1686 %b = load <16 x i32>, <16 x i32>* %ptr_b 1687 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1688 ret <32 x i16> %res 1689} 1690 1691define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1692; X86-LABEL: test_mask_packus_epi32_rmb_512: 1693; X86: # %bb.0: 1694; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1695; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00] 1696; X86-NEXT: retl # encoding: [0xc3] 1697; 1698; X64-LABEL: test_mask_packus_epi32_rmb_512: 1699; X64: # %bb.0: 1700; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07] 1701; X64-NEXT: retq # encoding: [0xc3] 1702 %q = load i32, i32* %ptr_b 1703 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1704 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1705 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1706 ret <32 x i16> %res 1707} 1708 1709define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1710; X86-LABEL: test_mask_packus_epi32_rmbk_512: 1711; X86: # %bb.0: 1712; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1713; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1714; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08] 1715; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1716; X86-NEXT: retl # encoding: [0xc3] 1717; 1718; X64-LABEL: test_mask_packus_epi32_rmbk_512: 1719; X64: # %bb.0: 1720; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1721; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f] 1722; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1723; X64-NEXT: retq # encoding: [0xc3] 1724 %q = load i32, i32* %ptr_b 1725 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1726 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1727 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1728 ret <32 x i16> %res 1729} 1730 1731define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1732; X86-LABEL: test_mask_packus_epi32_rmbkz_512: 1733; X86: # %bb.0: 1734; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1735; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1736; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00] 1737; X86-NEXT: retl # encoding: [0xc3] 1738; 1739; X64-LABEL: test_mask_packus_epi32_rmbkz_512: 1740; X64: # %bb.0: 1741; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1742; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07] 1743; X64-NEXT: retq # encoding: [0xc3] 1744 %q = load i32, i32* %ptr_b 1745 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1746 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1747 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1748 ret <32 x i16> %res 1749} 1750 1751declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1752 1753define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1754; CHECK-LABEL: test_mask_packus_epi16_rr_512: 1755; CHECK: # %bb.0: 1756; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1] 1757; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1758 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1759 ret <64 x i8> %res 1760} 1761 1762define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1763; X86-LABEL: test_mask_packus_epi16_rrk_512: 1764; X86: # %bb.0: 1765; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1766; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 1767; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1768; X86-NEXT: retl # encoding: [0xc3] 1769; 1770; X64-LABEL: test_mask_packus_epi16_rrk_512: 1771; X64: # %bb.0: 1772; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1773; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 1774; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1775; X64-NEXT: retq # encoding: [0xc3] 1776 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1777 ret <64 x i8> %res 1778} 1779 1780define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1781; X86-LABEL: test_mask_packus_epi16_rrkz_512: 1782; X86: # %bb.0: 1783; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1784; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 1785; X86-NEXT: retl # encoding: [0xc3] 1786; 1787; X64-LABEL: test_mask_packus_epi16_rrkz_512: 1788; X64: # %bb.0: 1789; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1790; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 1791; X64-NEXT: retq # encoding: [0xc3] 1792 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1793 ret <64 x i8> %res 1794} 1795 1796define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1797; X86-LABEL: test_mask_packus_epi16_rm_512: 1798; X86: # %bb.0: 1799; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1800; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00] 1801; X86-NEXT: retl # encoding: [0xc3] 1802; 1803; X64-LABEL: test_mask_packus_epi16_rm_512: 1804; X64: # %bb.0: 1805; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07] 1806; X64-NEXT: retq # encoding: [0xc3] 1807 %b = load <32 x i16>, <32 x i16>* %ptr_b 1808 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1809 ret <64 x i8> %res 1810} 1811 1812define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1813; X86-LABEL: test_mask_packus_epi16_rmk_512: 1814; X86: # %bb.0: 1815; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1816; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1817; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08] 1818; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1819; X86-NEXT: retl # encoding: [0xc3] 1820; 1821; X64-LABEL: test_mask_packus_epi16_rmk_512: 1822; X64: # %bb.0: 1823; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1824; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f] 1825; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1826; X64-NEXT: retq # encoding: [0xc3] 1827 %b = load <32 x i16>, <32 x i16>* %ptr_b 1828 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1829 ret <64 x i8> %res 1830} 1831 1832define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1833; X86-LABEL: test_mask_packus_epi16_rmkz_512: 1834; X86: # %bb.0: 1835; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1836; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1837; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00] 1838; X86-NEXT: retl # encoding: [0xc3] 1839; 1840; X64-LABEL: test_mask_packus_epi16_rmkz_512: 1841; X64: # %bb.0: 1842; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1843; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07] 1844; X64-NEXT: retq # encoding: [0xc3] 1845 %b = load <32 x i16>, <32 x i16>* %ptr_b 1846 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1847 ret <64 x i8> %res 1848} 1849 1850declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1851 1852define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 1853; X86-LABEL: test_cmp_b_512: 1854; X86: # %bb.0: 1855; X86-NEXT: pushl %edi # encoding: [0x57] 1856; X86-NEXT: .cfi_def_cfa_offset 8 1857; X86-NEXT: pushl %esi # encoding: [0x56] 1858; X86-NEXT: .cfi_def_cfa_offset 12 1859; X86-NEXT: .cfi_offset %esi, -12 1860; X86-NEXT: .cfi_offset %edi, -8 1861; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 1862; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1863; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 1864; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 1865; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0] 1866; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1867; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1868; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 1869; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 1870; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 1871; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02] 1872; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1873; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 1874; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 1875; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1] 1876; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0] 1877; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 1878; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1879; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1880; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 1881; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 1882; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 1883; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05] 1884; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1885; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 1886; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 1887; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7] 1888; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1] 1889; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 1890; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1891; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1892; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1893; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 1894; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 1895; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff] 1896; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff] 1897; X86-NEXT: popl %esi # encoding: [0x5e] 1898; X86-NEXT: .cfi_def_cfa_offset 8 1899; X86-NEXT: popl %edi # encoding: [0x5f] 1900; X86-NEXT: .cfi_def_cfa_offset 4 1901; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1902; X86-NEXT: retl # encoding: [0xc3] 1903; 1904; X64-LABEL: test_cmp_b_512: 1905; X64: # %bb.0: 1906; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 1907; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1908; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0] 1909; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1910; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1911; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02] 1912; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1913; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1914; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 1915; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1916; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1917; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05] 1918; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1919; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1920; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 1921; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1922; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] 1923; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1924; X64-NEXT: retq # encoding: [0xc3] 1925 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 1926 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 1927 %ret1 = add i64 %res0, %res1 1928 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 1929 %ret2 = add i64 %ret1, %res2 1930 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 1931 %ret3 = add i64 %ret2, %res3 1932 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 1933 %ret4 = add i64 %ret3, %res4 1934 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 1935 %ret5 = add i64 %ret4, %res5 1936 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 1937 %ret6 = add i64 %ret5, %res6 1938 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 1939 %ret7 = add i64 %ret6, %res7 1940 ret i64 %ret7 1941} 1942 1943define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 1944; X86-LABEL: test_mask_cmp_b_512: 1945; X86: # %bb.0: 1946; X86-NEXT: pushl %ebp # encoding: [0x55] 1947; X86-NEXT: .cfi_def_cfa_offset 8 1948; X86-NEXT: pushl %ebx # encoding: [0x53] 1949; X86-NEXT: .cfi_def_cfa_offset 12 1950; X86-NEXT: pushl %edi # encoding: [0x57] 1951; X86-NEXT: .cfi_def_cfa_offset 16 1952; X86-NEXT: pushl %esi # encoding: [0x56] 1953; X86-NEXT: .cfi_def_cfa_offset 20 1954; X86-NEXT: .cfi_offset %esi, -20 1955; X86-NEXT: .cfi_offset %edi, -16 1956; X86-NEXT: .cfi_offset %ebx, -12 1957; X86-NEXT: .cfi_offset %ebp, -8 1958; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14] 1959; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18] 1960; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1] 1961; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6] 1962; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1963; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1964; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1965; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 1966; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1967; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 1968; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0] 1969; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1970; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1971; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 1972; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1973; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 1974; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 1975; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 1976; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02] 1977; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1978; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1979; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 1980; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1981; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 1982; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda] 1983; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8] 1984; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04] 1985; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1986; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1987; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 1988; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1989; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 1990; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 1991; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 1992; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05] 1993; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1994; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1995; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb] 1996; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1997; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 1998; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9] 1999; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd] 2000; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1] 2001; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2002; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9] 2003; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2004; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0] 2005; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2006; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2007; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea] 2008; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2009; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18] 2010; X86-NEXT: popl %esi # encoding: [0x5e] 2011; X86-NEXT: .cfi_def_cfa_offset 16 2012; X86-NEXT: popl %edi # encoding: [0x5f] 2013; X86-NEXT: .cfi_def_cfa_offset 12 2014; X86-NEXT: popl %ebx # encoding: [0x5b] 2015; X86-NEXT: .cfi_def_cfa_offset 8 2016; X86-NEXT: popl %ebp # encoding: [0x5d] 2017; X86-NEXT: .cfi_def_cfa_offset 4 2018; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2019; X86-NEXT: retl # encoding: [0xc3] 2020; 2021; X64-LABEL: test_mask_cmp_b_512: 2022; X64: # %bb.0: 2023; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2024; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] 2025; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2026; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] 2027; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2028; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2029; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] 2030; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2031; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 2032; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] 2033; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2034; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2035; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05] 2036; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] 2037; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] 2038; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1] 2039; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2040; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] 2041; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2042; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2043; X64-NEXT: retq # encoding: [0xc3] 2044 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 2045 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 2046 %ret1 = add i64 %res0, %res1 2047 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 2048 %ret2 = add i64 %ret1, %res2 2049 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 2050 %ret3 = add i64 %ret2, %res3 2051 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 2052 %ret4 = add i64 %ret3, %res4 2053 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 2054 %ret5 = add i64 %ret4, %res5 2055 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 2056 %ret6 = add i64 %ret5, %res6 2057 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 2058 %ret7 = add i64 %ret6, %res7 2059 ret i64 %ret7 2060} 2061 2062declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 2063 2064define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 2065; X86-LABEL: test_ucmp_b_512: 2066; X86: # %bb.0: 2067; X86-NEXT: pushl %edi # encoding: [0x57] 2068; X86-NEXT: .cfi_def_cfa_offset 8 2069; X86-NEXT: pushl %esi # encoding: [0x56] 2070; X86-NEXT: .cfi_def_cfa_offset 12 2071; X86-NEXT: .cfi_offset %esi, -12 2072; X86-NEXT: .cfi_offset %edi, -8 2073; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 2074; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2075; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 2076; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2077; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01] 2078; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2079; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2080; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2081; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 2082; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 2083; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02] 2084; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2085; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 2086; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2087; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1] 2088; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0] 2089; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 2090; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2091; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2092; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2093; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 2094; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 2095; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05] 2096; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2097; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 2098; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 2099; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7] 2100; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1] 2101; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06] 2102; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2103; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2104; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2105; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2106; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 2107; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff] 2108; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff] 2109; X86-NEXT: popl %esi # encoding: [0x5e] 2110; X86-NEXT: .cfi_def_cfa_offset 8 2111; X86-NEXT: popl %edi # encoding: [0x5f] 2112; X86-NEXT: .cfi_def_cfa_offset 4 2113; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2114; X86-NEXT: retl # encoding: [0xc3] 2115; 2116; X64-LABEL: test_ucmp_b_512: 2117; X64: # %bb.0: 2118; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 2119; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2120; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01] 2121; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2122; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2123; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02] 2124; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2125; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 2126; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 2127; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2128; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2129; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05] 2130; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2131; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 2132; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06] 2133; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2134; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] 2135; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2136; X64-NEXT: retq # encoding: [0xc3] 2137 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 2138 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 2139 %ret1 = add i64 %res0, %res1 2140 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 2141 %ret2 = add i64 %ret1, %res2 2142 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 2143 %ret3 = add i64 %ret2, %res3 2144 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 2145 %ret4 = add i64 %ret3, %res4 2146 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 2147 %ret5 = add i64 %ret4, %res5 2148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 2149 %ret6 = add i64 %ret5, %res6 2150 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 2151 %ret7 = add i64 %ret6, %res7 2152 ret i64 %ret7 2153} 2154 2155define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 2156; X86-LABEL: test_mask_x86_avx512_ucmp_b_512: 2157; X86: # %bb.0: 2158; X86-NEXT: pushl %ebp # encoding: [0x55] 2159; X86-NEXT: .cfi_def_cfa_offset 8 2160; X86-NEXT: pushl %ebx # encoding: [0x53] 2161; X86-NEXT: .cfi_def_cfa_offset 12 2162; X86-NEXT: pushl %edi # encoding: [0x57] 2163; X86-NEXT: .cfi_def_cfa_offset 16 2164; X86-NEXT: pushl %esi # encoding: [0x56] 2165; X86-NEXT: .cfi_def_cfa_offset 20 2166; X86-NEXT: .cfi_offset %esi, -20 2167; X86-NEXT: .cfi_offset %edi, -16 2168; X86-NEXT: .cfi_offset %ebx, -12 2169; X86-NEXT: .cfi_offset %ebp, -8 2170; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14] 2171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18] 2172; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1] 2173; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6] 2174; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2175; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2176; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2177; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 2178; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2179; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 2180; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01] 2181; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2182; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2183; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 2184; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2185; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 2186; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 2187; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 2188; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02] 2189; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2190; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2191; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 2192; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2193; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 2194; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda] 2195; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8] 2196; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04] 2197; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2198; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2199; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 2200; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2201; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 2202; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 2203; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 2204; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05] 2205; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2206; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2207; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb] 2208; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2209; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 2210; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9] 2211; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd] 2212; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06] 2213; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2214; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9] 2215; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2216; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0] 2217; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2218; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2219; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea] 2220; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2221; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18] 2222; X86-NEXT: popl %esi # encoding: [0x5e] 2223; X86-NEXT: .cfi_def_cfa_offset 16 2224; X86-NEXT: popl %edi # encoding: [0x5f] 2225; X86-NEXT: .cfi_def_cfa_offset 12 2226; X86-NEXT: popl %ebx # encoding: [0x5b] 2227; X86-NEXT: .cfi_def_cfa_offset 8 2228; X86-NEXT: popl %ebp # encoding: [0x5d] 2229; X86-NEXT: .cfi_def_cfa_offset 4 2230; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2231; X86-NEXT: retl # encoding: [0xc3] 2232; 2233; X64-LABEL: test_mask_x86_avx512_ucmp_b_512: 2234; X64: # %bb.0: 2235; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2236; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] 2237; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2238; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] 2239; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2240; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2241; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] 2242; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2243; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 2244; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] 2245; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2246; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2247; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05] 2248; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] 2249; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] 2250; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06] 2251; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2252; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] 2253; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2254; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2255; X64-NEXT: retq # encoding: [0xc3] 2256 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 2257 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 2258 %ret1 = add i64 %res0, %res1 2259 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 2260 %ret2 = add i64 %ret1, %res2 2261 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 2262 %ret3 = add i64 %ret2, %res3 2263 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 2264 %ret4 = add i64 %ret3, %res4 2265 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 2266 %ret5 = add i64 %ret4, %res5 2267 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 2268 %ret6 = add i64 %ret5, %res6 2269 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 2270 %ret7 = add i64 %ret6, %res7 2271 ret i64 %ret7 2272} 2273 2274declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 2275 2276define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 2277; X86-LABEL: test_cmp_w_512: 2278; X86: # %bb.0: 2279; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2280; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2281; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] 2282; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2283; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2284; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] 2285; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2286; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2287; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2288; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2289; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2290; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] 2291; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2292; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2293; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 2294; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2295; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2296; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2297; X86-NEXT: retl # encoding: [0xc3] 2298; 2299; X64-LABEL: test_cmp_w_512: 2300; X64: # %bb.0: 2301; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2302; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2303; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] 2304; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2305; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2306; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] 2307; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2308; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2309; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2310; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2311; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2312; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] 2313; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2314; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2315; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 2316; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2317; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2318; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2319; X64-NEXT: retq # encoding: [0xc3] 2320 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 2321 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 2322 %ret1 = add i32 %res0, %res1 2323 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 2324 %ret2 = add i32 %ret1, %res2 2325 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 2326 %ret3 = add i32 %ret2, %res3 2327 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 2328 %ret4 = add i32 %ret3, %res4 2329 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 2330 %ret5 = add i32 %ret4, %res5 2331 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 2332 %ret6 = add i32 %ret5, %res6 2333 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 2334 %ret7 = add i32 %ret6, %res7 2335 ret i32 %ret7 2336} 2337 2338define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 2339; X86-LABEL: test_mask_cmp_w_512: 2340; X86: # %bb.0: 2341; X86-NEXT: pushl %esi # encoding: [0x56] 2342; X86-NEXT: .cfi_def_cfa_offset 8 2343; X86-NEXT: .cfi_offset %esi, -8 2344; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2345; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2346; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2347; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2348; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] 2349; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2350; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2351; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] 2352; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2353; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2354; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2355; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2356; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2357; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] 2358; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2359; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] 2360; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] 2361; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2362; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2363; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2364; X86-NEXT: popl %esi # encoding: [0x5e] 2365; X86-NEXT: .cfi_def_cfa_offset 4 2366; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2367; X86-NEXT: retl # encoding: [0xc3] 2368; 2369; X64-LABEL: test_mask_cmp_w_512: 2370; X64: # %bb.0: 2371; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2372; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2373; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2374; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] 2375; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2376; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2377; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] 2378; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2379; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2380; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2381; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2382; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2383; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] 2384; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2385; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] 2386; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] 2387; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2388; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2389; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2390; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2391; X64-NEXT: retq # encoding: [0xc3] 2392 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 2393 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 2394 %ret1 = add i32 %res0, %res1 2395 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 2396 %ret2 = add i32 %ret1, %res2 2397 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 2398 %ret3 = add i32 %ret2, %res3 2399 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 2400 %ret4 = add i32 %ret3, %res4 2401 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 2402 %ret5 = add i32 %ret4, %res5 2403 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 2404 %ret6 = add i32 %ret5, %res6 2405 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 2406 %ret7 = add i32 %ret6, %res7 2407 ret i32 %ret7 2408} 2409 2410declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 2411 2412define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 2413; X86-LABEL: test_ucmp_w_512: 2414; X86: # %bb.0: 2415; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2416; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2417; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] 2418; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2419; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2420; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] 2421; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2422; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2423; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2424; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2425; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2426; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] 2427; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2428; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2429; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] 2430; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2431; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2432; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2433; X86-NEXT: retl # encoding: [0xc3] 2434; 2435; X64-LABEL: test_ucmp_w_512: 2436; X64: # %bb.0: 2437; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2438; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2439; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] 2440; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2441; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2442; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] 2443; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2444; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2445; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2446; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2447; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2448; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] 2449; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2450; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2451; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] 2452; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2453; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2454; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2455; X64-NEXT: retq # encoding: [0xc3] 2456 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 2457 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 2458 %ret1 = add i32 %res0, %res1 2459 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 2460 %ret2 = add i32 %ret1, %res2 2461 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 2462 %ret3 = add i32 %ret2, %res3 2463 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 2464 %ret4 = add i32 %ret3, %res4 2465 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 2466 %ret5 = add i32 %ret4, %res5 2467 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 2468 %ret6 = add i32 %ret5, %res6 2469 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 2470 %ret7 = add i32 %ret6, %res7 2471 ret i32 %ret7 2472} 2473 2474define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 2475; X86-LABEL: test_mask_ucmp_w_512: 2476; X86: # %bb.0: 2477; X86-NEXT: pushl %esi # encoding: [0x56] 2478; X86-NEXT: .cfi_def_cfa_offset 8 2479; X86-NEXT: .cfi_offset %esi, -8 2480; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2481; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2482; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2483; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2484; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] 2485; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2486; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2487; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] 2488; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2489; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2490; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2491; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2492; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2493; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] 2494; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2495; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] 2496; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] 2497; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2498; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2499; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2500; X86-NEXT: popl %esi # encoding: [0x5e] 2501; X86-NEXT: .cfi_def_cfa_offset 4 2502; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2503; X86-NEXT: retl # encoding: [0xc3] 2504; 2505; X64-LABEL: test_mask_ucmp_w_512: 2506; X64: # %bb.0: 2507; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2508; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2509; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2510; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] 2511; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2512; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2513; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] 2514; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2515; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2516; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2517; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2518; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2519; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] 2520; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2521; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] 2522; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] 2523; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2524; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2525; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2526; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2527; X64-NEXT: retq # encoding: [0xc3] 2528 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 2529 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 2530 %ret1 = add i32 %res0, %res1 2531 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 2532 %ret2 = add i32 %ret1, %res2 2533 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 2534 %ret3 = add i32 %ret2, %res3 2535 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 2536 %ret4 = add i32 %ret3, %res4 2537 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 2538 %ret5 = add i32 %ret4, %res5 2539 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 2540 %ret6 = add i32 %ret5, %res6 2541 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 2542 %ret7 = add i32 %ret6, %res7 2543 ret i32 %ret7 2544} 2545 2546declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 2547 2548 2549declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2550 2551define <64 x i8>@mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2552; CHECK-LABEL: mm512_avg_epu8: 2553; CHECK: # %bb.0: 2554; CHECK-NEXT: vpavgb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xc1] 2555; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2556 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2557 ret <64 x i8> %res 2558} 2559 2560define <64 x i8>@mm512_mask_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2561; X86-LABEL: mm512_mask_avg_epu8: 2562; X86: # %bb.0: 2563; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 2564; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1] 2565; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2566; X86-NEXT: retl # encoding: [0xc3] 2567; 2568; X64-LABEL: mm512_mask_avg_epu8: 2569; X64: # %bb.0: 2570; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2571; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1] 2572; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2573; X64-NEXT: retq # encoding: [0xc3] 2574 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2575 ret <64 x i8> %res 2576} 2577 2578declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2579 2580define <32 x i16>@mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2581; CHECK-LABEL: mm512_avg_epu16: 2582; CHECK: # %bb.0: 2583; CHECK-NEXT: vpavgw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xc1] 2584; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2585 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2586 ret <32 x i16> %res 2587} 2588 2589define <32 x i16>@mm512_mask_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2590; X86-LABEL: mm512_mask_avg_epu16: 2591; X86: # %bb.0: 2592; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2593; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1] 2594; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2595; X86-NEXT: retl # encoding: [0xc3] 2596; 2597; X64-LABEL: mm512_mask_avg_epu16: 2598; X64: # %bb.0: 2599; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2600; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1] 2601; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2602; X64-NEXT: retq # encoding: [0xc3] 2603 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2604 ret <32 x i16> %res 2605} 2606 2607declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) 2608 2609define <32 x i16>@test_int_x86_avx512_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1) { 2610; CHECK-LABEL: test_int_x86_avx512_pabs_w_512: 2611; CHECK: # %bb.0: 2612; CHECK-NEXT: vpabsw %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xc0] 2613; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2614 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) 2615 ret <32 x i16> %res 2616} 2617 2618define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2619; X86-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2620; X86: # %bb.0: 2621; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2622; X86-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8] 2623; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2624; X86-NEXT: retl # encoding: [0xc3] 2625; 2626; X64-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2627; X64: # %bb.0: 2628; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2629; X64-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8] 2630; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2631; X64-NEXT: retq # encoding: [0xc3] 2632 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2633 ret <32 x i16> %res 2634} 2635 2636declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) 2637 2638define <64 x i8>@test_int_x86_avx512_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1) { 2639; CHECK-LABEL: test_int_x86_avx512_pabs_b_512: 2640; CHECK: # %bb.0: 2641; CHECK-NEXT: vpabsb %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xc0] 2642; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2643 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) 2644 ret <64 x i8> %res 2645} 2646 2647define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2648; X86-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2649; X86: # %bb.0: 2650; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 2651; X86-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8] 2652; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2653; X86-NEXT: retl # encoding: [0xc3] 2654; 2655; X64-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2656; X64: # %bb.0: 2657; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2658; X64-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8] 2659; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2660; X64-NEXT: retq # encoding: [0xc3] 2661 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2662 ret <64 x i8> %res 2663} 2664 2665declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) 2666 2667define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2668; X86-LABEL: test_int_x86_avx512_ptestm_b_512: 2669; X86: # %bb.0: 2670; X86-NEXT: pushl %esi # encoding: [0x56] 2671; X86-NEXT: .cfi_def_cfa_offset 8 2672; X86-NEXT: .cfi_offset %esi, -8 2673; X86-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1] 2674; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2675; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 2676; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2677; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca] 2678; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2679; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2680; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0] 2681; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2682; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 2683; X86-NEXT: popl %esi # encoding: [0x5e] 2684; X86-NEXT: .cfi_def_cfa_offset 4 2685; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2686; X86-NEXT: retl # encoding: [0xc3] 2687; 2688; X64-LABEL: test_int_x86_avx512_ptestm_b_512: 2689; X64: # %bb.0: 2690; X64-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1] 2691; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2692; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7] 2693; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2694; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2695; X64-NEXT: retq # encoding: [0xc3] 2696 %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2697 %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) 2698 %res2 = add i64 %res, %res1 2699 ret i64 %res2 2700} 2701 2702declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) 2703 2704define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2705; X86-LABEL: test_int_x86_avx512_ptestm_w_512: 2706; X86: # %bb.0: 2707; X86-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1] 2708; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2709; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2710; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 2711; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2712; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2713; X86-NEXT: retl # encoding: [0xc3] 2714; 2715; X64-LABEL: test_int_x86_avx512_ptestm_w_512: 2716; X64: # %bb.0: 2717; X64-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1] 2718; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2719; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 2720; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2721; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2722; X64-NEXT: retq # encoding: [0xc3] 2723 %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2724 %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) 2725 %res2 = add i32 %res, %res1 2726 ret i32 %res2 2727} 2728 2729declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) 2730 2731define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2732; X86-LABEL: test_int_x86_avx512_ptestnm_b_512: 2733; X86: # %bb.0: 2734; X86-NEXT: pushl %esi # encoding: [0x56] 2735; X86-NEXT: .cfi_def_cfa_offset 8 2736; X86-NEXT: .cfi_offset %esi, -8 2737; X86-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1] 2738; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2739; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 2740; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2741; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca] 2742; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2743; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2744; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0] 2745; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2746; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 2747; X86-NEXT: popl %esi # encoding: [0x5e] 2748; X86-NEXT: .cfi_def_cfa_offset 4 2749; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2750; X86-NEXT: retl # encoding: [0xc3] 2751; 2752; X64-LABEL: test_int_x86_avx512_ptestnm_b_512: 2753; X64: # %bb.0: 2754; X64-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1] 2755; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2756; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7] 2757; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2758; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2759; X64-NEXT: retq # encoding: [0xc3] 2760 %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2761 %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) 2762 %res2 = add i64 %res, %res1 2763 ret i64 %res2 2764} 2765 2766declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) 2767 2768define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2769; X86-LABEL: test_int_x86_avx512_ptestnm_w_512: 2770; X86: # %bb.0: 2771; X86-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1] 2772; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2773; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2774; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 2775; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2776; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2777; X86-NEXT: retl # encoding: [0xc3] 2778; 2779; X64-LABEL: test_int_x86_avx512_ptestnm_w_512: 2780; X64: # %bb.0: 2781; X64-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1] 2782; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2783; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 2784; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2785; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2786; X64-NEXT: retq # encoding: [0xc3] 2787 %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2788 %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) 2789 %res2 = add i32 %res, %res1 2790 ret i32 %res2 2791} 2792 2793declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) 2794 2795define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { 2796; X86-LABEL: test_int_x86_avx512_cvtb2mask_512: 2797; X86: # %bb.0: 2798; X86-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] 2799; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2800; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2801; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2802; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2803; X86-NEXT: retl # encoding: [0xc3] 2804; 2805; X64-LABEL: test_int_x86_avx512_cvtb2mask_512: 2806; X64: # %bb.0: 2807; X64-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] 2808; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2809; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2810; X64-NEXT: retq # encoding: [0xc3] 2811 %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) 2812 ret i64 %res 2813} 2814 2815declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) 2816 2817define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { 2818; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_512: 2819; CHECK: # %bb.0: 2820; CHECK-NEXT: vpmovw2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x29,0xc0] 2821; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2822; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2823; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2824 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) 2825 ret i32 %res 2826} 2827 2828declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2829 2830define <32 x i16>@test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 2831; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_512: 2832; CHECK: # %bb.0: 2833; CHECK-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xc1] 2834; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2835 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2836 ret <32 x i16> %res 2837} 2838 2839define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2840; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2841; X86: # %bb.0: 2842; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2843; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 2844; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2845; X86-NEXT: retl # encoding: [0xc3] 2846; 2847; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2848; X64: # %bb.0: 2849; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2850; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 2851; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2852; X64-NEXT: retq # encoding: [0xc3] 2853 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2854 ret <32 x i16> %res 2855} 2856 2857declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2858 2859define <32 x i16>@test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 2860; CHECK-LABEL: test_int_x86_avx512_pmulh_w_512: 2861; CHECK: # %bb.0: 2862; CHECK-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xc1] 2863; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2864 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2865 ret <32 x i16> %res 2866} 2867 2868define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2869; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2870; X86: # %bb.0: 2871; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2872; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 2873; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2874; X86-NEXT: retl # encoding: [0xc3] 2875; 2876; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2877; X64: # %bb.0: 2878; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2879; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 2880; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2881; X64-NEXT: retq # encoding: [0xc3] 2882 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2883 ret <32 x i16> %res 2884} 2885 2886declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2887 2888define <32 x i16>@test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 2889; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_512: 2890; CHECK: # %bb.0: 2891; CHECK-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xc1] 2892; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2893 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2894 ret <32 x i16> %res 2895} 2896 2897define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2898; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2899; X86: # %bb.0: 2900; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2901; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 2902; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2903; X86-NEXT: retl # encoding: [0xc3] 2904; 2905; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2906; X64: # %bb.0: 2907; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2908; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 2909; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2910; X64-NEXT: retq # encoding: [0xc3] 2911 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2912 ret <32 x i16> %res 2913} 2914 2915declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) 2916 2917define <32 x i16>@test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2) { 2918; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_512: 2919; CHECK: # %bb.0: 2920; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xc1] 2921; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2922 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) 2923 ret <32 x i16> %res 2924} 2925 2926define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 2927; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2928; X86: # %bb.0: 2929; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2930; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 2931; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2932; X86-NEXT: retl # encoding: [0xc3] 2933; 2934; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2935; X64: # %bb.0: 2936; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2937; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 2938; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2939; X64-NEXT: retq # encoding: [0xc3] 2940 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) 2941 ret <32 x i16> %res 2942} 2943 2944declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) 2945 2946define <16 x i32>@test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2) { 2947; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_512: 2948; CHECK: # %bb.0: 2949; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xc1] 2950; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2951 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) 2952 ret <16 x i32> %res 2953} 2954 2955define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 2956; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2957; X86: # %bb.0: 2958; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2959; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 2960; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2961; X86-NEXT: retl # encoding: [0xc3] 2962; 2963; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2964; X64: # %bb.0: 2965; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2966; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 2967; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2968; X64-NEXT: retq # encoding: [0xc3] 2969 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) 2970 ret <16 x i32> %res 2971} 2972 2973declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2974 2975define <32 x i16>@test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 2976; CHECK-LABEL: test_int_x86_avx512_permvar_hi_512: 2977; CHECK: # %bb.0: 2978; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xc0] 2979; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2980 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2981 ret <32 x i16> %res 2982} 2983 2984define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2985; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 2986; X86: # %bb.0: 2987; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2988; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 2989; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2990; X86-NEXT: retl # encoding: [0xc3] 2991; 2992; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 2993; X64: # %bb.0: 2994; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2995; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 2996; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2997; X64-NEXT: retq # encoding: [0xc3] 2998 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2999 ret <32 x i16> %res 3000} 3001 3002define <32 x i16>@test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 3003; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_512: 3004; X86: # %bb.0: 3005; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3006; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 3007; X86-NEXT: retl # encoding: [0xc3] 3008; 3009; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_512: 3010; X64: # %bb.0: 3011; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3012; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 3013; X64-NEXT: retq # encoding: [0xc3] 3014 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 3015 ret <32 x i16> %res 3016} 3017 3018declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3019 3020define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 3021; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_512: 3022; CHECK: # %bb.0: 3023; CHECK-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x75,0xc2] 3024; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3025 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 3026 ret <32 x i16> %res 3027} 3028 3029define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 3030; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 3031; X86: # %bb.0: 3032; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3033; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 3034; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3035; X86-NEXT: retl # encoding: [0xc3] 3036; 3037; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 3038; X64: # %bb.0: 3039; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3040; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 3041; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3042; X64-NEXT: retq # encoding: [0xc3] 3043 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 3044 ret <32 x i16> %res 3045} 3046 3047declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3048 3049define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 3050; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 3051; X86: # %bb.0: 3052; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3053; X86-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2] 3054; X86-NEXT: retl # encoding: [0xc3] 3055; 3056; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 3057; X64: # %bb.0: 3058; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3059; X64-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2] 3060; X64-NEXT: retq # encoding: [0xc3] 3061 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 3062 ret <32 x i16> %res 3063} 3064 3065declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3066 3067define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 3068; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_512: 3069; CHECK: # %bb.0: 3070; CHECK-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xc2] 3071; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3072 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 3073 ret <32 x i16> %res 3074} 3075 3076define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 3077; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 3078; X86: # %bb.0: 3079; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3080; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 3081; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3082; X86-NEXT: retl # encoding: [0xc3] 3083; 3084; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 3085; X64: # %bb.0: 3086; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3087; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 3088; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3089; X64-NEXT: retq # encoding: [0xc3] 3090 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 3091 ret <32 x i16> %res 3092} 3093 3094declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) 3095 3096define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 3097; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 3098; X86: # %bb.0: 3099; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3100; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 3101; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] 3102; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] 3103; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 3104; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 3105; X86-NEXT: retl # encoding: [0xc3] 3106; 3107; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 3108; X64: # %bb.0: 3109; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3110; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 3111; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] 3112; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] 3113; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 3114; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 3115; X64-NEXT: retq # encoding: [0xc3] 3116 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) 3117 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3, <32 x i16> zeroinitializer, i32 %x4) 3118 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4, <32 x i16> %x3, i32 -1) 3119 %res3 = add <32 x i16> %res, %res1 3120 %res4 = add <32 x i16> %res3, %res2 3121 ret <32 x i16> %res4 3122} 3123 3124define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 3125; CHECK-LABEL: test_mask_adds_epu16_rr_512: 3126; CHECK: # %bb.0: 3127; CHECK-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1] 3128; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3129 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3130 ret <32 x i16> %res 3131} 3132 3133define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 3134; X86-LABEL: test_mask_adds_epu16_rrk_512: 3135; X86: # %bb.0: 3136; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3137; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] 3138; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3139; X86-NEXT: retl # encoding: [0xc3] 3140; 3141; X64-LABEL: test_mask_adds_epu16_rrk_512: 3142; X64: # %bb.0: 3143; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3144; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] 3145; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3146; X64-NEXT: retq # encoding: [0xc3] 3147 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3148 ret <32 x i16> %res 3149} 3150 3151define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 3152; X86-LABEL: test_mask_adds_epu16_rrkz_512: 3153; X86: # %bb.0: 3154; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3155; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] 3156; X86-NEXT: retl # encoding: [0xc3] 3157; 3158; X64-LABEL: test_mask_adds_epu16_rrkz_512: 3159; X64: # %bb.0: 3160; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3161; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] 3162; X64-NEXT: retq # encoding: [0xc3] 3163 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3164 ret <32 x i16> %res 3165} 3166 3167define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 3168; X86-LABEL: test_mask_adds_epu16_rm_512: 3169; X86: # %bb.0: 3170; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3171; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00] 3172; X86-NEXT: retl # encoding: [0xc3] 3173; 3174; X64-LABEL: test_mask_adds_epu16_rm_512: 3175; X64: # %bb.0: 3176; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07] 3177; X64-NEXT: retq # encoding: [0xc3] 3178 %b = load <32 x i16>, <32 x i16>* %ptr_b 3179 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3180 ret <32 x i16> %res 3181} 3182 3183define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 3184; X86-LABEL: test_mask_adds_epu16_rmk_512: 3185; X86: # %bb.0: 3186; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3187; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3188; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08] 3189; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3190; X86-NEXT: retl # encoding: [0xc3] 3191; 3192; X64-LABEL: test_mask_adds_epu16_rmk_512: 3193; X64: # %bb.0: 3194; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3195; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f] 3196; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3197; X64-NEXT: retq # encoding: [0xc3] 3198 %b = load <32 x i16>, <32 x i16>* %ptr_b 3199 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3200 ret <32 x i16> %res 3201} 3202 3203define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 3204; X86-LABEL: test_mask_adds_epu16_rmkz_512: 3205; X86: # %bb.0: 3206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3207; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3208; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00] 3209; X86-NEXT: retl # encoding: [0xc3] 3210; 3211; X64-LABEL: test_mask_adds_epu16_rmkz_512: 3212; X64: # %bb.0: 3213; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3214; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07] 3215; X64-NEXT: retq # encoding: [0xc3] 3216 %b = load <32 x i16>, <32 x i16>* %ptr_b 3217 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3218 ret <32 x i16> %res 3219} 3220 3221declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3222 3223define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 3224; CHECK-LABEL: test_mask_subs_epu16_rr_512: 3225; CHECK: # %bb.0: 3226; CHECK-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1] 3227; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3228 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3229 ret <32 x i16> %res 3230} 3231 3232define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 3233; X86-LABEL: test_mask_subs_epu16_rrk_512: 3234; X86: # %bb.0: 3235; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3236; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] 3237; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3238; X86-NEXT: retl # encoding: [0xc3] 3239; 3240; X64-LABEL: test_mask_subs_epu16_rrk_512: 3241; X64: # %bb.0: 3242; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3243; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] 3244; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3245; X64-NEXT: retq # encoding: [0xc3] 3246 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3247 ret <32 x i16> %res 3248} 3249 3250define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 3251; X86-LABEL: test_mask_subs_epu16_rrkz_512: 3252; X86: # %bb.0: 3253; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3254; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] 3255; X86-NEXT: retl # encoding: [0xc3] 3256; 3257; X64-LABEL: test_mask_subs_epu16_rrkz_512: 3258; X64: # %bb.0: 3259; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3260; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] 3261; X64-NEXT: retq # encoding: [0xc3] 3262 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3263 ret <32 x i16> %res 3264} 3265 3266define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 3267; X86-LABEL: test_mask_subs_epu16_rm_512: 3268; X86: # %bb.0: 3269; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3270; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00] 3271; X86-NEXT: retl # encoding: [0xc3] 3272; 3273; X64-LABEL: test_mask_subs_epu16_rm_512: 3274; X64: # %bb.0: 3275; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07] 3276; X64-NEXT: retq # encoding: [0xc3] 3277 %b = load <32 x i16>, <32 x i16>* %ptr_b 3278 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3279 ret <32 x i16> %res 3280} 3281 3282define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 3283; X86-LABEL: test_mask_subs_epu16_rmk_512: 3284; X86: # %bb.0: 3285; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3286; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3287; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08] 3288; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3289; X86-NEXT: retl # encoding: [0xc3] 3290; 3291; X64-LABEL: test_mask_subs_epu16_rmk_512: 3292; X64: # %bb.0: 3293; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3294; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f] 3295; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3296; X64-NEXT: retq # encoding: [0xc3] 3297 %b = load <32 x i16>, <32 x i16>* %ptr_b 3298 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3299 ret <32 x i16> %res 3300} 3301 3302define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 3303; X86-LABEL: test_mask_subs_epu16_rmkz_512: 3304; X86: # %bb.0: 3305; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3306; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3307; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00] 3308; X86-NEXT: retl # encoding: [0xc3] 3309; 3310; X64-LABEL: test_mask_subs_epu16_rmkz_512: 3311; X64: # %bb.0: 3312; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3313; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07] 3314; X64-NEXT: retq # encoding: [0xc3] 3315 %b = load <32 x i16>, <32 x i16>* %ptr_b 3316 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3317 ret <32 x i16> %res 3318} 3319 3320declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3321 3322define <64 x i8> @test_mask_adds_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) { 3323; CHECK-LABEL: test_mask_adds_epu8_rr_512: 3324; CHECK: # %bb.0: 3325; CHECK-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0xc1] 3326; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3327 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 3328 ret <64 x i8> %res 3329} 3330 3331define <64 x i8> @test_mask_adds_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) { 3332; X86-LABEL: test_mask_adds_epu8_rrk_512: 3333; X86: # %bb.0: 3334; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 3335; X86-NEXT: vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1] 3336; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3337; X86-NEXT: retl # encoding: [0xc3] 3338; 3339; X64-LABEL: test_mask_adds_epu8_rrk_512: 3340; X64: # %bb.0: 3341; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 3342; X64-NEXT: vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1] 3343; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3344; X64-NEXT: retq # encoding: [0xc3] 3345 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 3346 ret <64 x i8> %res 3347} 3348 3349define <64 x i8> @test_mask_adds_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 3350; X86-LABEL: test_mask_adds_epu8_rrkz_512: 3351; X86: # %bb.0: 3352; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 3353; X86-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1] 3354; X86-NEXT: retl # encoding: [0xc3] 3355; 3356; X64-LABEL: test_mask_adds_epu8_rrkz_512: 3357; X64: # %bb.0: 3358; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 3359; X64-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1] 3360; X64-NEXT: retq # encoding: [0xc3] 3361 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 3362 ret <64 x i8> %res 3363} 3364 3365define <64 x i8> @test_mask_adds_epu8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) { 3366; X86-LABEL: test_mask_adds_epu8_rm_512: 3367; X86: # %bb.0: 3368; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3369; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x00] 3370; X86-NEXT: retl # encoding: [0xc3] 3371; 3372; X64-LABEL: test_mask_adds_epu8_rm_512: 3373; X64: # %bb.0: 3374; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x07] 3375; X64-NEXT: retq # encoding: [0xc3] 3376 %b = load <64 x i8>, <64 x i8>* %ptr_b 3377 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 3378 ret <64 x i8> %res 3379} 3380 3381define <64 x i8> @test_mask_adds_epu8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 3382; X86-LABEL: test_mask_adds_epu8_rmk_512: 3383; X86: # %bb.0: 3384; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3385; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 3386; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x08] 3387; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3388; X86-NEXT: retl # encoding: [0xc3] 3389; 3390; X64-LABEL: test_mask_adds_epu8_rmk_512: 3391; X64: # %bb.0: 3392; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 3393; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x0f] 3394; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3395; X64-NEXT: retq # encoding: [0xc3] 3396 %b = load <64 x i8>, <64 x i8>* %ptr_b 3397 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 3398 ret <64 x i8> %res 3399} 3400 3401define <64 x i8> @test_mask_adds_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) { 3402; X86-LABEL: test_mask_adds_epu8_rmkz_512: 3403; X86: # %bb.0: 3404; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3405; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 3406; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x00] 3407; X86-NEXT: retl # encoding: [0xc3] 3408; 3409; X64-LABEL: test_mask_adds_epu8_rmkz_512: 3410; X64: # %bb.0: 3411; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 3412; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x07] 3413; X64-NEXT: retq # encoding: [0xc3] 3414 %b = load <64 x i8>, <64 x i8>* %ptr_b 3415 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 3416 ret <64 x i8> %res 3417} 3418 3419declare <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 3420 3421define <64 x i8> @test_mask_subs_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) { 3422; CHECK-LABEL: test_mask_subs_epu8_rr_512: 3423; CHECK: # %bb.0: 3424; CHECK-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0xc1] 3425; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3426 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 3427 ret <64 x i8> %res 3428} 3429 3430define <64 x i8> @test_mask_subs_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) { 3431; X86-LABEL: test_mask_subs_epu8_rrk_512: 3432; X86: # %bb.0: 3433; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 3434; X86-NEXT: vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1] 3435; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3436; X86-NEXT: retl # encoding: [0xc3] 3437; 3438; X64-LABEL: test_mask_subs_epu8_rrk_512: 3439; X64: # %bb.0: 3440; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 3441; X64-NEXT: vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1] 3442; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3443; X64-NEXT: retq # encoding: [0xc3] 3444 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 3445 ret <64 x i8> %res 3446} 3447 3448define <64 x i8> @test_mask_subs_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 3449; X86-LABEL: test_mask_subs_epu8_rrkz_512: 3450; X86: # %bb.0: 3451; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 3452; X86-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1] 3453; X86-NEXT: retl # encoding: [0xc3] 3454; 3455; X64-LABEL: test_mask_subs_epu8_rrkz_512: 3456; X64: # %bb.0: 3457; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 3458; X64-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1] 3459; X64-NEXT: retq # encoding: [0xc3] 3460 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 3461 ret <64 x i8> %res 3462} 3463 3464define <64 x i8> @test_mask_subs_epu8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) { 3465; X86-LABEL: test_mask_subs_epu8_rm_512: 3466; X86: # %bb.0: 3467; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3468; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x00] 3469; X86-NEXT: retl # encoding: [0xc3] 3470; 3471; X64-LABEL: test_mask_subs_epu8_rm_512: 3472; X64: # %bb.0: 3473; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x07] 3474; X64-NEXT: retq # encoding: [0xc3] 3475 %b = load <64 x i8>, <64 x i8>* %ptr_b 3476 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 3477 ret <64 x i8> %res 3478} 3479 3480define <64 x i8> @test_mask_subs_epu8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 3481; X86-LABEL: test_mask_subs_epu8_rmk_512: 3482; X86: # %bb.0: 3483; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3484; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 3485; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x08] 3486; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3487; X86-NEXT: retl # encoding: [0xc3] 3488; 3489; X64-LABEL: test_mask_subs_epu8_rmk_512: 3490; X64: # %bb.0: 3491; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 3492; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x0f] 3493; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3494; X64-NEXT: retq # encoding: [0xc3] 3495 %b = load <64 x i8>, <64 x i8>* %ptr_b 3496 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 3497 ret <64 x i8> %res 3498} 3499 3500define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) { 3501; X86-LABEL: test_mask_subs_epu8_rmkz_512: 3502; X86: # %bb.0: 3503; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3504; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 3505; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x00] 3506; X86-NEXT: retl # encoding: [0xc3] 3507; 3508; X64-LABEL: test_mask_subs_epu8_rmkz_512: 3509; X64: # %bb.0: 3510; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 3511; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x07] 3512; X64-NEXT: retq # encoding: [0xc3] 3513 %b = load <64 x i8>, <64 x i8>* %ptr_b 3514 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 3515 ret <64 x i8> %res 3516} 3517 3518declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 3519 3520define <32 x i16> @test_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 3521; CHECK-LABEL: test_adds_epi16_rr_512: 3522; CHECK: # %bb.0: 3523; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] 3524; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3525 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) 3526 ret <32 x i16> %1 3527} 3528 3529define <32 x i16> @test_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 3530; X86-LABEL: test_adds_epi16_rrk_512: 3531; X86: # %bb.0: 3532; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3533; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] 3534; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3535; X86-NEXT: retl # encoding: [0xc3] 3536; 3537; X64-LABEL: test_adds_epi16_rrk_512: 3538; X64: # %bb.0: 3539; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3540; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] 3541; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3542; X64-NEXT: retq # encoding: [0xc3] 3543 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) 3544 %2 = bitcast i32 %mask to <32 x i1> 3545 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 3546 ret <32 x i16> %3 3547} 3548 3549define <32 x i16> @test_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 3550; X86-LABEL: test_adds_epi16_rrkz_512: 3551; X86: # %bb.0: 3552; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3553; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] 3554; X86-NEXT: retl # encoding: [0xc3] 3555; 3556; X64-LABEL: test_adds_epi16_rrkz_512: 3557; X64: # %bb.0: 3558; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3559; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] 3560; X64-NEXT: retq # encoding: [0xc3] 3561 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) 3562 %2 = bitcast i32 %mask to <32 x i1> 3563 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 3564 ret <32 x i16> %3 3565} 3566 3567define <32 x i16> @test_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 3568; X86-LABEL: test_adds_epi16_rm_512: 3569; X86: # %bb.0: 3570; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3571; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] 3572; X86-NEXT: retl # encoding: [0xc3] 3573; 3574; X64-LABEL: test_adds_epi16_rm_512: 3575; X64: # %bb.0: 3576; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] 3577; X64-NEXT: retq # encoding: [0xc3] 3578 %b = load <32 x i16>, <32 x i16>* %ptr_b 3579 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) 3580 ret <32 x i16> %1 3581} 3582 3583define <32 x i16> @test_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 3584; X86-LABEL: test_adds_epi16_rmk_512: 3585; X86: # %bb.0: 3586; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3587; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3588; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] 3589; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3590; X86-NEXT: retl # encoding: [0xc3] 3591; 3592; X64-LABEL: test_adds_epi16_rmk_512: 3593; X64: # %bb.0: 3594; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3595; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] 3596; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3597; X64-NEXT: retq # encoding: [0xc3] 3598 %b = load <32 x i16>, <32 x i16>* %ptr_b 3599 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) 3600 %2 = bitcast i32 %mask to <32 x i1> 3601 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 3602 ret <32 x i16> %3 3603} 3604 3605define <32 x i16> @test_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 3606; X86-LABEL: test_adds_epi16_rmkz_512: 3607; X86: # %bb.0: 3608; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3609; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3610; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] 3611; X86-NEXT: retl # encoding: [0xc3] 3612; 3613; X64-LABEL: test_adds_epi16_rmkz_512: 3614; X64: # %bb.0: 3615; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3616; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] 3617; X64-NEXT: retq # encoding: [0xc3] 3618 %b = load <32 x i16>, <32 x i16>* %ptr_b 3619 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) 3620 %2 = bitcast i32 %mask to <32 x i1> 3621 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 3622 ret <32 x i16> %3 3623} 3624 3625declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>) 3626 3627define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 3628; CHECK-LABEL: test_mask_adds_epi16_rr_512: 3629; CHECK: # %bb.0: 3630; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] 3631; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3632 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3633 ret <32 x i16> %res 3634} 3635 3636define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 3637; X86-LABEL: test_mask_adds_epi16_rrk_512: 3638; X86: # %bb.0: 3639; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3640; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] 3641; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3642; X86-NEXT: retl # encoding: [0xc3] 3643; 3644; X64-LABEL: test_mask_adds_epi16_rrk_512: 3645; X64: # %bb.0: 3646; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3647; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] 3648; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3649; X64-NEXT: retq # encoding: [0xc3] 3650 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3651 ret <32 x i16> %res 3652} 3653 3654define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 3655; X86-LABEL: test_mask_adds_epi16_rrkz_512: 3656; X86: # %bb.0: 3657; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3658; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] 3659; X86-NEXT: retl # encoding: [0xc3] 3660; 3661; X64-LABEL: test_mask_adds_epi16_rrkz_512: 3662; X64: # %bb.0: 3663; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3664; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] 3665; X64-NEXT: retq # encoding: [0xc3] 3666 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3667 ret <32 x i16> %res 3668} 3669 3670define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 3671; X86-LABEL: test_mask_adds_epi16_rm_512: 3672; X86: # %bb.0: 3673; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3674; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] 3675; X86-NEXT: retl # encoding: [0xc3] 3676; 3677; X64-LABEL: test_mask_adds_epi16_rm_512: 3678; X64: # %bb.0: 3679; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] 3680; X64-NEXT: retq # encoding: [0xc3] 3681 %b = load <32 x i16>, <32 x i16>* %ptr_b 3682 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3683 ret <32 x i16> %res 3684} 3685 3686define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 3687; X86-LABEL: test_mask_adds_epi16_rmk_512: 3688; X86: # %bb.0: 3689; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3690; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3691; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] 3692; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3693; X86-NEXT: retl # encoding: [0xc3] 3694; 3695; X64-LABEL: test_mask_adds_epi16_rmk_512: 3696; X64: # %bb.0: 3697; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3698; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] 3699; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3700; X64-NEXT: retq # encoding: [0xc3] 3701 %b = load <32 x i16>, <32 x i16>* %ptr_b 3702 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3703 ret <32 x i16> %res 3704} 3705 3706define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 3707; X86-LABEL: test_mask_adds_epi16_rmkz_512: 3708; X86: # %bb.0: 3709; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3710; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3711; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] 3712; X86-NEXT: retl # encoding: [0xc3] 3713; 3714; X64-LABEL: test_mask_adds_epi16_rmkz_512: 3715; X64: # %bb.0: 3716; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3717; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] 3718; X64-NEXT: retq # encoding: [0xc3] 3719 %b = load <32 x i16>, <32 x i16>* %ptr_b 3720 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3721 ret <32 x i16> %res 3722} 3723 3724declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3725 3726define <32 x i16> @test_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 3727; CHECK-LABEL: test_subs_epi16_rr_512: 3728; CHECK: # %bb.0: 3729; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] 3730; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3731 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) 3732 ret <32 x i16> %1 3733} 3734 3735define <32 x i16> @test_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 3736; X86-LABEL: test_subs_epi16_rrk_512: 3737; X86: # %bb.0: 3738; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3739; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] 3740; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3741; X86-NEXT: retl # encoding: [0xc3] 3742; 3743; X64-LABEL: test_subs_epi16_rrk_512: 3744; X64: # %bb.0: 3745; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3746; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] 3747; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3748; X64-NEXT: retq # encoding: [0xc3] 3749 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) 3750 %2 = bitcast i32 %mask to <32 x i1> 3751 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 3752 ret <32 x i16> %3 3753} 3754 3755define <32 x i16> @test_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 3756; X86-LABEL: test_subs_epi16_rrkz_512: 3757; X86: # %bb.0: 3758; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3759; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] 3760; X86-NEXT: retl # encoding: [0xc3] 3761; 3762; X64-LABEL: test_subs_epi16_rrkz_512: 3763; X64: # %bb.0: 3764; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3765; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] 3766; X64-NEXT: retq # encoding: [0xc3] 3767 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) 3768 %2 = bitcast i32 %mask to <32 x i1> 3769 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 3770 ret <32 x i16> %3 3771} 3772 3773define <32 x i16> @test_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 3774; X86-LABEL: test_subs_epi16_rm_512: 3775; X86: # %bb.0: 3776; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3777; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] 3778; X86-NEXT: retl # encoding: [0xc3] 3779; 3780; X64-LABEL: test_subs_epi16_rm_512: 3781; X64: # %bb.0: 3782; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] 3783; X64-NEXT: retq # encoding: [0xc3] 3784 %b = load <32 x i16>, <32 x i16>* %ptr_b 3785 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) 3786 ret <32 x i16> %1 3787} 3788 3789define <32 x i16> @test_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 3790; X86-LABEL: test_subs_epi16_rmk_512: 3791; X86: # %bb.0: 3792; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3793; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3794; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] 3795; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3796; X86-NEXT: retl # encoding: [0xc3] 3797; 3798; X64-LABEL: test_subs_epi16_rmk_512: 3799; X64: # %bb.0: 3800; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3801; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] 3802; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3803; X64-NEXT: retq # encoding: [0xc3] 3804 %b = load <32 x i16>, <32 x i16>* %ptr_b 3805 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) 3806 %2 = bitcast i32 %mask to <32 x i1> 3807 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 3808 ret <32 x i16> %3 3809} 3810 3811define <32 x i16> @test_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 3812; X86-LABEL: test_subs_epi16_rmkz_512: 3813; X86: # %bb.0: 3814; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3815; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3816; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] 3817; X86-NEXT: retl # encoding: [0xc3] 3818; 3819; X64-LABEL: test_subs_epi16_rmkz_512: 3820; X64: # %bb.0: 3821; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3822; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] 3823; X64-NEXT: retq # encoding: [0xc3] 3824 %b = load <32 x i16>, <32 x i16>* %ptr_b 3825 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) 3826 %2 = bitcast i32 %mask to <32 x i1> 3827 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 3828 ret <32 x i16> %3 3829} 3830 3831declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>) 3832 3833define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 3834; CHECK-LABEL: test_mask_subs_epi16_rr_512: 3835; CHECK: # %bb.0: 3836; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] 3837; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3838 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3839 ret <32 x i16> %res 3840} 3841 3842define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 3843; X86-LABEL: test_mask_subs_epi16_rrk_512: 3844; X86: # %bb.0: 3845; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3846; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] 3847; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3848; X86-NEXT: retl # encoding: [0xc3] 3849; 3850; X64-LABEL: test_mask_subs_epi16_rrk_512: 3851; X64: # %bb.0: 3852; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3853; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] 3854; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3855; X64-NEXT: retq # encoding: [0xc3] 3856 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3857 ret <32 x i16> %res 3858} 3859 3860define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 3861; X86-LABEL: test_mask_subs_epi16_rrkz_512: 3862; X86: # %bb.0: 3863; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3864; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] 3865; X86-NEXT: retl # encoding: [0xc3] 3866; 3867; X64-LABEL: test_mask_subs_epi16_rrkz_512: 3868; X64: # %bb.0: 3869; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3870; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] 3871; X64-NEXT: retq # encoding: [0xc3] 3872 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3873 ret <32 x i16> %res 3874} 3875 3876define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 3877; X86-LABEL: test_mask_subs_epi16_rm_512: 3878; X86: # %bb.0: 3879; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3880; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] 3881; X86-NEXT: retl # encoding: [0xc3] 3882; 3883; X64-LABEL: test_mask_subs_epi16_rm_512: 3884; X64: # %bb.0: 3885; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] 3886; X64-NEXT: retq # encoding: [0xc3] 3887 %b = load <32 x i16>, <32 x i16>* %ptr_b 3888 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 3889 ret <32 x i16> %res 3890} 3891 3892define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 3893; X86-LABEL: test_mask_subs_epi16_rmk_512: 3894; X86: # %bb.0: 3895; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3896; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3897; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] 3898; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3899; X86-NEXT: retl # encoding: [0xc3] 3900; 3901; X64-LABEL: test_mask_subs_epi16_rmk_512: 3902; X64: # %bb.0: 3903; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3904; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] 3905; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3906; X64-NEXT: retq # encoding: [0xc3] 3907 %b = load <32 x i16>, <32 x i16>* %ptr_b 3908 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 3909 ret <32 x i16> %res 3910} 3911 3912define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 3913; X86-LABEL: test_mask_subs_epi16_rmkz_512: 3914; X86: # %bb.0: 3915; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3916; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3917; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] 3918; X86-NEXT: retl # encoding: [0xc3] 3919; 3920; X64-LABEL: test_mask_subs_epi16_rmkz_512: 3921; X64: # %bb.0: 3922; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3923; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] 3924; X64-NEXT: retq # encoding: [0xc3] 3925 %b = load <32 x i16>, <32 x i16>* %ptr_b 3926 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 3927 ret <32 x i16> %res 3928} 3929 3930declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 3931 3932define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) { 3933; CHECK-LABEL: test_mask_adds_epi8_rr_512: 3934; CHECK: # %bb.0: 3935; CHECK-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0xc1] 3936; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3937 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 3938 ret <64 x i8> %res 3939} 3940 3941define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) { 3942; X86-LABEL: test_mask_adds_epi8_rrk_512: 3943; X86: # %bb.0: 3944; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 3945; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1] 3946; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3947; X86-NEXT: retl # encoding: [0xc3] 3948; 3949; X64-LABEL: test_mask_adds_epi8_rrk_512: 3950; X64: # %bb.0: 3951; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 3952; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1] 3953; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3954; X64-NEXT: retq # encoding: [0xc3] 3955 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 3956 ret <64 x i8> %res 3957} 3958 3959define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 3960; X86-LABEL: test_mask_adds_epi8_rrkz_512: 3961; X86: # %bb.0: 3962; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 3963; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1] 3964; X86-NEXT: retl # encoding: [0xc3] 3965; 3966; X64-LABEL: test_mask_adds_epi8_rrkz_512: 3967; X64: # %bb.0: 3968; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 3969; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1] 3970; X64-NEXT: retq # encoding: [0xc3] 3971 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 3972 ret <64 x i8> %res 3973} 3974 3975define <64 x i8> @test_mask_adds_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) { 3976; X86-LABEL: test_mask_adds_epi8_rm_512: 3977; X86: # %bb.0: 3978; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3979; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x00] 3980; X86-NEXT: retl # encoding: [0xc3] 3981; 3982; X64-LABEL: test_mask_adds_epi8_rm_512: 3983; X64: # %bb.0: 3984; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x07] 3985; X64-NEXT: retq # encoding: [0xc3] 3986 %b = load <64 x i8>, <64 x i8>* %ptr_b 3987 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 3988 ret <64 x i8> %res 3989} 3990 3991define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 3992; X86-LABEL: test_mask_adds_epi8_rmk_512: 3993; X86: # %bb.0: 3994; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3995; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 3996; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x08] 3997; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 3998; X86-NEXT: retl # encoding: [0xc3] 3999; 4000; X64-LABEL: test_mask_adds_epi8_rmk_512: 4001; X64: # %bb.0: 4002; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 4003; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x0f] 4004; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4005; X64-NEXT: retq # encoding: [0xc3] 4006 %b = load <64 x i8>, <64 x i8>* %ptr_b 4007 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 4008 ret <64 x i8> %res 4009} 4010 4011define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) { 4012; X86-LABEL: test_mask_adds_epi8_rmkz_512: 4013; X86: # %bb.0: 4014; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4015; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 4016; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x00] 4017; X86-NEXT: retl # encoding: [0xc3] 4018; 4019; X64-LABEL: test_mask_adds_epi8_rmkz_512: 4020; X64: # %bb.0: 4021; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 4022; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x07] 4023; X64-NEXT: retq # encoding: [0xc3] 4024 %b = load <64 x i8>, <64 x i8>* %ptr_b 4025 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 4026 ret <64 x i8> %res 4027} 4028 4029declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 4030 4031define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) { 4032; CHECK-LABEL: test_mask_subs_epi8_rr_512: 4033; CHECK: # %bb.0: 4034; CHECK-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0xc1] 4035; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4036 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 4037 ret <64 x i8> %res 4038} 4039 4040define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) { 4041; X86-LABEL: test_mask_subs_epi8_rrk_512: 4042; X86: # %bb.0: 4043; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 4044; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1] 4045; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4046; X86-NEXT: retl # encoding: [0xc3] 4047; 4048; X64-LABEL: test_mask_subs_epi8_rrk_512: 4049; X64: # %bb.0: 4050; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 4051; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1] 4052; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4053; X64-NEXT: retq # encoding: [0xc3] 4054 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 4055 ret <64 x i8> %res 4056} 4057 4058define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 4059; X86-LABEL: test_mask_subs_epi8_rrkz_512: 4060; X86: # %bb.0: 4061; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 4062; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1] 4063; X86-NEXT: retl # encoding: [0xc3] 4064; 4065; X64-LABEL: test_mask_subs_epi8_rrkz_512: 4066; X64: # %bb.0: 4067; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 4068; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1] 4069; X64-NEXT: retq # encoding: [0xc3] 4070 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 4071 ret <64 x i8> %res 4072} 4073 4074define <64 x i8> @test_mask_subs_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) { 4075; X86-LABEL: test_mask_subs_epi8_rm_512: 4076; X86: # %bb.0: 4077; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4078; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x00] 4079; X86-NEXT: retl # encoding: [0xc3] 4080; 4081; X64-LABEL: test_mask_subs_epi8_rm_512: 4082; X64: # %bb.0: 4083; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x07] 4084; X64-NEXT: retq # encoding: [0xc3] 4085 %b = load <64 x i8>, <64 x i8>* %ptr_b 4086 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) 4087 ret <64 x i8> %res 4088} 4089 4090define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 4091; X86-LABEL: test_mask_subs_epi8_rmk_512: 4092; X86: # %bb.0: 4093; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4094; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 4095; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x08] 4096; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4097; X86-NEXT: retl # encoding: [0xc3] 4098; 4099; X64-LABEL: test_mask_subs_epi8_rmk_512: 4100; X64: # %bb.0: 4101; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 4102; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x0f] 4103; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4104; X64-NEXT: retq # encoding: [0xc3] 4105 %b = load <64 x i8>, <64 x i8>* %ptr_b 4106 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) 4107 ret <64 x i8> %res 4108} 4109 4110define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) { 4111; X86-LABEL: test_mask_subs_epi8_rmkz_512: 4112; X86: # %bb.0: 4113; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4114; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 4115; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x00] 4116; X86-NEXT: retl # encoding: [0xc3] 4117; 4118; X64-LABEL: test_mask_subs_epi8_rmkz_512: 4119; X64: # %bb.0: 4120; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 4121; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x07] 4122; X64-NEXT: retq # encoding: [0xc3] 4123 %b = load <64 x i8>, <64 x i8>* %ptr_b 4124 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) 4125 ret <64 x i8> %res 4126} 4127 4128declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 4129 4130declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 4131 4132define <32 x i16>@test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 4133; CHECK-LABEL: test_int_x86_avx512_psrlv32hi: 4134; CHECK: # %bb.0: 4135; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xc1] 4136; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4137 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 4138 ret <32 x i16> %res 4139} 4140 4141define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 4142; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi: 4143; X86: # %bb.0: 4144; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4145; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] 4146; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4147; X86-NEXT: retl # encoding: [0xc3] 4148; 4149; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi: 4150; X64: # %bb.0: 4151; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4152; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] 4153; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4154; X64-NEXT: retq # encoding: [0xc3] 4155 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 4156 ret <32 x i16> %res 4157} 4158 4159define <32 x i16>@test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 4160; X86-LABEL: test_int_x86_avx512_maskz_psrlv32hi: 4161; X86: # %bb.0: 4162; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4163; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] 4164; X86-NEXT: retl # encoding: [0xc3] 4165; 4166; X64-LABEL: test_int_x86_avx512_maskz_psrlv32hi: 4167; X64: # %bb.0: 4168; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4169; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] 4170; X64-NEXT: retq # encoding: [0xc3] 4171 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 4172 ret <32 x i16> %res 4173} 4174 4175declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 4176 4177define <32 x i16>@test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 4178; CHECK-LABEL: test_int_x86_avx512_psrav32_hi: 4179; CHECK: # %bb.0: 4180; CHECK-NEXT: vpsravw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xc1] 4181; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4182 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 4183 ret <32 x i16> %res 4184} 4185 4186define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 4187; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi: 4188; X86: # %bb.0: 4189; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4190; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] 4191; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4192; X86-NEXT: retl # encoding: [0xc3] 4193; 4194; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi: 4195; X64: # %bb.0: 4196; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4197; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] 4198; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4199; X64-NEXT: retq # encoding: [0xc3] 4200 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 4201 ret <32 x i16> %res 4202} 4203 4204define <32 x i16>@test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 4205; X86-LABEL: test_int_x86_avx512_maskz_psrav32_hi: 4206; X86: # %bb.0: 4207; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4208; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] 4209; X86-NEXT: retl # encoding: [0xc3] 4210; 4211; X64-LABEL: test_int_x86_avx512_maskz_psrav32_hi: 4212; X64: # %bb.0: 4213; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4214; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] 4215; X64-NEXT: retq # encoding: [0xc3] 4216 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 4217 ret <32 x i16> %res 4218} 4219 4220declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 4221 4222define <32 x i16>@test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 4223; CHECK-LABEL: test_int_x86_avx512_psllv32hi: 4224; CHECK: # %bb.0: 4225; CHECK-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xc1] 4226; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4227 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 4228 ret <32 x i16> %res 4229} 4230 4231define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 4232; X86-LABEL: test_int_x86_avx512_mask_psllv32hi: 4233; X86: # %bb.0: 4234; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4235; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] 4236; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4237; X86-NEXT: retl # encoding: [0xc3] 4238; 4239; X64-LABEL: test_int_x86_avx512_mask_psllv32hi: 4240; X64: # %bb.0: 4241; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4242; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] 4243; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4244; X64-NEXT: retq # encoding: [0xc3] 4245 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 4246 ret <32 x i16> %res 4247} 4248 4249define <32 x i16>@test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 4250; X86-LABEL: test_int_x86_avx512_maskz_psllv32hi: 4251; X86: # %bb.0: 4252; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4253; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] 4254; X86-NEXT: retl # encoding: [0xc3] 4255; 4256; X64-LABEL: test_int_x86_avx512_maskz_psllv32hi: 4257; X64: # %bb.0: 4258; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4259; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] 4260; X64-NEXT: retq # encoding: [0xc3] 4261 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 4262 ret <32 x i16> %res 4263} 4264 4265declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) 4266 4267define <32 x i8>@test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1) { 4268; CHECK-LABEL: test_int_x86_avx512_pmov_wb_512: 4269; CHECK: # %bb.0: 4270; CHECK-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] 4271; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4272 %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 4273 ret <32 x i8> %res 4274} 4275 4276define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 4277; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 4278; X86: # %bb.0: 4279; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4280; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] 4281; X86-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 4282; X86-NEXT: retl # encoding: [0xc3] 4283; 4284; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 4285; X64: # %bb.0: 4286; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4287; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] 4288; X64-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 4289; X64-NEXT: retq # encoding: [0xc3] 4290 %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 4291 ret <32 x i8> %res 4292} 4293 4294define <32 x i8>@test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) { 4295; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_512: 4296; X86: # %bb.0: 4297; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4298; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0] 4299; X86-NEXT: retl # encoding: [0xc3] 4300; 4301; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_512: 4302; X64: # %bb.0: 4303; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4304; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0] 4305; X64-NEXT: retq # encoding: [0xc3] 4306 %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 4307 ret <32 x i8> %res 4308} 4309