1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>) 6 7define <16 x i32>@test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 8; CHECK-LABEL: test_int_x86_avx512_ask_vpdpbusd_512: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] 11; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 13 ret <16 x i32> %1 14} 15 16define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 17; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: 18; X86: # %bb.0: 19; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 20; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 21; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 22; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18] 23; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] 24; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 25; X86-NEXT: retl # encoding: [0xc3] 26; 27; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: 28; X64: # %bb.0: 29; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 30; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 31; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x1f] 32; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] 33; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 34; X64-NEXT: retq # encoding: [0xc3] 35 %x2 = load <16 x i32>, <16 x i32>* %x2p 36 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 37 %2 = bitcast i16 %x3 to <16 x i1> 38 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 39 %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 40 %5 = bitcast i16 %x3 to <16 x i1> 41 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 42 %res3 = add <16 x i32> %3, %6 43 ret <16 x i32> %res3 44} 45 46declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>) 47 48define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 49; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_512: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] 52; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 53 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 54 ret <16 x i32> %1 55} 56 57define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 58; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: 59; X86: # %bb.0: 60; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 61; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 62; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 63; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18] 64; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] 65; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 66; X86-NEXT: retl # encoding: [0xc3] 67; 68; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: 69; X64: # %bb.0: 70; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 71; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 72; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x1f] 73; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] 74; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 75; X64-NEXT: retq # encoding: [0xc3] 76 %x2 = load <16 x i32>, <16 x i32>* %x2p 77 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 78 %2 = bitcast i16 %x3 to <16 x i1> 79 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 80 %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 81 %5 = bitcast i16 %x3 to <16 x i1> 82 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 83 %res3 = add <16 x i32> %3, %6 84 ret <16 x i32> %res3 85} 86 87declare <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>) 88 89define <16 x i32>@test_int_x86_avx512_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 90; CHECK-LABEL: test_int_x86_avx512_vpdpwssd_512: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2] 93; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 94 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 95 ret <16 x i32> %1 96} 97 98define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 99; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: 100; X86: # %bb.0: 101; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 102; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 103; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 104; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18] 105; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] 106; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 107; X86-NEXT: retl # encoding: [0xc3] 108; 109; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: 110; X64: # %bb.0: 111; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 112; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 113; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x1f] 114; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] 115; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 116; X64-NEXT: retq # encoding: [0xc3] 117 %x2 = load <16 x i32>, <16 x i32>* %x2p 118 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 119 %2 = bitcast i16 %x3 to <16 x i1> 120 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 121 %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 122 %5 = bitcast i16 %x3 to <16 x i1> 123 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 124 %res3 = add <16 x i32> %3, %6 125 ret <16 x i32> %res3 126} 127 128declare <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>) 129 130define <16 x i32>@test_int_x86_avx512_ask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 131; CHECK-LABEL: test_int_x86_avx512_ask_vpdpwssds_512: 132; CHECK: # %bb.0: 133; CHECK-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2] 134; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 135 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 136 ret <16 x i32> %1 137} 138 139define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 140; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: 141; X86: # %bb.0: 142; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 143; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 144; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 145; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18] 146; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] 147; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 148; X86-NEXT: retl # encoding: [0xc3] 149; 150; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: 151; X64: # %bb.0: 152; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 153; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 154; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x1f] 155; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] 156; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 157; X64-NEXT: retq # encoding: [0xc3] 158 %x2 = load <16 x i32>, <16 x i32>* %x2p 159 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 160 %2 = bitcast i16 %x3 to <16 x i1> 161 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 162 %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 163 %5 = bitcast i16 %x3 to <16 x i1> 164 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 165 %res3 = add <16 x i32> %3, %6 166 ret <16 x i32> %res3 167} 168