1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
6
7define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
8; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2]
11; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12
13  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
14  ret <8 x i64> %1
15}
16
17define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
18; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512:
19; X86:       # %bb.0:
20; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
21; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
22; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
23; X86-NEXT:    retl # encoding: [0xc3]
24;
25; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512:
26; X64:       # %bb.0:
27; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
28; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
29; X64-NEXT:    retq # encoding: [0xc3]
30
31  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
32  %2 = bitcast i8 %x3 to <8 x i1>
33  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
34  ret <8 x i64> %3
35}
36
37define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
38; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512:
39; X86:       # %bb.0:
40; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
41; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
42; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
43; X86-NEXT:    retl # encoding: [0xc3]
44;
45; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512:
46; X64:       # %bb.0:
47; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
48; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
49; X64-NEXT:    retq # encoding: [0xc3]
50
51  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
52  %2 = bitcast i8 %x3 to <8 x i1>
53  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
54  ret <8 x i64> %3
55}
56
57declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
58
59define <8 x i64>@test_int_x86_avx512_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
60; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_512:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xc2]
63; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
64
65  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
66  ret <8 x i64> %1
67}
68
69define <8 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
70; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512:
71; X86:       # %bb.0:
72; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
73; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
74; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2]
75; X86-NEXT:    retl # encoding: [0xc3]
76;
77; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512:
78; X64:       # %bb.0:
79; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
80; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2]
81; X64-NEXT:    retq # encoding: [0xc3]
82
83  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
84  %2 = bitcast i8 %x3 to <8 x i1>
85  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
86  ret <8 x i64> %3
87}
88
89define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
90; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512:
91; X86:       # %bb.0:
92; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
93; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
94; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2]
95; X86-NEXT:    retl # encoding: [0xc3]
96;
97; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512:
98; X64:       # %bb.0:
99; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
100; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2]
101; X64-NEXT:    retq # encoding: [0xc3]
102
103  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
104  %2 = bitcast i8 %x3 to <8 x i1>
105  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
106  ret <8 x i64> %3
107}
108
109define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) {
110; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
111; X86:       # %bb.0:
112; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
113; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00]
114; X86-NEXT:    retl # encoding: [0xc3]
115;
116; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
117; X64:       # %bb.0:
118; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07]
119; X64-NEXT:    retq # encoding: [0xc3]
120
121  %x2 = load <8 x i64>, <8 x i64>* %x2ptr
122  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
123  ret <8 x i64> %1
124}
125
126define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) {
127; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
128; X86:       # %bb.0:
129; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
130; X86-NEXT:    vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00]
131; X86-NEXT:    retl # encoding: [0xc3]
132;
133; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
134; X64:       # %bb.0:
135; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07]
136; X64-NEXT:    retq # encoding: [0xc3]
137
138  %x2load = load i64, i64* %x2ptr
139  %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
140  %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
141  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
142  ret <8 x i64> %1
143}
144
145define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) {
146; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
147; X86:       # %bb.0:
148; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
149; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00]
150; X86-NEXT:    retl # encoding: [0xc3]
151;
152; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
153; X64:       # %bb.0:
154; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07]
155; X64-NEXT:    retq # encoding: [0xc3]
156
157  %x1 = load <8 x i64>, <8 x i64>* %x1ptr
158  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
159  ret <8 x i64> %1
160}
161
162define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) {
163; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
164; X86:       # %bb.0:
165; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
166; X86-NEXT:    vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00]
167; X86-NEXT:    retl # encoding: [0xc3]
168;
169; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
170; X64:       # %bb.0:
171; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07]
172; X64-NEXT:    retq # encoding: [0xc3]
173
174  %x1load = load i64, i64* %x1ptr
175  %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
176  %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
177  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
178  ret <8 x i64> %1
179}
180
181define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
182; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
183; X86:       # %bb.0:
184; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
185; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
186; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
187; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00]
188; X86-NEXT:    retl # encoding: [0xc3]
189;
190; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
191; X64:       # %bb.0:
192; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
193; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07]
194; X64-NEXT:    retq # encoding: [0xc3]
195
196  %x2 = load <8 x i64>, <8 x i64>* %x2ptr
197  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
198  %2 = bitcast i8 %x3 to <8 x i1>
199  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
200  ret <8 x i64> %3
201}
202
203define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
204; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
205; X86:       # %bb.0:
206; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
207; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
208; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
209; X86-NEXT:    vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00]
210; X86-NEXT:    retl # encoding: [0xc3]
211;
212; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
213; X64:       # %bb.0:
214; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
215; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07]
216; X64-NEXT:    retq # encoding: [0xc3]
217
218  %x2load = load i64, i64* %x2ptr
219  %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
220  %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
221  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
222  %2 = bitcast i8 %x3 to <8 x i1>
223  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
224  ret <8 x i64> %3
225}
226
227define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
228; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
229; X86:       # %bb.0:
230; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
231; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
232; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
233; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00]
234; X86-NEXT:    retl # encoding: [0xc3]
235;
236; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
237; X64:       # %bb.0:
238; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
239; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07]
240; X64-NEXT:    retq # encoding: [0xc3]
241
242  %x1 = load <8 x i64>, <8 x i64>* %x1ptr
243  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
244  %2 = bitcast i8 %x3 to <8 x i1>
245  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
246  ret <8 x i64> %3
247}
248
249define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
250; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
251; X86:       # %bb.0:
252; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
253; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
254; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
255; X86-NEXT:    vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00]
256; X86-NEXT:    retl # encoding: [0xc3]
257;
258; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
259; X64:       # %bb.0:
260; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
261; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07]
262; X64-NEXT:    retq # encoding: [0xc3]
263
264  %x1load = load i64, i64* %x1ptr
265  %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
266  %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
267  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
268  %2 = bitcast i8 %x3 to <8 x i1>
269  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
270  ret <8 x i64> %3
271}
272
273define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
274; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
275; X86:       # %bb.0:
276; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
277; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
278; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
279; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00]
280; X86-NEXT:    retl # encoding: [0xc3]
281;
282; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
283; X64:       # %bb.0:
284; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
285; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07]
286; X64-NEXT:    retq # encoding: [0xc3]
287
288  %x2 = load <8 x i64>, <8 x i64>* %x2ptr
289  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
290  %2 = bitcast i8 %x3 to <8 x i1>
291  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
292  ret <8 x i64> %3
293}
294
295define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
296; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
297; X86:       # %bb.0:
298; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
299; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
300; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
301; X86-NEXT:    vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00]
302; X86-NEXT:    retl # encoding: [0xc3]
303;
304; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
305; X64:       # %bb.0:
306; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
307; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07]
308; X64-NEXT:    retq # encoding: [0xc3]
309
310  %x2load = load i64, i64* %x2ptr
311  %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
312  %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
313  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
314  %2 = bitcast i8 %x3 to <8 x i1>
315  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
316  ret <8 x i64> %3
317}
318
319define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
320; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
321; X86:       # %bb.0:
322; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
323; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
324; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
325; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00]
326; X86-NEXT:    retl # encoding: [0xc3]
327;
328; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
329; X64:       # %bb.0:
330; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
331; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07]
332; X64-NEXT:    retq # encoding: [0xc3]
333
334  %x1 = load <8 x i64>, <8 x i64>* %x1ptr
335  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
336  %2 = bitcast i8 %x3 to <8 x i1>
337  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
338  ret <8 x i64> %3
339}
340
341define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
342; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
343; X86:       # %bb.0:
344; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
345; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
346; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
347; X86-NEXT:    vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00]
348; X86-NEXT:    retl # encoding: [0xc3]
349;
350; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
351; X64:       # %bb.0:
352; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
353; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07]
354; X64-NEXT:    retq # encoding: [0xc3]
355
356  %x1load = load i64, i64* %x1ptr
357  %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
358  %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
359  %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
360  %2 = bitcast i8 %x3 to <8 x i1>
361  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
362  ret <8 x i64> %3
363}
364