1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT).  These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
20// The template is also used for scalar types, in this case numelts is 1.
21class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                      string suffix = ""> {
23  RegisterClass RC = rc;
24  ValueType EltVT = eltvt;
25  int NumElts = numelts;
26
27  // Corresponding mask register class.
28  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30  // Corresponding write-mask register class.
31  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33  // The GPR register class that can hold the write mask.  Use GR8 for fewer
34  // than 8 elements.  Use shift-right and equal to work around the lack of
35  // !lt in tablegen.
36  RegisterClass MRC =
37    !cast<RegisterClass>("GR" #
38                         !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
39
40  // Suffix used in the instruction mnemonic.
41  string Suffix = suffix;
42
43  // VTName is a string name for vector VT. For vector types it will be
44  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
45  // It is a little bit complex for scalar types, where NumElts = 1.
46  // In this case we build v4f32 or v2f64
47  string VTName = "v" # !if (!eq (NumElts, 1),
48                        !if (!eq (EltVT.Size, 32), 4,
49                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
50
51  // The vector VT.
52  ValueType VT = !cast<ValueType>(VTName);
53
54  string EltTypeName = !cast<string>(EltVT);
55  // Size of the element type in bits, e.g. 32 for v16i32.
56  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
57  int EltSize = EltVT.Size;
58
59  // "i" for integer types and "f" for floating-point types
60  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61
62  // Size of RC in bits, e.g. 512 for VR512.
63  int Size = VT.Size;
64
65  // The corresponding memory operand, e.g. i512mem for VR512.
66  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
68
69  // Load patterns
70  // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
71  //       due to load promotion during legalization
72  PatFrag LdFrag = !cast<PatFrag>("load" #
73                                  !if (!eq (TypeVariantName, "i"),
74                                       !if (!eq (Size, 128), "v2i64",
75                                       !if (!eq (Size, 256), "v4i64",
76                                            VTName)), VTName));
77
78  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79                          !if (!eq (TypeVariantName, "i"),
80                                !if (!eq (Size, 128), "v2i64",
81                                !if (!eq (Size, 256), "v4i64",
82                                !if (!eq (Size, 512),
83                                    !if (!eq (EltSize, 64), "v8i64", "v16i32"),
84                                    VTName))), VTName));
85
86  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87
88  // The corresponding float type, e.g. v16f32 for v16i32
89  // Note: For EltSize < 32, FloatVT is illegal and TableGen
90  //       fails to compile, so we choose FloatVT = VT
91  ValueType FloatVT = !cast<ValueType>(
92                        !if (!eq (!srl(EltSize,5),0),
93                             VTName,
94                             !if (!eq(TypeVariantName, "i"),
95                                  "v" # NumElts # "f" # EltSize,
96                                  VTName)));
97
98  // The string to specify embedded broadcast in assembly.
99  string BroadcastStr = "{1to" # NumElts # "}";
100
101  // 8-bit compressed displacement tuple/subvector format.  This is only
102  // defined for NumElts <= 8.
103  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
104                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
105
106  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
107                          !if (!eq (Size, 256), sub_ymm, ?));
108
109  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
110                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
111                     SSEPackedInt));
112
113  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
114
115  // A vector type of the same width with element type i32.  This is used to
116  // create the canonical constant zero node ImmAllZerosV.
117  ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
118  dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
119
120  string ZSuffix = !if (!eq (Size, 128), "Z128",
121                   !if (!eq (Size, 256), "Z256", "Z"));
122}
123
124def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
125def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
126def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
127def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
128def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
129def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
130
131// "x" in v32i8x_info means RC = VR256X
132def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
133def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
134def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
135def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
136def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
137def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
138
139def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
140def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
141def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
142def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
143def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
144def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
145
146// We map scalar types to the smallest (128-bit) vector type
147// with the appropriate element type. This allows to use the same masking logic.
148def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
149def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
150def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
151def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
152
153class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
154                           X86VectorVTInfo i128> {
155  X86VectorVTInfo info512 = i512;
156  X86VectorVTInfo info256 = i256;
157  X86VectorVTInfo info128 = i128;
158}
159
160def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
161                                             v16i8x_info>;
162def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
163                                             v8i16x_info>;
164def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
165                                             v4i32x_info>;
166def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
167                                             v2i64x_info>;
168def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
169                                             v4f32x_info>;
170def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
171                                             v2f64x_info>;
172
173// This multiclass generates the masking variants from the non-masking
174// variant.  It only provides the assembly pieces for the masking variants.
175// It assumes custom ISel patterns for masking which can be provided as
176// template arguments.
177multiclass AVX512_maskable_custom<bits<8> O, Format F,
178                                  dag Outs,
179                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
180                                  string OpcodeStr,
181                                  string AttSrcAsm, string IntelSrcAsm,
182                                  list<dag> Pattern,
183                                  list<dag> MaskingPattern,
184                                  list<dag> ZeroMaskingPattern,
185                                  string MaskingConstraint = "",
186                                  InstrItinClass itin = NoItinerary,
187                                  bit IsCommutable = 0> {
188  let isCommutable = IsCommutable in
189    def NAME: AVX512<O, F, Outs, Ins,
190                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191                                     "$dst , "#IntelSrcAsm#"}",
192                       Pattern, itin>;
193
194  // Prefer over VMOV*rrk Pat<>
195  let AddedComplexity = 20 in
196    def NAME#k: AVX512<O, F, Outs, MaskingIns,
197                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
199                       MaskingPattern, itin>,
200              EVEX_K {
201      // In case of the 3src subclass this is overridden with a let.
202      string Constraints = MaskingConstraint;
203  }
204  let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
205    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
206                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
207                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
208                       ZeroMaskingPattern,
209                       itin>,
210              EVEX_KZ;
211}
212
213
214// Common base class of AVX512_maskable and AVX512_maskable_3src.
215multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
216                                  dag Outs,
217                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
218                                  string OpcodeStr,
219                                  string AttSrcAsm, string IntelSrcAsm,
220                                  dag RHS, dag MaskingRHS,
221                                  SDNode Select = vselect,
222                                  string MaskingConstraint = "",
223                                  InstrItinClass itin = NoItinerary,
224                                  bit IsCommutable = 0> :
225  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
226                         AttSrcAsm, IntelSrcAsm,
227                         [(set _.RC:$dst, RHS)],
228                         [(set _.RC:$dst, MaskingRHS)],
229                         [(set _.RC:$dst,
230                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
231                         MaskingConstraint, NoItinerary, IsCommutable>;
232
233// This multiclass generates the unconditional/non-masking, the masking and
234// the zero-masking variant of the vector instruction.  In the masking case, the
235// perserved vector elements come from a new dummy input operand tied to $dst.
236multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
237                           dag Outs, dag Ins, string OpcodeStr,
238                           string AttSrcAsm, string IntelSrcAsm,
239                           dag RHS,
240                           InstrItinClass itin = NoItinerary,
241                           bit IsCommutable = 0> :
242   AVX512_maskable_common<O, F, _, Outs, Ins,
243                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
244                          !con((ins _.KRCWM:$mask), Ins),
245                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
246                          (vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect,
247                          "$src0 = $dst", itin, IsCommutable>;
248
249// This multiclass generates the unconditional/non-masking, the masking and
250// the zero-masking variant of the scalar instruction.
251multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
252                           dag Outs, dag Ins, string OpcodeStr,
253                           string AttSrcAsm, string IntelSrcAsm,
254                           dag RHS,
255                           InstrItinClass itin = NoItinerary,
256                           bit IsCommutable = 0> :
257   AVX512_maskable_common<O, F, _, Outs, Ins,
258                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
259                          !con((ins _.KRCWM:$mask), Ins),
260                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
261                          (X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select,
262                          "$src0 = $dst", itin, IsCommutable>;
263
264// Similar to AVX512_maskable but in this case one of the source operands
265// ($src1) is already tied to $dst so we just use that for the preserved
266// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
267// $src1.
268multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
269                                dag Outs, dag NonTiedIns, string OpcodeStr,
270                                string AttSrcAsm, string IntelSrcAsm,
271                                dag RHS> :
272   AVX512_maskable_common<O, F, _, Outs,
273                          !con((ins _.RC:$src1), NonTiedIns),
274                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
277                          (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
278
279// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
280// operand differs from the output VT. This requires a bitconvert on
281// the preserved vector going into the vselect.
282multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
283                                     X86VectorVTInfo InVT,
284                                     dag Outs, dag NonTiedIns, string OpcodeStr,
285                                     string AttSrcAsm, string IntelSrcAsm,
286                                     dag RHS> :
287   AVX512_maskable_common<O, F, OutVT, Outs,
288                          !con((ins InVT.RC:$src1), NonTiedIns),
289                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
290                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
291                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
292                          (vselect InVT.KRCWM:$mask, RHS,
293                           (bitconvert InVT.RC:$src1))>;
294
295multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
296                                     dag Outs, dag NonTiedIns, string OpcodeStr,
297                                     string AttSrcAsm, string IntelSrcAsm,
298                                     dag RHS> :
299   AVX512_maskable_common<O, F, _, Outs,
300                          !con((ins _.RC:$src1), NonTiedIns),
301                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
302                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
303                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
304                          (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;
305
306multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
307                                  dag Outs, dag Ins,
308                                  string OpcodeStr,
309                                  string AttSrcAsm, string IntelSrcAsm,
310                                  list<dag> Pattern> :
311   AVX512_maskable_custom<O, F, Outs, Ins,
312                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
313                          !con((ins _.KRCWM:$mask), Ins),
314                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
315                          "$src0 = $dst">;
316
317
318// Instruction with mask that puts result in mask register,
319// like "compare" and "vptest"
320multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
321                                  dag Outs,
322                                  dag Ins, dag MaskingIns,
323                                  string OpcodeStr,
324                                  string AttSrcAsm, string IntelSrcAsm,
325                                  list<dag> Pattern,
326                                  list<dag> MaskingPattern,
327                                  string Round = "",
328                                  InstrItinClass itin = NoItinerary> {
329    def NAME: AVX512<O, F, Outs, Ins,
330                       OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
331                                     "$dst "#Round#", "#IntelSrcAsm#"}",
332                       Pattern, itin>;
333
334    def NAME#k: AVX512<O, F, Outs, MaskingIns,
335                       OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
336                                     "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
337                       MaskingPattern, itin>, EVEX_K;
338}
339
340multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
341                                  dag Outs,
342                                  dag Ins, dag MaskingIns,
343                                  string OpcodeStr,
344                                  string AttSrcAsm, string IntelSrcAsm,
345                                  dag RHS, dag MaskingRHS,
346                                  string Round = "",
347                                  InstrItinClass itin = NoItinerary> :
348  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
349                         AttSrcAsm, IntelSrcAsm,
350                         [(set _.KRC:$dst, RHS)],
351                         [(set _.KRC:$dst, MaskingRHS)],
352                         Round, NoItinerary>;
353
354multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
355                           dag Outs, dag Ins, string OpcodeStr,
356                           string AttSrcAsm, string IntelSrcAsm,
357                           dag RHS, string Round = "",
358                           InstrItinClass itin = NoItinerary> :
359   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
360                          !con((ins _.KRCWM:$mask), Ins),
361                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
362                          (and _.KRCWM:$mask, RHS),
363                          Round, itin>;
364
365multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
366                           dag Outs, dag Ins, string OpcodeStr,
367                           string AttSrcAsm, string IntelSrcAsm> :
368   AVX512_maskable_custom_cmp<O, F, Outs,
369                             Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
370                             AttSrcAsm, IntelSrcAsm,
371                             [],[],"", NoItinerary>;
372
373// Bitcasts between 512-bit vector types. Return the original type since
374// no instruction is needed for the conversion
375let Predicates = [HasAVX512] in {
376  def : Pat<(v8f64  (bitconvert (v8i64 VR512:$src))),  (v8f64 VR512:$src)>;
377  def : Pat<(v8f64  (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
378  def : Pat<(v8f64  (bitconvert (v32i16 VR512:$src))),  (v8f64 VR512:$src)>;
379  def : Pat<(v8f64  (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
380  def : Pat<(v8f64  (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
381  def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))),  (v16f32 VR512:$src)>;
382  def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
383  def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
384  def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
385  def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))),  (v16f32 VR512:$src)>;
386  def : Pat<(v8i64  (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
387  def : Pat<(v8i64  (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
388  def : Pat<(v8i64  (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
389  def : Pat<(v8i64  (bitconvert (v8f64 VR512:$src))),  (v8i64 VR512:$src)>;
390  def : Pat<(v8i64  (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
391  def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
392  def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
393  def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))),  (v16i32 VR512:$src)>;
394  def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))),  (v16i32 VR512:$src)>;
395  def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))),  (v16i32 VR512:$src)>;
396  def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
397  def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))),  (v32i16 VR512:$src)>;
398  def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))),  (v32i16 VR512:$src)>;
399  def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))),  (v32i16 VR512:$src)>;
400  def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
401  def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
402  def : Pat<(v64i8  (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
403  def : Pat<(v64i8  (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
404  def : Pat<(v64i8  (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
405  def : Pat<(v64i8  (bitconvert (v8f64 VR512:$src))),  (v64i8 VR512:$src)>;
406  def : Pat<(v64i8  (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
407
408  def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
409  def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
410  def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
411  def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
412  def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
413  def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
414  def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
415  def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
416  def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
417  def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
418  def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
419  def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
420  def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
421  def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
422  def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
423  def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
424  def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
425  def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
426  def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
427  def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
428  def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
429  def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
430  def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
431  def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
432  def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
433  def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
434  def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
435  def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
436  def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
437  def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
438
439// Bitcasts between 256-bit vector types. Return the original type since
440// no instruction is needed for the conversion
441  def : Pat<(v4f64  (bitconvert (v8f32 VR256X:$src))),  (v4f64 VR256X:$src)>;
442  def : Pat<(v4f64  (bitconvert (v8i32 VR256X:$src))),  (v4f64 VR256X:$src)>;
443  def : Pat<(v4f64  (bitconvert (v4i64 VR256X:$src))),  (v4f64 VR256X:$src)>;
444  def : Pat<(v4f64  (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
445  def : Pat<(v4f64  (bitconvert (v32i8 VR256X:$src))),  (v4f64 VR256X:$src)>;
446  def : Pat<(v8f32  (bitconvert (v8i32 VR256X:$src))),  (v8f32 VR256X:$src)>;
447  def : Pat<(v8f32  (bitconvert (v4i64 VR256X:$src))),  (v8f32 VR256X:$src)>;
448  def : Pat<(v8f32  (bitconvert (v4f64 VR256X:$src))),  (v8f32 VR256X:$src)>;
449  def : Pat<(v8f32  (bitconvert (v32i8 VR256X:$src))),  (v8f32 VR256X:$src)>;
450  def : Pat<(v8f32  (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
451  def : Pat<(v4i64  (bitconvert (v8f32 VR256X:$src))),  (v4i64 VR256X:$src)>;
452  def : Pat<(v4i64  (bitconvert (v8i32 VR256X:$src))),  (v4i64 VR256X:$src)>;
453  def : Pat<(v4i64  (bitconvert (v4f64 VR256X:$src))),  (v4i64 VR256X:$src)>;
454  def : Pat<(v4i64  (bitconvert (v32i8 VR256X:$src))),  (v4i64 VR256X:$src)>;
455  def : Pat<(v4i64  (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
456  def : Pat<(v32i8  (bitconvert (v4f64 VR256X:$src))),  (v32i8 VR256X:$src)>;
457  def : Pat<(v32i8  (bitconvert (v4i64 VR256X:$src))),  (v32i8 VR256X:$src)>;
458  def : Pat<(v32i8  (bitconvert (v8f32 VR256X:$src))),  (v32i8 VR256X:$src)>;
459  def : Pat<(v32i8  (bitconvert (v8i32 VR256X:$src))),  (v32i8 VR256X:$src)>;
460  def : Pat<(v32i8  (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
461  def : Pat<(v8i32  (bitconvert (v32i8 VR256X:$src))),  (v8i32 VR256X:$src)>;
462  def : Pat<(v8i32  (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
463  def : Pat<(v8i32  (bitconvert (v8f32 VR256X:$src))),  (v8i32 VR256X:$src)>;
464  def : Pat<(v8i32  (bitconvert (v4i64 VR256X:$src))),  (v8i32 VR256X:$src)>;
465  def : Pat<(v8i32  (bitconvert (v4f64 VR256X:$src))),  (v8i32 VR256X:$src)>;
466  def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))),  (v16i16 VR256X:$src)>;
467  def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))),  (v16i16 VR256X:$src)>;
468  def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))),  (v16i16 VR256X:$src)>;
469  def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))),  (v16i16 VR256X:$src)>;
470  def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))),  (v16i16 VR256X:$src)>;
471}
472
473//
474// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
475//
476
477let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
478    isPseudo = 1, Predicates = [HasAVX512] in {
479def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
480               [(set VR512:$dst, (v16f32 immAllZerosV))]>;
481}
482
483let Predicates = [HasAVX512] in {
484def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
485def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
486def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
487}
488
489//===----------------------------------------------------------------------===//
490// AVX-512 - VECTOR INSERT
491//
492multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
493                                                       PatFrag vinsert_insert> {
494  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
495    defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
496                   (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
497                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
498                   "$src3, $src2, $src1", "$src1, $src2, $src3",
499                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
500                                         (From.VT From.RC:$src2),
501                                         (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
502
503  let mayLoad = 1 in
504    defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
505                   (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
506                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
507                   "$src3, $src2, $src1", "$src1, $src2, $src3",
508                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
509                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
510                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
511                   EVEX_CD8<From.EltSize, From.CD8TupleForm>;
512  }
513}
514
515multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
516                       X86VectorVTInfo To, PatFrag vinsert_insert,
517                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
518  let Predicates = p in {
519    def : Pat<(vinsert_insert:$ins
520                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
521              (To.VT (!cast<Instruction>(InstrStr#"rr")
522                     To.RC:$src1, From.RC:$src2,
523                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
524
525    def : Pat<(vinsert_insert:$ins
526                  (To.VT To.RC:$src1),
527                  (From.VT (bitconvert (From.LdFrag addr:$src2))),
528                  (iPTR imm)),
529              (To.VT (!cast<Instruction>(InstrStr#"rm")
530                  To.RC:$src1, addr:$src2,
531                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
532  }
533}
534
535multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
536                            ValueType EltVT64, int Opcode256> {
537
538  let Predicates = [HasVLX] in
539    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
540                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
541                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
542                                 vinsert128_insert>, EVEX_V256;
543
544  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
545                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
546                                 X86VectorVTInfo<16, EltVT32, VR512>,
547                                 vinsert128_insert>, EVEX_V512;
548
549  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
550                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
551                                 X86VectorVTInfo< 8, EltVT64, VR512>,
552                                 vinsert256_insert>, VEX_W, EVEX_V512;
553
554  let Predicates = [HasVLX, HasDQI] in
555    defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
556                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
557                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
558                                   vinsert128_insert>, VEX_W, EVEX_V256;
559
560  let Predicates = [HasDQI] in {
561    defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
562                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
563                                 X86VectorVTInfo< 8, EltVT64, VR512>,
564                                 vinsert128_insert>, VEX_W, EVEX_V512;
565
566    defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
567                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
568                                   X86VectorVTInfo<16, EltVT32, VR512>,
569                                   vinsert256_insert>, EVEX_V512;
570  }
571}
572
573defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
574defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
575
576// Codegen pattern with the alternative types,
577// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
578defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
579              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
580defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
581              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
582
583defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
584              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
585defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
586              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
587
588defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
589              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
590defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
591              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
592
593// Codegen pattern with the alternative types insert VEC128 into VEC256
594defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
595              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
596defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
597              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
598// Codegen pattern with the alternative types insert VEC128 into VEC512
599defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
600              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
601defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
602               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
603// Codegen pattern with the alternative types insert VEC256 into VEC512
604defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
605              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
606defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
607              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
608
609// vinsertps - insert f32 to XMM
610def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
611      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
612      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
613      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
614      EVEX_4V;
615def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
616      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
617      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
618      [(set VR128X:$dst, (X86insertps VR128X:$src1,
619                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
620                          imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
621
622//===----------------------------------------------------------------------===//
623// AVX-512 VECTOR EXTRACT
624//---
625
626multiclass vextract_for_size_first_position_lowering<X86VectorVTInfo From,
627                                                     X86VectorVTInfo To> {
628  // A subvector extract from the first vector position is
629  // a subregister copy that needs no instruction.
630  def NAME # To.NumElts:
631      Pat<(To.VT (extract_subvector (From.VT From.RC:$src),(iPTR 0))),
632          (To.VT (EXTRACT_SUBREG (From.VT From.RC:$src), To.SubRegIdx))>;
633}
634
635multiclass vextract_for_size<int Opcode,
636                                    X86VectorVTInfo From, X86VectorVTInfo To,
637                                    PatFrag vextract_extract> :
638  vextract_for_size_first_position_lowering<From, To> {
639
640  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
641    // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
642    // vextract_extract), we interesting only in patterns without mask,
643    // intrinsics pattern match generated bellow.
644    defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
645                (ins From.RC:$src1, i32u8imm:$idx),
646                "vextract" # To.EltTypeName # "x" # To.NumElts,
647                "$idx, $src1", "$src1, $idx",
648                [(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
649                                                         (iPTR imm)))]>,
650              AVX512AIi8Base, EVEX;
651    let mayStore = 1 in {
652      def rm  : AVX512AIi8<Opcode, MRMDestMem, (outs),
653                      (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$src2),
654                      "vextract" # To.EltTypeName # "x" # To.NumElts #
655                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
656                      []>, EVEX;
657
658      def rmk : AVX512AIi8<Opcode, MRMDestMem, (outs),
659                      (ins To.MemOp:$dst, To.KRCWM:$mask,
660                                          From.RC:$src1, i32u8imm:$src2),
661                       "vextract" # To.EltTypeName # "x" # To.NumElts #
662                            "\t{$src2, $src1, $dst {${mask}}|"
663                            "$dst {${mask}}, $src1, $src2}",
664                      []>, EVEX_K, EVEX;
665    }//mayStore = 1
666  }
667
668  // Intrinsic call with masking.
669  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
670                              "x" # To.NumElts # "_" # From.Size)
671                From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
672            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
673                                From.ZSuffix # "rrk")
674                To.RC:$src0,
675                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
676                From.RC:$src1, imm:$idx)>;
677
678  // Intrinsic call with zero-masking.
679  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
680                              "x" # To.NumElts # "_" # From.Size)
681                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
682            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
683                                From.ZSuffix # "rrkz")
684                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
685                From.RC:$src1, imm:$idx)>;
686
687  // Intrinsic call without masking.
688  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
689                              "x" # To.NumElts # "_" # From.Size)
690                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
691            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
692                                From.ZSuffix # "rr")
693                From.RC:$src1, imm:$idx)>;
694}
695
696// Codegen pattern for the alternative types
697multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
698                X86VectorVTInfo To, PatFrag vextract_extract,
699                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> :
700  vextract_for_size_first_position_lowering<From, To> {
701
702  let Predicates = p in
703     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
704               (To.VT (!cast<Instruction>(InstrStr#"rr")
705                          From.RC:$src1,
706                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
707}
708
709multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
710                                             ValueType EltVT64, int Opcode256> {
711  defm NAME # "32x4Z" : vextract_for_size<Opcode128,
712                                 X86VectorVTInfo<16, EltVT32, VR512>,
713                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
714                                 vextract128_extract>,
715                                     EVEX_V512, EVEX_CD8<32, CD8VT4>;
716  defm NAME # "64x4Z" : vextract_for_size<Opcode256,
717                                 X86VectorVTInfo< 8, EltVT64, VR512>,
718                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
719                                 vextract256_extract>,
720                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
721  let Predicates = [HasVLX] in
722    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
723                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
724                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
725                                 vextract128_extract>,
726                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
727  let Predicates = [HasVLX, HasDQI] in
728    defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
729                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
730                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
731                                 vextract128_extract>,
732                                     VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
733  let Predicates = [HasDQI] in {
734    defm NAME # "64x2Z" : vextract_for_size<Opcode128,
735                                 X86VectorVTInfo< 8, EltVT64, VR512>,
736                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
737                                 vextract128_extract>,
738                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
739    defm NAME # "32x8Z" : vextract_for_size<Opcode256,
740                                 X86VectorVTInfo<16, EltVT32, VR512>,
741                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
742                                 vextract256_extract>,
743                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
744  }
745}
746
747defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
748defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
749
750// extract_subvector codegen patterns with the alternative types.
751// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
752defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
753          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
754defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
755          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
756
757defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
758          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
759defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
760          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
761
762defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
763          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
764defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
765          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
766
767// Codegen pattern with the alternative types extract VEC128 from VEC512
768defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
769                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
770defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
771                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
772// Codegen pattern with the alternative types extract VEC256 from VEC512
773defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
774                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
775defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
776                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
777
778// A 128-bit subvector insert to the first 512-bit vector position
779// is a subregister copy that needs no instruction.
780def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
781          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
782          (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
783          sub_ymm)>;
784def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
785          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
786          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
787          sub_ymm)>;
788def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
789          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
790          (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
791          sub_ymm)>;
792def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
793          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
794          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
795          sub_ymm)>;
796
797def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
798          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
799def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
800          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
801def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
802          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
803def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
804          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
805def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)),
806          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
807def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)),
808          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
809
810// vextractps - extract 32 bits from XMM
811def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
812      (ins VR128X:$src1, u8imm:$src2),
813      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
814      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
815      EVEX;
816
817def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
818      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
819      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
820      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
821                          addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
822
823//===---------------------------------------------------------------------===//
824// AVX-512 BROADCAST
825//---
826
827multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
828                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
829
830  defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
831                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
832                   (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
833                   T8PD, EVEX;
834  let mayLoad = 1 in
835    defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
836                     (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
837                     (DestInfo.VT (X86VBroadcast
838                                     (SrcInfo.ScalarLdFrag addr:$src)))>,
839                     T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
840}
841
842multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
843                                                       AVX512VLVectorVTInfo _> {
844  defm Z  : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
845                             EVEX_V512;
846
847  let Predicates = [HasVLX] in {
848    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
849                             EVEX_V256;
850  }
851}
852
853let ExeDomain = SSEPackedSingle in {
854  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
855                                         avx512vl_f32_info>;
856   let Predicates = [HasVLX] in {
857     defm VBROADCASTSSZ128  : avx512_broadcast_rm<0x18, "vbroadcastss",
858                                         v4f32x_info, v4f32x_info>, EVEX_V128;
859   }
860}
861
862let ExeDomain = SSEPackedDouble in {
863  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
864                                         avx512vl_f64_info>, VEX_W;
865}
866
867// avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
868// Later, we can canonize broadcast instructions before ISel phase and
869// eliminate additional patterns on ISel.
870// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar
871// representations of source
872multiclass avx512_broadcast_pat<string InstName, SDNode OpNode,
873                                X86VectorVTInfo _, RegisterClass SrcRC_v,
874                                RegisterClass SrcRC_s> {
875  def : Pat<(_.VT (OpNode  (_.EltVT SrcRC_s:$src))),
876            (!cast<Instruction>(InstName##"r")
877              (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>;
878
879  let AddedComplexity = 30 in {
880    def : Pat<(_.VT (vselect _.KRCWM:$mask,
881                (OpNode (_.EltVT SrcRC_s:$src)), _.RC:$src0)),
882              (!cast<Instruction>(InstName##"rk") _.RC:$src0, _.KRCWM:$mask,
883                (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>;
884
885    def : Pat<(_.VT(vselect _.KRCWM:$mask,
886                (OpNode (_.EltVT SrcRC_s:$src)), _.ImmAllZerosV)),
887              (!cast<Instruction>(InstName##"rkz") _.KRCWM:$mask,
888                (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>;
889  }
890}
891
892defm : avx512_broadcast_pat<"VBROADCASTSSZ", X86VBroadcast, v16f32_info,
893                            VR128X, FR32X>;
894defm : avx512_broadcast_pat<"VBROADCASTSDZ", X86VBroadcast, v8f64_info,
895                            VR128X, FR64X>;
896
897let Predicates = [HasVLX] in {
898  defm : avx512_broadcast_pat<"VBROADCASTSSZ256", X86VBroadcast,
899                              v8f32x_info, VR128X, FR32X>;
900  defm : avx512_broadcast_pat<"VBROADCASTSSZ128", X86VBroadcast,
901                              v4f32x_info, VR128X, FR32X>;
902  defm : avx512_broadcast_pat<"VBROADCASTSDZ256", X86VBroadcast,
903                              v4f64x_info, VR128X, FR64X>;
904}
905
906def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
907          (VBROADCASTSSZm addr:$src)>;
908def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
909          (VBROADCASTSDZm addr:$src)>;
910
911def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
912          (VBROADCASTSSZm addr:$src)>;
913def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
914          (VBROADCASTSDZm addr:$src)>;
915
916multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
917                                    RegisterClass SrcRC> {
918  defm r : AVX512_maskable_in_asm<opc, MRMSrcReg, _, (outs _.RC:$dst),
919                           (ins SrcRC:$src),  "vpbroadcast"##_.Suffix,
920                           "$src", "$src", []>, T8PD, EVEX;
921}
922
923multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
924                                       RegisterClass SrcRC, Predicate prd> {
925  let Predicates = [prd] in
926    defm Z : avx512_int_broadcast_reg<opc, _.info512, SrcRC>, EVEX_V512;
927  let Predicates = [prd, HasVLX] in {
928    defm Z256 : avx512_int_broadcast_reg<opc, _.info256, SrcRC>, EVEX_V256;
929    defm Z128 : avx512_int_broadcast_reg<opc, _.info128, SrcRC>, EVEX_V128;
930  }
931}
932
933defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR32,
934                                                 HasBWI>;
935defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR32,
936                                                 HasBWI>;
937defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32,
938                                                 HasAVX512>;
939defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64,
940                                                 HasAVX512>, VEX_W;
941
942def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
943           (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
944
945def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
946           (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
947
948def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
949        (VPBROADCASTDrZr GR32:$src)>;
950def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
951        (VPBROADCASTQrZr GR64:$src)>;
952
953def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
954        (VPBROADCASTDrZr GR32:$src)>;
955def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
956        (VPBROADCASTQrZr GR64:$src)>;
957
958def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
959                   (v16i32 immAllZerosV), (i16 GR16:$mask))),
960          (VPBROADCASTDrZrkz (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
961def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
962                   (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
963          (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
964
965// Provide aliases for broadcast from the same register class that
966// automatically does the extract.
967multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
968                                            X86VectorVTInfo SrcInfo> {
969  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
970            (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
971                (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
972}
973
974multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
975                                        AVX512VLVectorVTInfo _, Predicate prd> {
976  let Predicates = [prd] in {
977    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
978               avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
979                                  EVEX_V512;
980    // Defined separately to avoid redefinition.
981    defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
982  }
983  let Predicates = [prd, HasVLX] in {
984    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
985                avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
986                                 EVEX_V256;
987    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
988                                 EVEX_V128;
989  }
990}
991
992defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
993                                           avx512vl_i8_info, HasBWI>;
994defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
995                                           avx512vl_i16_info, HasBWI>;
996defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
997                                           avx512vl_i32_info, HasAVX512>;
998defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
999                                           avx512vl_i64_info, HasAVX512>, VEX_W;
1000
1001multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1002                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1003  let mayLoad = 1 in {
1004  def rm : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src),
1005                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1006                  [(set _Dst.RC:$dst,
1007                    (_Dst.VT (X86SubVBroadcast
1008                    (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))))]>, EVEX;
1009  def rmk : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
1010                                                         _Src.MemOp:$src),
1011                  !strconcat(OpcodeStr,
1012                      "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
1013                  []>, EVEX, EVEX_K;
1014  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
1015                                                         _Src.MemOp:$src),
1016                  !strconcat(OpcodeStr,
1017                    "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
1018                  []>, EVEX, EVEX_KZ;
1019  }
1020}
1021
1022defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1023                       v16i32_info, v4i32x_info>,
1024                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1025defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1026                       v16f32_info, v4f32x_info>,
1027                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1028defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1029                       v8i64_info, v4i64x_info>, VEX_W,
1030                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1031defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1032                       v8f64_info, v4f64x_info>, VEX_W,
1033                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1034
1035let Predicates = [HasVLX] in {
1036defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1037                           v8i32x_info, v4i32x_info>,
1038                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1039defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1040                           v8f32x_info, v4f32x_info>,
1041                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1042}
1043let Predicates = [HasVLX, HasDQI] in {
1044defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
1045                           v4i64x_info, v2i64x_info>, VEX_W,
1046                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1047defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
1048                           v4f64x_info, v2f64x_info>, VEX_W,
1049                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1050}
1051let Predicates = [HasDQI] in {
1052defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
1053                       v8i64_info, v2i64x_info>, VEX_W,
1054                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1055defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
1056                       v16i32_info, v8i32x_info>,
1057                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1058defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
1059                       v8f64_info, v2f64x_info>, VEX_W,
1060                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1061defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
1062                       v16f32_info, v8f32x_info>,
1063                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1064}
1065
1066multiclass avx512_broadcast_32x2<bits<8> opc, string OpcodeStr,
1067                                 X86VectorVTInfo _Dst, X86VectorVTInfo _Src,
1068                                 SDNode OpNode = X86SubVBroadcast> {
1069
1070  defm r : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
1071                   (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
1072                   (_Dst.VT (OpNode (_Src.VT _Src.RC:$src)))>,
1073                   T8PD, EVEX;
1074  let mayLoad = 1 in
1075    defm m : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1076                   (ins _Src.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1077                   (_Dst.VT (OpNode
1078                              (_Src.VT (scalar_to_vector(loadi64 addr:$src)))))>,
1079                   T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>;
1080}
1081
1082multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1083                             AVX512VLVectorVTInfo _> {
1084  let Predicates = [HasDQI] in
1085    defm Z :    avx512_broadcast_32x2<opc, OpcodeStr, _.info512, _.info128>,
1086                                  EVEX_V512;
1087  let Predicates = [HasDQI, HasVLX] in
1088    defm Z256 : avx512_broadcast_32x2<opc, OpcodeStr, _.info256, _.info128>,
1089                                  EVEX_V256;
1090}
1091
1092multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1093                                                       AVX512VLVectorVTInfo _> :
1094  avx512_common_broadcast_32x2<opc, OpcodeStr, _> {
1095
1096  let Predicates = [HasDQI, HasVLX] in
1097    defm Z128 : avx512_broadcast_32x2<opc, OpcodeStr, _.info128, _.info128,
1098                                      X86SubV32x2Broadcast>, EVEX_V128;
1099}
1100
1101defm VPBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1102                                           avx512vl_i32_info>;
1103defm VPBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1104                                           avx512vl_f32_info>;
1105
1106def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1107          (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1108def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1109          (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1110
1111def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1112          (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1113def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1114          (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1115
1116def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
1117          (VBROADCASTSSZr VR128X:$src)>;
1118def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
1119          (VBROADCASTSDZr VR128X:$src)>;
1120
1121// Provide fallback in case the load node that is used in the patterns above
1122// is used by additional users, which prevents the pattern selection.
1123def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
1124          (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
1125def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
1126          (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
1127
1128
1129//===----------------------------------------------------------------------===//
1130// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1131//---
1132multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1133                                  X86VectorVTInfo _, RegisterClass KRC> {
1134  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1135                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1136                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
1137}
1138
1139multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1140                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1141  let Predicates = [HasCDI] in
1142    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1143  let Predicates = [HasCDI, HasVLX] in {
1144    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1145    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1146  }
1147}
1148
1149defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1150                                               avx512vl_i32_info, VK16>;
1151defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1152                                               avx512vl_i64_info, VK8>, VEX_W;
1153
1154//===----------------------------------------------------------------------===//
1155// -- VPERMI2 - 3 source operands form --
1156multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1157                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1158let Constraints = "$src1 = $dst" in {
1159  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1160          (ins _.RC:$src2, _.RC:$src3),
1161          OpcodeStr, "$src3, $src2", "$src2, $src3",
1162          (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
1163         AVX5128IBase;
1164
1165  let mayLoad = 1 in
1166  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1167            (ins _.RC:$src2, _.MemOp:$src3),
1168            OpcodeStr, "$src3, $src2", "$src2, $src3",
1169            (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
1170                   (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
1171            EVEX_4V, AVX5128IBase;
1172  }
1173}
1174multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1175                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1176  let mayLoad = 1, Constraints = "$src1 = $dst" in
1177  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1178              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1179              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1180              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1181              (_.VT (X86VPermi2X IdxVT.RC:$src1,
1182               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
1183              AVX5128IBase, EVEX_4V, EVEX_B;
1184}
1185
1186multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1187                               AVX512VLVectorVTInfo VTInfo,
1188                               AVX512VLVectorVTInfo ShuffleMask> {
1189  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
1190                           ShuffleMask.info512>,
1191            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
1192                             ShuffleMask.info512>, EVEX_V512;
1193  let Predicates = [HasVLX] in {
1194  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
1195                               ShuffleMask.info128>,
1196                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
1197                                  ShuffleMask.info128>, EVEX_V128;
1198  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
1199                               ShuffleMask.info256>,
1200                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
1201                                  ShuffleMask.info256>,  EVEX_V256;
1202  }
1203}
1204
1205multiclass avx512_perm_i_sizes_w<bits<8> opc, string OpcodeStr,
1206                                 AVX512VLVectorVTInfo VTInfo,
1207                                 AVX512VLVectorVTInfo Idx> {
1208  let Predicates = [HasBWI] in
1209  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
1210                           Idx.info512>, EVEX_V512;
1211  let Predicates = [HasBWI, HasVLX] in {
1212  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
1213                               Idx.info128>, EVEX_V128;
1214  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
1215                               Idx.info256>,  EVEX_V256;
1216  }
1217}
1218
1219defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d",
1220                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1221defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q",
1222                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1223defm VPERMI2W  : avx512_perm_i_sizes_w<0x75, "vpermi2w",
1224                  avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
1225defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
1226                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1227defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
1228                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1229
1230// VPERMT2
1231multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1232                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1233let Constraints = "$src1 = $dst" in {
1234  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1235          (ins IdxVT.RC:$src2, _.RC:$src3),
1236          OpcodeStr, "$src3, $src2", "$src2, $src3",
1237          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
1238         AVX5128IBase;
1239
1240  let mayLoad = 1 in
1241  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1242            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1243            OpcodeStr, "$src3, $src2", "$src2, $src3",
1244            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1245                   (bitconvert (_.LdFrag addr:$src3))))>,
1246            EVEX_4V, AVX5128IBase;
1247  }
1248}
1249multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1250                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1251  let mayLoad = 1, Constraints = "$src1 = $dst" in
1252  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1253              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1254              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1255              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1256              (_.VT (X86VPermt2 _.RC:$src1,
1257               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
1258              AVX5128IBase, EVEX_4V, EVEX_B;
1259}
1260
1261multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1262                               AVX512VLVectorVTInfo VTInfo,
1263                               AVX512VLVectorVTInfo ShuffleMask> {
1264  defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
1265                              ShuffleMask.info512>,
1266            avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
1267                              ShuffleMask.info512>, EVEX_V512;
1268  let Predicates = [HasVLX] in {
1269  defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
1270                              ShuffleMask.info128>,
1271                 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
1272                              ShuffleMask.info128>, EVEX_V128;
1273  defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
1274                              ShuffleMask.info256>,
1275                 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
1276                              ShuffleMask.info256>, EVEX_V256;
1277  }
1278}
1279
1280multiclass avx512_perm_t_sizes_w<bits<8> opc, string OpcodeStr,
1281                                 AVX512VLVectorVTInfo VTInfo,
1282                                 AVX512VLVectorVTInfo Idx> {
1283  let Predicates = [HasBWI] in
1284  defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
1285                           Idx.info512>, EVEX_V512;
1286  let Predicates = [HasBWI, HasVLX] in {
1287  defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
1288                               Idx.info128>, EVEX_V128;
1289  defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
1290                               Idx.info256>, EVEX_V256;
1291  }
1292}
1293
1294defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d",
1295                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1296defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q",
1297                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1298defm VPERMT2W  : avx512_perm_t_sizes_w<0x7D, "vpermt2w",
1299                  avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
1300defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
1301                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1302defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
1303                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1304
1305//===----------------------------------------------------------------------===//
1306// AVX-512 - BLEND using mask
1307//
1308multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1309  let ExeDomain = _.ExeDomain in {
1310  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1311             (ins _.RC:$src1, _.RC:$src2),
1312             !strconcat(OpcodeStr,
1313             "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
1314             []>, EVEX_4V;
1315  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1316             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1317             !strconcat(OpcodeStr,
1318             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1319             [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1),
1320                 (_.VT _.RC:$src2)))]>, EVEX_4V, EVEX_K;
1321  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1322             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1323             !strconcat(OpcodeStr,
1324             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1325             []>, EVEX_4V, EVEX_KZ;
1326  let mayLoad = 1 in {
1327  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1328             (ins _.RC:$src1, _.MemOp:$src2),
1329             !strconcat(OpcodeStr,
1330             "\t{$src2, $src1, ${dst} |${dst},  $src1, $src2}"),
1331             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
1332  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1333             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1334             !strconcat(OpcodeStr,
1335             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1336             [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1),
1337              (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
1338              EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
1339  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1340             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1341             !strconcat(OpcodeStr,
1342             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1343             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
1344  }
1345  }
1346}
1347multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1348
1349  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1350      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1351       !strconcat(OpcodeStr,
1352            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1353            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1354      [(set _.RC:$dst,(X86select _.KRCWM:$mask, (_.VT _.RC:$src1),
1355                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
1356      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
1357
1358  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1359      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1360       !strconcat(OpcodeStr,
1361            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1362            "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1363      []>,  EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
1364
1365}
1366
1367multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
1368                                 AVX512VLVectorVTInfo VTInfo> {
1369  defm Z : avx512_blendmask      <opc, OpcodeStr, VTInfo.info512>,
1370           avx512_blendmask_rmb  <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1371
1372  let Predicates = [HasVLX] in {
1373    defm Z256 : avx512_blendmask<opc, OpcodeStr, VTInfo.info256>,
1374                avx512_blendmask_rmb  <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1375    defm Z128 : avx512_blendmask<opc, OpcodeStr, VTInfo.info128>,
1376                avx512_blendmask_rmb  <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1377  }
1378}
1379
1380multiclass blendmask_bw <bits<8> opc, string OpcodeStr,
1381                         AVX512VLVectorVTInfo VTInfo> {
1382  let Predicates = [HasBWI] in
1383    defm Z : avx512_blendmask    <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1384
1385  let Predicates = [HasBWI, HasVLX] in {
1386    defm Z256 : avx512_blendmask <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1387    defm Z128 : avx512_blendmask <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1388  }
1389}
1390
1391
1392defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>;
1393defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W;
1394defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>;
1395defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W;
1396defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
1397defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
1398
1399
1400let Predicates = [HasAVX512] in {
1401def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
1402                            (v8f32 VR256X:$src2))),
1403            (EXTRACT_SUBREG
1404              (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1405            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1406            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
1407
1408def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
1409                            (v8i32 VR256X:$src2))),
1410            (EXTRACT_SUBREG
1411                (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1412            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1413            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
1414}
1415//===----------------------------------------------------------------------===//
1416// Compare Instructions
1417//===----------------------------------------------------------------------===//
1418
1419// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1420
1421multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
1422
1423  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1424                      (outs _.KRC:$dst),
1425                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1426                      "vcmp${cc}"#_.Suffix,
1427                      "$src2, $src1", "$src1, $src2",
1428                      (OpNode (_.VT _.RC:$src1),
1429                              (_.VT _.RC:$src2),
1430                              imm:$cc)>, EVEX_4V;
1431  let mayLoad = 1 in
1432    defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1433                      (outs _.KRC:$dst),
1434                      (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1435                      "vcmp${cc}"#_.Suffix,
1436                      "$src2, $src1", "$src1, $src2",
1437                      (OpNode (_.VT _.RC:$src1),
1438                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
1439                          imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1440
1441  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1442                     (outs _.KRC:$dst),
1443                     (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1444                     "vcmp${cc}"#_.Suffix,
1445                     "{sae}, $src2, $src1", "$src1, $src2,{sae}",
1446                     (OpNodeRnd (_.VT _.RC:$src1),
1447                                (_.VT _.RC:$src2),
1448                                imm:$cc,
1449                                (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
1450  // Accept explicit immediate argument form instead of comparison code.
1451  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1452    defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1453                        (outs VK1:$dst),
1454                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1455                        "vcmp"#_.Suffix,
1456                        "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
1457    defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1458                        (outs _.KRC:$dst),
1459                        (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1460                        "vcmp"#_.Suffix,
1461                        "$cc, $src2, $src1", "$src1, $src2, $cc">,
1462                        EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1463
1464    defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1465                       (outs _.KRC:$dst),
1466                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1467                       "vcmp"#_.Suffix,
1468                       "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
1469                       EVEX_4V, EVEX_B;
1470  }// let isAsmParserOnly = 1, hasSideEffects = 0
1471
1472  let isCodeGenOnly = 1 in {
1473    def rr : AVX512Ii8<0xC2, MRMSrcReg,
1474                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1475                !strconcat("vcmp${cc}", _.Suffix,
1476                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1477                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1478                                          _.FRC:$src2,
1479                                          imm:$cc))],
1480                IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1481    let mayLoad = 1 in
1482      def rm : AVX512Ii8<0xC2, MRMSrcMem,
1483                (outs _.KRC:$dst),
1484                (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1485                !strconcat("vcmp${cc}", _.Suffix,
1486                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1487                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1488                                          (_.ScalarLdFrag addr:$src2),
1489                                          imm:$cc))],
1490                IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1491  }
1492}
1493
1494let Predicates = [HasAVX512] in {
1495  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
1496                                   AVX512XSIi8Base;
1497  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
1498                                   AVX512XDIi8Base, VEX_W;
1499}
1500
1501multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1502              X86VectorVTInfo _> {
1503  def rr : AVX512BI<opc, MRMSrcReg,
1504             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1505             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1506             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
1507             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1508  let mayLoad = 1 in
1509  def rm : AVX512BI<opc, MRMSrcMem,
1510             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1511             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1512             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1513                                     (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1514             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1515  def rrk : AVX512BI<opc, MRMSrcReg,
1516              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1517              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1518                          "$dst {${mask}}, $src1, $src2}"),
1519              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1520                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1521              IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1522  let mayLoad = 1 in
1523  def rmk : AVX512BI<opc, MRMSrcMem,
1524              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1525              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1526                          "$dst {${mask}}, $src1, $src2}"),
1527              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1528                                   (OpNode (_.VT _.RC:$src1),
1529                                       (_.VT (bitconvert
1530                                              (_.LdFrag addr:$src2))))))],
1531              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1532}
1533
1534multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1535              X86VectorVTInfo _> :
1536           avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
1537  let mayLoad = 1 in {
1538  def rmb : AVX512BI<opc, MRMSrcMem,
1539              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1540              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1541                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1542              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1543                              (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1544              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1545  def rmbk : AVX512BI<opc, MRMSrcMem,
1546               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1547                                       _.ScalarMemOp:$src2),
1548               !strconcat(OpcodeStr,
1549                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1550                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1551               [(set _.KRC:$dst, (and _.KRCWM:$mask,
1552                                      (OpNode (_.VT _.RC:$src1),
1553                                        (X86VBroadcast
1554                                          (_.ScalarLdFrag addr:$src2)))))],
1555               IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1556  }
1557}
1558
1559multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1560                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1561  let Predicates = [prd] in
1562  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1563           EVEX_V512;
1564
1565  let Predicates = [prd, HasVLX] in {
1566    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1567                EVEX_V256;
1568    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1569                EVEX_V128;
1570  }
1571}
1572
1573multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1574                                  SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1575                                  Predicate prd> {
1576  let Predicates = [prd] in
1577  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1578           EVEX_V512;
1579
1580  let Predicates = [prd, HasVLX] in {
1581    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1582                EVEX_V256;
1583    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1584                EVEX_V128;
1585  }
1586}
1587
1588defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1589                      avx512vl_i8_info, HasBWI>,
1590                EVEX_CD8<8, CD8VF>;
1591
1592defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1593                      avx512vl_i16_info, HasBWI>,
1594                EVEX_CD8<16, CD8VF>;
1595
1596defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1597                      avx512vl_i32_info, HasAVX512>,
1598                EVEX_CD8<32, CD8VF>;
1599
1600defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1601                      avx512vl_i64_info, HasAVX512>,
1602                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1603
1604defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1605                      avx512vl_i8_info, HasBWI>,
1606                EVEX_CD8<8, CD8VF>;
1607
1608defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1609                      avx512vl_i16_info, HasBWI>,
1610                EVEX_CD8<16, CD8VF>;
1611
1612defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1613                      avx512vl_i32_info, HasAVX512>,
1614                EVEX_CD8<32, CD8VF>;
1615
1616defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1617                      avx512vl_i64_info, HasAVX512>,
1618                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1619
1620def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1621            (COPY_TO_REGCLASS (VPCMPGTDZrr
1622            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1623            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1624
1625def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1626            (COPY_TO_REGCLASS (VPCMPEQDZrr
1627            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1628            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1629
1630multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1631                          X86VectorVTInfo _> {
1632  def rri : AVX512AIi8<opc, MRMSrcReg,
1633             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
1634             !strconcat("vpcmp${cc}", Suffix,
1635                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1636             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1637                                       imm:$cc))],
1638             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1639  let mayLoad = 1 in
1640  def rmi : AVX512AIi8<opc, MRMSrcMem,
1641             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
1642             !strconcat("vpcmp${cc}", Suffix,
1643                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1644             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1645                              (_.VT (bitconvert (_.LdFrag addr:$src2))),
1646                              imm:$cc))],
1647             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1648  def rrik : AVX512AIi8<opc, MRMSrcReg,
1649              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1650                                      AVX512ICC:$cc),
1651              !strconcat("vpcmp${cc}", Suffix,
1652                         "\t{$src2, $src1, $dst {${mask}}|",
1653                         "$dst {${mask}}, $src1, $src2}"),
1654              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1655                                  (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1656                                          imm:$cc)))],
1657              IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1658  let mayLoad = 1 in
1659  def rmik : AVX512AIi8<opc, MRMSrcMem,
1660              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1661                                    AVX512ICC:$cc),
1662              !strconcat("vpcmp${cc}", Suffix,
1663                         "\t{$src2, $src1, $dst {${mask}}|",
1664                         "$dst {${mask}}, $src1, $src2}"),
1665              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1666                                   (OpNode (_.VT _.RC:$src1),
1667                                      (_.VT (bitconvert (_.LdFrag addr:$src2))),
1668                                      imm:$cc)))],
1669              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1670
1671  // Accept explicit immediate argument form instead of comparison code.
1672  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1673    def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1674               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1675               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1676                          "$dst, $src1, $src2, $cc}"),
1677               [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1678    let mayLoad = 1 in
1679    def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1680               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1681               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1682                          "$dst, $src1, $src2, $cc}"),
1683               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1684    def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1685               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1686                                       u8imm:$cc),
1687               !strconcat("vpcmp", Suffix,
1688                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
1689                          "$dst {${mask}}, $src1, $src2, $cc}"),
1690               [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1691    let mayLoad = 1 in
1692    def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1693               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1694                                       u8imm:$cc),
1695               !strconcat("vpcmp", Suffix,
1696                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
1697                          "$dst {${mask}}, $src1, $src2, $cc}"),
1698               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1699  }
1700}
1701
1702multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
1703                              X86VectorVTInfo _> :
1704           avx512_icmp_cc<opc, Suffix, OpNode, _> {
1705  def rmib : AVX512AIi8<opc, MRMSrcMem,
1706             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1707                                     AVX512ICC:$cc),
1708             !strconcat("vpcmp${cc}", Suffix,
1709                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1710                        "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1711             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1712                               (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1713                               imm:$cc))],
1714             IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1715  def rmibk : AVX512AIi8<opc, MRMSrcMem,
1716              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1717                                       _.ScalarMemOp:$src2, AVX512ICC:$cc),
1718              !strconcat("vpcmp${cc}", Suffix,
1719                       "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1720                       "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1721              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1722                                  (OpNode (_.VT _.RC:$src1),
1723                                    (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1724                                    imm:$cc)))],
1725              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1726
1727  // Accept explicit immediate argument form instead of comparison code.
1728  let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
1729    def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1730               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1731                                       u8imm:$cc),
1732               !strconcat("vpcmp", Suffix,
1733                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1734                   "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1735               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1736    def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1737               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1738                                       _.ScalarMemOp:$src2, u8imm:$cc),
1739               !strconcat("vpcmp", Suffix,
1740                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1741                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1742               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1743  }
1744}
1745
1746multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1747                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1748  let Predicates = [prd] in
1749  defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1750
1751  let Predicates = [prd, HasVLX] in {
1752    defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1753    defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1754  }
1755}
1756
1757multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1758                                AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1759  let Predicates = [prd] in
1760  defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1761           EVEX_V512;
1762
1763  let Predicates = [prd, HasVLX] in {
1764    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1765                EVEX_V256;
1766    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1767                EVEX_V128;
1768  }
1769}
1770
1771defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1772                                HasBWI>, EVEX_CD8<8, CD8VF>;
1773defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1774                                 HasBWI>, EVEX_CD8<8, CD8VF>;
1775
1776defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1777                                HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1778defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1779                                 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1780
1781defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1782                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
1783defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1784                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
1785
1786defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1787                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1788defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1789                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1790
1791multiclass avx512_vcmp_common<X86VectorVTInfo _> {
1792
1793  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1794                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
1795                   "vcmp${cc}"#_.Suffix,
1796                   "$src2, $src1", "$src1, $src2",
1797                   (X86cmpm (_.VT _.RC:$src1),
1798                         (_.VT _.RC:$src2),
1799                           imm:$cc)>;
1800
1801  let mayLoad = 1 in {
1802    defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1803                  (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1804                  "vcmp${cc}"#_.Suffix,
1805                  "$src2, $src1", "$src1, $src2",
1806                  (X86cmpm (_.VT _.RC:$src1),
1807                          (_.VT (bitconvert (_.LdFrag addr:$src2))),
1808                          imm:$cc)>;
1809
1810    defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1811                  (outs _.KRC:$dst),
1812                  (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1813                  "vcmp${cc}"#_.Suffix,
1814                  "${src2}"##_.BroadcastStr##", $src1",
1815                  "$src1, ${src2}"##_.BroadcastStr,
1816                  (X86cmpm (_.VT _.RC:$src1),
1817                          (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
1818                          imm:$cc)>,EVEX_B;
1819  }
1820  // Accept explicit immediate argument form instead of comparison code.
1821  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1822    defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1823                         (outs _.KRC:$dst),
1824                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1825                         "vcmp"#_.Suffix,
1826                         "$cc, $src2, $src1", "$src1, $src2, $cc">;
1827
1828    let mayLoad = 1 in {
1829      defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1830                             (outs _.KRC:$dst),
1831                             (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1832                             "vcmp"#_.Suffix,
1833                             "$cc, $src2, $src1", "$src1, $src2, $cc">;
1834
1835      defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1836                         (outs _.KRC:$dst),
1837                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1838                         "vcmp"#_.Suffix,
1839                         "$cc, ${src2}"##_.BroadcastStr##", $src1",
1840                         "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
1841    }
1842 }
1843}
1844
1845multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
1846  // comparison code form (VCMP[EQ/LT/LE/...]
1847  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1848                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1849                     "vcmp${cc}"#_.Suffix,
1850                     "{sae}, $src2, $src1", "$src1, $src2,{sae}",
1851                     (X86cmpmRnd (_.VT _.RC:$src1),
1852                                    (_.VT _.RC:$src2),
1853                                    imm:$cc,
1854                                (i32 FROUND_NO_EXC))>, EVEX_B;
1855
1856  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1857    defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1858                         (outs _.KRC:$dst),
1859                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1860                         "vcmp"#_.Suffix,
1861                         "$cc,{sae}, $src2, $src1",
1862                         "$src1, $src2,{sae}, $cc">, EVEX_B;
1863   }
1864}
1865
1866multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
1867  let Predicates = [HasAVX512] in {
1868    defm Z    : avx512_vcmp_common<_.info512>,
1869                avx512_vcmp_sae<_.info512>, EVEX_V512;
1870
1871  }
1872  let Predicates = [HasAVX512,HasVLX] in {
1873   defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
1874   defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
1875  }
1876}
1877
1878defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
1879                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
1880defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
1881                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
1882
1883def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1884          (COPY_TO_REGCLASS (VCMPPSZrri
1885            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1886            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1887            imm:$cc), VK8)>;
1888def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1889          (COPY_TO_REGCLASS (VPCMPDZrri
1890            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1891            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1892            imm:$cc), VK8)>;
1893def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1894          (COPY_TO_REGCLASS (VPCMPUDZrri
1895            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1896            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1897            imm:$cc), VK8)>;
1898
1899// ----------------------------------------------------------------
1900// FPClass
1901//handle fpclass instruction  mask =  op(reg_scalar,imm)
1902//                                    op(mem_scalar,imm)
1903multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
1904                                 X86VectorVTInfo _, Predicate prd> {
1905  let Predicates = [prd] in {
1906      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
1907                      (ins _.RC:$src1, i32u8imm:$src2),
1908                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
1909                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
1910                              (i32 imm:$src2)))], NoItinerary>;
1911      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
1912                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
1913                      OpcodeStr##_.Suffix#
1914                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
1915                      [(set _.KRC:$dst,(or _.KRCWM:$mask,
1916                                      (OpNode (_.VT _.RC:$src1),
1917                                      (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1918    let mayLoad = 1, AddedComplexity = 20 in {
1919      def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1920                      (ins _.MemOp:$src1, i32u8imm:$src2),
1921                      OpcodeStr##_.Suffix##
1922                                "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
1923                      [(set _.KRC:$dst,
1924                            (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
1925                                    (i32 imm:$src2)))], NoItinerary>;
1926      def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1927                      (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
1928                      OpcodeStr##_.Suffix##
1929                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
1930                      [(set _.KRC:$dst,(or _.KRCWM:$mask,
1931                          (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
1932                              (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1933    }
1934  }
1935}
1936
1937//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
1938//                                  fpclass(reg_vec, mem_vec, imm)
1939//                                  fpclass(reg_vec, broadcast(eltVt), imm)
1940multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
1941                                 X86VectorVTInfo _, string mem, string broadcast>{
1942  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
1943                      (ins _.RC:$src1, i32u8imm:$src2),
1944                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
1945                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
1946                                       (i32 imm:$src2)))], NoItinerary>;
1947  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
1948                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
1949                      OpcodeStr##_.Suffix#
1950                      "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
1951                      [(set _.KRC:$dst,(or _.KRCWM:$mask,
1952                                       (OpNode (_.VT _.RC:$src1),
1953                                       (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1954  let mayLoad = 1 in {
1955    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1956                      (ins _.MemOp:$src1, i32u8imm:$src2),
1957                      OpcodeStr##_.Suffix##mem#
1958                      "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
1959                      [(set _.KRC:$dst,(OpNode
1960                                       (_.VT (bitconvert (_.LdFrag addr:$src1))),
1961                                       (i32 imm:$src2)))], NoItinerary>;
1962    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1963                      (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
1964                      OpcodeStr##_.Suffix##mem#
1965                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
1966                      [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
1967                                    (_.VT (bitconvert (_.LdFrag addr:$src1))),
1968                                    (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1969    def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1970                      (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
1971                      OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
1972                                        _.BroadcastStr##", $dst | $dst, ${src1}"
1973                                                    ##_.BroadcastStr##", $src2}",
1974                      [(set _.KRC:$dst,(OpNode
1975                                       (_.VT (X86VBroadcast
1976                                             (_.ScalarLdFrag addr:$src1))),
1977                                       (i32 imm:$src2)))], NoItinerary>,EVEX_B;
1978    def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1979                      (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
1980                      OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
1981                            _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
1982                                                     _.BroadcastStr##", $src2}",
1983                      [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
1984                                       (_.VT (X86VBroadcast
1985                                             (_.ScalarLdFrag addr:$src1))),
1986                                       (i32 imm:$src2))))], NoItinerary>,
1987                                                            EVEX_B, EVEX_K;
1988  }
1989}
1990
1991multiclass avx512_vector_fpclass_all<string OpcodeStr,
1992            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
1993                                                              string broadcast>{
1994  let Predicates = [prd] in {
1995    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
1996                                      broadcast>, EVEX_V512;
1997  }
1998  let Predicates = [prd, HasVLX] in {
1999    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
2000                                      broadcast>, EVEX_V128;
2001    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
2002                                      broadcast>, EVEX_V256;
2003  }
2004}
2005
2006multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2007             bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
2008  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2009                                      VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
2010  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2011                                      VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
2012  defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2013                                      f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
2014  defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2015                                      f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
2016}
2017
2018defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2019                                      X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
2020
2021//-----------------------------------------------------------------
2022// Mask register copy, including
2023// - copy between mask registers
2024// - load/store mask registers
2025// - copy from GPR to mask register and vice versa
2026//
2027multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2028                         string OpcodeStr, RegisterClass KRC,
2029                         ValueType vvt, X86MemOperand x86memop> {
2030  let hasSideEffects = 0 in {
2031    def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2032               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
2033    let mayLoad = 1 in
2034    def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2035               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2036               [(set KRC:$dst, (vvt (load addr:$src)))]>;
2037    let mayStore = 1 in
2038    def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2039               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2040               [(store KRC:$src, addr:$dst)]>;
2041  }
2042}
2043
2044multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2045                             string OpcodeStr,
2046                             RegisterClass KRC, RegisterClass GRC> {
2047  let hasSideEffects = 0 in {
2048    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2049               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
2050    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2051               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
2052  }
2053}
2054
2055let Predicates = [HasDQI] in
2056  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2057               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2058               VEX, PD;
2059
2060let Predicates = [HasAVX512] in
2061  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2062               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2063               VEX, PS;
2064
2065let Predicates = [HasBWI] in {
2066  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2067               VEX, PD, VEX_W;
2068  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2069               VEX, XD;
2070}
2071
2072let Predicates = [HasBWI] in {
2073  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2074               VEX, PS, VEX_W;
2075  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2076               VEX, XD, VEX_W;
2077}
2078
2079// GR from/to mask register
2080let Predicates = [HasDQI] in {
2081  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2082            (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
2083  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2084            (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
2085}
2086let Predicates = [HasAVX512] in {
2087  def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2088            (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
2089  def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2090            (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
2091}
2092let Predicates = [HasBWI] in {
2093  def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
2094  def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
2095}
2096let Predicates = [HasBWI] in {
2097  def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
2098  def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
2099}
2100
2101// Load/store kreg
2102let Predicates = [HasDQI] in {
2103  def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
2104            (KMOVBmk addr:$dst, VK8:$src)>;
2105  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2106            (KMOVBkm addr:$src)>;
2107
2108  def : Pat<(store VK4:$src, addr:$dst),
2109            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2110  def : Pat<(store VK2:$src, addr:$dst),
2111            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2112}
2113let Predicates = [HasAVX512, NoDQI] in {
2114  def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
2115            (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
2116  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2117            (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
2118}
2119let Predicates = [HasAVX512] in {
2120  def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
2121            (KMOVWmk addr:$dst, VK16:$src)>;
2122  def : Pat<(i1 (load addr:$src)),
2123            (COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0),
2124                                              (MOV8rm addr:$src), sub_8bit)),
2125                                (i16 1)), VK1)>;
2126  def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
2127            (KMOVWkm addr:$src)>;
2128}
2129let Predicates = [HasBWI] in {
2130  def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
2131            (KMOVDmk addr:$dst, VK32:$src)>;
2132  def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
2133            (KMOVDkm addr:$src)>;
2134}
2135let Predicates = [HasBWI] in {
2136  def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
2137            (KMOVQmk addr:$dst, VK64:$src)>;
2138  def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
2139            (KMOVQkm addr:$src)>;
2140}
2141
2142let Predicates = [HasAVX512] in {
2143  def : Pat<(i1 (trunc (i64 GR64:$src))),
2144            (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
2145                                        (i32 1))), VK1)>;
2146
2147  def : Pat<(i1 (trunc (i32 GR32:$src))),
2148            (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
2149
2150  def : Pat<(i1 (trunc (i8 GR8:$src))),
2151       (COPY_TO_REGCLASS
2152        (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
2153       VK1)>;
2154  def : Pat<(i1 (trunc (i16 GR16:$src))),
2155       (COPY_TO_REGCLASS
2156        (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
2157       VK1)>;
2158
2159  def : Pat<(i32 (zext VK1:$src)),
2160            (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
2161  def : Pat<(i32 (anyext VK1:$src)),
2162            (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
2163
2164  def : Pat<(i8 (zext VK1:$src)),
2165            (EXTRACT_SUBREG
2166             (AND32ri (KMOVWrk
2167                       (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
2168  def : Pat<(i8 (anyext VK1:$src)),
2169              (EXTRACT_SUBREG
2170                (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
2171
2172  def : Pat<(i64 (zext VK1:$src)),
2173            (AND64ri8 (SUBREG_TO_REG (i64 0),
2174             (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
2175  def : Pat<(i16 (zext VK1:$src)),
2176            (EXTRACT_SUBREG
2177             (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
2178              sub_16bit)>;
2179}
2180def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
2181          (COPY_TO_REGCLASS VK1:$src, VK16)>;
2182def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
2183          (COPY_TO_REGCLASS VK1:$src, VK8)>;
2184def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
2185          (COPY_TO_REGCLASS VK1:$src, VK4)>;
2186def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
2187          (COPY_TO_REGCLASS VK1:$src, VK2)>;
2188def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
2189          (COPY_TO_REGCLASS VK1:$src, VK32)>;
2190def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
2191          (COPY_TO_REGCLASS VK1:$src, VK64)>;
2192
2193
2194// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
2195let Predicates = [HasAVX512, NoDQI] in {
2196  // GR from/to 8-bit mask without native support
2197  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2198            (COPY_TO_REGCLASS
2199             (KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>;
2200  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2201            (EXTRACT_SUBREG
2202              (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
2203              sub_8bit)>;
2204}
2205
2206let Predicates = [HasAVX512] in {
2207  def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
2208            (COPY_TO_REGCLASS VK16:$src, VK1)>;
2209  def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
2210            (COPY_TO_REGCLASS VK8:$src, VK1)>;
2211}
2212let Predicates = [HasBWI] in {
2213  def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
2214            (COPY_TO_REGCLASS VK32:$src, VK1)>;
2215  def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
2216            (COPY_TO_REGCLASS VK64:$src, VK1)>;
2217}
2218
2219// Mask unary operation
2220// - KNOT
2221multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2222                            RegisterClass KRC, SDPatternOperator OpNode,
2223                            Predicate prd> {
2224  let Predicates = [prd] in
2225    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2226               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2227               [(set KRC:$dst, (OpNode KRC:$src))]>;
2228}
2229
2230multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2231                                SDPatternOperator OpNode> {
2232  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2233                            HasDQI>, VEX, PD;
2234  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2235                            HasAVX512>, VEX, PS;
2236  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2237                            HasBWI>, VEX, PD, VEX_W;
2238  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2239                            HasBWI>, VEX, PS, VEX_W;
2240}
2241
2242defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
2243
2244multiclass avx512_mask_unop_int<string IntName, string InstName> {
2245  let Predicates = [HasAVX512] in
2246    def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
2247                (i16 GR16:$src)),
2248              (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
2249              (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
2250}
2251defm : avx512_mask_unop_int<"knot", "KNOT">;
2252
2253let Predicates = [HasDQI] in
2254def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
2255let Predicates = [HasAVX512] in
2256def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
2257let Predicates = [HasBWI] in
2258def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
2259let Predicates = [HasBWI] in
2260def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
2261
2262// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2263let Predicates = [HasAVX512, NoDQI] in {
2264def : Pat<(xor VK8:$src1,  (v8i1 immAllOnesV)),
2265          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
2266def : Pat<(not VK8:$src),
2267          (COPY_TO_REGCLASS
2268            (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2269}
2270def : Pat<(xor VK4:$src1,  (v4i1 immAllOnesV)),
2271          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>;
2272def : Pat<(xor VK2:$src1,  (v2i1 immAllOnesV)),
2273          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>;
2274
2275// Mask binary operation
2276// - KAND, KANDN, KOR, KXNOR, KXOR
2277multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2278                           RegisterClass KRC, SDPatternOperator OpNode,
2279                           Predicate prd, bit IsCommutable> {
2280  let Predicates = [prd], isCommutable = IsCommutable in
2281    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2282               !strconcat(OpcodeStr,
2283                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2284               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
2285}
2286
2287multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2288                               SDPatternOperator OpNode, bit IsCommutable,
2289                               Predicate prdW = HasAVX512> {
2290  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2291                             HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2292  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2293                             prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2294  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2295                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2296  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2297                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2298}
2299
2300def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2301def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2302
2303defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,  1>;
2304defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,   1>;
2305defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
2306defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,  1>;
2307defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
2308defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,  1, HasDQI>;
2309
2310multiclass avx512_mask_binop_int<string IntName, string InstName> {
2311  let Predicates = [HasAVX512] in
2312    def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
2313                (i16 GR16:$src1), (i16 GR16:$src2)),
2314              (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
2315              (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
2316              (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
2317}
2318
2319defm : avx512_mask_binop_int<"kand",  "KAND">;
2320defm : avx512_mask_binop_int<"kandn", "KANDN">;
2321defm : avx512_mask_binop_int<"kor",   "KOR">;
2322defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
2323defm : avx512_mask_binop_int<"kxor",  "KXOR">;
2324
2325multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
2326  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2327  // for the DQI set, this type is legal and KxxxB instruction is used
2328  let Predicates = [NoDQI] in
2329  def : Pat<(OpNode VK8:$src1, VK8:$src2),
2330            (COPY_TO_REGCLASS
2331              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2332                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2333
2334  // All types smaller than 8 bits require conversion anyway
2335  def : Pat<(OpNode VK1:$src1, VK1:$src2),
2336        (COPY_TO_REGCLASS (Inst
2337                           (COPY_TO_REGCLASS VK1:$src1, VK16),
2338                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2339  def : Pat<(OpNode VK2:$src1, VK2:$src2),
2340        (COPY_TO_REGCLASS (Inst
2341                           (COPY_TO_REGCLASS VK2:$src1, VK16),
2342                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2343  def : Pat<(OpNode VK4:$src1, VK4:$src2),
2344        (COPY_TO_REGCLASS (Inst
2345                           (COPY_TO_REGCLASS VK4:$src1, VK16),
2346                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2347}
2348
2349defm : avx512_binop_pat<and,  KANDWrr>;
2350defm : avx512_binop_pat<andn, KANDNWrr>;
2351defm : avx512_binop_pat<or,   KORWrr>;
2352defm : avx512_binop_pat<xnor, KXNORWrr>;
2353defm : avx512_binop_pat<xor,  KXORWrr>;
2354
2355def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
2356          (KXNORWrr VK16:$src1, VK16:$src2)>;
2357def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
2358          (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>;
2359def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
2360          (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>;
2361def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
2362          (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>;
2363
2364let Predicates = [NoDQI] in
2365def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
2366          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16),
2367                             (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2368
2369def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)),
2370          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16),
2371                             (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2372
2373def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)),
2374          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16),
2375                             (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2376
2377def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
2378          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
2379                             (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2380
2381// Mask unpacking
2382multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2383                             RegisterClass KRCSrc, Predicate prd> {
2384  let Predicates = [prd] in {
2385    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2386               (ins KRC:$src1, KRC:$src2),
2387               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2388               VEX_4V, VEX_L;
2389
2390    def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2391              (!cast<Instruction>(NAME##rr)
2392                        (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2393                        (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2394  }
2395}
2396
2397defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
2398defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
2399defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
2400
2401// Mask bit testing
2402multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2403                              SDNode OpNode, Predicate prd> {
2404  let Predicates = [prd], Defs = [EFLAGS] in
2405    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2406               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2407               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
2408}
2409
2410multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2411                                Predicate prdW = HasAVX512> {
2412  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
2413                                                                VEX, PD;
2414  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
2415                                                                VEX, PS;
2416  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
2417                                                                VEX, PS, VEX_W;
2418  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
2419                                                                VEX, PD, VEX_W;
2420}
2421
2422defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
2423defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
2424
2425// Mask shift
2426multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2427                             SDNode OpNode> {
2428  let Predicates = [HasAVX512] in
2429    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2430                 !strconcat(OpcodeStr,
2431                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2432                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
2433}
2434
2435multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2436                               SDNode OpNode> {
2437  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
2438                               VEX, TAPD, VEX_W;
2439  let Predicates = [HasDQI] in
2440  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
2441                               VEX, TAPD;
2442  let Predicates = [HasBWI] in {
2443  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
2444                               VEX, TAPD, VEX_W;
2445  let Predicates = [HasDQI] in
2446  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
2447                               VEX, TAPD;
2448  }
2449}
2450
2451defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
2452defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
2453
2454// Mask setting all 0s or 1s
2455multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
2456  let Predicates = [HasAVX512] in
2457    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
2458      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
2459                     [(set KRC:$dst, (VT Val))]>;
2460}
2461
2462multiclass avx512_mask_setop_w<PatFrag Val> {
2463  defm B : avx512_mask_setop<VK8,   v8i1, Val>;
2464  defm W : avx512_mask_setop<VK16, v16i1, Val>;
2465  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
2466  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
2467}
2468
2469defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
2470defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
2471
2472// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
2473let Predicates = [HasAVX512] in {
2474  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
2475  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
2476  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
2477  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
2478  def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
2479  def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
2480  def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
2481}
2482def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
2483          (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
2484
2485def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
2486          (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
2487
2488def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
2489          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
2490
2491def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))),
2492          (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>;
2493
2494def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
2495          (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
2496
2497def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
2498          (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;
2499
2500def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
2501          (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
2502
2503def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
2504          (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
2505
2506def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
2507          (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
2508
2509def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
2510          (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
2511
2512def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
2513          (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
2514def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
2515          (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
2516
2517def : Pat<(v32i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
2518          (v32i1 (COPY_TO_REGCLASS VK2:$src, VK32))>;
2519def : Pat<(v32i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
2520          (v32i1 (COPY_TO_REGCLASS VK4:$src, VK32))>;
2521def : Pat<(v32i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
2522          (v32i1 (COPY_TO_REGCLASS VK8:$src, VK32))>;
2523def : Pat<(v32i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
2524          (v32i1 (COPY_TO_REGCLASS VK16:$src, VK32))>;
2525
2526def : Pat<(v64i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
2527          (v64i1 (COPY_TO_REGCLASS VK2:$src, VK64))>;
2528def : Pat<(v64i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
2529          (v64i1 (COPY_TO_REGCLASS VK4:$src, VK64))>;
2530def : Pat<(v64i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
2531          (v64i1 (COPY_TO_REGCLASS VK8:$src, VK64))>;
2532def : Pat<(v64i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
2533          (v64i1 (COPY_TO_REGCLASS VK16:$src, VK64))>;
2534def : Pat<(v64i1 (insert_subvector undef, VK32:$src, (iPTR 0))),
2535          (v64i1 (COPY_TO_REGCLASS VK32:$src, VK64))>;
2536
2537
2538def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
2539          (v8i1 (COPY_TO_REGCLASS
2540                 (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16),
2541                  (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
2542
2543def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
2544          (v8i1 (COPY_TO_REGCLASS
2545                 (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16),
2546                  (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
2547
2548def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
2549          (v4i1 (COPY_TO_REGCLASS
2550                 (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16),
2551                  (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
2552
2553def : Pat<(v4i1 (X86vsrli VK4:$src, (i8 imm:$imm))),
2554          (v4i1 (COPY_TO_REGCLASS
2555                 (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16),
2556                  (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
2557
2558//===----------------------------------------------------------------------===//
2559// AVX-512 - Aligned and unaligned load and store
2560//
2561
2562
2563multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
2564                         PatFrag ld_frag, PatFrag mload,
2565                         bit IsReMaterializable = 1> {
2566  let hasSideEffects = 0 in {
2567  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
2568                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2569                    _.ExeDomain>, EVEX;
2570  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
2571                      (ins _.KRCWM:$mask,  _.RC:$src),
2572                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
2573                       "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>,
2574                       EVEX, EVEX_KZ;
2575
2576  let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
2577      SchedRW = [WriteLoad] in
2578  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
2579                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2580                    [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
2581                    _.ExeDomain>, EVEX;
2582
2583  let Constraints = "$src0 = $dst" in {
2584  def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
2585                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
2586                    !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
2587                    "${dst} {${mask}}, $src1}"),
2588                    [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
2589                                        (_.VT _.RC:$src1),
2590                                        (_.VT _.RC:$src0))))], _.ExeDomain>,
2591                     EVEX, EVEX_K;
2592  let mayLoad = 1, SchedRW = [WriteLoad] in
2593    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
2594                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
2595                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
2596                      "${dst} {${mask}}, $src1}"),
2597                     [(set _.RC:$dst, (_.VT
2598                         (vselect _.KRCWM:$mask,
2599                          (_.VT (bitconvert (ld_frag addr:$src1))),
2600                           (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
2601  }
2602  let mayLoad = 1, SchedRW = [WriteLoad] in
2603  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
2604                  (ins _.KRCWM:$mask, _.MemOp:$src),
2605                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
2606                                "${dst} {${mask}} {z}, $src}",
2607                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
2608                    (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
2609                  _.ExeDomain>, EVEX, EVEX_KZ;
2610  }
2611  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
2612            (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
2613
2614  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
2615            (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
2616
2617  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
2618            (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
2619             _.KRCWM:$mask, addr:$ptr)>;
2620}
2621
2622multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
2623                                  AVX512VLVectorVTInfo _,
2624                                  Predicate prd,
2625                                  bit IsReMaterializable = 1> {
2626  let Predicates = [prd] in
2627  defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
2628                       masked_load_aligned512, IsReMaterializable>, EVEX_V512;
2629
2630  let Predicates = [prd, HasVLX] in {
2631  defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
2632                          masked_load_aligned256, IsReMaterializable>, EVEX_V256;
2633  defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
2634                          masked_load_aligned128, IsReMaterializable>, EVEX_V128;
2635  }
2636}
2637
2638multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
2639                                  AVX512VLVectorVTInfo _,
2640                                  Predicate prd,
2641                                  bit IsReMaterializable = 1> {
2642  let Predicates = [prd] in
2643  defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
2644                       masked_load_unaligned, IsReMaterializable>, EVEX_V512;
2645
2646  let Predicates = [prd, HasVLX] in {
2647  defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
2648                         masked_load_unaligned, IsReMaterializable>, EVEX_V256;
2649  defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
2650                         masked_load_unaligned, IsReMaterializable>, EVEX_V128;
2651  }
2652}
2653
2654multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
2655                        PatFrag st_frag, PatFrag mstore> {
2656
2657  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
2658                         OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
2659                         [], _.ExeDomain>, EVEX;
2660  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
2661                         (ins _.KRCWM:$mask, _.RC:$src),
2662                         OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
2663                         "${dst} {${mask}}, $src}",
2664                         [], _.ExeDomain>,  EVEX, EVEX_K;
2665  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
2666                          (ins _.KRCWM:$mask, _.RC:$src),
2667                          OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
2668                          "${dst} {${mask}} {z}, $src}",
2669                          [], _.ExeDomain>, EVEX, EVEX_KZ;
2670
2671  let mayStore = 1 in {
2672  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
2673                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2674                    [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
2675  def mrk : AVX512PI<opc, MRMDestMem, (outs),
2676                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
2677              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
2678               [], _.ExeDomain>, EVEX, EVEX_K;
2679  }
2680
2681  def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
2682           (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
2683                                                    _.KRCWM:$mask, _.RC:$src)>;
2684}
2685
2686
2687multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
2688                            AVX512VLVectorVTInfo _, Predicate prd> {
2689  let Predicates = [prd] in
2690  defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
2691                        masked_store_unaligned>, EVEX_V512;
2692
2693  let Predicates = [prd, HasVLX] in {
2694    defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
2695                             masked_store_unaligned>, EVEX_V256;
2696    defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
2697                             masked_store_unaligned>, EVEX_V128;
2698  }
2699}
2700
2701multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
2702                                  AVX512VLVectorVTInfo _,  Predicate prd> {
2703  let Predicates = [prd] in
2704  defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
2705                        masked_store_aligned512>, EVEX_V512;
2706
2707  let Predicates = [prd, HasVLX] in {
2708    defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
2709                             masked_store_aligned256>, EVEX_V256;
2710    defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
2711                             masked_store_aligned128>, EVEX_V128;
2712  }
2713}
2714
2715defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
2716                                     HasAVX512>,
2717               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
2718                                      HasAVX512>,  PS, EVEX_CD8<32, CD8VF>;
2719
2720defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
2721                                     HasAVX512>,
2722               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
2723                                     HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2724
2725defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512>,
2726               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
2727                              PS, EVEX_CD8<32, CD8VF>;
2728
2729defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
2730               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
2731               PD, VEX_W, EVEX_CD8<64, CD8VF>;
2732
2733def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
2734                (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
2735       (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
2736
2737def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
2738                 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
2739       (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
2740
2741def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
2742                (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
2743       (VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
2744
2745def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
2746                 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
2747       (VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
2748
2749def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
2750                (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
2751       (VMOVAPDZrm addr:$ptr)>;
2752
2753def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
2754                 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
2755       (VMOVAPSZrm addr:$ptr)>;
2756
2757def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
2758          GR16:$mask),
2759         (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2760            VR512:$src)>;
2761def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
2762          GR8:$mask),
2763         (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2764            VR512:$src)>;
2765
2766def: Pat<(int_x86_avx512_mask_store_ps_512 addr:$ptr, (v16f32 VR512:$src),
2767          GR16:$mask),
2768         (VMOVAPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2769            VR512:$src)>;
2770def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
2771          GR8:$mask),
2772         (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2773            VR512:$src)>;
2774
2775defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
2776                                       HasAVX512>,
2777                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
2778                                       HasAVX512>, PD, EVEX_CD8<32, CD8VF>;
2779
2780defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
2781                                       HasAVX512>,
2782                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
2783                                    HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2784
2785defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
2786                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
2787                                 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
2788
2789defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
2790                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
2791                                 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
2792
2793defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512>,
2794                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
2795                                 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
2796
2797defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512>,
2798                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
2799                                 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
2800
2801def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
2802                 (v16i32 immAllZerosV), GR16:$mask)),
2803       (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
2804
2805def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
2806                (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2807       (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
2808
2809def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
2810            GR16:$mask),
2811         (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2812            VR512:$src)>;
2813def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
2814            GR8:$mask),
2815         (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2816            VR512:$src)>;
2817
2818let AddedComplexity = 20 in {
2819def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
2820                          (bc_v8i64 (v16i32 immAllZerosV)))),
2821                  (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
2822
2823def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
2824                          (v8i64 VR512:$src))),
2825   (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
2826                                              VK8), VR512:$src)>;
2827
2828def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2829                           (v16i32 immAllZerosV))),
2830                  (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
2831
2832def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
2833                           (v16i32 VR512:$src))),
2834                  (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
2835}
2836
2837// Move Int Doubleword to Packed Double Int
2838//
2839def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
2840                      "vmovd\t{$src, $dst|$dst, $src}",
2841                      [(set VR128X:$dst,
2842                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2843                        EVEX, VEX_LIG;
2844def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
2845                      "vmovd\t{$src, $dst|$dst, $src}",
2846                      [(set VR128X:$dst,
2847                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2848                        IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2849def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
2850                      "vmovq\t{$src, $dst|$dst, $src}",
2851                        [(set VR128X:$dst,
2852                          (v2i64 (scalar_to_vector GR64:$src)))],
2853                          IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
2854let isCodeGenOnly = 1 in {
2855def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
2856                       "vmovq\t{$src, $dst|$dst, $src}",
2857                       [(set FR64:$dst, (bitconvert GR64:$src))],
2858                       IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2859def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
2860                         "vmovq\t{$src, $dst|$dst, $src}",
2861                         [(set GR64:$dst, (bitconvert FR64:$src))],
2862                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2863}
2864def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
2865                         "vmovq\t{$src, $dst|$dst, $src}",
2866                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2867                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2868                         EVEX_CD8<64, CD8VT1>;
2869
2870// Move Int Doubleword to Single Scalar
2871//
2872let isCodeGenOnly = 1 in {
2873def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
2874                      "vmovd\t{$src, $dst|$dst, $src}",
2875                      [(set FR32X:$dst, (bitconvert GR32:$src))],
2876                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2877
2878def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
2879                      "vmovd\t{$src, $dst|$dst, $src}",
2880                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2881                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2882}
2883
2884// Move doubleword from xmm register to r/m32
2885//
2886def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
2887                       "vmovd\t{$src, $dst|$dst, $src}",
2888                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
2889                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2890                       EVEX, VEX_LIG;
2891def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
2892                       (ins i32mem:$dst, VR128X:$src),
2893                       "vmovd\t{$src, $dst|$dst, $src}",
2894                       [(store (i32 (extractelt (v4i32 VR128X:$src),
2895                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2896                       EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2897
2898// Move quadword from xmm1 register to r/m64
2899//
2900def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
2901                      "vmovq\t{$src, $dst|$dst, $src}",
2902                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2903                                                   (iPTR 0)))],
2904                      IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
2905                      Requires<[HasAVX512, In64BitMode]>;
2906
2907def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
2908                       (ins i64mem:$dst, VR128X:$src),
2909                       "vmovq\t{$src, $dst|$dst, $src}",
2910                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2911                               addr:$dst)], IIC_SSE_MOVDQ>,
2912                       EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
2913                       Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2914
2915def VMOV64toPQIZrr_REV : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
2916                            (ins VR128X:$src),
2917                            "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
2918                            EVEX, VEX_W, VEX_LIG;
2919
2920// Move Scalar Single to Double Int
2921//
2922let isCodeGenOnly = 1 in {
2923def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
2924                      (ins FR32X:$src),
2925                      "vmovd\t{$src, $dst|$dst, $src}",
2926                      [(set GR32:$dst, (bitconvert FR32X:$src))],
2927                      IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
2928def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
2929                      (ins i32mem:$dst, FR32X:$src),
2930                      "vmovd\t{$src, $dst|$dst, $src}",
2931                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2932                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2933}
2934
2935// Move Quadword Int to Packed Quadword Int
2936//
2937def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
2938                      (ins i64mem:$src),
2939                      "vmovq\t{$src, $dst|$dst, $src}",
2940                      [(set VR128X:$dst,
2941                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2942                      EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2943
2944//===----------------------------------------------------------------------===//
2945// AVX-512  MOVSS, MOVSD
2946//===----------------------------------------------------------------------===//
2947
2948multiclass avx512_move_scalar <string asm, SDNode OpNode,
2949                              X86VectorVTInfo _> {
2950  defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
2951                    (ins _.RC:$src1, _.RC:$src2),
2952                    asm, "$src2, $src1","$src1, $src2",
2953                    (_.VT (OpNode (_.VT _.RC:$src1),
2954                                   (_.VT _.RC:$src2))),
2955                                   IIC_SSE_MOV_S_RR>, EVEX_4V;
2956  let Constraints = "$src1 = $dst" , mayLoad = 1 in
2957    defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
2958                    (outs _.RC:$dst),
2959                    (ins _.ScalarMemOp:$src),
2960                    asm,"$src","$src",
2961                    (_.VT (OpNode (_.VT _.RC:$src1),
2962                               (_.VT (scalar_to_vector
2963                                     (_.ScalarLdFrag addr:$src)))))>, EVEX;
2964  let isCodeGenOnly = 1 in {
2965    def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
2966               (ins _.RC:$src1, _.FRC:$src2),
2967               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2968               [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
2969                                      (scalar_to_vector _.FRC:$src2))))],
2970               _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
2971  let mayLoad = 1 in
2972    def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
2973               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
2974               [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
2975               _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
2976  }
2977  let mayStore = 1 in {
2978    def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
2979               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
2980               [(store _.FRC:$src, addr:$dst)],  _.ExeDomain, IIC_SSE_MOV_S_MR>,
2981               EVEX;
2982    def mrk: AVX512PI<0x11, MRMDestMem, (outs),
2983                (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
2984                !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2985                [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
2986  } // mayStore
2987}
2988
2989defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
2990                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
2991
2992defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
2993                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2994
2995def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2996          (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2997           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
2998
2999def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3000          (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3001           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
3002
3003def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3004          (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
3005           (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3006
3007defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
3008                           (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
3009                           "vmovss.s", "$src2, $src1", "$src1, $src2", []>,
3010                           XS, EVEX_4V, VEX_LIG;
3011
3012defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
3013                           (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
3014                           "vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
3015                           XD, EVEX_4V, VEX_LIG, VEX_W;
3016
3017let Predicates = [HasAVX512] in {
3018  let AddedComplexity = 15 in {
3019  // Move scalar to XMM zero-extended, zeroing a VR128X then do a
3020  // MOVS{S,D} to the lower bits.
3021  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
3022            (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
3023  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
3024            (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3025  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
3026            (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3027  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
3028            (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
3029
3030  // Move low f32 and clear high bits.
3031  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3032            (SUBREG_TO_REG (i32 0),
3033             (VMOVSSZrr (v4f32 (V_SET0)),
3034              (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3035  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3036            (SUBREG_TO_REG (i32 0),
3037             (VMOVSSZrr (v4i32 (V_SET0)),
3038                       (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
3039  }
3040
3041  let AddedComplexity = 20 in {
3042  // MOVSSrm zeros the high parts of the register; represent this
3043  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3044  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3045            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3046  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3047            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3048  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3049            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3050
3051  // MOVSDrm zeros the high parts of the register; represent this
3052  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3053  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
3054            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3055  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
3056            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3057  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
3058            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3059  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
3060            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3061  def : Pat<(v2f64 (X86vzload addr:$src)),
3062            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3063
3064  // Represent the same patterns above but in the form they appear for
3065  // 256-bit types
3066  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
3067                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3068            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3069  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
3070                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3071            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3072  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
3073                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3074            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3075  }
3076  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
3077                   (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
3078            (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
3079                                            FR32X:$src)), sub_xmm)>;
3080  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
3081                   (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
3082            (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
3083                                     FR64X:$src)), sub_xmm)>;
3084  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
3085                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
3086            (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
3087
3088  // Move low f64 and clear high bits.
3089  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
3090            (SUBREG_TO_REG (i32 0),
3091             (VMOVSDZrr (v2f64 (V_SET0)),
3092                       (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
3093
3094  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
3095            (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
3096                       (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
3097
3098  // Extract and store.
3099  def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
3100                   addr:$dst),
3101            (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
3102  def : Pat<(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
3103                   addr:$dst),
3104            (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
3105
3106  // Shuffle with VMOVSS
3107  def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
3108            (VMOVSSZrr (v4i32 VR128X:$src1),
3109                      (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
3110  def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
3111            (VMOVSSZrr (v4f32 VR128X:$src1),
3112                      (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
3113
3114  // 256-bit variants
3115  def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
3116            (SUBREG_TO_REG (i32 0),
3117              (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
3118                        (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
3119              sub_xmm)>;
3120  def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
3121            (SUBREG_TO_REG (i32 0),
3122              (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
3123                        (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
3124              sub_xmm)>;
3125
3126  // Shuffle with VMOVSD
3127  def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
3128            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3129  def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
3130            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3131  def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
3132            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3133  def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
3134            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3135
3136  // 256-bit variants
3137  def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
3138            (SUBREG_TO_REG (i32 0),
3139              (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
3140                        (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
3141              sub_xmm)>;
3142  def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
3143            (SUBREG_TO_REG (i32 0),
3144              (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
3145                        (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
3146              sub_xmm)>;
3147
3148  def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
3149            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3150  def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
3151            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3152  def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
3153            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3154  def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
3155            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3156}
3157
3158let AddedComplexity = 15 in
3159def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
3160                                (ins VR128X:$src),
3161                                "vmovq\t{$src, $dst|$dst, $src}",
3162                                [(set VR128X:$dst, (v2i64 (X86vzmovl
3163                                                   (v2i64 VR128X:$src))))],
3164                                IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
3165
3166let AddedComplexity = 20 , isCodeGenOnly = 1 in
3167def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3168                                 (ins i128mem:$src),
3169                                 "vmovq\t{$src, $dst|$dst, $src}",
3170                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
3171                                                     (loadv2i64 addr:$src))))],
3172                                 IIC_SSE_MOVDQ>, EVEX, VEX_W,
3173                                 EVEX_CD8<8, CD8VT8>;
3174
3175let Predicates = [HasAVX512] in {
3176  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
3177  let AddedComplexity = 20 in {
3178    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
3179              (VMOVDI2PDIZrm addr:$src)>;
3180    def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
3181              (VMOV64toPQIZrr GR64:$src)>;
3182    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
3183              (VMOVDI2PDIZrr GR32:$src)>;
3184
3185    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
3186              (VMOVDI2PDIZrm addr:$src)>;
3187    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
3188              (VMOVDI2PDIZrm addr:$src)>;
3189    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
3190            (VMOVZPQILo2PQIZrm addr:$src)>;
3191    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
3192            (VMOVZPQILo2PQIZrr VR128X:$src)>;
3193    def : Pat<(v2i64 (X86vzload addr:$src)),
3194            (VMOVZPQILo2PQIZrm addr:$src)>;
3195  }
3196
3197  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
3198  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
3199                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
3200            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
3201  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
3202                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
3203            (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
3204}
3205
3206def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
3207        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
3208
3209def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
3210        (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
3211
3212def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
3213        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
3214
3215def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
3216        (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
3217
3218//===----------------------------------------------------------------------===//
3219// AVX-512 - Non-temporals
3220//===----------------------------------------------------------------------===//
3221let SchedRW = [WriteLoad] in {
3222  def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
3223                        (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
3224                        [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
3225                        SSEPackedInt>, EVEX, T8PD, EVEX_V512,
3226                        EVEX_CD8<64, CD8VF>;
3227
3228  let Predicates = [HasAVX512, HasVLX] in {
3229    def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
3230                             (ins i256mem:$src),
3231                             "vmovntdqa\t{$src, $dst|$dst, $src}", [],
3232                             SSEPackedInt>, EVEX, T8PD, EVEX_V256,
3233                             EVEX_CD8<64, CD8VF>;
3234
3235    def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
3236                             (ins i128mem:$src),
3237                             "vmovntdqa\t{$src, $dst|$dst, $src}", [],
3238                             SSEPackedInt>, EVEX, T8PD, EVEX_V128,
3239                             EVEX_CD8<64, CD8VF>;
3240  }
3241}
3242
3243multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
3244                        ValueType OpVT, RegisterClass RC, X86MemOperand memop,
3245                        Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
3246  let SchedRW = [WriteStore], mayStore = 1,
3247      AddedComplexity = 400 in
3248  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
3249                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3250                    [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
3251}
3252
3253multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
3254                           string elty, string elsz, string vsz512,
3255                           string vsz256, string vsz128, Domain d,
3256                           Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
3257  let Predicates = [prd] in
3258  defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
3259                        !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
3260                        !cast<X86MemOperand>(elty##"512mem"), d, itin>,
3261                        EVEX_V512;
3262
3263  let Predicates = [prd, HasVLX] in {
3264    defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
3265                             !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
3266                             !cast<X86MemOperand>(elty##"256mem"), d, itin>,
3267                             EVEX_V256;
3268
3269    defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
3270                             !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
3271                             !cast<X86MemOperand>(elty##"128mem"), d, itin>,
3272                             EVEX_V128;
3273  }
3274}
3275
3276defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
3277                                "i", "64", "8", "4", "2", SSEPackedInt,
3278                                HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
3279
3280defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
3281                                "f", "64", "8", "4", "2", SSEPackedDouble,
3282                                HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
3283
3284defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
3285                                "f", "32", "16", "8", "4", SSEPackedSingle,
3286                                HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
3287
3288//===----------------------------------------------------------------------===//
3289// AVX-512 - Integer arithmetic
3290//
3291multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3292                           X86VectorVTInfo _, OpndItins itins,
3293                           bit IsCommutable = 0> {
3294  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3295                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3296                    "$src2, $src1", "$src1, $src2",
3297                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3298                    itins.rr, IsCommutable>,
3299            AVX512BIBase, EVEX_4V;
3300
3301  let mayLoad = 1 in
3302    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3303                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
3304                    "$src2, $src1", "$src1, $src2",
3305                    (_.VT (OpNode _.RC:$src1,
3306                                  (bitconvert (_.LdFrag addr:$src2)))),
3307                    itins.rm>,
3308              AVX512BIBase, EVEX_4V;
3309}
3310
3311multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
3312                            X86VectorVTInfo _, OpndItins itins,
3313                            bit IsCommutable = 0> :
3314           avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
3315  let mayLoad = 1 in
3316    defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3317                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
3318                    "${src2}"##_.BroadcastStr##", $src1",
3319                    "$src1, ${src2}"##_.BroadcastStr,
3320                    (_.VT (OpNode _.RC:$src1,
3321                                  (X86VBroadcast
3322                                      (_.ScalarLdFrag addr:$src2)))),
3323                    itins.rm>,
3324               AVX512BIBase, EVEX_4V, EVEX_B;
3325}
3326
3327multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
3328                              AVX512VLVectorVTInfo VTInfo, OpndItins itins,
3329                              Predicate prd, bit IsCommutable = 0> {
3330  let Predicates = [prd] in
3331    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
3332                             IsCommutable>, EVEX_V512;
3333
3334  let Predicates = [prd, HasVLX] in {
3335    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
3336                             IsCommutable>, EVEX_V256;
3337    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
3338                             IsCommutable>, EVEX_V128;
3339  }
3340}
3341
3342multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
3343                               AVX512VLVectorVTInfo VTInfo, OpndItins itins,
3344                               Predicate prd, bit IsCommutable = 0> {
3345  let Predicates = [prd] in
3346    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
3347                             IsCommutable>, EVEX_V512;
3348
3349  let Predicates = [prd, HasVLX] in {
3350    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
3351                             IsCommutable>, EVEX_V256;
3352    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
3353                             IsCommutable>, EVEX_V128;
3354  }
3355}
3356
3357multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
3358                                OpndItins itins, Predicate prd,
3359                                bit IsCommutable = 0> {
3360  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
3361                               itins, prd, IsCommutable>,
3362                               VEX_W, EVEX_CD8<64, CD8VF>;
3363}
3364
3365multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
3366                                OpndItins itins, Predicate prd,
3367                                bit IsCommutable = 0> {
3368  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
3369                               itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
3370}
3371
3372multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3373                                OpndItins itins, Predicate prd,
3374                                bit IsCommutable = 0> {
3375  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
3376                              itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
3377}
3378
3379multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
3380                                OpndItins itins, Predicate prd,
3381                                bit IsCommutable = 0> {
3382  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
3383                              itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
3384}
3385
3386multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
3387                                 SDNode OpNode, OpndItins itins, Predicate prd,
3388                                 bit IsCommutable = 0> {
3389  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
3390                                   IsCommutable>;
3391
3392  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
3393                                   IsCommutable>;
3394}
3395
3396multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
3397                                 SDNode OpNode, OpndItins itins, Predicate prd,
3398                                 bit IsCommutable = 0> {
3399  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
3400                                   IsCommutable>;
3401
3402  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
3403                                   IsCommutable>;
3404}
3405
3406multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
3407                                  bits<8> opc_d, bits<8> opc_q,
3408                                  string OpcodeStr, SDNode OpNode,
3409                                  OpndItins itins, bit IsCommutable = 0> {
3410  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
3411                                    itins, HasAVX512, IsCommutable>,
3412              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
3413                                    itins, HasBWI, IsCommutable>;
3414}
3415
3416multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
3417                            SDNode OpNode,X86VectorVTInfo _Src,
3418                            X86VectorVTInfo _Dst, bit IsCommutable = 0> {
3419  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
3420                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
3421                            "$src2, $src1","$src1, $src2",
3422                            (_Dst.VT (OpNode
3423                                         (_Src.VT _Src.RC:$src1),
3424                                         (_Src.VT _Src.RC:$src2))),
3425                            itins.rr, IsCommutable>,
3426                            AVX512BIBase, EVEX_4V;
3427  let mayLoad = 1 in {
3428      defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3429                            (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
3430                            "$src2, $src1", "$src1, $src2",
3431                            (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
3432                                          (bitconvert (_Src.LdFrag addr:$src2)))),
3433                            itins.rm>,
3434                            AVX512BIBase, EVEX_4V;
3435
3436      defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3437                        (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
3438                        OpcodeStr,
3439                        "${src2}"##_Dst.BroadcastStr##", $src1",
3440                         "$src1, ${src2}"##_Dst.BroadcastStr,
3441                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
3442                                     (_Dst.VT (X86VBroadcast
3443                                              (_Dst.ScalarLdFrag addr:$src2)))))),
3444                        itins.rm>,
3445                        AVX512BIBase, EVEX_4V, EVEX_B;
3446  }
3447}
3448
3449defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
3450                                    SSE_INTALU_ITINS_P, 1>;
3451defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
3452                                    SSE_INTALU_ITINS_P, 0>;
3453defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
3454                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
3455defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
3456                                    SSE_INTALU_ITINS_P, HasBWI, 0>;
3457defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
3458                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3459defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
3460                                     SSE_INTALU_ITINS_P, HasBWI, 0>;
3461defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
3462                                    SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3463defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
3464                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
3465defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
3466                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
3467defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
3468                                    HasBWI, 1>;
3469defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
3470                                     HasBWI, 1>;
3471defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
3472                                      HasBWI, 1>, T8PD;
3473defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
3474                                   SSE_INTALU_ITINS_P, HasBWI, 1>;
3475
3476multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
3477                            SDNode OpNode, bit IsCommutable = 0> {
3478
3479  defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
3480                                 v16i32_info, v8i64_info, IsCommutable>,
3481                                EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
3482  let Predicates = [HasVLX] in {
3483    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
3484                                      v8i32x_info, v4i64x_info, IsCommutable>,
3485                                     EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
3486    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
3487                                      v4i32x_info, v2i64x_info, IsCommutable>,
3488                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
3489  }
3490}
3491
3492defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
3493                   X86pmuldq, 1>,T8PD;
3494defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
3495                   X86pmuludq, 1>;
3496
3497multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
3498                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
3499  let mayLoad = 1 in {
3500      defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3501                        (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
3502                        OpcodeStr,
3503                        "${src2}"##_Src.BroadcastStr##", $src1",
3504                         "$src1, ${src2}"##_Src.BroadcastStr,
3505                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
3506                                     (_Src.VT (X86VBroadcast
3507                                              (_Src.ScalarLdFrag addr:$src2))))))>,
3508                        EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
3509  }
3510}
3511
3512multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
3513                            SDNode OpNode,X86VectorVTInfo _Src,
3514                            X86VectorVTInfo _Dst> {
3515  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
3516                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
3517                            "$src2, $src1","$src1, $src2",
3518                            (_Dst.VT (OpNode
3519                                         (_Src.VT _Src.RC:$src1),
3520                                         (_Src.VT _Src.RC:$src2)))>,
3521                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
3522  let mayLoad = 1 in {
3523    defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3524                          (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
3525                          "$src2, $src1", "$src1, $src2",
3526                          (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
3527                                        (bitconvert (_Src.LdFrag addr:$src2))))>,
3528                           EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
3529  }
3530}
3531
3532multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
3533                                    SDNode OpNode> {
3534  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
3535                                 v32i16_info>,
3536                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
3537                                 v32i16_info>, EVEX_V512;
3538  let Predicates = [HasVLX] in {
3539    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
3540                                     v16i16x_info>,
3541                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
3542                                     v16i16x_info>, EVEX_V256;
3543    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
3544                                     v8i16x_info>,
3545                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
3546                                     v8i16x_info>, EVEX_V128;
3547  }
3548}
3549multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
3550                            SDNode OpNode> {
3551  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
3552                                v64i8_info>, EVEX_V512;
3553  let Predicates = [HasVLX] in {
3554    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
3555                                    v32i8x_info>, EVEX_V256;
3556    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
3557                                    v16i8x_info>, EVEX_V128;
3558  }
3559}
3560
3561multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
3562                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
3563                            AVX512VLVectorVTInfo _Dst> {
3564  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
3565                                _Dst.info512>, EVEX_V512;
3566  let Predicates = [HasVLX] in {
3567    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
3568                                     _Dst.info256>, EVEX_V256;
3569    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
3570                                     _Dst.info128>, EVEX_V128;
3571  }
3572}
3573
3574let Predicates = [HasBWI] in {
3575  defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
3576  defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
3577  defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
3578  defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
3579
3580  defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
3581                       avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
3582  defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
3583                       avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
3584}
3585
3586defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
3587                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3588defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
3589                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3590defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
3591                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3592
3593defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
3594                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3595defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
3596                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3597defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
3598                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3599
3600defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
3601                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3602defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
3603                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3604defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
3605                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3606
3607defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
3608                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3609defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
3610                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3611defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
3612                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3613//===----------------------------------------------------------------------===//
3614// AVX-512  Logical Instructions
3615//===----------------------------------------------------------------------===//
3616
3617defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
3618                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
3619defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
3620                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
3621defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
3622                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
3623defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
3624                                  SSE_INTALU_ITINS_P, HasAVX512, 0>;
3625
3626//===----------------------------------------------------------------------===//
3627// AVX-512  FP arithmetic
3628//===----------------------------------------------------------------------===//
3629multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
3630                         SDNode OpNode, SDNode VecNode, OpndItins itins,
3631                         bit IsCommutable> {
3632
3633  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3634                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3635                           "$src2, $src1", "$src1, $src2",
3636                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
3637                           (i32 FROUND_CURRENT)),
3638                           itins.rr, IsCommutable>;
3639
3640  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
3641                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
3642                         "$src2, $src1", "$src1, $src2",
3643                         (VecNode (_.VT _.RC:$src1),
3644                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
3645                           (i32 FROUND_CURRENT)),
3646                         itins.rm, IsCommutable>;
3647  let isCodeGenOnly = 1, isCommutable = IsCommutable,
3648      Predicates = [HasAVX512] in {
3649  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
3650                         (ins _.FRC:$src1, _.FRC:$src2),
3651                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3652                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
3653                          itins.rr>;
3654  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
3655                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
3656                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3657                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
3658                         (_.ScalarLdFrag addr:$src2)))], itins.rr>;
3659  }
3660}
3661
3662multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
3663                         SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
3664
3665  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3666                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
3667                          "$rc, $src2, $src1", "$src1, $src2, $rc",
3668                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
3669                          (i32 imm:$rc)), itins.rr, IsCommutable>,
3670                          EVEX_B, EVEX_RC;
3671}
3672multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
3673                         SDNode VecNode, OpndItins itins, bit IsCommutable> {
3674
3675  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3676                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3677                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
3678                            (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
3679                            (i32 FROUND_NO_EXC))>, EVEX_B;
3680}
3681
3682multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
3683                                  SDNode VecNode,
3684                                  SizeItins itins, bit IsCommutable> {
3685  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
3686                              itins.s, IsCommutable>,
3687             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
3688                              itins.s, IsCommutable>,
3689                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
3690  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
3691                              itins.d,                  IsCommutable>,
3692             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
3693                              itins.d, IsCommutable>,
3694                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3695}
3696
3697multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
3698                                  SDNode VecNode,
3699                                  SizeItins itins, bit IsCommutable> {
3700  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
3701                              itins.s, IsCommutable>,
3702             avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
3703                              itins.s, IsCommutable>,
3704                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
3705  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
3706                              itins.d,                  IsCommutable>,
3707             avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
3708                              itins.d, IsCommutable>,
3709                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3710}
3711defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
3712defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
3713defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
3714defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
3715defm VMIN : avx512_binop_s_sae  <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>;
3716defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>;
3717
3718multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
3719                            X86VectorVTInfo _, bit IsCommutable> {
3720  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3721                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3722                  "$src2, $src1", "$src1, $src2",
3723                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V;
3724  let mayLoad = 1 in {
3725    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3726                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
3727                    "$src2, $src1", "$src1, $src2",
3728                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V;
3729    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3730                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
3731                     "${src2}"##_.BroadcastStr##", $src1",
3732                     "$src1, ${src2}"##_.BroadcastStr,
3733                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
3734                                                (_.ScalarLdFrag addr:$src2))))>,
3735                     EVEX_4V, EVEX_B;
3736  }//let mayLoad = 1
3737}
3738
3739multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
3740                            X86VectorVTInfo _> {
3741  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3742                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
3743                  "$rc, $src2, $src1", "$src1, $src2, $rc",
3744                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
3745                  EVEX_4V, EVEX_B, EVEX_RC;
3746}
3747
3748
3749multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
3750                            X86VectorVTInfo _> {
3751  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3752                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3753                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
3754                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
3755                  EVEX_4V, EVEX_B;
3756}
3757
3758multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3759                             bit IsCommutable = 0> {
3760  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
3761                              IsCommutable>, EVEX_V512, PS,
3762                              EVEX_CD8<32, CD8VF>;
3763  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
3764                              IsCommutable>, EVEX_V512, PD, VEX_W,
3765                              EVEX_CD8<64, CD8VF>;
3766
3767    // Define only if AVX512VL feature is present.
3768  let Predicates = [HasVLX] in {
3769    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
3770                                   IsCommutable>, EVEX_V128, PS,
3771                                   EVEX_CD8<32, CD8VF>;
3772    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
3773                                   IsCommutable>, EVEX_V256, PS,
3774                                   EVEX_CD8<32, CD8VF>;
3775    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
3776                                   IsCommutable>, EVEX_V128, PD, VEX_W,
3777                                   EVEX_CD8<64, CD8VF>;
3778    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
3779                                   IsCommutable>, EVEX_V256, PD, VEX_W,
3780                                   EVEX_CD8<64, CD8VF>;
3781  }
3782}
3783
3784multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
3785  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
3786                              EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3787  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
3788                              EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
3789}
3790
3791multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
3792  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
3793                              EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3794  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
3795                              EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
3796}
3797
3798defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
3799            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
3800defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
3801            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
3802defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
3803            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
3804defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
3805            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
3806defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>,
3807            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
3808defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>,
3809            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
3810let Predicates = [HasDQI] in {
3811  defm VAND  : avx512_fp_binop_p<0x54, "vand", X86fand, 1>;
3812  defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>;
3813  defm VOR   : avx512_fp_binop_p<0x56, "vor", X86for, 1>;
3814  defm VXOR  : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
3815}
3816
3817multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3818                            X86VectorVTInfo _> {
3819  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3820                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3821                  "$src2, $src1", "$src1, $src2",
3822                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
3823  let mayLoad = 1 in {
3824    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3825                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
3826                    "$src2, $src1", "$src1, $src2",
3827                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
3828    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3829                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
3830                     "${src2}"##_.BroadcastStr##", $src1",
3831                     "$src1, ${src2}"##_.BroadcastStr,
3832                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
3833                                                (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
3834                     EVEX_4V, EVEX_B;
3835  }//let mayLoad = 1
3836}
3837
3838multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
3839                            X86VectorVTInfo _> {
3840  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3841                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3842                  "$src2, $src1", "$src1, $src2",
3843                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
3844  let mayLoad = 1 in {
3845    defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
3846                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
3847                    "$src2, $src1", "$src1, $src2",
3848                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
3849  }//let mayLoad = 1
3850}
3851
3852multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
3853  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
3854             avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
3855                              EVEX_V512, EVEX_CD8<32, CD8VF>;
3856  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
3857             avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
3858                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3859  defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
3860                avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
3861                              EVEX_4V,EVEX_CD8<32, CD8VT1>;
3862  defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
3863                avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
3864                              EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
3865
3866  // Define only if AVX512VL feature is present.
3867  let Predicates = [HasVLX] in {
3868    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
3869                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
3870    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
3871                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
3872    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
3873                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
3874    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
3875                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
3876  }
3877}
3878defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
3879
3880//===----------------------------------------------------------------------===//
3881// AVX-512  VPTESTM instructions
3882//===----------------------------------------------------------------------===//
3883
3884multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
3885                            X86VectorVTInfo _> {
3886  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
3887                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3888                      "$src2, $src1", "$src1, $src2",
3889                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
3890                    EVEX_4V;
3891  let mayLoad = 1 in
3892  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
3893                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
3894                       "$src2, $src1", "$src1, $src2",
3895                   (OpNode (_.VT _.RC:$src1),
3896                    (_.VT (bitconvert (_.LdFrag addr:$src2))))>,
3897                    EVEX_4V,
3898                   EVEX_CD8<_.EltSize, CD8VF>;
3899}
3900
3901multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
3902                            X86VectorVTInfo _> {
3903  let mayLoad = 1 in
3904  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
3905                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
3906                    "${src2}"##_.BroadcastStr##", $src1",
3907                    "$src1, ${src2}"##_.BroadcastStr,
3908                    (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
3909                                                (_.ScalarLdFrag addr:$src2))))>,
3910                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
3911}
3912multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
3913                                  AVX512VLVectorVTInfo _> {
3914  let Predicates  = [HasAVX512] in
3915  defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
3916           avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
3917
3918  let Predicates = [HasAVX512, HasVLX] in {
3919  defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
3920              avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
3921  defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
3922              avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
3923  }
3924}
3925
3926multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
3927  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
3928                                 avx512vl_i32_info>;
3929  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
3930                                 avx512vl_i64_info>, VEX_W;
3931}
3932
3933multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
3934                                 SDNode OpNode> {
3935  let Predicates = [HasBWI] in {
3936  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
3937              EVEX_V512, VEX_W;
3938  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
3939              EVEX_V512;
3940  }
3941  let Predicates = [HasVLX, HasBWI] in {
3942
3943  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
3944              EVEX_V256, VEX_W;
3945  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
3946              EVEX_V128, VEX_W;
3947  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
3948              EVEX_V256;
3949  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
3950              EVEX_V128;
3951  }
3952}
3953
3954multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
3955                                   SDNode OpNode> :
3956  avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
3957  avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;
3958
3959defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
3960defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;
3961
3962def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3963                 (v16i32 VR512:$src2), (i16 -1))),
3964                 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3965
3966def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3967                 (v8i64 VR512:$src2), (i8 -1))),
3968                 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
3969
3970//===----------------------------------------------------------------------===//
3971// AVX-512  Shift instructions
3972//===----------------------------------------------------------------------===//
3973multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3974                         string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
3975  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
3976                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
3977                      "$src2, $src1", "$src1, $src2",
3978                   (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
3979                   SSE_INTSHIFT_ITINS_P.rr>;
3980  let mayLoad = 1 in
3981  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
3982                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
3983                       "$src2, $src1", "$src1, $src2",
3984                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
3985                          (i8 imm:$src2))),
3986                   SSE_INTSHIFT_ITINS_P.rm>;
3987}
3988
3989multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
3990                         string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
3991  let mayLoad = 1 in
3992  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
3993                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
3994      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
3995     (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
3996     SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
3997}
3998
3999multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4000                         ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
4001   // src2 is always 128-bit
4002  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4003                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
4004                      "$src2, $src1", "$src1, $src2",
4005                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
4006                   SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
4007  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4008                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
4009                       "$src2, $src1", "$src1, $src2",
4010                   (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
4011                   SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
4012                   EVEX_4V;
4013}
4014
4015multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4016                                  ValueType SrcVT, PatFrag bc_frag,
4017                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
4018  let Predicates = [prd] in
4019  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
4020                            VTInfo.info512>, EVEX_V512,
4021                            EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
4022  let Predicates = [prd, HasVLX] in {
4023  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
4024                            VTInfo.info256>, EVEX_V256,
4025                            EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
4026  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
4027                            VTInfo.info128>, EVEX_V128,
4028                            EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
4029  }
4030}
4031
4032multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
4033                              string OpcodeStr, SDNode OpNode> {
4034  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
4035                                 avx512vl_i32_info, HasAVX512>;
4036  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
4037                                 avx512vl_i64_info, HasAVX512>, VEX_W;
4038  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
4039                                 avx512vl_i16_info, HasBWI>;
4040}
4041
4042multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
4043                                 string OpcodeStr, SDNode OpNode,
4044                                 AVX512VLVectorVTInfo VTInfo> {
4045  let Predicates = [HasAVX512] in
4046  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4047                              VTInfo.info512>,
4048             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4049                              VTInfo.info512>, EVEX_V512;
4050  let Predicates = [HasAVX512, HasVLX] in {
4051  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4052                              VTInfo.info256>,
4053             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4054                              VTInfo.info256>, EVEX_V256;
4055  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4056                              VTInfo.info128>,
4057             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4058                              VTInfo.info128>, EVEX_V128;
4059  }
4060}
4061
4062multiclass avx512_shift_rmi_w<bits<8> opcw,
4063                                 Format ImmFormR, Format ImmFormM,
4064                                 string OpcodeStr, SDNode OpNode> {
4065  let Predicates = [HasBWI] in
4066  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4067                               v32i16_info>, EVEX_V512;
4068  let Predicates = [HasVLX, HasBWI] in {
4069  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4070                               v16i16x_info>, EVEX_V256;
4071  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4072                               v8i16x_info>, EVEX_V128;
4073  }
4074}
4075
4076multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
4077                                 Format ImmFormR, Format ImmFormM,
4078                                 string OpcodeStr, SDNode OpNode> {
4079  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
4080                                 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
4081  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
4082                                 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
4083}
4084
4085defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
4086             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
4087
4088defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
4089             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
4090
4091defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
4092             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
4093
4094defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V;
4095defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V;
4096
4097defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
4098defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
4099defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
4100
4101//===-------------------------------------------------------------------===//
4102// Variable Bit Shifts
4103//===-------------------------------------------------------------------===//
4104multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
4105                            X86VectorVTInfo _> {
4106  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4107                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4108                      "$src2, $src1", "$src1, $src2",
4109                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
4110                   SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
4111  let mayLoad = 1 in
4112  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4113                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4114                       "$src2, $src1", "$src1, $src2",
4115                   (_.VT (OpNode _.RC:$src1,
4116                   (_.VT (bitconvert (_.LdFrag addr:$src2))))),
4117                   SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
4118                   EVEX_CD8<_.EltSize, CD8VF>;
4119}
4120
4121multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4122                            X86VectorVTInfo _> {
4123  let mayLoad = 1 in
4124  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4125                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4126                    "${src2}"##_.BroadcastStr##", $src1",
4127                    "$src1, ${src2}"##_.BroadcastStr,
4128                    (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4129                                                (_.ScalarLdFrag addr:$src2))))),
4130                    SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
4131                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
4132}
4133multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4134                                  AVX512VLVectorVTInfo _> {
4135  let Predicates  = [HasAVX512] in
4136  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
4137           avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
4138
4139  let Predicates = [HasAVX512, HasVLX] in {
4140  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
4141              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
4142  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
4143              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
4144  }
4145}
4146
4147multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
4148                                 SDNode OpNode> {
4149  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
4150                                 avx512vl_i32_info>;
4151  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
4152                                 avx512vl_i64_info>, VEX_W;
4153}
4154
4155multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
4156                                 SDNode OpNode> {
4157  let Predicates = [HasBWI] in
4158  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
4159              EVEX_V512, VEX_W;
4160  let Predicates = [HasVLX, HasBWI] in {
4161
4162  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
4163              EVEX_V256, VEX_W;
4164  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
4165              EVEX_V128, VEX_W;
4166  }
4167}
4168
4169defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
4170              avx512_var_shift_w<0x12, "vpsllvw", shl>;
4171defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
4172              avx512_var_shift_w<0x11, "vpsravw", sra>;
4173defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
4174              avx512_var_shift_w<0x10, "vpsrlvw", srl>;
4175defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
4176defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
4177
4178//===-------------------------------------------------------------------===//
4179// 1-src variable permutation VPERMW/D/Q
4180//===-------------------------------------------------------------------===//
4181multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4182                                  AVX512VLVectorVTInfo _> {
4183  let Predicates  = [HasAVX512] in
4184  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
4185           avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
4186
4187  let Predicates = [HasAVX512, HasVLX] in
4188  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
4189              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
4190}
4191
4192multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
4193                                 string OpcodeStr, SDNode OpNode,
4194                                 AVX512VLVectorVTInfo VTInfo> {
4195  let Predicates = [HasAVX512] in
4196  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4197                              VTInfo.info512>,
4198             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4199                              VTInfo.info512>, EVEX_V512;
4200  let Predicates = [HasAVX512, HasVLX] in
4201  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4202                              VTInfo.info256>,
4203             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4204                              VTInfo.info256>, EVEX_V256;
4205}
4206
4207
4208defm VPERM  : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>;
4209
4210defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
4211                                    avx512vl_i32_info>;
4212defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
4213                                    avx512vl_i64_info>, VEX_W;
4214defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
4215                                    avx512vl_f32_info>;
4216defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
4217                                    avx512vl_f64_info>, VEX_W;
4218
4219defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
4220                             X86VPermi, avx512vl_i64_info>,
4221                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
4222defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
4223                             X86VPermi, avx512vl_f64_info>,
4224                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
4225//===----------------------------------------------------------------------===//
4226// AVX-512 - VPERMIL
4227//===----------------------------------------------------------------------===//
4228
4229multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr,  SDNode OpNode,
4230                             X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
4231  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
4232                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
4233                  "$src2, $src1", "$src1, $src2",
4234                  (_.VT (OpNode _.RC:$src1,
4235                               (Ctrl.VT Ctrl.RC:$src2)))>,
4236                  T8PD, EVEX_4V;
4237  let mayLoad = 1 in {
4238    defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
4239                    (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
4240                    "$src2, $src1", "$src1, $src2",
4241                    (_.VT (OpNode
4242                             _.RC:$src1,
4243                             (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
4244                    T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
4245    defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
4246                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4247                     "${src2}"##_.BroadcastStr##", $src1",
4248                     "$src1, ${src2}"##_.BroadcastStr,
4249                     (_.VT (OpNode
4250                              _.RC:$src1,
4251                              (Ctrl.VT (X86VBroadcast
4252                                         (Ctrl.ScalarLdFrag addr:$src2)))))>,
4253                     T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
4254  }//let mayLoad = 1
4255}
4256
4257multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
4258                             AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
4259  let Predicates = [HasAVX512] in {
4260    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
4261                                  Ctrl.info512>, EVEX_V512;
4262  }
4263  let Predicates = [HasAVX512, HasVLX] in {
4264    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
4265                                  Ctrl.info128>, EVEX_V128;
4266    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
4267                                  Ctrl.info256>, EVEX_V256;
4268  }
4269}
4270
4271multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
4272                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
4273
4274  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
4275  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
4276                                    X86VPermilpi, _>,
4277                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
4278
4279  let isCodeGenOnly = 1 in {
4280    // lowering implementation with the alternative types
4281    defm NAME#_I: avx512_permil_vec_common<OpcodeStr, OpcVar, Ctrl, Ctrl>;
4282    defm NAME#_I: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem,
4283                                         OpcodeStr, X86VPermilpi, Ctrl>,
4284                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
4285  }
4286}
4287
4288defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
4289                               avx512vl_i32_info>;
4290defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
4291                               avx512vl_i64_info>, VEX_W;
4292//===----------------------------------------------------------------------===//
4293// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
4294//===----------------------------------------------------------------------===//
4295
4296defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
4297                             X86PShufd, avx512vl_i32_info>,
4298                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
4299defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
4300                                  X86PShufhw>, EVEX, AVX512XSIi8Base;
4301defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
4302                                  X86PShuflw>, EVEX, AVX512XDIi8Base;
4303
4304multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
4305  let Predicates = [HasBWI] in
4306  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
4307
4308  let Predicates = [HasVLX, HasBWI] in {
4309  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
4310  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
4311  }
4312}
4313
4314defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
4315
4316//===----------------------------------------------------------------------===//
4317// Move Low to High and High to Low packed FP Instructions
4318//===----------------------------------------------------------------------===//
4319def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
4320          (ins VR128X:$src1, VR128X:$src2),
4321          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4322          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
4323           IIC_SSE_MOV_LH>, EVEX_4V;
4324def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
4325          (ins VR128X:$src1, VR128X:$src2),
4326          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4327          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
4328          IIC_SSE_MOV_LH>, EVEX_4V;
4329
4330let Predicates = [HasAVX512] in {
4331  // MOVLHPS patterns
4332  def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
4333            (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
4334  def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
4335            (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
4336
4337  // MOVHLPS patterns
4338  def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
4339            (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
4340}
4341
4342//===----------------------------------------------------------------------===//
4343// VMOVHPS/PD VMOVLPS Instructions
4344// All patterns was taken from SSS implementation.
4345//===----------------------------------------------------------------------===//
4346multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
4347                                  X86VectorVTInfo _> {
4348  let mayLoad = 1 in
4349    def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
4350                    (ins _.RC:$src1, f64mem:$src2),
4351                    !strconcat(OpcodeStr,
4352                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4353                    [(set _.RC:$dst,
4354                       (OpNode _.RC:$src1,
4355                         (_.VT (bitconvert
4356                           (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
4357                    IIC_SSE_MOV_LH>, EVEX_4V;
4358}
4359
4360defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
4361                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
4362defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
4363                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
4364defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
4365                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
4366defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
4367                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
4368
4369let Predicates = [HasAVX512] in {
4370  // VMOVHPS patterns
4371  def : Pat<(X86Movlhps VR128X:$src1,
4372               (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
4373          (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
4374  def : Pat<(X86Movlhps VR128X:$src1,
4375               (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
4376          (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
4377  // VMOVHPD patterns
4378  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
4379                    (scalar_to_vector (loadf64 addr:$src2)))),
4380           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
4381  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
4382                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
4383           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
4384  // VMOVLPS patterns
4385  def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
4386          (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
4387  def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
4388          (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
4389  // VMOVLPD patterns
4390  def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
4391          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
4392  def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
4393          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
4394  def : Pat<(v2f64 (X86Movsd VR128X:$src1,
4395                           (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
4396          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
4397}
4398
4399let mayStore = 1 in {
4400def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
4401                       (ins f64mem:$dst, VR128X:$src),
4402                       "vmovhps\t{$src, $dst|$dst, $src}",
4403                       [(store (f64 (vector_extract
4404                                     (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
4405                                                (bc_v2f64 (v4f32 VR128X:$src))),
4406                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
4407                       EVEX, EVEX_CD8<32, CD8VT2>;
4408def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
4409                       (ins f64mem:$dst, VR128X:$src),
4410                       "vmovhpd\t{$src, $dst|$dst, $src}",
4411                       [(store (f64 (vector_extract
4412                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
4413                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
4414                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
4415def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
4416                       (ins f64mem:$dst, VR128X:$src),
4417                       "vmovlps\t{$src, $dst|$dst, $src}",
4418                       [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)),
4419                                     (iPTR 0))), addr:$dst)],
4420                                     IIC_SSE_MOV_LH>,
4421                       EVEX, EVEX_CD8<32, CD8VT2>;
4422def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
4423                       (ins f64mem:$dst, VR128X:$src),
4424                       "vmovlpd\t{$src, $dst|$dst, $src}",
4425                       [(store (f64 (vector_extract (v2f64 VR128X:$src),
4426                                     (iPTR 0))), addr:$dst)],
4427                                     IIC_SSE_MOV_LH>,
4428                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
4429}
4430let Predicates = [HasAVX512] in {
4431  // VMOVHPD patterns
4432  def : Pat<(store (f64 (vector_extract
4433                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
4434                           (iPTR 0))), addr:$dst),
4435           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
4436  // VMOVLPS patterns
4437  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
4438                   addr:$src1),
4439            (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
4440  def : Pat<(store (v4i32 (X86Movlps
4441                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
4442            (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
4443  // VMOVLPD patterns
4444  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
4445                   addr:$src1),
4446            (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
4447  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
4448                   addr:$src1),
4449            (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
4450}
4451//===----------------------------------------------------------------------===//
4452// FMA - Fused Multiply Operations
4453//
4454
4455let Constraints = "$src1 = $dst" in {
4456multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4457                                                            X86VectorVTInfo _> {
4458  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4459          (ins _.RC:$src2, _.RC:$src3),
4460          OpcodeStr, "$src3, $src2", "$src2, $src3",
4461          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
4462         AVX512FMA3Base;
4463
4464  let mayLoad = 1 in {
4465    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4466            (ins _.RC:$src2, _.MemOp:$src3),
4467            OpcodeStr, "$src3, $src2", "$src2, $src3",
4468            (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
4469            AVX512FMA3Base;
4470
4471    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4472              (ins _.RC:$src2, _.ScalarMemOp:$src3),
4473              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
4474              !strconcat("$src2, ${src3}", _.BroadcastStr ),
4475              (OpNode _.RC:$src1,
4476               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
4477              AVX512FMA3Base, EVEX_B;
4478  }
4479}
4480
4481multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4482                                                            X86VectorVTInfo _> {
4483  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4484          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
4485          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
4486          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
4487          AVX512FMA3Base, EVEX_B, EVEX_RC;
4488}
4489} // Constraints = "$src1 = $dst"
4490
4491multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4492                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
4493  let Predicates = [HasAVX512] in {
4494    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
4495                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
4496                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4497  }
4498  let Predicates = [HasVLX, HasAVX512] in {
4499    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
4500                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4501    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
4502                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4503  }
4504}
4505
4506multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
4507                                                            SDNode OpNodeRnd > {
4508    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
4509                                      avx512vl_f32_info>;
4510    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
4511                                      avx512vl_f64_info>, VEX_W;
4512}
4513
4514defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
4515defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
4516defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
4517defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
4518defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
4519defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
4520
4521
4522let Constraints = "$src1 = $dst" in {
4523multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4524                                                            X86VectorVTInfo _> {
4525  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4526          (ins _.RC:$src2, _.RC:$src3),
4527          OpcodeStr, "$src3, $src2", "$src2, $src3",
4528          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
4529         AVX512FMA3Base;
4530
4531  let mayLoad = 1 in {
4532    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4533            (ins _.RC:$src2, _.MemOp:$src3),
4534            OpcodeStr, "$src3, $src2", "$src2, $src3",
4535            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
4536           AVX512FMA3Base;
4537
4538    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4539           (ins _.RC:$src2, _.ScalarMemOp:$src3),
4540           OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
4541           "$src2, ${src3}"##_.BroadcastStr,
4542           (_.VT (OpNode _.RC:$src2,
4543                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
4544                        _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
4545  }
4546}
4547
4548multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4549                                                            X86VectorVTInfo _> {
4550  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4551          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
4552          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
4553          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
4554          AVX512FMA3Base, EVEX_B, EVEX_RC;
4555}
4556} // Constraints = "$src1 = $dst"
4557
4558multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4559                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
4560  let Predicates = [HasAVX512] in {
4561    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
4562                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
4563                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4564  }
4565  let Predicates = [HasVLX, HasAVX512] in {
4566    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
4567                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4568    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
4569                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4570  }
4571}
4572
4573multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
4574                                                            SDNode OpNodeRnd > {
4575    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
4576                                      avx512vl_f32_info>;
4577    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
4578                                      avx512vl_f64_info>, VEX_W;
4579}
4580
4581defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
4582defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
4583defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
4584defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
4585defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
4586defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
4587
4588let Constraints = "$src1 = $dst" in {
4589multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4590                                                            X86VectorVTInfo _> {
4591  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4592          (ins _.RC:$src3, _.RC:$src2),
4593          OpcodeStr, "$src2, $src3", "$src3, $src2",
4594          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
4595         AVX512FMA3Base;
4596
4597  let mayLoad = 1 in {
4598    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4599            (ins _.RC:$src3, _.MemOp:$src2),
4600            OpcodeStr, "$src2, $src3", "$src3, $src2",
4601            (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
4602           AVX512FMA3Base;
4603
4604    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4605           (ins _.RC:$src3, _.ScalarMemOp:$src2),
4606           OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
4607           "$src3, ${src2}"##_.BroadcastStr,
4608           (_.VT (OpNode _.RC:$src1,
4609                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
4610                        _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
4611  }
4612}
4613
4614multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4615                                                            X86VectorVTInfo _> {
4616  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4617          (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
4618          OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
4619          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
4620          AVX512FMA3Base, EVEX_B, EVEX_RC;
4621}
4622} // Constraints = "$src1 = $dst"
4623
4624multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4625                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
4626  let Predicates = [HasAVX512] in {
4627    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
4628                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
4629                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4630  }
4631  let Predicates = [HasVLX, HasAVX512] in {
4632    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
4633                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4634    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
4635                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4636  }
4637}
4638
4639multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
4640                                                            SDNode OpNodeRnd > {
4641    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
4642                                      avx512vl_f32_info>;
4643    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
4644                                      avx512vl_f64_info>, VEX_W;
4645}
4646
4647defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
4648defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
4649defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
4650defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
4651defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
4652defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
4653
4654// Scalar FMA
4655let Constraints = "$src1 = $dst" in {
4656multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4657                               dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
4658                                                        dag RHS_r, dag RHS_m > {
4659  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4660          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
4661          "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;
4662
4663  let mayLoad = 1 in
4664    defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4665            (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr,
4666            "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
4667
4668  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
4670         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
4671                                       AVX512FMA3Base, EVEX_B, EVEX_RC;
4672
4673  let isCodeGenOnly = 1 in {
4674    def r     : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
4675                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
4676                     !strconcat(OpcodeStr,
4677                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4678                     [RHS_r]>;
4679    let mayLoad = 1 in
4680      def m     : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
4681                      (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
4682                      !strconcat(OpcodeStr,
4683                                 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4684                      [RHS_m]>;
4685  }// isCodeGenOnly = 1
4686}
4687}// Constraints = "$src1 = $dst"
4688
4689multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
4690         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
4691                                                                  string SUFF> {
4692
4693  defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
4694                (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
4695                (_.VT (OpNode _.RC:$src2, _.RC:$src1,
4696                         (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))))),
4697                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
4698                         (i32 imm:$rc))),
4699                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
4700                         _.FRC:$src3))),
4701                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
4702                         (_.ScalarLdFrag addr:$src3))))>;
4703
4704  defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
4705                (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)),
4706                (_.VT (OpNode _.RC:$src2,
4707                       (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
4708                              _.RC:$src1)),
4709                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
4710                                  (i32 imm:$rc))),
4711                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
4712                                          _.FRC:$src1))),
4713                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
4714                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
4715
4716  defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
4717                (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)),
4718                (_.VT (OpNode _.RC:$src1,
4719                       (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
4720                              _.RC:$src2)),
4721                (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
4722                         (i32 imm:$rc))),
4723                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
4724                         _.FRC:$src2))),
4725                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
4726                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
4727}
4728
4729multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
4730                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
4731  let Predicates = [HasAVX512] in {
4732    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
4733                                   OpNodeRnd, f32x_info, "SS">,
4734                                   EVEX_CD8<32, CD8VT1>, VEX_LIG;
4735    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
4736                                   OpNodeRnd, f64x_info, "SD">,
4737                                   EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
4738  }
4739}
4740
4741defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
4742defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
4743defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
4744defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
4745
4746//===----------------------------------------------------------------------===//
4747// AVX-512  Scalar convert from sign integer to float/double
4748//===----------------------------------------------------------------------===//
4749
4750multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
4751                    X86VectorVTInfo DstVT, X86MemOperand x86memop,
4752                    PatFrag ld_frag, string asm> {
4753  let hasSideEffects = 0 in {
4754    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
4755              (ins DstVT.FRC:$src1, SrcRC:$src),
4756              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
4757              EVEX_4V;
4758    let mayLoad = 1 in
4759      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
4760              (ins DstVT.FRC:$src1, x86memop:$src),
4761              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
4762              EVEX_4V;
4763  } // hasSideEffects = 0
4764  let isCodeGenOnly = 1 in {
4765    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
4766                  (ins DstVT.RC:$src1, SrcRC:$src2),
4767                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4768                  [(set DstVT.RC:$dst,
4769                        (OpNode (DstVT.VT DstVT.RC:$src1),
4770                                 SrcRC:$src2,
4771                                 (i32 FROUND_CURRENT)))]>, EVEX_4V;
4772
4773    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
4774                  (ins DstVT.RC:$src1, x86memop:$src2),
4775                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4776                  [(set DstVT.RC:$dst,
4777                        (OpNode (DstVT.VT DstVT.RC:$src1),
4778                                 (ld_frag addr:$src2),
4779                                 (i32 FROUND_CURRENT)))]>, EVEX_4V;
4780  }//isCodeGenOnly = 1
4781}
4782
4783multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
4784                    X86VectorVTInfo DstVT, string asm> {
4785  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
4786              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
4787              !strconcat(asm,
4788                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
4789              [(set DstVT.RC:$dst,
4790                    (OpNode (DstVT.VT DstVT.RC:$src1),
4791                             SrcRC:$src2,
4792                             (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
4793}
4794
4795multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
4796                    X86VectorVTInfo DstVT, X86MemOperand x86memop,
4797                    PatFrag ld_frag, string asm> {
4798  defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
4799              avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
4800                        VEX_LIG;
4801}
4802
4803let Predicates = [HasAVX512] in {
4804defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
4805                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
4806                                 XS, EVEX_CD8<32, CD8VT1>;
4807defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
4808                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
4809                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
4810defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
4811                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
4812                                 XD, EVEX_CD8<32, CD8VT1>;
4813defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
4814                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
4815                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4816
4817def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
4818          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
4819def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
4820          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
4821def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
4822          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
4823def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
4824          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
4825
4826def : Pat<(f32 (sint_to_fp GR32:$src)),
4827          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
4828def : Pat<(f32 (sint_to_fp GR64:$src)),
4829          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
4830def : Pat<(f64 (sint_to_fp GR32:$src)),
4831          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
4832def : Pat<(f64 (sint_to_fp GR64:$src)),
4833          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
4834
4835defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
4836                                  v4f32x_info, i32mem, loadi32,
4837                                  "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
4838defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
4839                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
4840                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
4841defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
4842                                  i32mem, loadi32, "cvtusi2sd{l}">,
4843                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4844defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
4845                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
4846                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4847
4848def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
4849          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
4850def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
4851          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
4852def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
4853          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
4854def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
4855          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
4856
4857def : Pat<(f32 (uint_to_fp GR32:$src)),
4858          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
4859def : Pat<(f32 (uint_to_fp GR64:$src)),
4860          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
4861def : Pat<(f64 (uint_to_fp GR32:$src)),
4862          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
4863def : Pat<(f64 (uint_to_fp GR64:$src)),
4864          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
4865}
4866
4867//===----------------------------------------------------------------------===//
4868// AVX-512  Scalar convert from float/double to integer
4869//===----------------------------------------------------------------------===//
4870multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC,
4871                                  RegisterClass DstRC, Intrinsic Int,
4872                           Operand memop, ComplexPattern mem_cpat, string asm> {
4873  let hasSideEffects = 0, Predicates = [HasAVX512] in {
4874    def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
4875                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
4876                [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
4877    def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
4878                !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>,
4879                EVEX, VEX_LIG, EVEX_B, EVEX_RC;
4880    let mayLoad = 1 in
4881    def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
4882                !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
4883  } // hasSideEffects = 0, Predicates = [HasAVX512]
4884}
4885
4886// Convert float/double to signed/unsigned int 32/64
4887defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
4888                                   ssmem, sse_load_f32, "cvtss2si">,
4889                                   XS, EVEX_CD8<32, CD8VT1>;
4890defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
4891                                  int_x86_sse_cvtss2si64,
4892                                   ssmem, sse_load_f32, "cvtss2si">,
4893                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
4894defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
4895                                  int_x86_avx512_cvtss2usi,
4896                                   ssmem, sse_load_f32, "cvtss2usi">,
4897                                   XS, EVEX_CD8<32, CD8VT1>;
4898defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
4899                                   int_x86_avx512_cvtss2usi64, ssmem,
4900                                   sse_load_f32, "cvtss2usi">, XS, VEX_W,
4901                                   EVEX_CD8<32, CD8VT1>;
4902defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
4903                                   sdmem, sse_load_f64, "cvtsd2si">,
4904                                   XD, EVEX_CD8<64, CD8VT1>;
4905defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
4906                                   int_x86_sse2_cvtsd2si64,
4907                                   sdmem, sse_load_f64, "cvtsd2si">,
4908                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4909defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, VR128X, GR32,
4910                                   int_x86_avx512_cvtsd2usi,
4911                                   sdmem, sse_load_f64, "cvtsd2usi">,
4912                                   XD, EVEX_CD8<64, CD8VT1>;
4913defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
4914                                   int_x86_avx512_cvtsd2usi64, sdmem,
4915                                   sse_load_f64, "cvtsd2usi">, XD, VEX_W,
4916                                   EVEX_CD8<64, CD8VT1>;
4917
4918let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
4919  defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
4920            int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
4921            SSE_CVT_Scalar, 0>, XS, EVEX_4V;
4922  defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
4923            int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
4924            SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
4925  defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
4926            int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
4927            SSE_CVT_Scalar, 0>, XD, EVEX_4V;
4928  defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
4929            int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
4930            SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
4931
4932  defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
4933            int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
4934            SSE_CVT_Scalar, 0>, XD, EVEX_4V;
4935} // isCodeGenOnly = 1, Predicates = [HasAVX512]
4936
4937// Convert float/double to signed/unsigned int 32/64 with truncation
4938multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
4939                            X86VectorVTInfo _DstRC, SDNode OpNode,
4940                            SDNode OpNodeRnd>{
4941let Predicates = [HasAVX512] in {
4942  def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
4943              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
4944              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
4945  def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
4946                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
4947                []>, EVEX, EVEX_B;
4948  def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
4949              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
4950              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
4951              EVEX;
4952
4953  let isCodeGenOnly = 1,hasSideEffects = 0 in {
4954      def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
4955                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
4956               [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
4957                                     (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
4958      def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
4959                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
4960                [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
4961                                      (i32 FROUND_NO_EXC)))]>,
4962                                      EVEX,VEX_LIG , EVEX_B;
4963      let mayLoad = 1 in
4964        def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
4965                    (ins _SrcRC.MemOp:$src),
4966                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
4967                    []>, EVEX, VEX_LIG;
4968
4969  } // isCodeGenOnly = 1, hasSideEffects = 0
4970} //HasAVX512
4971}
4972
4973
4974defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info,
4975                        fp_to_sint,X86cvttss2IntRnd>,
4976                        XS, EVEX_CD8<32, CD8VT1>;
4977defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info,
4978                        fp_to_sint,X86cvttss2IntRnd>,
4979                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
4980defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info,
4981                        fp_to_sint,X86cvttsd2IntRnd>,
4982                        XD, EVEX_CD8<64, CD8VT1>;
4983defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info,
4984                        fp_to_sint,X86cvttsd2IntRnd>,
4985                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
4986
4987defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info,
4988                        fp_to_uint,X86cvttss2UIntRnd>,
4989                        XS, EVEX_CD8<32, CD8VT1>;
4990defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info,
4991                        fp_to_uint,X86cvttss2UIntRnd>,
4992                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
4993defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info,
4994                        fp_to_uint,X86cvttsd2UIntRnd>,
4995                        XD, EVEX_CD8<64, CD8VT1>;
4996defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info,
4997                        fp_to_uint,X86cvttsd2UIntRnd>,
4998                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4999let Predicates = [HasAVX512] in {
5000  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
5001            (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
5002  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
5003            (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
5004  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
5005            (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
5006  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
5007            (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
5008
5009} // HasAVX512
5010//===----------------------------------------------------------------------===//
5011// AVX-512  Convert form float to double and back
5012//===----------------------------------------------------------------------===//
5013multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5014                         X86VectorVTInfo _Src, SDNode OpNode> {
5015  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5016                         (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5017                         "$src2, $src1", "$src1, $src2",
5018                         (_.VT (OpNode (_Src.VT _Src.RC:$src1),
5019                                       (_Src.VT _Src.RC:$src2)))>,
5020                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
5021  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5022                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5023                         "$src2, $src1", "$src1, $src2",
5024                         (_.VT (OpNode (_Src.VT _Src.RC:$src1),
5025                                  (_Src.VT (scalar_to_vector
5026                                            (_Src.ScalarLdFrag addr:$src2)))))>,
5027                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
5028}
5029
5030// Scalar Coversion with SAE - suppress all exceptions
5031multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5032                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5033  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5034                        (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5035                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5036                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
5037                                         (_Src.VT _Src.RC:$src2),
5038                                         (i32 FROUND_NO_EXC)))>,
5039                        EVEX_4V, VEX_LIG, EVEX_B;
5040}
5041
5042// Scalar Conversion with rounding control (RC)
5043multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5044                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5045  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5046                        (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
5047                        "$rc, $src2, $src1", "$src1, $src2, $rc",
5048                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
5049                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
5050                        EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
5051                        EVEX_B, EVEX_RC;
5052}
5053multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode,
5054                                  SDNode OpNodeRnd, X86VectorVTInfo _src,
5055                                                        X86VectorVTInfo _dst> {
5056  let Predicates = [HasAVX512] in {
5057    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
5058             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
5059                               OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
5060                               EVEX_V512, XD;
5061  }
5062}
5063
5064multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode,
5065                                    SDNode OpNodeRnd, X86VectorVTInfo _src,
5066                                                          X86VectorVTInfo _dst> {
5067  let Predicates = [HasAVX512] in {
5068    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
5069             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
5070             EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
5071  }
5072}
5073defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
5074                                         X86froundRnd, f64x_info, f32x_info>;
5075defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext,
5076                                          X86fpextRnd,f32x_info, f64x_info >;
5077
5078def : Pat<(f64 (fextend FR32X:$src)),
5079          (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
5080                               (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
5081          Requires<[HasAVX512]>;
5082def : Pat<(f64 (fextend (loadf32 addr:$src))),
5083          (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
5084          Requires<[HasAVX512]>;
5085
5086def : Pat<(f64 (extloadf32 addr:$src)),
5087      (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
5088      Requires<[HasAVX512, OptForSize]>;
5089
5090def : Pat<(f64 (extloadf32 addr:$src)),
5091          (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
5092                    (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
5093          Requires<[HasAVX512, OptForSpeed]>;
5094
5095def : Pat<(f32 (fround FR64X:$src)),
5096          (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
5097                    (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
5098           Requires<[HasAVX512]>;
5099//===----------------------------------------------------------------------===//
5100// AVX-512  Vector convert from signed/unsigned integer to float/double
5101//          and from float/double to signed/unsigned integer
5102//===----------------------------------------------------------------------===//
5103
5104multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5105                         X86VectorVTInfo _Src, SDNode OpNode,
5106                         string Broadcast = _.BroadcastStr,
5107                         string Alias = ""> {
5108
5109  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5110                         (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
5111                         (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
5112
5113  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5114                         (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src",
5115                         (_.VT (OpNode (_Src.VT
5116                             (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
5117
5118  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5119                         (ins _Src.MemOp:$src), OpcodeStr,
5120                         "${src}"##Broadcast, "${src}"##Broadcast,
5121                         (_.VT (OpNode (_Src.VT
5122                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
5123                            ))>, EVEX, EVEX_B;
5124}
5125// Coversion with SAE - suppress all exceptions
5126multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5127                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5128  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5129                        (ins _Src.RC:$src), OpcodeStr,
5130                        "{sae}, $src", "$src, {sae}",
5131                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
5132                               (i32 FROUND_NO_EXC)))>,
5133                        EVEX, EVEX_B;
5134}
5135
5136// Conversion with rounding control (RC)
5137multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5138                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5139  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5140                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
5141                        "$rc, $src", "$src, $rc",
5142                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
5143                        EVEX, EVEX_B, EVEX_RC;
5144}
5145
5146// Extend Float to Double
5147multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
5148  let Predicates = [HasAVX512] in {
5149    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>,
5150             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
5151                                X86vfpextRnd>, EVEX_V512;
5152  }
5153  let Predicates = [HasVLX] in {
5154    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
5155                               X86vfpext, "{1to2}">, EVEX_V128;
5156    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>,
5157                                     EVEX_V256;
5158  }
5159}
5160
5161// Truncate Double to Float
5162multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
5163  let Predicates = [HasAVX512] in {
5164    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>,
5165             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
5166                               X86vfproundRnd>, EVEX_V512;
5167  }
5168  let Predicates = [HasVLX] in {
5169    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
5170                               X86vfpround, "{1to2}", "{x}">, EVEX_V128;
5171    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround,
5172                               "{1to4}", "{y}">, EVEX_V256;
5173  }
5174}
5175
5176defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
5177                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
5178defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
5179                                  PS, EVEX_CD8<32, CD8VH>;
5180
5181def : Pat<(v8f64 (extloadv8f32 addr:$src)),
5182            (VCVTPS2PDZrm addr:$src)>;
5183
5184let Predicates = [HasVLX] in {
5185  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
5186              (VCVTPS2PDZ256rm addr:$src)>;
5187}
5188
5189// Convert Signed/Unsigned Doubleword to Double
5190multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
5191                           SDNode OpNode128> {
5192  // No rounding in this op
5193  let Predicates = [HasAVX512] in
5194    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
5195                                     EVEX_V512;
5196
5197  let Predicates = [HasVLX] in {
5198    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
5199                                     OpNode128, "{1to2}">, EVEX_V128;
5200    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
5201                                     EVEX_V256;
5202  }
5203}
5204
5205// Convert Signed/Unsigned Doubleword to Float
5206multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
5207                           SDNode OpNodeRnd> {
5208  let Predicates = [HasAVX512] in
5209    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
5210             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
5211                               OpNodeRnd>, EVEX_V512;
5212
5213  let Predicates = [HasVLX] in {
5214    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
5215                                     EVEX_V128;
5216    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
5217                                     EVEX_V256;
5218  }
5219}
5220
5221// Convert Float to Signed/Unsigned Doubleword with truncation
5222multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
5223                                  SDNode OpNode, SDNode OpNodeRnd> {
5224  let Predicates = [HasAVX512] in {
5225    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
5226             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
5227                                OpNodeRnd>, EVEX_V512;
5228  }
5229  let Predicates = [HasVLX] in {
5230    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
5231                                     EVEX_V128;
5232    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
5233                                     EVEX_V256;
5234  }
5235}
5236
5237// Convert Float to Signed/Unsigned Doubleword
5238multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
5239                                  SDNode OpNode, SDNode OpNodeRnd> {
5240  let Predicates = [HasAVX512] in {
5241    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
5242             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
5243                                OpNodeRnd>, EVEX_V512;
5244  }
5245  let Predicates = [HasVLX] in {
5246    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
5247                                     EVEX_V128;
5248    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
5249                                     EVEX_V256;
5250  }
5251}
5252
5253// Convert Double to Signed/Unsigned Doubleword with truncation
5254multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
5255                                  SDNode OpNode, SDNode OpNodeRnd> {
5256  let Predicates = [HasAVX512] in {
5257    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
5258             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
5259                                OpNodeRnd>, EVEX_V512;
5260  }
5261  let Predicates = [HasVLX] in {
5262    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
5263    // memory forms of these instructions in Asm Parcer. They have the same
5264    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
5265    // due to the same reason.
5266    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
5267                               "{1to2}", "{x}">, EVEX_V128;
5268    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
5269                               "{1to4}", "{y}">, EVEX_V256;
5270  }
5271}
5272
5273// Convert Double to Signed/Unsigned Doubleword
5274multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
5275                                  SDNode OpNode, SDNode OpNodeRnd> {
5276  let Predicates = [HasAVX512] in {
5277    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
5278             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
5279                               OpNodeRnd>, EVEX_V512;
5280  }
5281  let Predicates = [HasVLX] in {
5282    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
5283    // memory forms of these instructions in Asm Parcer. They have the same
5284    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
5285    // due to the same reason.
5286    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
5287                               "{1to2}", "{x}">, EVEX_V128;
5288    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
5289                               "{1to4}", "{y}">, EVEX_V256;
5290  }
5291}
5292
5293// Convert Double to Signed/Unsigned Quardword
5294multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
5295                                  SDNode OpNode, SDNode OpNodeRnd> {
5296  let Predicates = [HasDQI] in {
5297    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
5298             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
5299                               OpNodeRnd>, EVEX_V512;
5300  }
5301  let Predicates = [HasDQI, HasVLX] in {
5302    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
5303                               EVEX_V128;
5304    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
5305                               EVEX_V256;
5306  }
5307}
5308
5309// Convert Double to Signed/Unsigned Quardword with truncation
5310multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
5311                                  SDNode OpNode, SDNode OpNodeRnd> {
5312  let Predicates = [HasDQI] in {
5313    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
5314             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
5315                               OpNodeRnd>, EVEX_V512;
5316  }
5317  let Predicates = [HasDQI, HasVLX] in {
5318    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
5319                               EVEX_V128;
5320    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
5321                               EVEX_V256;
5322  }
5323}
5324
5325// Convert Signed/Unsigned Quardword to Double
5326multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
5327                                  SDNode OpNode, SDNode OpNodeRnd> {
5328  let Predicates = [HasDQI] in {
5329    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
5330             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
5331                               OpNodeRnd>, EVEX_V512;
5332  }
5333  let Predicates = [HasDQI, HasVLX] in {
5334    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
5335                               EVEX_V128;
5336    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
5337                               EVEX_V256;
5338  }
5339}
5340
5341// Convert Float to Signed/Unsigned Quardword
5342multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
5343                                  SDNode OpNode, SDNode OpNodeRnd> {
5344  let Predicates = [HasDQI] in {
5345    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
5346             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
5347                               OpNodeRnd>, EVEX_V512;
5348  }
5349  let Predicates = [HasDQI, HasVLX] in {
5350    // Explicitly specified broadcast string, since we take only 2 elements
5351    // from v4f32x_info source
5352    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
5353                               "{1to2}">, EVEX_V128;
5354    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
5355                               EVEX_V256;
5356  }
5357}
5358
5359// Convert Float to Signed/Unsigned Quardword with truncation
5360multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
5361                                  SDNode OpNode, SDNode OpNodeRnd> {
5362  let Predicates = [HasDQI] in {
5363    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
5364             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
5365                               OpNodeRnd>, EVEX_V512;
5366  }
5367  let Predicates = [HasDQI, HasVLX] in {
5368    // Explicitly specified broadcast string, since we take only 2 elements
5369    // from v4f32x_info source
5370    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
5371                               "{1to2}">, EVEX_V128;
5372    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
5373                               EVEX_V256;
5374  }
5375}
5376
5377// Convert Signed/Unsigned Quardword to Float
5378multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
5379                                  SDNode OpNode, SDNode OpNodeRnd> {
5380  let Predicates = [HasDQI] in {
5381    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
5382             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
5383                               OpNodeRnd>, EVEX_V512;
5384  }
5385  let Predicates = [HasDQI, HasVLX] in {
5386    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
5387    // memory forms of these instructions in Asm Parcer. They have the same
5388    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
5389    // due to the same reason.
5390    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode,
5391                               "{1to2}", "{x}">, EVEX_V128;
5392    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
5393                               "{1to4}", "{y}">, EVEX_V256;
5394  }
5395}
5396
5397defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS,
5398                                EVEX_CD8<32, CD8VH>;
5399
5400defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
5401                                X86VSintToFpRnd>,
5402                                PS, EVEX_CD8<32, CD8VF>;
5403
5404defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
5405                                X86VFpToSintRnd>,
5406                                XS, EVEX_CD8<32, CD8VF>;
5407
5408defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
5409                                 X86VFpToSintRnd>,
5410                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
5411
5412defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
5413                                 X86VFpToUintRnd>, PS,
5414                                 EVEX_CD8<32, CD8VF>;
5415
5416defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
5417                                 X86VFpToUintRnd>, PS, VEX_W,
5418                                 EVEX_CD8<64, CD8VF>;
5419
5420defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
5421                                 XS, EVEX_CD8<32, CD8VH>;
5422
5423defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
5424                                 X86VUintToFpRnd>, XD,
5425                                 EVEX_CD8<32, CD8VF>;
5426
5427defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int,
5428                                 X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
5429
5430defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int,
5431                                 X86cvtpd2IntRnd>, XD, VEX_W,
5432                                 EVEX_CD8<64, CD8VF>;
5433
5434defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt,
5435                                 X86cvtps2UIntRnd>,
5436                                 PS, EVEX_CD8<32, CD8VF>;
5437defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt,
5438                                 X86cvtpd2UIntRnd>, VEX_W,
5439                                 PS, EVEX_CD8<64, CD8VF>;
5440
5441defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int,
5442                                 X86cvtpd2IntRnd>, VEX_W,
5443                                 PD, EVEX_CD8<64, CD8VF>;
5444
5445defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int,
5446                                 X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
5447
5448defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt,
5449                                 X86cvtpd2UIntRnd>, VEX_W,
5450                                 PD, EVEX_CD8<64, CD8VF>;
5451
5452defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt,
5453                                 X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
5454
5455defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
5456                                 X86VFpToSlongRnd>, VEX_W,
5457                                 PD, EVEX_CD8<64, CD8VF>;
5458
5459defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint,
5460                                 X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>;
5461
5462defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
5463                                 X86VFpToUlongRnd>, VEX_W,
5464                                 PD, EVEX_CD8<64, CD8VF>;
5465
5466defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint,
5467                                 X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>;
5468
5469defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
5470                            X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
5471
5472defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
5473                            X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
5474
5475defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
5476                            X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
5477
5478defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
5479                            X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
5480
5481let Predicates = [HasAVX512, NoVLX] in {
5482def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
5483          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
5484           (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
5485
5486def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
5487          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
5488           (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
5489
5490def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
5491          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
5492           (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
5493
5494def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
5495          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
5496           (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
5497
5498def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
5499          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
5500           (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
5501}
5502
5503let Predicates = [HasAVX512] in {
5504  def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
5505            (VCVTPD2PSZrm addr:$src)>;
5506  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
5507            (VCVTPS2PDZrm addr:$src)>;
5508}
5509
5510//===----------------------------------------------------------------------===//
5511// Half precision conversion instructions
5512//===----------------------------------------------------------------------===//
5513multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
5514                           X86MemOperand x86memop, PatFrag ld_frag> {
5515  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
5516                    "vcvtph2ps", "$src", "$src",
5517                   (X86cvtph2ps (_src.VT _src.RC:$src),
5518                                                (i32 FROUND_CURRENT))>, T8PD;
5519  let hasSideEffects = 0, mayLoad = 1 in {
5520    defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
5521                      "vcvtph2ps", "$src", "$src",
5522                      (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
5523                                       (i32 FROUND_CURRENT))>, T8PD;
5524  }
5525}
5526
5527multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
5528  defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
5529                    "vcvtph2ps", "{sae}, $src", "$src, {sae}",
5530                   (X86cvtph2ps (_src.VT _src.RC:$src),
5531                                                (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
5532
5533}
5534
5535let Predicates = [HasAVX512] in {
5536  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
5537                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
5538                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
5539  let Predicates = [HasVLX] in {
5540    defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
5541                         loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
5542    defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
5543                         loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
5544  }
5545}
5546
5547multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
5548                           X86MemOperand x86memop> {
5549  defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
5550               (ins _src.RC:$src1, i32u8imm:$src2),
5551                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
5552                   (X86cvtps2ph (_src.VT _src.RC:$src1),
5553                                (i32 imm:$src2),
5554                                (i32 FROUND_CURRENT))>, AVX512AIi8Base;
5555  let hasSideEffects = 0, mayStore = 1 in {
5556    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
5557               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
5558               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5559               [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
5560                                       (i32 imm:$src2), (i32 FROUND_CURRENT) )),
5561                                       addr:$dst)]>;
5562    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
5563               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
5564               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
5565                []>, EVEX_K;
5566  }
5567}
5568multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
5569  defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
5570               (ins _src.RC:$src1, i32u8imm:$src2),
5571                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}",
5572                   (X86cvtps2ph (_src.VT _src.RC:$src1),
5573                                (i32 imm:$src2),
5574                                (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base;
5575}
5576let Predicates = [HasAVX512] in {
5577  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
5578                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
5579                      EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
5580  let Predicates = [HasVLX] in {
5581    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
5582                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
5583    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
5584                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
5585  }
5586}
5587
5588//  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
5589multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
5590                            string OpcodeStr> {
5591  def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
5592                 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
5593                 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2,
5594                                                        (i32 FROUND_NO_EXC)))],
5595                 IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
5596                 Sched<[WriteFAdd]>;
5597}
5598
5599let Defs = [EFLAGS], Predicates = [HasAVX512] in {
5600  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
5601                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
5602  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
5603                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
5604  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
5605                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
5606  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
5607                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
5608}
5609
5610let Defs = [EFLAGS], Predicates = [HasAVX512] in {
5611  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
5612                                 "ucomiss">, PS, EVEX, VEX_LIG,
5613                                 EVEX_CD8<32, CD8VT1>;
5614  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
5615                                  "ucomisd">, PD, EVEX,
5616                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5617  let Pattern = []<dag> in {
5618    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
5619                                   "comiss">, PS, EVEX, VEX_LIG,
5620                                   EVEX_CD8<32, CD8VT1>;
5621    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
5622                                   "comisd">, PD, EVEX,
5623                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5624  }
5625  let isCodeGenOnly = 1 in {
5626    defm Int_VUCOMISSZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
5627                              load, "ucomiss">, PS, EVEX, VEX_LIG,
5628                              EVEX_CD8<32, CD8VT1>;
5629    defm Int_VUCOMISDZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
5630                              load, "ucomisd">, PD, EVEX,
5631                              VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5632
5633    defm Int_VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
5634                              load, "comiss">, PS, EVEX, VEX_LIG,
5635                              EVEX_CD8<32, CD8VT1>;
5636    defm Int_VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
5637                              load, "comisd">, PD, EVEX,
5638                              VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5639  }
5640}
5641
5642/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
5643multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
5644                            X86VectorVTInfo _> {
5645  let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in {
5646  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5647                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5648                           "$src2, $src1", "$src1, $src2",
5649                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
5650  let mayLoad = 1 in {
5651  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5652                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5653                         "$src2, $src1", "$src1, $src2",
5654                         (OpNode (_.VT _.RC:$src1),
5655                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
5656  }
5657}
5658}
5659
5660defm VRCP14SS   : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
5661                  EVEX_CD8<32, CD8VT1>, T8PD;
5662defm VRCP14SD   : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
5663                  VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
5664defm VRSQRT14SS   : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
5665                  EVEX_CD8<32, CD8VT1>, T8PD;
5666defm VRSQRT14SD   : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
5667                  VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
5668
5669/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
5670multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5671                         X86VectorVTInfo _> {
5672  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5673                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
5674                         (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
5675  let mayLoad = 1 in {
5676    defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5677                           (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
5678                           (OpNode (_.FloatVT
5679                             (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
5680    defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5681                            (ins _.ScalarMemOp:$src), OpcodeStr,
5682                            "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
5683                            (OpNode (_.FloatVT
5684                              (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
5685                            EVEX, T8PD, EVEX_B;
5686  }
5687}
5688
5689multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5690  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
5691                          EVEX_V512, EVEX_CD8<32, CD8VF>;
5692  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
5693                          EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5694
5695  // Define only if AVX512VL feature is present.
5696  let Predicates = [HasVLX] in {
5697    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
5698                                OpNode, v4f32x_info>,
5699                               EVEX_V128, EVEX_CD8<32, CD8VF>;
5700    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
5701                                OpNode, v8f32x_info>,
5702                               EVEX_V256, EVEX_CD8<32, CD8VF>;
5703    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
5704                                OpNode, v2f64x_info>,
5705                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5706    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
5707                                OpNode, v4f64x_info>,
5708                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5709  }
5710}
5711
5712defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
5713defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
5714
5715def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
5716              (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
5717           (VRSQRT14PSZr VR512:$src)>;
5718def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
5719              (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
5720           (VRSQRT14PDZr VR512:$src)>;
5721
5722def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
5723              (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
5724           (VRCP14PSZr VR512:$src)>;
5725def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
5726              (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
5727           (VRCP14PDZr VR512:$src)>;
5728
5729/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
5730multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5731                         SDNode OpNode> {
5732
5733  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5734                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5735                           "$src2, $src1", "$src1, $src2",
5736                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5737                           (i32 FROUND_CURRENT))>;
5738
5739  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5740                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5741                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5742                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5743                            (i32 FROUND_NO_EXC))>, EVEX_B;
5744
5745  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5746                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5747                         "$src2, $src1", "$src1, $src2",
5748                         (OpNode (_.VT _.RC:$src1),
5749                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
5750                         (i32 FROUND_CURRENT))>;
5751}
5752
5753multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5754  defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
5755              EVEX_CD8<32, CD8VT1>;
5756  defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
5757              EVEX_CD8<64, CD8VT1>, VEX_W;
5758}
5759
5760let hasSideEffects = 0, Predicates = [HasERI] in {
5761  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28",   X86rcp28s>,   T8PD, EVEX_4V;
5762  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
5763}
5764
5765defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
5766/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
5767
5768multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5769                         SDNode OpNode> {
5770
5771  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5772                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
5773                         (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
5774
5775  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5776                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
5777                         (OpNode (_.FloatVT
5778                             (bitconvert (_.LdFrag addr:$src))),
5779                          (i32 FROUND_CURRENT))>;
5780
5781  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5782                         (ins _.MemOp:$src), OpcodeStr,
5783                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
5784                         (OpNode (_.FloatVT
5785                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))),
5786                                 (i32 FROUND_CURRENT))>, EVEX_B;
5787}
5788multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5789                         SDNode OpNode> {
5790  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5791                        (ins _.RC:$src), OpcodeStr,
5792                        "{sae}, $src", "$src, {sae}",
5793                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
5794}
5795
5796multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5797   defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
5798             avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
5799             T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
5800   defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
5801             avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
5802             T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5803}
5804
5805multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
5806                                  SDNode OpNode> {
5807  // Define only if AVX512VL feature is present.
5808  let Predicates = [HasVLX] in {
5809    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
5810                                     EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
5811    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
5812                                     EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
5813    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
5814                                     EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
5815    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
5816                                     EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
5817  }
5818}
5819let Predicates = [HasERI], hasSideEffects = 0 in {
5820
5821 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
5822 defm VRCP28   : avx512_eri<0xCA, "vrcp28",   X86rcp28>,   EVEX;
5823 defm VEXP2    : avx512_eri<0xC8, "vexp2",    X86exp2>,    EVEX;
5824}
5825defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
5826                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
5827
5828multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
5829                              SDNode OpNodeRnd, X86VectorVTInfo _>{
5830  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5831                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
5832                         (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
5833                         EVEX, EVEX_B, EVEX_RC;
5834}
5835
5836multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
5837                              SDNode OpNode, X86VectorVTInfo _>{
5838  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5839                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
5840                         (_.FloatVT (OpNode _.RC:$src))>, EVEX;
5841  let mayLoad = 1 in {
5842    defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5843                           (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
5844                           (OpNode (_.FloatVT
5845                             (bitconvert (_.LdFrag addr:$src))))>, EVEX;
5846
5847    defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5848                            (ins _.ScalarMemOp:$src), OpcodeStr,
5849                            "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
5850                            (OpNode (_.FloatVT
5851                              (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
5852                            EVEX, EVEX_B;
5853  }
5854}
5855
5856multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
5857                                  SDNode OpNode> {
5858  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
5859                                v16f32_info>,
5860                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5861  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
5862                                v8f64_info>,
5863                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
5864  // Define only if AVX512VL feature is present.
5865  let Predicates = [HasVLX] in {
5866    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
5867                                     OpNode, v4f32x_info>,
5868                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
5869    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
5870                                     OpNode, v8f32x_info>,
5871                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
5872    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
5873                                     OpNode, v2f64x_info>,
5874                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
5875    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
5876                                     OpNode, v4f64x_info>,
5877                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
5878  }
5879}
5880
5881multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
5882                                          SDNode OpNodeRnd> {
5883  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
5884                                v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5885  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
5886                                v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
5887}
5888
5889multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5890                              string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
5891
5892  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5893                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5894                         "$src2, $src1", "$src1, $src2",
5895                         (OpNodeRnd (_.VT _.RC:$src1),
5896                                    (_.VT _.RC:$src2),
5897                                    (i32 FROUND_CURRENT))>;
5898  let mayLoad = 1 in
5899    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5900                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5901                         "$src2, $src1", "$src1, $src2",
5902                         (OpNodeRnd (_.VT _.RC:$src1),
5903                                    (_.VT (scalar_to_vector
5904                                              (_.ScalarLdFrag addr:$src2))),
5905                                    (i32 FROUND_CURRENT))>;
5906
5907  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5908                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5909                         "$rc, $src2, $src1", "$src1, $src2, $rc",
5910                         (OpNodeRnd (_.VT _.RC:$src1),
5911                                     (_.VT _.RC:$src2),
5912                                     (i32 imm:$rc))>,
5913                         EVEX_B, EVEX_RC;
5914
5915  let isCodeGenOnly = 1 in {
5916    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
5917               (ins _.FRC:$src1, _.FRC:$src2),
5918               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
5919
5920    let mayLoad = 1 in
5921      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
5922                 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5923                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
5924  }
5925
5926  def : Pat<(_.EltVT (OpNode _.FRC:$src)),
5927            (!cast<Instruction>(NAME#SUFF#Zr)
5928                (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
5929
5930  def : Pat<(_.EltVT (OpNode (load addr:$src))),
5931            (!cast<Instruction>(NAME#SUFF#Zm)
5932                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
5933}
5934
5935multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
5936  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
5937                        X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
5938  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
5939                        X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
5940}
5941
5942defm VSQRT   : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
5943               avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
5944
5945defm VSQRT   : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
5946
5947let Predicates = [HasAVX512] in {
5948  def : Pat<(f32 (X86frsqrt FR32X:$src)),
5949            (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
5950  def : Pat<(f32 (X86frsqrt (load addr:$src))),
5951            (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
5952            Requires<[OptForSize]>;
5953  def : Pat<(f32 (X86frcp FR32X:$src)),
5954            (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
5955  def : Pat<(f32 (X86frcp (load addr:$src))),
5956            (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
5957            Requires<[OptForSize]>;
5958}
5959
5960multiclass
5961avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
5962
5963  let ExeDomain = _.ExeDomain in {
5964  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5965                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
5966                           "$src3, $src2, $src1", "$src1, $src2, $src3",
5967                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5968                            (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
5969
5970  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5971                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
5972                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
5973                         (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5974                         (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
5975
5976  let mayLoad = 1 in
5977  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5978                         (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
5979                         "$src3, $src2, $src1", "$src1, $src2, $src3",
5980                         (_.VT (X86RndScales (_.VT _.RC:$src1),
5981                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
5982                          (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
5983  }
5984  let Predicates = [HasAVX512] in {
5985  def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
5986             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
5987             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
5988  def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
5989             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
5990             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>;
5991  def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
5992             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
5993             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>;
5994  def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
5995             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
5996             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
5997  def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
5998             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
5999             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
6000
6001  def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6002             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6003             addr:$src, (i32 0x1))), _.FRC)>;
6004  def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6005             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6006             addr:$src, (i32 0x2))), _.FRC)>;
6007  def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6008             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6009             addr:$src, (i32 0x3))), _.FRC)>;
6010  def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6011             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6012             addr:$src, (i32 0x4))), _.FRC)>;
6013  def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6014             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6015             addr:$src, (i32 0xc))), _.FRC)>;
6016  }
6017}
6018
6019defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
6020                                AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
6021
6022defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
6023                                AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
6024
6025//-------------------------------------------------
6026// Integer truncate and extend operations
6027//-------------------------------------------------
6028
6029multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6030                              X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
6031                              X86MemOperand x86memop> {
6032
6033  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
6034                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
6035                      (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
6036                       EVEX, T8XS;
6037
6038  // for intrinsic patter match
6039  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
6040                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
6041                           undef)),
6042            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
6043                                      SrcInfo.RC:$src1)>;
6044
6045  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
6046                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
6047                           DestInfo.ImmAllZerosV)),
6048            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
6049                                      SrcInfo.RC:$src1)>;
6050
6051  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
6052                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
6053                           DestInfo.RC:$src0)),
6054            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
6055                                      DestInfo.KRCWM:$mask ,
6056                                      SrcInfo.RC:$src1)>;
6057
6058  let mayStore = 1 in {
6059    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
6060               (ins x86memop:$dst, SrcInfo.RC:$src),
6061               OpcodeStr # "\t{$src, $dst |$dst, $src}",
6062               []>, EVEX;
6063
6064    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
6065               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
6066               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
6067               []>, EVEX, EVEX_K;
6068  }//mayStore = 1
6069}
6070
6071multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
6072                                    X86VectorVTInfo DestInfo,
6073                                    PatFrag truncFrag, PatFrag mtruncFrag > {
6074
6075  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
6076            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
6077                                    addr:$dst, SrcInfo.RC:$src)>;
6078
6079  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
6080                                               (SrcInfo.VT SrcInfo.RC:$src)),
6081            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
6082                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
6083}
6084
6085multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
6086                                        X86VectorVTInfo DestInfo, string sat > {
6087
6088  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
6089                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
6090                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
6091           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
6092                    (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
6093                    (SrcInfo.VT SrcInfo.RC:$src))>;
6094
6095  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
6096                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
6097                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
6098           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
6099                    (SrcInfo.VT SrcInfo.RC:$src))>;
6100}
6101
6102multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
6103         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
6104         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
6105         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
6106         X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
6107                                                     Predicate prd = HasAVX512>{
6108
6109  let Predicates = [HasVLX, prd] in {
6110    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
6111                             DestInfoZ128, x86memopZ128>,
6112                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
6113                             truncFrag, mtruncFrag>, EVEX_V128;
6114
6115    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
6116                             DestInfoZ256, x86memopZ256>,
6117                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
6118                             truncFrag, mtruncFrag>, EVEX_V256;
6119  }
6120  let Predicates = [prd] in
6121    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
6122                             DestInfoZ, x86memopZ>,
6123                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
6124                             truncFrag, mtruncFrag>, EVEX_V512;
6125}
6126
6127multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
6128         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
6129         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
6130         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
6131         X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
6132
6133  let Predicates = [HasVLX, prd] in {
6134    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
6135                             DestInfoZ128, x86memopZ128>,
6136                avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
6137                             sat>, EVEX_V128;
6138
6139    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
6140                             DestInfoZ256, x86memopZ256>,
6141                avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
6142                             sat>, EVEX_V256;
6143  }
6144  let Predicates = [prd] in
6145    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
6146                             DestInfoZ, x86memopZ>,
6147                avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
6148                             sat>, EVEX_V512;
6149}
6150
6151multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6152  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
6153               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
6154               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
6155}
6156multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
6157  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
6158               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
6159               sat>, EVEX_CD8<8, CD8VO>;
6160}
6161
6162multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6163  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
6164               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
6165               truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
6166}
6167multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
6168  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
6169               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
6170               sat>, EVEX_CD8<16, CD8VQ>;
6171}
6172
6173multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6174  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
6175               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
6176               truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
6177}
6178multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
6179  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
6180               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
6181               sat>, EVEX_CD8<32, CD8VH>;
6182}
6183
6184multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6185  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
6186               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
6187               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
6188}
6189multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
6190  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
6191               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
6192               sat>, EVEX_CD8<8, CD8VQ>;
6193}
6194
6195multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6196  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
6197              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
6198              truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
6199}
6200multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
6201  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
6202              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
6203              sat>, EVEX_CD8<16, CD8VH>;
6204}
6205
6206multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6207  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
6208              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
6209              truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
6210}
6211multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
6212  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
6213              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
6214              sat, HasBWI>, EVEX_CD8<16, CD8VH>;
6215}
6216
6217defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
6218defm VPMOVSQB   : avx512_trunc_sat_qb<0x22, "s",   X86vtruncs>;
6219defm VPMOVUSQB  : avx512_trunc_sat_qb<0x12, "us",  X86vtruncus>;
6220
6221defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
6222defm VPMOVSQW   : avx512_trunc_sat_qw<0x24, "s",   X86vtruncs>;
6223defm VPMOVUSQW  : avx512_trunc_sat_qw<0x14, "us",  X86vtruncus>;
6224
6225defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
6226defm VPMOVSQD   : avx512_trunc_sat_qd<0x25, "s",   X86vtruncs>;
6227defm VPMOVUSQD  : avx512_trunc_sat_qd<0x15, "us",  X86vtruncus>;
6228
6229defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
6230defm VPMOVSDB   : avx512_trunc_sat_db<0x21, "s",   X86vtruncs>;
6231defm VPMOVUSDB  : avx512_trunc_sat_db<0x11, "us",  X86vtruncus>;
6232
6233defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
6234defm VPMOVSDW   : avx512_trunc_sat_dw<0x23, "s",   X86vtruncs>;
6235defm VPMOVUSDW  : avx512_trunc_sat_dw<0x13, "us",  X86vtruncus>;
6236
6237defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
6238defm VPMOVSWB   : avx512_trunc_sat_wb<0x20, "s",   X86vtruncs>;
6239defm VPMOVUSWB  : avx512_trunc_sat_wb<0x10, "us",  X86vtruncus>;
6240
6241let Predicates = [HasAVX512, NoVLX] in {
6242def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
6243         (v8i16 (EXTRACT_SUBREG
6244                 (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
6245                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
6246def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
6247         (v4i32 (EXTRACT_SUBREG
6248                 (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
6249                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
6250}
6251
6252let Predicates = [HasBWI, NoVLX] in {
6253def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
6254         (v16i8 (EXTRACT_SUBREG  (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
6255                                            VR256X:$src, sub_ymm))), sub_xmm))>;
6256}
6257
6258multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
6259                  X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
6260                  X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
6261
6262  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
6263                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
6264                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
6265                  EVEX;
6266
6267  let mayLoad = 1 in {
6268    defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
6269                    (ins x86memop:$src), OpcodeStr ,"$src", "$src",
6270                    (DestInfo.VT (LdFrag addr:$src))>,
6271                  EVEX;
6272  }
6273}
6274
6275multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
6276          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
6277  let Predicates = [HasVLX, HasBWI] in {
6278    defm Z128:  avx512_extend_common<opc, OpcodeStr, v8i16x_info,
6279                    v16i8x_info, i64mem, LdFrag, OpNode>,
6280                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
6281
6282    defm Z256:  avx512_extend_common<opc, OpcodeStr, v16i16x_info,
6283                    v16i8x_info, i128mem, LdFrag, OpNode>,
6284                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
6285  }
6286  let Predicates = [HasBWI] in {
6287    defm Z   :  avx512_extend_common<opc, OpcodeStr, v32i16_info,
6288                    v32i8x_info, i256mem, LdFrag, OpNode>,
6289                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
6290  }
6291}
6292
6293multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, SDNode OpNode,
6294          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
6295  let Predicates = [HasVLX, HasAVX512] in {
6296    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
6297                   v16i8x_info, i32mem, LdFrag, OpNode>,
6298                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
6299
6300    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
6301                   v16i8x_info, i64mem, LdFrag, OpNode>,
6302                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
6303  }
6304  let Predicates = [HasAVX512] in {
6305    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
6306                   v16i8x_info, i128mem, LdFrag, OpNode>,
6307                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
6308  }
6309}
6310
6311multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
6312          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
6313  let Predicates = [HasVLX, HasAVX512] in {
6314    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
6315                   v16i8x_info, i16mem, LdFrag, OpNode>,
6316                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
6317
6318    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
6319                   v16i8x_info, i32mem, LdFrag, OpNode>,
6320                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
6321  }
6322  let Predicates = [HasAVX512] in {
6323    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
6324                   v16i8x_info, i64mem, LdFrag, OpNode>,
6325                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
6326  }
6327}
6328
6329multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, SDNode OpNode,
6330         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
6331  let Predicates = [HasVLX, HasAVX512] in {
6332    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
6333                   v8i16x_info, i64mem, LdFrag, OpNode>,
6334                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
6335
6336    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
6337                   v8i16x_info, i128mem, LdFrag, OpNode>,
6338                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
6339  }
6340  let Predicates = [HasAVX512] in {
6341    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
6342                   v16i16x_info, i256mem, LdFrag, OpNode>,
6343                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
6344  }
6345}
6346
6347multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
6348         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
6349  let Predicates = [HasVLX, HasAVX512] in {
6350    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
6351                   v8i16x_info, i32mem, LdFrag, OpNode>,
6352                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
6353
6354    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
6355                   v8i16x_info, i64mem, LdFrag, OpNode>,
6356                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
6357  }
6358  let Predicates = [HasAVX512] in {
6359    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
6360                   v8i16x_info, i128mem, LdFrag, OpNode>,
6361                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
6362  }
6363}
6364
6365multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
6366         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
6367
6368  let Predicates = [HasVLX, HasAVX512] in {
6369    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
6370                   v4i32x_info, i64mem, LdFrag, OpNode>,
6371                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
6372
6373    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
6374                   v4i32x_info, i128mem, LdFrag, OpNode>,
6375                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
6376  }
6377  let Predicates = [HasAVX512] in {
6378    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
6379                   v8i32x_info, i256mem, LdFrag, OpNode>,
6380                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
6381  }
6382}
6383
6384defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
6385defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
6386defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
6387defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
6388defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
6389defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
6390
6391
6392defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
6393defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
6394defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
6395defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
6396defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
6397defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
6398
6399//===----------------------------------------------------------------------===//
6400// GATHER - SCATTER Operations
6401
6402multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6403                         X86MemOperand memop, PatFrag GatherNode> {
6404  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
6405      ExeDomain = _.ExeDomain in
6406  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
6407            (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
6408            !strconcat(OpcodeStr#_.Suffix,
6409            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
6410            [(set _.RC:$dst, _.KRCWM:$mask_wb,
6411              (GatherNode  (_.VT _.RC:$src1), _.KRCWM:$mask,
6412                     vectoraddr:$src2))]>, EVEX, EVEX_K,
6413             EVEX_CD8<_.EltSize, CD8VT1>;
6414}
6415
6416multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
6417                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6418  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
6419                                      vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W;
6420  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
6421                                      vz64mem,  mgatherv8i64>, EVEX_V512, VEX_W;
6422let Predicates = [HasVLX] in {
6423  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
6424                              vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W;
6425  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
6426                              vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W;
6427  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
6428                              vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W;
6429  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
6430                              vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W;
6431}
6432}
6433
6434multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
6435                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6436  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem,
6437                                       mgatherv16i32>, EVEX_V512;
6438  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem,
6439                                       mgatherv8i64>, EVEX_V512;
6440let Predicates = [HasVLX] in {
6441  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
6442                                          vy32xmem, mgatherv8i32>, EVEX_V256;
6443  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
6444                                          vy64xmem, mgatherv4i64>, EVEX_V256;
6445  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
6446                                          vx32xmem, mgatherv4i32>, EVEX_V128;
6447  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
6448                                          vx64xmem, mgatherv2i64>, EVEX_V128;
6449}
6450}
6451
6452
6453defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
6454               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
6455
6456defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
6457                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
6458
6459multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6460                          X86MemOperand memop, PatFrag ScatterNode> {
6461
6462let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
6463
6464  def mr  : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
6465            (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
6466            !strconcat(OpcodeStr#_.Suffix,
6467            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
6468            [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
6469                                     _.KRCWM:$mask,  vectoraddr:$dst))]>,
6470            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
6471}
6472
6473multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
6474                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6475  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
6476                                      vy32xmem, mscatterv8i32>, EVEX_V512, VEX_W;
6477  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
6478                                      vz64mem,  mscatterv8i64>, EVEX_V512, VEX_W;
6479let Predicates = [HasVLX] in {
6480  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
6481                              vx32xmem, mscatterv4i32>, EVEX_V256, VEX_W;
6482  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
6483                              vy64xmem, mscatterv4i64>, EVEX_V256, VEX_W;
6484  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
6485                              vx32xmem, mscatterv4i32>, EVEX_V128, VEX_W;
6486  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
6487                              vx64xmem, mscatterv2i64>, EVEX_V128, VEX_W;
6488}
6489}
6490
6491multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
6492                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6493  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz32mem,
6494                                       mscatterv16i32>, EVEX_V512;
6495  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz64mem,
6496                                       mscatterv8i64>, EVEX_V512;
6497let Predicates = [HasVLX] in {
6498  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
6499                                          vy32xmem, mscatterv8i32>, EVEX_V256;
6500  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
6501                                          vy64xmem, mscatterv4i64>, EVEX_V256;
6502  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
6503                                          vx32xmem, mscatterv4i32>, EVEX_V128;
6504  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
6505                                          vx64xmem, mscatterv2i64>, EVEX_V128;
6506}
6507}
6508
6509defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
6510               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
6511
6512defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
6513                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
6514
6515// prefetch
6516multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
6517                       RegisterClass KRC, X86MemOperand memop> {
6518  let Predicates = [HasPFI], hasSideEffects = 1 in
6519  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
6520            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
6521            []>, EVEX, EVEX_K;
6522}
6523
6524defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
6525                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6526
6527defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
6528                     VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6529
6530defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
6531                     VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6532
6533defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
6534                     VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6535
6536defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
6537                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6538
6539defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
6540                     VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6541
6542defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
6543                     VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6544
6545defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
6546                     VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6547
6548defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
6549                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6550
6551defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
6552                     VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6553
6554defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
6555                     VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6556
6557defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
6558                     VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6559
6560defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
6561                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6562
6563defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
6564                     VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6565
6566defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
6567                     VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6568
6569defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
6570                     VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6571
6572// Helper fragments to match sext vXi1 to vXiY.
6573def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
6574def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
6575
6576def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
6577def : Pat<(store (i1  1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
6578def : Pat<(store (i1  0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
6579
6580def : Pat<(store VK1:$src, addr:$dst),
6581          (MOV8mr addr:$dst,
6582           (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
6583            sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
6584
6585def : Pat<(store VK8:$src, addr:$dst),
6586          (MOV8mr addr:$dst,
6587           (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
6588            sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
6589
6590def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
6591                           (truncstore node:$val, node:$ptr), [{
6592  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
6593}]>;
6594
6595def : Pat<(truncstorei1 GR8:$src, addr:$dst),
6596          (MOV8mr addr:$dst, GR8:$src)>;
6597
6598multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
6599def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
6600                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
6601                  [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
6602}
6603
6604multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
6605                                 string OpcodeStr, Predicate prd> {
6606let Predicates = [prd] in
6607  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
6608
6609  let Predicates = [prd, HasVLX] in {
6610    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
6611    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
6612  }
6613}
6614
6615multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
6616  defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info,  OpcodeStr,
6617                                       HasBWI>;
6618  defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
6619                                       HasBWI>, VEX_W;
6620  defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
6621                                       HasDQI>;
6622  defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
6623                                       HasDQI>, VEX_W;
6624}
6625
6626defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
6627
6628multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
6629def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
6630                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6631                  [(set _.KRC:$dst, (trunc (_.VT _.RC:$src)))]>, EVEX;
6632}
6633
6634multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
6635                        AVX512VLVectorVTInfo VTInfo, Predicate prd> {
6636let Predicates = [prd] in
6637  defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
6638   EVEX_V512;
6639
6640  let Predicates = [prd, HasVLX] in {
6641    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
6642     EVEX_V256;
6643    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
6644     EVEX_V128;
6645  }
6646}
6647
6648defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
6649                                              avx512vl_i8_info, HasBWI>;
6650defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
6651                                              avx512vl_i16_info, HasBWI>, VEX_W;
6652defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
6653                                              avx512vl_i32_info, HasDQI>;
6654defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
6655                                              avx512vl_i64_info, HasDQI>, VEX_W;
6656
6657//===----------------------------------------------------------------------===//
6658// AVX-512 - COMPRESS and EXPAND
6659//
6660
6661multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
6662                                 string OpcodeStr> {
6663  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
6664              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
6665              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
6666
6667  let mayStore = 1 in {
6668  def mr : AVX5128I<opc, MRMDestMem, (outs),
6669              (ins _.MemOp:$dst, _.RC:$src),
6670              OpcodeStr # "\t{$src, $dst |$dst, $src}",
6671              []>, EVEX_CD8<_.EltSize, CD8VT1>;
6672
6673  def mrk : AVX5128I<opc, MRMDestMem, (outs),
6674              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
6675              OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
6676              [(store (_.VT (vselect _.KRCWM:$mask,
6677                             (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
6678                addr:$dst)]>,
6679              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
6680  }
6681}
6682
6683multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
6684                                 AVX512VLVectorVTInfo VTInfo> {
6685  defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
6686
6687  let Predicates = [HasVLX] in {
6688    defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
6689    defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
6690  }
6691}
6692
6693defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
6694                                         EVEX;
6695defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
6696                                         EVEX, VEX_W;
6697defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
6698                                         EVEX;
6699defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
6700                                         EVEX, VEX_W;
6701
6702// expand
6703multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
6704                                 string OpcodeStr> {
6705  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6706              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
6707              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
6708
6709  let mayLoad = 1 in
6710  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6711              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
6712              (_.VT (X86expand (_.VT (bitconvert
6713                                      (_.LdFrag addr:$src1)))))>,
6714            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
6715}
6716
6717multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
6718                                 AVX512VLVectorVTInfo VTInfo> {
6719  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
6720
6721  let Predicates = [HasVLX] in {
6722    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
6723    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
6724  }
6725}
6726
6727defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
6728                                         EVEX;
6729defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
6730                                         EVEX, VEX_W;
6731defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
6732                                         EVEX;
6733defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
6734                                         EVEX, VEX_W;
6735
6736//handle instruction  reg_vec1 = op(reg_vec,imm)
6737//                               op(mem_vec,imm)
6738//                               op(broadcast(eltVt),imm)
6739//all instruction created with FROUND_CURRENT
6740multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6741                                                            X86VectorVTInfo _>{
6742  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6743                      (ins _.RC:$src1, i32u8imm:$src2),
6744                      OpcodeStr##_.Suffix, "$src2, $src1", "$src2, $src2",
6745                      (OpNode (_.VT _.RC:$src1),
6746                              (i32 imm:$src2),
6747                              (i32 FROUND_CURRENT))>;
6748  let mayLoad = 1 in {
6749    defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6750                      (ins _.MemOp:$src1, i32u8imm:$src2),
6751                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
6752                      (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
6753                              (i32 imm:$src2),
6754                              (i32 FROUND_CURRENT))>;
6755    defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6756                      (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
6757                      OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
6758                      "${src1}"##_.BroadcastStr##", $src2",
6759                      (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
6760                              (i32 imm:$src2),
6761                              (i32 FROUND_CURRENT))>, EVEX_B;
6762  }
6763}
6764
6765//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
6766multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
6767                                             SDNode OpNode, X86VectorVTInfo _>{
6768  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6769                      (ins _.RC:$src1, i32u8imm:$src2),
6770                      OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
6771                      "$src1, {sae}, $src2",
6772                      (OpNode (_.VT _.RC:$src1),
6773                              (i32 imm:$src2),
6774                              (i32 FROUND_NO_EXC))>, EVEX_B;
6775}
6776
6777multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
6778            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
6779  let Predicates = [prd] in {
6780    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
6781                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
6782                                  EVEX_V512;
6783  }
6784  let Predicates = [prd, HasVLX] in {
6785    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
6786                                  EVEX_V128;
6787    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
6788                                  EVEX_V256;
6789  }
6790}
6791
6792//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
6793//                               op(reg_vec2,mem_vec,imm)
6794//                               op(reg_vec2,broadcast(eltVt),imm)
6795//all instruction created with FROUND_CURRENT
6796multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6797                                                            X86VectorVTInfo _>{
6798  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6799                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
6800                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6801                      (OpNode (_.VT _.RC:$src1),
6802                              (_.VT _.RC:$src2),
6803                              (i32 imm:$src3),
6804                              (i32 FROUND_CURRENT))>;
6805  let mayLoad = 1 in {
6806    defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6807                      (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
6808                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6809                      (OpNode (_.VT _.RC:$src1),
6810                              (_.VT (bitconvert (_.LdFrag addr:$src2))),
6811                              (i32 imm:$src3),
6812                              (i32 FROUND_CURRENT))>;
6813    defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6814                      (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
6815                      OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
6816                      "$src1, ${src2}"##_.BroadcastStr##", $src3",
6817                      (OpNode (_.VT _.RC:$src1),
6818                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
6819                              (i32 imm:$src3),
6820                              (i32 FROUND_CURRENT))>, EVEX_B;
6821  }
6822}
6823
6824//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
6825//                               op(reg_vec2,mem_vec,imm)
6826multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
6827                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
6828
6829  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
6830                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
6831                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6832                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
6833                               (SrcInfo.VT SrcInfo.RC:$src2),
6834                               (i8 imm:$src3)))>;
6835  let mayLoad = 1 in
6836    defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
6837                  (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
6838                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6839                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
6840                               (SrcInfo.VT (bitconvert
6841                                                  (SrcInfo.LdFrag addr:$src2))),
6842                               (i8 imm:$src3)))>;
6843}
6844
6845//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
6846//                               op(reg_vec2,mem_vec,imm)
6847//                               op(reg_vec2,broadcast(eltVt),imm)
6848multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
6849                           X86VectorVTInfo _>:
6850  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
6851
6852  let mayLoad = 1 in
6853    defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6854                      (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
6855                      OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
6856                      "$src1, ${src2}"##_.BroadcastStr##", $src3",
6857                      (OpNode (_.VT _.RC:$src1),
6858                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
6859                              (i8 imm:$src3))>, EVEX_B;
6860}
6861
6862//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
6863//                                      op(reg_vec2,mem_scalar,imm)
6864//all instruction created with FROUND_CURRENT
6865multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6866                                                           X86VectorVTInfo _> {
6867
6868  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6869                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
6870                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6871                      (OpNode (_.VT _.RC:$src1),
6872                              (_.VT _.RC:$src2),
6873                              (i32 imm:$src3),
6874                              (i32 FROUND_CURRENT))>;
6875  let mayLoad = 1 in {
6876    defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6877                      (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
6878                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6879                      (OpNode (_.VT _.RC:$src1),
6880                              (_.VT (scalar_to_vector
6881                                        (_.ScalarLdFrag addr:$src2))),
6882                              (i32 imm:$src3),
6883                              (i32 FROUND_CURRENT))>;
6884
6885    let isAsmParserOnly = 1 in {
6886      defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst),
6887                      (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
6888                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
6889                      []>;
6890    }
6891  }
6892}
6893
6894//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
6895multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
6896                                             SDNode OpNode, X86VectorVTInfo _>{
6897  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6898                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
6899                      OpcodeStr, "$src3,{sae}, $src2, $src1",
6900                      "$src1, $src2,{sae}, $src3",
6901                      (OpNode (_.VT _.RC:$src1),
6902                              (_.VT _.RC:$src2),
6903                              (i32 imm:$src3),
6904                              (i32 FROUND_NO_EXC))>, EVEX_B;
6905}
6906//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
6907multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
6908                                             SDNode OpNode, X86VectorVTInfo _> {
6909  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6910                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
6911                      OpcodeStr, "$src3,{sae}, $src2, $src1",
6912                      "$src1, $src2,{sae}, $src3",
6913                      (OpNode (_.VT _.RC:$src1),
6914                              (_.VT _.RC:$src2),
6915                              (i32 imm:$src3),
6916                              (i32 FROUND_NO_EXC))>, EVEX_B;
6917}
6918
6919multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
6920            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
6921  let Predicates = [prd] in {
6922    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
6923                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
6924                                  EVEX_V512;
6925
6926  }
6927  let Predicates = [prd, HasVLX] in {
6928    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
6929                                  EVEX_V128;
6930    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
6931                                  EVEX_V256;
6932  }
6933}
6934
6935multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
6936                   AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
6937  let Predicates = [HasBWI] in {
6938    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
6939                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
6940  }
6941  let Predicates = [HasBWI, HasVLX] in {
6942    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
6943                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
6944    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode,  DestInfo.info256,
6945                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
6946  }
6947}
6948
6949multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
6950                                bits<8> opc, SDNode OpNode>{
6951  let Predicates = [HasAVX512] in {
6952    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
6953  }
6954  let Predicates = [HasAVX512, HasVLX] in {
6955    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
6956    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
6957  }
6958}
6959
6960multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
6961                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
6962  let Predicates = [prd] in {
6963     defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
6964                 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
6965  }
6966}
6967
6968multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
6969                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
6970  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
6971                            opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
6972  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
6973                            opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
6974}
6975
6976defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
6977                              avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
6978      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
6979defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps",
6980                              avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>,
6981      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
6982
6983defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info,
6984                                                 0x55, X86VFixupimm, HasAVX512>,
6985      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
6986defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
6987                                                 0x55, X86VFixupimm, HasAVX512>,
6988      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
6989
6990defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
6991                              X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
6992defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
6993                              X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
6994defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
6995                              X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
6996
6997
6998defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
6999                                                       0x50, X86VRange, HasDQI>,
7000      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
7001defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
7002                                                       0x50, X86VRange, HasDQI>,
7003      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
7004
7005defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
7006                                                 0x51, X86VRange, HasDQI>,
7007      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7008defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
7009                                                 0x51, X86VRange, HasDQI>,
7010      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7011
7012defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
7013                                                 0x57, X86Reduces, HasDQI>,
7014      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7015defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
7016                                                 0x57, X86Reduces, HasDQI>,
7017      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7018
7019defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
7020                                                 0x27, X86GetMants, HasAVX512>,
7021      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7022defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
7023                                                 0x27, X86GetMants, HasAVX512>,
7024      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7025
7026multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
7027                                       bits<8> opc, SDNode OpNode = X86Shuf128>{
7028  let Predicates = [HasAVX512] in {
7029    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
7030
7031  }
7032  let Predicates = [HasAVX512, HasVLX] in {
7033     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
7034  }
7035}
7036let Predicates = [HasAVX512] in {
7037def : Pat<(v16f32 (ffloor VR512:$src)),
7038          (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
7039def : Pat<(v16f32 (fnearbyint VR512:$src)),
7040          (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
7041def : Pat<(v16f32 (fceil VR512:$src)),
7042          (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
7043def : Pat<(v16f32 (frint VR512:$src)),
7044          (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
7045def : Pat<(v16f32 (ftrunc VR512:$src)),
7046          (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
7047
7048def : Pat<(v8f64 (ffloor VR512:$src)),
7049          (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
7050def : Pat<(v8f64 (fnearbyint VR512:$src)),
7051          (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
7052def : Pat<(v8f64 (fceil VR512:$src)),
7053          (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
7054def : Pat<(v8f64 (frint VR512:$src)),
7055          (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
7056def : Pat<(v8f64 (ftrunc VR512:$src)),
7057          (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
7058}
7059
7060defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
7061      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
7062defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
7063      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
7064defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
7065      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
7066defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
7067      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
7068
7069multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
7070                                                AVX512VLVectorVTInfo VTInfo_FP>{
7071  defm NAME:       avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
7072                           AVX512AIi8Base, EVEX_4V;
7073  let isCodeGenOnly = 1 in {
7074    defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>,
7075                           AVX512AIi8Base, EVEX_4V;
7076  }
7077}
7078
7079defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
7080                                                  EVEX_CD8<32, CD8VF>;
7081defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
7082                                                  EVEX_CD8<64, CD8VF>, VEX_W;
7083
7084multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
7085  let Predicates = p in
7086    def NAME#_.VTName#rri:
7087          Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
7088              (!cast<Instruction>(NAME#_.ZSuffix#rri)
7089                    _.RC:$src1, _.RC:$src2, imm:$imm)>;
7090}
7091
7092multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
7093      avx512_vpalign_lowering<_.info512, [HasBWI]>,
7094      avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
7095      avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
7096
7097defm VPALIGN:   avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
7098                                          avx512vl_i8_info, avx512vl_i8_info>,
7099                avx512_vpalign_lowering_common<avx512vl_i16_info>,
7100                avx512_vpalign_lowering_common<avx512vl_i32_info>,
7101                avx512_vpalign_lowering_common<avx512vl_f32_info>,
7102                avx512_vpalign_lowering_common<avx512vl_i64_info>,
7103                avx512_vpalign_lowering_common<avx512vl_f64_info>,
7104                EVEX_CD8<8, CD8VF>;
7105
7106defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
7107                    avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
7108
7109multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7110                           X86VectorVTInfo _> {
7111  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7112                    (ins _.RC:$src1), OpcodeStr,
7113                    "$src1", "$src1",
7114                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
7115
7116  let mayLoad = 1 in
7117    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7118                    (ins _.MemOp:$src1), OpcodeStr,
7119                    "$src1", "$src1",
7120                    (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
7121              EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
7122}
7123
7124multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
7125                            X86VectorVTInfo _> :
7126           avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
7127  let mayLoad = 1 in
7128    defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7129                    (ins _.ScalarMemOp:$src1), OpcodeStr,
7130                    "${src1}"##_.BroadcastStr,
7131                    "${src1}"##_.BroadcastStr,
7132                    (_.VT (OpNode (X86VBroadcast
7133                                      (_.ScalarLdFrag addr:$src1))))>,
7134               EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
7135}
7136
7137multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
7138                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
7139  let Predicates = [prd] in
7140    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
7141
7142  let Predicates = [prd, HasVLX] in {
7143    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
7144                              EVEX_V256;
7145    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
7146                              EVEX_V128;
7147  }
7148}
7149
7150multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
7151                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
7152  let Predicates = [prd] in
7153    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
7154                              EVEX_V512;
7155
7156  let Predicates = [prd, HasVLX] in {
7157    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
7158                                 EVEX_V256;
7159    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
7160                                 EVEX_V128;
7161  }
7162}
7163
7164multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
7165                                 SDNode OpNode, Predicate prd> {
7166  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
7167                               prd>, VEX_W;
7168  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
7169                               prd>;
7170}
7171
7172multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
7173                                 SDNode OpNode, Predicate prd> {
7174  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
7175  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
7176}
7177
7178multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
7179                                  bits<8> opc_d, bits<8> opc_q,
7180                                  string OpcodeStr, SDNode OpNode> {
7181  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
7182                                    HasAVX512>,
7183              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
7184                                    HasBWI>;
7185}
7186
7187defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
7188
7189def : Pat<(xor
7190          (bc_v16i32 (v16i1sextv16i32)),
7191          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
7192          (VPABSDZrr VR512:$src)>;
7193def : Pat<(xor
7194          (bc_v8i64 (v8i1sextv8i64)),
7195          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
7196          (VPABSQZrr VR512:$src)>;
7197
7198multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
7199
7200  defm NAME :          avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
7201  let isCodeGenOnly = 1 in
7202    defm NAME#_UNDEF : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr,
7203                                             ctlz_zero_undef, prd>;
7204}
7205
7206defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
7207defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
7208
7209//===---------------------------------------------------------------------===//
7210// Replicate Single FP - MOVSHDUP and MOVSLDUP
7211//===---------------------------------------------------------------------===//
7212multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
7213  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
7214                                      HasAVX512>, XS;
7215  let isCodeGenOnly = 1 in
7216    defm NAME#_I: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
7217                                     HasAVX512>, XS;
7218}
7219
7220defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
7221defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
7222
7223//===----------------------------------------------------------------------===//
7224// AVX-512 - MOVDDUP
7225//===----------------------------------------------------------------------===//
7226
7227multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
7228                                                            X86VectorVTInfo _> {
7229  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7230                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
7231                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
7232  let mayLoad = 1 in
7233    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7234                   (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
7235                   (_.VT (OpNode (_.VT (scalar_to_vector
7236                                         (_.ScalarLdFrag addr:$src)))))>,
7237                   EVEX, EVEX_CD8<_.EltSize, CD8VH>;
7238}
7239
7240multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7241                                                  AVX512VLVectorVTInfo VTInfo> {
7242
7243  defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
7244
7245  let Predicates = [HasAVX512, HasVLX] in {
7246    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
7247                               EVEX_V256;
7248    defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
7249                               EVEX_V128;
7250  }
7251}
7252
7253multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
7254  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode,
7255                                        avx512vl_f64_info>, XD, VEX_W;
7256  let isCodeGenOnly = 1 in
7257    defm NAME#_I: avx512_movddup_common<opc, OpcodeStr, OpNode,
7258                                        avx512vl_i64_info>;
7259}
7260
7261defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
7262
7263def : Pat<(X86Movddup (loadv2f64 addr:$src)),
7264          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
7265def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
7266          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
7267
7268//===----------------------------------------------------------------------===//
7269// AVX-512 - Unpack Instructions
7270//===----------------------------------------------------------------------===//
7271defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>;
7272defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>;
7273
7274defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
7275                                       SSE_INTALU_ITINS_P, HasBWI>;
7276defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
7277                                       SSE_INTALU_ITINS_P, HasBWI>;
7278defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
7279                                       SSE_INTALU_ITINS_P, HasBWI>;
7280defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
7281                                       SSE_INTALU_ITINS_P, HasBWI>;
7282
7283defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
7284                                       SSE_INTALU_ITINS_P, HasAVX512>;
7285defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
7286                                       SSE_INTALU_ITINS_P, HasAVX512>;
7287defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
7288                                       SSE_INTALU_ITINS_P, HasAVX512>;
7289defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
7290                                       SSE_INTALU_ITINS_P, HasAVX512>;
7291
7292//===----------------------------------------------------------------------===//
7293// AVX-512 - Extract & Insert Integer Instructions
7294//===----------------------------------------------------------------------===//
7295
7296multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
7297                                                            X86VectorVTInfo _> {
7298  let mayStore = 1 in
7299    def mr : AVX512Ii8<opc, MRMDestMem, (outs),
7300                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
7301                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7302                [(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
7303                                                            imm:$src2)))),
7304                        addr:$dst)]>,
7305                EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
7306}
7307
7308multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
7309  let Predicates = [HasBWI] in {
7310    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
7311                  (ins _.RC:$src1, u8imm:$src2),
7312                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7313                  [(set GR32orGR64:$dst,
7314                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
7315                  EVEX, TAPD;
7316
7317    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
7318  }
7319}
7320
7321multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
7322  let Predicates = [HasBWI] in {
7323    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
7324                  (ins _.RC:$src1, u8imm:$src2),
7325                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7326                  [(set GR32orGR64:$dst,
7327                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
7328                  EVEX, PD;
7329
7330    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
7331                   (ins _.RC:$src1, u8imm:$src2),
7332                   OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7333                   EVEX, TAPD;
7334
7335    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
7336  }
7337}
7338
7339multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
7340                                                            RegisterClass GRC> {
7341  let Predicates = [HasDQI] in {
7342    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
7343                  (ins _.RC:$src1, u8imm:$src2),
7344                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7345                  [(set GRC:$dst,
7346                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
7347                  EVEX, TAPD;
7348
7349    let mayStore = 1 in
7350      def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
7351                  (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
7352                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7353                  [(store (extractelt (_.VT _.RC:$src1),
7354                                      imm:$src2),addr:$dst)]>,
7355                  EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
7356  }
7357}
7358
7359defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
7360defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
7361defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
7362defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
7363
7364multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
7365                                            X86VectorVTInfo _, PatFrag LdFrag> {
7366  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
7367      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
7368      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7369      [(set _.RC:$dst,
7370          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
7371      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
7372}
7373
7374multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
7375                                            X86VectorVTInfo _, PatFrag LdFrag> {
7376  let Predicates = [HasBWI] in {
7377    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
7378        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
7379        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7380        [(set _.RC:$dst,
7381            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
7382
7383    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
7384  }
7385}
7386
7387multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
7388                                         X86VectorVTInfo _, RegisterClass GRC> {
7389  let Predicates = [HasDQI] in {
7390    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
7391        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
7392        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7393        [(set _.RC:$dst,
7394            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
7395        EVEX_4V, TAPD;
7396
7397    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
7398                                    _.ScalarLdFrag>, TAPD;
7399  }
7400}
7401
7402defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
7403                                     extloadi8>, TAPD;
7404defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
7405                                     extloadi16>, PD;
7406defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
7407defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
7408//===----------------------------------------------------------------------===//
7409// VSHUFPS - VSHUFPD Operations
7410//===----------------------------------------------------------------------===//
7411multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
7412                                                AVX512VLVectorVTInfo VTInfo_FP>{
7413  defm NAME:     avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
7414                                   EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
7415                                   AVX512AIi8Base, EVEX_4V;
7416  let isCodeGenOnly = 1 in {
7417    defm NAME#_I: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0xC6, X86Shufp>,
7418                                   EVEX_CD8<VTInfo_I.info512.EltSize, CD8VF>,
7419                                   AVX512AIi8Base, EVEX_4V;
7420  }
7421}
7422
7423defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
7424defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
7425//===----------------------------------------------------------------------===//
7426// AVX-512 - Byte shift Left/Right
7427//===----------------------------------------------------------------------===//
7428
7429multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
7430                             Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
7431  def rr : AVX512<opc, MRMr,
7432             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
7433             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7434             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
7435  let mayLoad = 1 in
7436    def rm : AVX512<opc, MRMm,
7437             (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
7438             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7439             [(set _.RC:$dst,(_.VT (OpNode
7440                                   (_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
7441}
7442
7443multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
7444                                 Format MRMm, string OpcodeStr, Predicate prd>{
7445  let Predicates = [prd] in
7446    defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
7447                                    OpcodeStr, v8i64_info>, EVEX_V512;
7448  let Predicates = [prd, HasVLX] in {
7449    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
7450                                    OpcodeStr, v4i64x_info>, EVEX_V256;
7451    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
7452                                    OpcodeStr, v2i64x_info>, EVEX_V128;
7453  }
7454}
7455defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
7456                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
7457defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
7458                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
7459
7460
7461multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
7462                                string OpcodeStr, X86VectorVTInfo _dst,
7463                                X86VectorVTInfo _src>{
7464  def rr : AVX512BI<opc, MRMSrcReg,
7465             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
7466             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7467             [(set _dst.RC:$dst,(_dst.VT
7468                                (OpNode (_src.VT _src.RC:$src1),
7469                                        (_src.VT _src.RC:$src2))))]>;
7470  let mayLoad = 1 in
7471    def rm : AVX512BI<opc, MRMSrcMem,
7472             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
7473             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7474             [(set _dst.RC:$dst,(_dst.VT
7475                                (OpNode (_src.VT _src.RC:$src1),
7476                                (_src.VT (bitconvert
7477                                          (_src.LdFrag addr:$src2))))))]>;
7478}
7479
7480multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
7481                                    string OpcodeStr, Predicate prd> {
7482  let Predicates = [prd] in
7483    defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
7484                                    v64i8_info>, EVEX_V512;
7485  let Predicates = [prd, HasVLX] in {
7486    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
7487                                    v32i8x_info>, EVEX_V256;
7488    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
7489                                    v16i8x_info>, EVEX_V128;
7490  }
7491}
7492
7493defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
7494                                       HasBWI>, EVEX_4V;
7495
7496multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
7497                                                            X86VectorVTInfo _>{
7498  let Constraints = "$src1 = $dst" in {
7499  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7500                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
7501                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
7502                      (OpNode (_.VT _.RC:$src1),
7503                              (_.VT _.RC:$src2),
7504                              (_.VT _.RC:$src3),
7505                              (i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
7506  let mayLoad = 1 in {
7507    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7508                      (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
7509                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
7510                      (OpNode (_.VT _.RC:$src1),
7511                              (_.VT _.RC:$src2),
7512                              (_.VT (bitconvert (_.LdFrag addr:$src3))),
7513                              (i8 imm:$src4))>,
7514                      AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
7515    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7516                      (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
7517                      OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
7518                      "$src2, ${src3}"##_.BroadcastStr##", $src4",
7519                      (OpNode (_.VT _.RC:$src1),
7520                              (_.VT _.RC:$src2),
7521                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
7522                              (i8 imm:$src4))>, EVEX_B,
7523                      AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
7524  }
7525  }// Constraints = "$src1 = $dst"
7526}
7527
7528multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
7529  let Predicates = [HasAVX512] in
7530    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
7531  let Predicates = [HasAVX512, HasVLX] in {
7532    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
7533    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
7534  }
7535}
7536
7537defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
7538defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
7539
7540