1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT).  These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
20// The template is also used for scalar types, in this case numelts is 1.
21class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                      string suffix = ""> {
23  RegisterClass RC = rc;
24  ValueType EltVT = eltvt;
25  int NumElts = numelts;
26
27  // Corresponding mask register class.
28  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30  // Corresponding write-mask register class.
31  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33  // The mask VT.
34  ValueType KVT = !cast<ValueType>(!if (!eq (NumElts, 1), "i1",
35                                                          "v" # NumElts # "i1"));
36
37  // The GPR register class that can hold the write mask.  Use GR8 for fewer
38  // than 8 elements.  Use shift-right and equal to work around the lack of
39  // !lt in tablegen.
40  RegisterClass MRC =
41    !cast<RegisterClass>("GR" #
42                         !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
43
44  // Suffix used in the instruction mnemonic.
45  string Suffix = suffix;
46
47  // VTName is a string name for vector VT. For vector types it will be
48  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
49  // It is a little bit complex for scalar types, where NumElts = 1.
50  // In this case we build v4f32 or v2f64
51  string VTName = "v" # !if (!eq (NumElts, 1),
52                        !if (!eq (EltVT.Size, 32), 4,
53                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
54
55  // The vector VT.
56  ValueType VT = !cast<ValueType>(VTName);
57
58  string EltTypeName = !cast<string>(EltVT);
59  // Size of the element type in bits, e.g. 32 for v16i32.
60  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
61  int EltSize = EltVT.Size;
62
63  // "i" for integer types and "f" for floating-point types
64  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
65
66  // Size of RC in bits, e.g. 512 for VR512.
67  int Size = VT.Size;
68
69  // The corresponding memory operand, e.g. i512mem for VR512.
70  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
71  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
72
73  // Load patterns
74  // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
75  //       due to load promotion during legalization
76  PatFrag LdFrag = !cast<PatFrag>("load" #
77                                  !if (!eq (TypeVariantName, "i"),
78                                       !if (!eq (Size, 128), "v2i64",
79                                       !if (!eq (Size, 256), "v4i64",
80                                            VTName)), VTName));
81
82  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
83                          !if (!eq (TypeVariantName, "i"),
84                                !if (!eq (Size, 128), "v2i64",
85                                !if (!eq (Size, 256), "v4i64",
86                                !if (!eq (Size, 512),
87                                    !if (!eq (EltSize, 64), "v8i64", "v16i32"),
88                                    VTName))), VTName));
89
90  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
91
92  // The corresponding float type, e.g. v16f32 for v16i32
93  // Note: For EltSize < 32, FloatVT is illegal and TableGen
94  //       fails to compile, so we choose FloatVT = VT
95  ValueType FloatVT = !cast<ValueType>(
96                        !if (!eq (!srl(EltSize,5),0),
97                             VTName,
98                             !if (!eq(TypeVariantName, "i"),
99                                  "v" # NumElts # "f" # EltSize,
100                                  VTName)));
101
102  ValueType IntVT = !cast<ValueType>(
103                        !if (!eq (!srl(EltSize,5),0),
104                             VTName,
105                             !if (!eq(TypeVariantName, "f"),
106                                  "v" # NumElts # "i" # EltSize,
107                                  VTName)));
108  // The string to specify embedded broadcast in assembly.
109  string BroadcastStr = "{1to" # NumElts # "}";
110
111  // 8-bit compressed displacement tuple/subvector format.  This is only
112  // defined for NumElts <= 8.
113  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
114                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
115
116  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
117                          !if (!eq (Size, 256), sub_ymm, ?));
118
119  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
120                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
121                     SSEPackedInt));
122
123  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
124
125  // A vector type of the same width with element type i32.  This is used to
126  // create the canonical constant zero node ImmAllZerosV.
127  ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
128  dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
129
130  string ZSuffix = !if (!eq (Size, 128), "Z128",
131                   !if (!eq (Size, 256), "Z256", "Z"));
132}
133
134def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
135def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
136def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
137def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
138def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
139def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
140
141// "x" in v32i8x_info means RC = VR256X
142def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
143def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
144def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
145def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
146def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
147def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
148
149def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
150def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
151def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
152def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
153def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
154def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
155
156// We map scalar types to the smallest (128-bit) vector type
157// with the appropriate element type. This allows to use the same masking logic.
158def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
159def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
160def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
161def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
162
163class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
164                           X86VectorVTInfo i128> {
165  X86VectorVTInfo info512 = i512;
166  X86VectorVTInfo info256 = i256;
167  X86VectorVTInfo info128 = i128;
168}
169
170def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
171                                             v16i8x_info>;
172def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
173                                             v8i16x_info>;
174def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
175                                             v4i32x_info>;
176def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
177                                             v2i64x_info>;
178def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
179                                             v4f32x_info>;
180def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
181                                             v2f64x_info>;
182
183// This multiclass generates the masking variants from the non-masking
184// variant.  It only provides the assembly pieces for the masking variants.
185// It assumes custom ISel patterns for masking which can be provided as
186// template arguments.
187multiclass AVX512_maskable_custom<bits<8> O, Format F,
188                                  dag Outs,
189                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
190                                  string OpcodeStr,
191                                  string AttSrcAsm, string IntelSrcAsm,
192                                  list<dag> Pattern,
193                                  list<dag> MaskingPattern,
194                                  list<dag> ZeroMaskingPattern,
195                                  string MaskingConstraint = "",
196                                  InstrItinClass itin = NoItinerary,
197                                  bit IsCommutable = 0> {
198  let isCommutable = IsCommutable in
199    def NAME: AVX512<O, F, Outs, Ins,
200                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
201                                     "$dst, "#IntelSrcAsm#"}",
202                       Pattern, itin>;
203
204  // Prefer over VMOV*rrk Pat<>
205  let AddedComplexity = 20 in
206    def NAME#k: AVX512<O, F, Outs, MaskingIns,
207                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
208                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
209                       MaskingPattern, itin>,
210              EVEX_K {
211      // In case of the 3src subclass this is overridden with a let.
212      string Constraints = MaskingConstraint;
213  }
214  let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
215    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
216                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
217                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
218                       ZeroMaskingPattern,
219                       itin>,
220              EVEX_KZ;
221}
222
223
224// Common base class of AVX512_maskable and AVX512_maskable_3src.
225multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
226                                  dag Outs,
227                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
228                                  string OpcodeStr,
229                                  string AttSrcAsm, string IntelSrcAsm,
230                                  dag RHS, dag MaskingRHS,
231                                  SDNode Select = vselect,
232                                  string MaskingConstraint = "",
233                                  InstrItinClass itin = NoItinerary,
234                                  bit IsCommutable = 0> :
235  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
236                         AttSrcAsm, IntelSrcAsm,
237                         [(set _.RC:$dst, RHS)],
238                         [(set _.RC:$dst, MaskingRHS)],
239                         [(set _.RC:$dst,
240                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
241                         MaskingConstraint, NoItinerary, IsCommutable>;
242
243// This multiclass generates the unconditional/non-masking, the masking and
244// the zero-masking variant of the vector instruction.  In the masking case, the
245// perserved vector elements come from a new dummy input operand tied to $dst.
246multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
247                           dag Outs, dag Ins, string OpcodeStr,
248                           string AttSrcAsm, string IntelSrcAsm,
249                           dag RHS,
250                           InstrItinClass itin = NoItinerary,
251                           bit IsCommutable = 0, SDNode Select = vselect> :
252   AVX512_maskable_common<O, F, _, Outs, Ins,
253                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
254                          !con((ins _.KRCWM:$mask), Ins),
255                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
256                          (Select _.KRCWM:$mask, RHS, _.RC:$src0), Select,
257                          "$src0 = $dst", itin, IsCommutable>;
258
259// This multiclass generates the unconditional/non-masking, the masking and
260// the zero-masking variant of the scalar instruction.
261multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
262                           dag Outs, dag Ins, string OpcodeStr,
263                           string AttSrcAsm, string IntelSrcAsm,
264                           dag RHS,
265                           InstrItinClass itin = NoItinerary,
266                           bit IsCommutable = 0> :
267   AVX512_maskable_common<O, F, _, Outs, Ins,
268                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269                          !con((ins _.KRCWM:$mask), Ins),
270                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271                          (X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
272                          X86selects, "$src0 = $dst", itin, IsCommutable>;
273
274// Similar to AVX512_maskable but in this case one of the source operands
275// ($src1) is already tied to $dst so we just use that for the preserved
276// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
277// $src1.
278multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
279                                dag Outs, dag NonTiedIns, string OpcodeStr,
280                                string AttSrcAsm, string IntelSrcAsm,
281                                dag RHS> :
282   AVX512_maskable_common<O, F, _, Outs,
283                          !con((ins _.RC:$src1), NonTiedIns),
284                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
285                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
286                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
287                          (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
288
289// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
290// operand differs from the output VT. This requires a bitconvert on
291// the preserved vector going into the vselect.
292multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
293                                     X86VectorVTInfo InVT,
294                                     dag Outs, dag NonTiedIns, string OpcodeStr,
295                                     string AttSrcAsm, string IntelSrcAsm,
296                                     dag RHS> :
297   AVX512_maskable_common<O, F, OutVT, Outs,
298                          !con((ins InVT.RC:$src1), NonTiedIns),
299                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
300                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
301                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
302                          (vselect InVT.KRCWM:$mask, RHS,
303                           (bitconvert InVT.RC:$src1))>;
304
305multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
306                                     dag Outs, dag NonTiedIns, string OpcodeStr,
307                                     string AttSrcAsm, string IntelSrcAsm,
308                                     dag RHS> :
309   AVX512_maskable_common<O, F, _, Outs,
310                          !con((ins _.RC:$src1), NonTiedIns),
311                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
312                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
314                          (X86selects _.KRCWM:$mask, RHS, _.RC:$src1),
315                          X86selects>;
316
317multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
318                                  dag Outs, dag Ins,
319                                  string OpcodeStr,
320                                  string AttSrcAsm, string IntelSrcAsm,
321                                  list<dag> Pattern> :
322   AVX512_maskable_custom<O, F, Outs, Ins,
323                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
324                          !con((ins _.KRCWM:$mask), Ins),
325                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
326                          "$src0 = $dst">;
327
328
329// Instruction with mask that puts result in mask register,
330// like "compare" and "vptest"
331multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
332                                  dag Outs,
333                                  dag Ins, dag MaskingIns,
334                                  string OpcodeStr,
335                                  string AttSrcAsm, string IntelSrcAsm,
336                                  list<dag> Pattern,
337                                  list<dag> MaskingPattern> {
338    def NAME: AVX512<O, F, Outs, Ins,
339                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
340                                     "$dst, "#IntelSrcAsm#"}",
341                       Pattern, NoItinerary>;
342
343    def NAME#k: AVX512<O, F, Outs, MaskingIns,
344                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
345                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
346                       MaskingPattern, NoItinerary>, EVEX_K;
347}
348
349multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
350                                  dag Outs,
351                                  dag Ins, dag MaskingIns,
352                                  string OpcodeStr,
353                                  string AttSrcAsm, string IntelSrcAsm,
354                                  dag RHS, dag MaskingRHS> :
355  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
356                         AttSrcAsm, IntelSrcAsm,
357                         [(set _.KRC:$dst, RHS)],
358                         [(set _.KRC:$dst, MaskingRHS)]>;
359
360multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
361                           dag Outs, dag Ins, string OpcodeStr,
362                           string AttSrcAsm, string IntelSrcAsm,
363                           dag RHS> :
364   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
365                          !con((ins _.KRCWM:$mask), Ins),
366                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
367                          (and _.KRCWM:$mask, RHS)>;
368
369multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
370                           dag Outs, dag Ins, string OpcodeStr,
371                           string AttSrcAsm, string IntelSrcAsm> :
372   AVX512_maskable_custom_cmp<O, F, Outs,
373                             Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
374                             AttSrcAsm, IntelSrcAsm, [],[]>;
375
376// Bitcasts between 512-bit vector types. Return the original type since
377// no instruction is needed for the conversion.
378def : Pat<(v8f64  (bitconvert (v8i64  VR512:$src))), (v8f64  VR512:$src)>;
379def : Pat<(v8f64  (bitconvert (v16i32 VR512:$src))), (v8f64  VR512:$src)>;
380def : Pat<(v8f64  (bitconvert (v32i16 VR512:$src))), (v8f64  VR512:$src)>;
381def : Pat<(v8f64  (bitconvert (v64i8  VR512:$src))), (v8f64  VR512:$src)>;
382def : Pat<(v8f64  (bitconvert (v16f32 VR512:$src))), (v8f64  VR512:$src)>;
383def : Pat<(v16f32 (bitconvert (v8i64  VR512:$src))), (v16f32 VR512:$src)>;
384def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
385def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
386def : Pat<(v16f32 (bitconvert (v64i8  VR512:$src))), (v16f32 VR512:$src)>;
387def : Pat<(v16f32 (bitconvert (v8f64  VR512:$src))), (v16f32 VR512:$src)>;
388def : Pat<(v8i64  (bitconvert (v16i32 VR512:$src))), (v8i64  VR512:$src)>;
389def : Pat<(v8i64  (bitconvert (v32i16 VR512:$src))), (v8i64  VR512:$src)>;
390def : Pat<(v8i64  (bitconvert (v64i8  VR512:$src))), (v8i64  VR512:$src)>;
391def : Pat<(v8i64  (bitconvert (v8f64  VR512:$src))), (v8i64  VR512:$src)>;
392def : Pat<(v8i64  (bitconvert (v16f32 VR512:$src))), (v8i64  VR512:$src)>;
393def : Pat<(v16i32 (bitconvert (v8i64  VR512:$src))), (v16i32 VR512:$src)>;
394def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
395def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
396def : Pat<(v16i32 (bitconvert (v64i8  VR512:$src))), (v16i32 VR512:$src)>;
397def : Pat<(v16i32 (bitconvert (v8f64  VR512:$src))), (v16i32 VR512:$src)>;
398def : Pat<(v32i16 (bitconvert (v8i64  VR512:$src))), (v32i16 VR512:$src)>;
399def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
400def : Pat<(v32i16 (bitconvert (v64i8  VR512:$src))), (v32i16 VR512:$src)>;
401def : Pat<(v32i16 (bitconvert (v8f64  VR512:$src))), (v32i16 VR512:$src)>;
402def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
403def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
404def : Pat<(v64i8  (bitconvert (v8i64  VR512:$src))), (v64i8  VR512:$src)>;
405def : Pat<(v64i8  (bitconvert (v16i32 VR512:$src))), (v64i8  VR512:$src)>;
406def : Pat<(v64i8  (bitconvert (v32i16 VR512:$src))), (v64i8  VR512:$src)>;
407def : Pat<(v64i8  (bitconvert (v8f64  VR512:$src))), (v64i8  VR512:$src)>;
408def : Pat<(v64i8  (bitconvert (v16f32 VR512:$src))), (v64i8  VR512:$src)>;
409
410// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
411// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
412// swizzled by ExecutionDepsFix to pxor.
413// We set canFoldAsLoad because this can be converted to a constant-pool
414// load of an all-zeros value if folding it would be beneficial.
415let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
416    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
417def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
418               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
419def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
420               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
421}
422
423let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
424    isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
425def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
426               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
427def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
428               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
429}
430
431//===----------------------------------------------------------------------===//
432// AVX-512 - VECTOR INSERT
433//
434multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
435                                                       PatFrag vinsert_insert> {
436  let ExeDomain = To.ExeDomain in {
437    defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
438                   (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
439                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
440                   "$src3, $src2, $src1", "$src1, $src2, $src3",
441                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
442                                         (From.VT From.RC:$src2),
443                                         (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
444
445    defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
446                   (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
447                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
448                   "$src3, $src2, $src1", "$src1, $src2, $src3",
449                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
450                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
451                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
452                   EVEX_CD8<From.EltSize, From.CD8TupleForm>;
453  }
454}
455
456multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
457                       X86VectorVTInfo To, PatFrag vinsert_insert,
458                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
459  let Predicates = p in {
460    def : Pat<(vinsert_insert:$ins
461                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
462              (To.VT (!cast<Instruction>(InstrStr#"rr")
463                     To.RC:$src1, From.RC:$src2,
464                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
465
466    def : Pat<(vinsert_insert:$ins
467                  (To.VT To.RC:$src1),
468                  (From.VT (bitconvert (From.LdFrag addr:$src2))),
469                  (iPTR imm)),
470              (To.VT (!cast<Instruction>(InstrStr#"rm")
471                  To.RC:$src1, addr:$src2,
472                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
473  }
474}
475
476multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
477                            ValueType EltVT64, int Opcode256> {
478
479  let Predicates = [HasVLX] in
480    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
481                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
482                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
483                                 vinsert128_insert>, EVEX_V256;
484
485  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
486                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
487                                 X86VectorVTInfo<16, EltVT32, VR512>,
488                                 vinsert128_insert>, EVEX_V512;
489
490  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
491                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
492                                 X86VectorVTInfo< 8, EltVT64, VR512>,
493                                 vinsert256_insert>, VEX_W, EVEX_V512;
494
495  let Predicates = [HasVLX, HasDQI] in
496    defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
497                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
498                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
499                                   vinsert128_insert>, VEX_W, EVEX_V256;
500
501  let Predicates = [HasDQI] in {
502    defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
503                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
504                                 X86VectorVTInfo< 8, EltVT64, VR512>,
505                                 vinsert128_insert>, VEX_W, EVEX_V512;
506
507    defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
508                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
509                                   X86VectorVTInfo<16, EltVT32, VR512>,
510                                   vinsert256_insert>, EVEX_V512;
511  }
512}
513
514defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
515defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
516
517// Codegen pattern with the alternative types,
518// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
519defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
520              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
521defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
522              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
523
524defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
525              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
526defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
527              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
528
529defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
530              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
531defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
532              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
533
534// Codegen pattern with the alternative types insert VEC128 into VEC256
535defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
536              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
537defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
538              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
539// Codegen pattern with the alternative types insert VEC128 into VEC512
540defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
541              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
542defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
543               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
544// Codegen pattern with the alternative types insert VEC256 into VEC512
545defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
546              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
547defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
548              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
549
550// vinsertps - insert f32 to XMM
551def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
552      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
553      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
554      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
555      EVEX_4V;
556def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
557      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
558      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
559      [(set VR128X:$dst, (X86insertps VR128X:$src1,
560                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
561                          imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
562
563//===----------------------------------------------------------------------===//
564// AVX-512 VECTOR EXTRACT
565//---
566
567multiclass vextract_for_size<int Opcode,
568                                    X86VectorVTInfo From, X86VectorVTInfo To,
569                                    PatFrag vextract_extract> {
570
571  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
572    // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
573    // vextract_extract), we interesting only in patterns without mask,
574    // intrinsics pattern match generated bellow.
575    defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
576                (ins From.RC:$src1, i32u8imm:$idx),
577                "vextract" # To.EltTypeName # "x" # To.NumElts,
578                "$idx, $src1", "$src1, $idx",
579                [(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
580                                                         (iPTR imm)))]>,
581              AVX512AIi8Base, EVEX;
582    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
583                    (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$idx),
584                    "vextract" # To.EltTypeName # "x" # To.NumElts #
585                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
586                    [(store (To.VT (vextract_extract:$idx
587                                    (From.VT From.RC:$src1), (iPTR imm))),
588                             addr:$dst)]>, EVEX;
589
590    let mayStore = 1, hasSideEffects = 0 in
591    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
592                    (ins To.MemOp:$dst, To.KRCWM:$mask,
593                                        From.RC:$src1, i32u8imm:$idx),
594                     "vextract" # To.EltTypeName # "x" # To.NumElts #
595                          "\t{$idx, $src1, $dst {${mask}}|"
596                          "$dst {${mask}}, $src1, $idx}",
597                    []>, EVEX_K, EVEX;
598  }
599
600  // Intrinsic call with masking.
601  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
602                              "x" # To.NumElts # "_" # From.Size)
603                From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
604            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
605                                From.ZSuffix # "rrk")
606                To.RC:$src0,
607                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
608                From.RC:$src1, imm:$idx)>;
609
610  // Intrinsic call with zero-masking.
611  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
612                              "x" # To.NumElts # "_" # From.Size)
613                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
614            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
615                                From.ZSuffix # "rrkz")
616                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
617                From.RC:$src1, imm:$idx)>;
618
619  // Intrinsic call without masking.
620  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
621                              "x" # To.NumElts # "_" # From.Size)
622                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
623            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
624                                From.ZSuffix # "rr")
625                From.RC:$src1, imm:$idx)>;
626}
627
628// Codegen pattern for the alternative types
629multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
630                X86VectorVTInfo To, PatFrag vextract_extract,
631                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
632  let Predicates = p in {
633     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
634               (To.VT (!cast<Instruction>(InstrStr#"rr")
635                          From.RC:$src1,
636                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
637     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
638                              (iPTR imm))), addr:$dst),
639               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
640                (EXTRACT_get_vextract_imm To.RC:$ext))>;
641  }
642}
643
644multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
645                                             ValueType EltVT64, int Opcode256> {
646  defm NAME # "32x4Z" : vextract_for_size<Opcode128,
647                                 X86VectorVTInfo<16, EltVT32, VR512>,
648                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
649                                 vextract128_extract>,
650                                     EVEX_V512, EVEX_CD8<32, CD8VT4>;
651  defm NAME # "64x4Z" : vextract_for_size<Opcode256,
652                                 X86VectorVTInfo< 8, EltVT64, VR512>,
653                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
654                                 vextract256_extract>,
655                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
656  let Predicates = [HasVLX] in
657    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
658                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
659                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
660                                 vextract128_extract>,
661                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
662  let Predicates = [HasVLX, HasDQI] in
663    defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
664                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
665                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
666                                 vextract128_extract>,
667                                     VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
668  let Predicates = [HasDQI] in {
669    defm NAME # "64x2Z" : vextract_for_size<Opcode128,
670                                 X86VectorVTInfo< 8, EltVT64, VR512>,
671                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
672                                 vextract128_extract>,
673                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
674    defm NAME # "32x8Z" : vextract_for_size<Opcode256,
675                                 X86VectorVTInfo<16, EltVT32, VR512>,
676                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
677                                 vextract256_extract>,
678                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
679  }
680}
681
682defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
683defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
684
685// extract_subvector codegen patterns with the alternative types.
686// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
687defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
688          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
689defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
690          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
691
692defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
693          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
694defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
695          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
696
697defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
698          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
699defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
700          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
701
702// Codegen pattern with the alternative types extract VEC128 from VEC256
703defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
704          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
705defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
706          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
707
708// Codegen pattern with the alternative types extract VEC128 from VEC512
709defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
710                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
711defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
712                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
713// Codegen pattern with the alternative types extract VEC256 from VEC512
714defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
715                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
716defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
717                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
718
719// A 128-bit subvector extract from the first 256-bit vector position
720// is a subregister copy that needs no instruction.
721def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
722          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
723def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
724          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
725def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
726          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
727def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
728          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
729def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
730          (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm))>;
731def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
732          (v16i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_xmm))>;
733
734// A 256-bit subvector extract from the first 256-bit vector position
735// is a subregister copy that needs no instruction.
736def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
737          (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
738def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
739          (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
740def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
741          (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
742def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
743          (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
744def : Pat<(v16i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
745          (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm))>;
746def : Pat<(v32i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
747          (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm))>;
748
749let AddedComplexity = 25 in { // to give priority over vinsertf128rm
750// A 128-bit subvector insert to the first 512-bit vector position
751// is a subregister copy that needs no instruction.
752def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))),
753          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
754def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))),
755          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
756def : Pat<(v16i32 (insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0))),
757          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
758def : Pat<(v16f32 (insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0))),
759          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
760def : Pat<(v32i16 (insert_subvector undef, (v8i16 VR128X:$src), (iPTR 0))),
761          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
762def : Pat<(v64i8 (insert_subvector undef, (v16i8 VR128X:$src), (iPTR 0))),
763          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
764
765// A 256-bit subvector insert to the first 512-bit vector position
766// is a subregister copy that needs no instruction.
767def : Pat<(v8i64 (insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0))),
768          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
769def : Pat<(v8f64 (insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0))),
770          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
771def : Pat<(v16i32 (insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0))),
772          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
773def : Pat<(v16f32 (insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0))),
774          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
775def : Pat<(v32i16 (insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0))),
776          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
777def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))),
778          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
779}
780
781// vextractps - extract 32 bits from XMM
782def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
783      (ins VR128X:$src1, u8imm:$src2),
784      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
785      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
786      EVEX;
787
788def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
789      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
790      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
791      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
792                          addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
793
794//===---------------------------------------------------------------------===//
795// AVX-512 BROADCAST
796//---
797// broadcast with a scalar argument.
798multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
799                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
800
801  let isCodeGenOnly = 1 in {
802  def r_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
803               (ins SrcInfo.FRC:$src), OpcodeStr#"\t{$src, $dst|$dst, $src}",
804               [(set DestInfo.RC:$dst, (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)))]>,
805               Requires<[HasAVX512]>, T8PD, EVEX;
806
807  let Constraints = "$src0 = $dst" in
808  def rk_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
809                (ins DestInfo.RC:$src0, DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
810                OpcodeStr#"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
811                [(set DestInfo.RC:$dst,
812                     (vselect DestInfo.KRCWM:$mask,
813                              (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
814                              DestInfo.RC:$src0))]>,
815              Requires<[HasAVX512]>, T8PD, EVEX, EVEX_K;
816
817  def rkz_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
818                (ins DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
819                OpcodeStr#"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
820                [(set DestInfo.RC:$dst,
821                     (vselect DestInfo.KRCWM:$mask,
822                              (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
823                              DestInfo.ImmAllZerosV))]>,
824                Requires<[HasAVX512]>, T8PD, EVEX, EVEX_KZ;
825  } // let isCodeGenOnly = 1 in
826}
827
828multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
829                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
830
831  defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
832                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
833                   (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
834                   T8PD, EVEX;
835  defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
836                   (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
837                   (DestInfo.VT (X86VBroadcast
838                                   (SrcInfo.ScalarLdFrag addr:$src)))>,
839                   T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
840
841  let isCodeGenOnly = 1 in
842  defm m_Int : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
843                 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
844                 (DestInfo.VT
845                   (X86VBroadcast
846                     (SrcInfo.VT (scalar_to_vector
847                                   (SrcInfo.ScalarLdFrag addr:$src)))))>,
848                   T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
849}
850
851multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
852                                                       AVX512VLVectorVTInfo _> {
853  defm Z  : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
854            avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
855                             EVEX_V512;
856
857  let Predicates = [HasVLX] in {
858    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
859                 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
860                             EVEX_V256;
861  }
862}
863
864let ExeDomain = SSEPackedSingle in {
865  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
866                                         avx512vl_f32_info>;
867   let Predicates = [HasVLX] in {
868     defm VBROADCASTSSZ128  :
869           avx512_broadcast_rm<0x18, "vbroadcastss", v4f32x_info, v4f32x_info>,
870           avx512_broadcast_scalar<0x18, "vbroadcastss", v4f32x_info, v4f32x_info>,
871                                        EVEX_V128;
872   }
873}
874
875let ExeDomain = SSEPackedDouble in {
876  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
877                                         avx512vl_f64_info>, VEX_W;
878}
879
880def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
881          (VBROADCASTSSZm addr:$src)>;
882def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
883          (VBROADCASTSDZm addr:$src)>;
884
885multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
886                                    RegisterClass SrcRC> {
887  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
888                         (ins SrcRC:$src),
889                         "vpbroadcast"##_.Suffix, "$src", "$src",
890                         (_.VT (X86VBroadcast SrcRC:$src))>, T8PD, EVEX;
891}
892
893multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
894                                       RegisterClass SrcRC, Predicate prd> {
895  let Predicates = [prd] in
896    defm Z : avx512_int_broadcast_reg<opc, _.info512, SrcRC>, EVEX_V512;
897  let Predicates = [prd, HasVLX] in {
898    defm Z256 : avx512_int_broadcast_reg<opc, _.info256, SrcRC>, EVEX_V256;
899    defm Z128 : avx512_int_broadcast_reg<opc, _.info128, SrcRC>, EVEX_V128;
900  }
901}
902
903let isCodeGenOnly = 1 in {
904defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR8,
905                                                 HasBWI>;
906defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR16,
907                                                 HasBWI>;
908}
909let isAsmParserOnly = 1 in {
910  defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
911                                                       GR32, HasBWI>;
912  defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
913                                                       GR32, HasBWI>;
914}
915defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32,
916                                                 HasAVX512>;
917defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64,
918                                                 HasAVX512>, VEX_W;
919
920def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
921           (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
922def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
923           (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
924
925// Provide aliases for broadcast from the same register class that
926// automatically does the extract.
927multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
928                                            X86VectorVTInfo SrcInfo> {
929  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
930            (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
931                (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
932}
933
934multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
935                                        AVX512VLVectorVTInfo _, Predicate prd> {
936  let Predicates = [prd] in {
937    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
938               avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
939                                  EVEX_V512;
940    // Defined separately to avoid redefinition.
941    defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
942  }
943  let Predicates = [prd, HasVLX] in {
944    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
945                avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
946                                 EVEX_V256;
947    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
948                                 EVEX_V128;
949  }
950}
951
952defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
953                                           avx512vl_i8_info, HasBWI>;
954defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
955                                           avx512vl_i16_info, HasBWI>;
956defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
957                                           avx512vl_i32_info, HasAVX512>;
958defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
959                                           avx512vl_i64_info, HasAVX512>, VEX_W;
960
961multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
962                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
963  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
964                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
965                           (_Dst.VT (X86SubVBroadcast
966                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
967                            AVX5128IBase, EVEX;
968}
969
970defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
971                       v16i32_info, v4i32x_info>,
972                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
973defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
974                       v16f32_info, v4f32x_info>,
975                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
976defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
977                       v8i64_info, v4i64x_info>, VEX_W,
978                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
979defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
980                       v8f64_info, v4f64x_info>, VEX_W,
981                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
982
983let Predicates = [HasVLX] in {
984defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
985                           v8i32x_info, v4i32x_info>,
986                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
987defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
988                           v8f32x_info, v4f32x_info>,
989                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
990}
991let Predicates = [HasVLX, HasDQI] in {
992defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
993                           v4i64x_info, v2i64x_info>, VEX_W,
994                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
995defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
996                           v4f64x_info, v2f64x_info>, VEX_W,
997                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
998}
999let Predicates = [HasDQI] in {
1000defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
1001                       v8i64_info, v2i64x_info>, VEX_W,
1002                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1003defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
1004                       v16i32_info, v8i32x_info>,
1005                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1006defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
1007                       v8f64_info, v2f64x_info>, VEX_W,
1008                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1009defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
1010                       v16f32_info, v8f32x_info>,
1011                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1012}
1013
1014multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1015                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1016  let Predicates = [HasDQI] in
1017    defm Z :    avx512_broadcast_rm<opc, OpcodeStr, _Dst.info512, _Src.info128>,
1018                                  EVEX_V512;
1019  let Predicates = [HasDQI, HasVLX] in
1020    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info256, _Src.info128>,
1021                                  EVEX_V256;
1022}
1023
1024multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1025                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1026  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1027
1028  let Predicates = [HasDQI, HasVLX] in
1029    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info128, _Src.info128>,
1030                                      EVEX_V128;
1031}
1032
1033defm VPBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1034                                           avx512vl_i32_info, avx512vl_i64_info>;
1035defm VPBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1036                                           avx512vl_f32_info, avx512vl_f64_info>;
1037
1038def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1039          (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1040def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1041          (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1042
1043def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1044          (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1045def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1046          (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1047
1048//===----------------------------------------------------------------------===//
1049// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1050//---
1051multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1052                                  X86VectorVTInfo _, RegisterClass KRC> {
1053  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1054                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1055                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
1056}
1057
1058multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1059                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1060  let Predicates = [HasCDI] in
1061    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1062  let Predicates = [HasCDI, HasVLX] in {
1063    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1064    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1065  }
1066}
1067
1068defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1069                                               avx512vl_i32_info, VK16>;
1070defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1071                                               avx512vl_i64_info, VK8>, VEX_W;
1072
1073//===----------------------------------------------------------------------===//
1074// -- VPERMI2 - 3 source operands form --
1075multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1076                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1077let Constraints = "$src1 = $dst" in {
1078  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1079          (ins _.RC:$src2, _.RC:$src3),
1080          OpcodeStr, "$src3, $src2", "$src2, $src3",
1081          (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
1082         AVX5128IBase;
1083
1084  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1085            (ins _.RC:$src2, _.MemOp:$src3),
1086            OpcodeStr, "$src3, $src2", "$src2, $src3",
1087            (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
1088                   (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
1089            EVEX_4V, AVX5128IBase;
1090  }
1091}
1092multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1093                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1094  let Constraints = "$src1 = $dst" in
1095  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1096              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1097              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1098              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1099              (_.VT (X86VPermi2X IdxVT.RC:$src1,
1100               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
1101              AVX5128IBase, EVEX_4V, EVEX_B;
1102}
1103
1104multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1105                               AVX512VLVectorVTInfo VTInfo,
1106                               AVX512VLVectorVTInfo ShuffleMask> {
1107  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
1108                           ShuffleMask.info512>,
1109            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
1110                             ShuffleMask.info512>, EVEX_V512;
1111  let Predicates = [HasVLX] in {
1112  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
1113                               ShuffleMask.info128>,
1114                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
1115                                  ShuffleMask.info128>, EVEX_V128;
1116  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
1117                               ShuffleMask.info256>,
1118                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
1119                                  ShuffleMask.info256>,  EVEX_V256;
1120  }
1121}
1122
1123multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1124                                 AVX512VLVectorVTInfo VTInfo,
1125                                 AVX512VLVectorVTInfo Idx,
1126                                 Predicate Prd> {
1127  let Predicates = [Prd] in
1128  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
1129                           Idx.info512>, EVEX_V512;
1130  let Predicates = [Prd, HasVLX] in {
1131  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
1132                               Idx.info128>, EVEX_V128;
1133  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
1134                               Idx.info256>,  EVEX_V256;
1135  }
1136}
1137
1138defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d",
1139                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1140defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q",
1141                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1142defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
1143                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1144                  VEX_W, EVEX_CD8<16, CD8VF>;
1145defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
1146                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1147                  EVEX_CD8<8, CD8VF>;
1148defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
1149                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1150defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
1151                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1152
1153// VPERMT2
1154multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1155                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1156let Constraints = "$src1 = $dst" in {
1157  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1158          (ins IdxVT.RC:$src2, _.RC:$src3),
1159          OpcodeStr, "$src3, $src2", "$src2, $src3",
1160          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
1161         AVX5128IBase;
1162
1163  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1164            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1165            OpcodeStr, "$src3, $src2", "$src2, $src3",
1166            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1167                   (bitconvert (_.LdFrag addr:$src3))))>,
1168            EVEX_4V, AVX5128IBase;
1169  }
1170}
1171multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1172                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1173  let Constraints = "$src1 = $dst" in
1174  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1175              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1176              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1177              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1178              (_.VT (X86VPermt2 _.RC:$src1,
1179               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
1180              AVX5128IBase, EVEX_4V, EVEX_B;
1181}
1182
1183multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1184                               AVX512VLVectorVTInfo VTInfo,
1185                               AVX512VLVectorVTInfo ShuffleMask> {
1186  defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
1187                              ShuffleMask.info512>,
1188            avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
1189                              ShuffleMask.info512>, EVEX_V512;
1190  let Predicates = [HasVLX] in {
1191  defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
1192                              ShuffleMask.info128>,
1193                 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
1194                              ShuffleMask.info128>, EVEX_V128;
1195  defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
1196                              ShuffleMask.info256>,
1197                 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
1198                              ShuffleMask.info256>, EVEX_V256;
1199  }
1200}
1201
1202multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1203                                 AVX512VLVectorVTInfo VTInfo,
1204                                 AVX512VLVectorVTInfo Idx,
1205                                 Predicate Prd> {
1206  let Predicates = [Prd] in
1207  defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
1208                           Idx.info512>, EVEX_V512;
1209  let Predicates = [Prd, HasVLX] in {
1210  defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
1211                               Idx.info128>, EVEX_V128;
1212  defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
1213                               Idx.info256>, EVEX_V256;
1214  }
1215}
1216
1217defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d",
1218                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1219defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q",
1220                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1221defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w",
1222                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1223                  VEX_W, EVEX_CD8<16, CD8VF>;
1224defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b",
1225                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1226                  EVEX_CD8<8, CD8VF>;
1227defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
1228                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1229defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
1230                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1231
1232//===----------------------------------------------------------------------===//
1233// AVX-512 - BLEND using mask
1234//
1235multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1236  let ExeDomain = _.ExeDomain in {
1237  let hasSideEffects = 0 in
1238  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1239             (ins _.RC:$src1, _.RC:$src2),
1240             !strconcat(OpcodeStr,
1241             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1242             []>, EVEX_4V;
1243  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1244             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1245             !strconcat(OpcodeStr,
1246             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1247             [(set _.RC:$dst, (vselect _.KRCWM:$mask,
1248                                (_.VT _.RC:$src2),
1249                                (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K;
1250  let hasSideEffects = 0 in
1251  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1252             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1253             !strconcat(OpcodeStr,
1254             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1255             []>, EVEX_4V, EVEX_KZ;
1256  let mayLoad = 1, hasSideEffects = 0 in
1257  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1258             (ins _.RC:$src1, _.MemOp:$src2),
1259             !strconcat(OpcodeStr,
1260             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1261             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
1262  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1263             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1264             !strconcat(OpcodeStr,
1265             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1266             [(set _.RC:$dst, (vselect _.KRCWM:$mask,
1267                                 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1268                                 (_.VT _.RC:$src1)))]>,
1269              EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
1270  let mayLoad = 1, hasSideEffects = 0 in
1271  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1272             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1273             !strconcat(OpcodeStr,
1274             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1275             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
1276  }
1277}
1278multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1279
1280  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1281      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1282       !strconcat(OpcodeStr,
1283            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1284            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1285      [(set _.RC:$dst,(vselect _.KRCWM:$mask,
1286                        (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1287                        (_.VT _.RC:$src1)))]>,
1288      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
1289
1290  let mayLoad = 1, hasSideEffects = 0 in
1291  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1292      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1293       !strconcat(OpcodeStr,
1294            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1295            "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1296      []>,  EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
1297
1298}
1299
1300multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
1301                                 AVX512VLVectorVTInfo VTInfo> {
1302  defm Z : avx512_blendmask      <opc, OpcodeStr, VTInfo.info512>,
1303           avx512_blendmask_rmb  <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1304
1305  let Predicates = [HasVLX] in {
1306    defm Z256 : avx512_blendmask<opc, OpcodeStr, VTInfo.info256>,
1307                avx512_blendmask_rmb  <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1308    defm Z128 : avx512_blendmask<opc, OpcodeStr, VTInfo.info128>,
1309                avx512_blendmask_rmb  <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1310  }
1311}
1312
1313multiclass blendmask_bw <bits<8> opc, string OpcodeStr,
1314                         AVX512VLVectorVTInfo VTInfo> {
1315  let Predicates = [HasBWI] in
1316    defm Z : avx512_blendmask    <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1317
1318  let Predicates = [HasBWI, HasVLX] in {
1319    defm Z256 : avx512_blendmask <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1320    defm Z128 : avx512_blendmask <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1321  }
1322}
1323
1324
1325defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>;
1326defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W;
1327defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>;
1328defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W;
1329defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
1330defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
1331
1332
1333let Predicates = [HasAVX512, NoVLX] in {
1334def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
1335                            (v8f32 VR256X:$src2))),
1336            (EXTRACT_SUBREG
1337              (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1338            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1339            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
1340
1341def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
1342                            (v8i32 VR256X:$src2))),
1343            (EXTRACT_SUBREG
1344                (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1345            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1346            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
1347}
1348//===----------------------------------------------------------------------===//
1349// Compare Instructions
1350//===----------------------------------------------------------------------===//
1351
1352// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1353
1354multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
1355
1356  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1357                      (outs _.KRC:$dst),
1358                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1359                      "vcmp${cc}"#_.Suffix,
1360                      "$src2, $src1", "$src1, $src2",
1361                      (OpNode (_.VT _.RC:$src1),
1362                              (_.VT _.RC:$src2),
1363                              imm:$cc)>, EVEX_4V;
1364  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1365                    (outs _.KRC:$dst),
1366                    (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1367                    "vcmp${cc}"#_.Suffix,
1368                    "$src2, $src1", "$src1, $src2",
1369                    (OpNode (_.VT _.RC:$src1),
1370                        (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
1371                        imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1372
1373  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1374                     (outs _.KRC:$dst),
1375                     (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1376                     "vcmp${cc}"#_.Suffix,
1377                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1378                     (OpNodeRnd (_.VT _.RC:$src1),
1379                                (_.VT _.RC:$src2),
1380                                imm:$cc,
1381                                (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
1382  // Accept explicit immediate argument form instead of comparison code.
1383  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1384    defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1385                        (outs VK1:$dst),
1386                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1387                        "vcmp"#_.Suffix,
1388                        "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
1389    defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1390                        (outs _.KRC:$dst),
1391                        (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1392                        "vcmp"#_.Suffix,
1393                        "$cc, $src2, $src1", "$src1, $src2, $cc">,
1394                        EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1395
1396    defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1397                       (outs _.KRC:$dst),
1398                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1399                       "vcmp"#_.Suffix,
1400                       "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
1401                       EVEX_4V, EVEX_B;
1402  }// let isAsmParserOnly = 1, hasSideEffects = 0
1403
1404  let isCodeGenOnly = 1 in {
1405    def rr : AVX512Ii8<0xC2, MRMSrcReg,
1406                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1407                !strconcat("vcmp${cc}", _.Suffix,
1408                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1409                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1410                                          _.FRC:$src2,
1411                                          imm:$cc))],
1412                IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1413    def rm : AVX512Ii8<0xC2, MRMSrcMem,
1414              (outs _.KRC:$dst),
1415              (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1416              !strconcat("vcmp${cc}", _.Suffix,
1417                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1418              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1419                                        (_.ScalarLdFrag addr:$src2),
1420                                        imm:$cc))],
1421              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1422  }
1423}
1424
1425let Predicates = [HasAVX512] in {
1426  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
1427                                   AVX512XSIi8Base;
1428  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
1429                                   AVX512XDIi8Base, VEX_W;
1430}
1431
1432multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1433              X86VectorVTInfo _> {
1434  def rr : AVX512BI<opc, MRMSrcReg,
1435             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1436             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1437             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
1438             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1439  def rm : AVX512BI<opc, MRMSrcMem,
1440             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1441             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1442             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1443                                     (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1444             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1445  def rrk : AVX512BI<opc, MRMSrcReg,
1446              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1447              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1448                          "$dst {${mask}}, $src1, $src2}"),
1449              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1450                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1451              IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1452  def rmk : AVX512BI<opc, MRMSrcMem,
1453              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1454              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1455                          "$dst {${mask}}, $src1, $src2}"),
1456              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1457                                   (OpNode (_.VT _.RC:$src1),
1458                                       (_.VT (bitconvert
1459                                              (_.LdFrag addr:$src2))))))],
1460              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1461}
1462
1463multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1464              X86VectorVTInfo _> :
1465           avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
1466  def rmb : AVX512BI<opc, MRMSrcMem,
1467              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1468              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1469                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1470              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1471                              (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1472              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1473  def rmbk : AVX512BI<opc, MRMSrcMem,
1474               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1475                                       _.ScalarMemOp:$src2),
1476               !strconcat(OpcodeStr,
1477                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1478                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1479               [(set _.KRC:$dst, (and _.KRCWM:$mask,
1480                                      (OpNode (_.VT _.RC:$src1),
1481                                        (X86VBroadcast
1482                                          (_.ScalarLdFrag addr:$src2)))))],
1483               IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1484}
1485
1486multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1487                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1488  let Predicates = [prd] in
1489  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1490           EVEX_V512;
1491
1492  let Predicates = [prd, HasVLX] in {
1493    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1494                EVEX_V256;
1495    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1496                EVEX_V128;
1497  }
1498}
1499
1500multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1501                                  SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1502                                  Predicate prd> {
1503  let Predicates = [prd] in
1504  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1505           EVEX_V512;
1506
1507  let Predicates = [prd, HasVLX] in {
1508    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1509                EVEX_V256;
1510    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1511                EVEX_V128;
1512  }
1513}
1514
1515defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1516                      avx512vl_i8_info, HasBWI>,
1517                EVEX_CD8<8, CD8VF>;
1518
1519defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1520                      avx512vl_i16_info, HasBWI>,
1521                EVEX_CD8<16, CD8VF>;
1522
1523defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1524                      avx512vl_i32_info, HasAVX512>,
1525                EVEX_CD8<32, CD8VF>;
1526
1527defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1528                      avx512vl_i64_info, HasAVX512>,
1529                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1530
1531defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1532                      avx512vl_i8_info, HasBWI>,
1533                EVEX_CD8<8, CD8VF>;
1534
1535defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1536                      avx512vl_i16_info, HasBWI>,
1537                EVEX_CD8<16, CD8VF>;
1538
1539defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1540                      avx512vl_i32_info, HasAVX512>,
1541                EVEX_CD8<32, CD8VF>;
1542
1543defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1544                      avx512vl_i64_info, HasAVX512>,
1545                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1546
1547def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1548            (COPY_TO_REGCLASS (VPCMPGTDZrr
1549            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1550            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1551
1552def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1553            (COPY_TO_REGCLASS (VPCMPEQDZrr
1554            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1555            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1556
1557multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1558                          X86VectorVTInfo _> {
1559  def rri : AVX512AIi8<opc, MRMSrcReg,
1560             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
1561             !strconcat("vpcmp${cc}", Suffix,
1562                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1563             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1564                                       imm:$cc))],
1565             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1566  def rmi : AVX512AIi8<opc, MRMSrcMem,
1567             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
1568             !strconcat("vpcmp${cc}", Suffix,
1569                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1570             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1571                              (_.VT (bitconvert (_.LdFrag addr:$src2))),
1572                              imm:$cc))],
1573             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1574  def rrik : AVX512AIi8<opc, MRMSrcReg,
1575              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1576                                      AVX512ICC:$cc),
1577              !strconcat("vpcmp${cc}", Suffix,
1578                         "\t{$src2, $src1, $dst {${mask}}|",
1579                         "$dst {${mask}}, $src1, $src2}"),
1580              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1581                                  (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1582                                          imm:$cc)))],
1583              IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1584  def rmik : AVX512AIi8<opc, MRMSrcMem,
1585              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1586                                    AVX512ICC:$cc),
1587              !strconcat("vpcmp${cc}", Suffix,
1588                         "\t{$src2, $src1, $dst {${mask}}|",
1589                         "$dst {${mask}}, $src1, $src2}"),
1590              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1591                                   (OpNode (_.VT _.RC:$src1),
1592                                      (_.VT (bitconvert (_.LdFrag addr:$src2))),
1593                                      imm:$cc)))],
1594              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1595
1596  // Accept explicit immediate argument form instead of comparison code.
1597  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1598    def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1599               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1600               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1601                          "$dst, $src1, $src2, $cc}"),
1602               [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1603    let mayLoad = 1 in
1604    def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1605               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1606               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1607                          "$dst, $src1, $src2, $cc}"),
1608               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1609    def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1610               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1611                                       u8imm:$cc),
1612               !strconcat("vpcmp", Suffix,
1613                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
1614                          "$dst {${mask}}, $src1, $src2, $cc}"),
1615               [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1616    let mayLoad = 1 in
1617    def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1618               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1619                                       u8imm:$cc),
1620               !strconcat("vpcmp", Suffix,
1621                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
1622                          "$dst {${mask}}, $src1, $src2, $cc}"),
1623               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1624  }
1625}
1626
1627multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
1628                              X86VectorVTInfo _> :
1629           avx512_icmp_cc<opc, Suffix, OpNode, _> {
1630  def rmib : AVX512AIi8<opc, MRMSrcMem,
1631             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1632                                     AVX512ICC:$cc),
1633             !strconcat("vpcmp${cc}", Suffix,
1634                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1635                        "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1636             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1637                               (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1638                               imm:$cc))],
1639             IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1640  def rmibk : AVX512AIi8<opc, MRMSrcMem,
1641              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1642                                       _.ScalarMemOp:$src2, AVX512ICC:$cc),
1643              !strconcat("vpcmp${cc}", Suffix,
1644                       "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1645                       "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1646              [(set _.KRC:$dst, (and _.KRCWM:$mask,
1647                                  (OpNode (_.VT _.RC:$src1),
1648                                    (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1649                                    imm:$cc)))],
1650              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1651
1652  // Accept explicit immediate argument form instead of comparison code.
1653  let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
1654    def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1655               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1656                                       u8imm:$cc),
1657               !strconcat("vpcmp", Suffix,
1658                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1659                   "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1660               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1661    def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1662               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1663                                       _.ScalarMemOp:$src2, u8imm:$cc),
1664               !strconcat("vpcmp", Suffix,
1665                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1666                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1667               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1668  }
1669}
1670
1671multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1672                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1673  let Predicates = [prd] in
1674  defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1675
1676  let Predicates = [prd, HasVLX] in {
1677    defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1678    defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1679  }
1680}
1681
1682multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1683                                AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1684  let Predicates = [prd] in
1685  defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1686           EVEX_V512;
1687
1688  let Predicates = [prd, HasVLX] in {
1689    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1690                EVEX_V256;
1691    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1692                EVEX_V128;
1693  }
1694}
1695
1696defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1697                                HasBWI>, EVEX_CD8<8, CD8VF>;
1698defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1699                                 HasBWI>, EVEX_CD8<8, CD8VF>;
1700
1701defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1702                                HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1703defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1704                                 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1705
1706defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1707                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
1708defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1709                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
1710
1711defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1712                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1713defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1714                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1715
1716multiclass avx512_vcmp_common<X86VectorVTInfo _> {
1717
1718  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1719                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
1720                   "vcmp${cc}"#_.Suffix,
1721                   "$src2, $src1", "$src1, $src2",
1722                   (X86cmpm (_.VT _.RC:$src1),
1723                         (_.VT _.RC:$src2),
1724                           imm:$cc)>;
1725
1726  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1727                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1728                "vcmp${cc}"#_.Suffix,
1729                "$src2, $src1", "$src1, $src2",
1730                (X86cmpm (_.VT _.RC:$src1),
1731                        (_.VT (bitconvert (_.LdFrag addr:$src2))),
1732                        imm:$cc)>;
1733
1734  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1735                (outs _.KRC:$dst),
1736                (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1737                "vcmp${cc}"#_.Suffix,
1738                "${src2}"##_.BroadcastStr##", $src1",
1739                "$src1, ${src2}"##_.BroadcastStr,
1740                (X86cmpm (_.VT _.RC:$src1),
1741                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
1742                        imm:$cc)>,EVEX_B;
1743  // Accept explicit immediate argument form instead of comparison code.
1744  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1745    defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1746                         (outs _.KRC:$dst),
1747                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1748                         "vcmp"#_.Suffix,
1749                         "$cc, $src2, $src1", "$src1, $src2, $cc">;
1750
1751    let mayLoad = 1 in {
1752      defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1753                             (outs _.KRC:$dst),
1754                             (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1755                             "vcmp"#_.Suffix,
1756                             "$cc, $src2, $src1", "$src1, $src2, $cc">;
1757
1758      defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1759                         (outs _.KRC:$dst),
1760                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1761                         "vcmp"#_.Suffix,
1762                         "$cc, ${src2}"##_.BroadcastStr##", $src1",
1763                         "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
1764    }
1765 }
1766}
1767
1768multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
1769  // comparison code form (VCMP[EQ/LT/LE/...]
1770  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1771                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1772                     "vcmp${cc}"#_.Suffix,
1773                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1774                     (X86cmpmRnd (_.VT _.RC:$src1),
1775                                    (_.VT _.RC:$src2),
1776                                    imm:$cc,
1777                                (i32 FROUND_NO_EXC))>, EVEX_B;
1778
1779  let isAsmParserOnly = 1, hasSideEffects = 0 in {
1780    defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1781                         (outs _.KRC:$dst),
1782                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1783                         "vcmp"#_.Suffix,
1784                         "$cc, {sae}, $src2, $src1",
1785                         "$src1, $src2, {sae}, $cc">, EVEX_B;
1786   }
1787}
1788
1789multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
1790  let Predicates = [HasAVX512] in {
1791    defm Z    : avx512_vcmp_common<_.info512>,
1792                avx512_vcmp_sae<_.info512>, EVEX_V512;
1793
1794  }
1795  let Predicates = [HasAVX512,HasVLX] in {
1796   defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
1797   defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
1798  }
1799}
1800
1801defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
1802                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
1803defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
1804                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
1805
1806def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1807          (COPY_TO_REGCLASS (VCMPPSZrri
1808            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1809            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1810            imm:$cc), VK8)>;
1811def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1812          (COPY_TO_REGCLASS (VPCMPDZrri
1813            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1814            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1815            imm:$cc), VK8)>;
1816def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1817          (COPY_TO_REGCLASS (VPCMPUDZrri
1818            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1819            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1820            imm:$cc), VK8)>;
1821
1822// ----------------------------------------------------------------
1823// FPClass
1824//handle fpclass instruction  mask =  op(reg_scalar,imm)
1825//                                    op(mem_scalar,imm)
1826multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
1827                                 X86VectorVTInfo _, Predicate prd> {
1828  let Predicates = [prd] in {
1829      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
1830                      (ins _.RC:$src1, i32u8imm:$src2),
1831                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1832                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
1833                              (i32 imm:$src2)))], NoItinerary>;
1834      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
1835                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
1836                      OpcodeStr##_.Suffix#
1837                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
1838                      [(set _.KRC:$dst,(or _.KRCWM:$mask,
1839                                      (OpNode (_.VT _.RC:$src1),
1840                                      (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1841    let AddedComplexity = 20 in {
1842      def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1843                      (ins _.MemOp:$src1, i32u8imm:$src2),
1844                      OpcodeStr##_.Suffix##
1845                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1846                      [(set _.KRC:$dst,
1847                            (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
1848                                    (i32 imm:$src2)))], NoItinerary>;
1849      def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1850                      (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
1851                      OpcodeStr##_.Suffix##
1852                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
1853                      [(set _.KRC:$dst,(or _.KRCWM:$mask,
1854                          (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
1855                              (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1856    }
1857  }
1858}
1859
1860//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
1861//                                  fpclass(reg_vec, mem_vec, imm)
1862//                                  fpclass(reg_vec, broadcast(eltVt), imm)
1863multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
1864                                 X86VectorVTInfo _, string mem, string broadcast>{
1865  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
1866                      (ins _.RC:$src1, i32u8imm:$src2),
1867                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1868                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
1869                                       (i32 imm:$src2)))], NoItinerary>;
1870  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
1871                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
1872                      OpcodeStr##_.Suffix#
1873                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
1874                      [(set _.KRC:$dst,(or _.KRCWM:$mask,
1875                                       (OpNode (_.VT _.RC:$src1),
1876                                       (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1877  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1878                    (ins _.MemOp:$src1, i32u8imm:$src2),
1879                    OpcodeStr##_.Suffix##mem#
1880                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1881                    [(set _.KRC:$dst,(OpNode
1882                                     (_.VT (bitconvert (_.LdFrag addr:$src1))),
1883                                     (i32 imm:$src2)))], NoItinerary>;
1884  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1885                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
1886                    OpcodeStr##_.Suffix##mem#
1887                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
1888                    [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
1889                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
1890                                  (i32 imm:$src2))))], NoItinerary>, EVEX_K;
1891  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1892                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
1893                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
1894                                      _.BroadcastStr##", $dst|$dst, ${src1}"
1895                                                  ##_.BroadcastStr##", $src2}",
1896                    [(set _.KRC:$dst,(OpNode
1897                                     (_.VT (X86VBroadcast
1898                                           (_.ScalarLdFrag addr:$src1))),
1899                                     (i32 imm:$src2)))], NoItinerary>,EVEX_B;
1900  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
1901                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
1902                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
1903                          _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
1904                                                   _.BroadcastStr##", $src2}",
1905                    [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
1906                                     (_.VT (X86VBroadcast
1907                                           (_.ScalarLdFrag addr:$src1))),
1908                                     (i32 imm:$src2))))], NoItinerary>,
1909                                                          EVEX_B, EVEX_K;
1910}
1911
1912multiclass avx512_vector_fpclass_all<string OpcodeStr,
1913            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
1914                                                              string broadcast>{
1915  let Predicates = [prd] in {
1916    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
1917                                      broadcast>, EVEX_V512;
1918  }
1919  let Predicates = [prd, HasVLX] in {
1920    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
1921                                      broadcast>, EVEX_V128;
1922    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
1923                                      broadcast>, EVEX_V256;
1924  }
1925}
1926
1927multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
1928             bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
1929  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
1930                                      VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
1931  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
1932                                      VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
1933  defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
1934                                      f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
1935  defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
1936                                      f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
1937}
1938
1939defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
1940                                      X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
1941
1942//-----------------------------------------------------------------
1943// Mask register copy, including
1944// - copy between mask registers
1945// - load/store mask registers
1946// - copy from GPR to mask register and vice versa
1947//
1948multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1949                         string OpcodeStr, RegisterClass KRC,
1950                         ValueType vvt, X86MemOperand x86memop> {
1951  let hasSideEffects = 0 in
1952  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1953             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
1954  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
1955             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1956             [(set KRC:$dst, (vvt (load addr:$src)))]>;
1957  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
1958             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1959             [(store KRC:$src, addr:$dst)]>;
1960}
1961
1962multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1963                             string OpcodeStr,
1964                             RegisterClass KRC, RegisterClass GRC> {
1965  let hasSideEffects = 0 in {
1966    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
1967               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
1968    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
1969               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
1970  }
1971}
1972
1973let Predicates = [HasDQI] in
1974  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
1975               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1976               VEX, PD;
1977
1978let Predicates = [HasAVX512] in
1979  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
1980               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
1981               VEX, PS;
1982
1983let Predicates = [HasBWI] in {
1984  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
1985               VEX, PD, VEX_W;
1986  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1987               VEX, XD;
1988  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
1989               VEX, PS, VEX_W;
1990  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1991               VEX, XD, VEX_W;
1992}
1993
1994// GR from/to mask register
1995let Predicates = [HasDQI] in {
1996  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1997            (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1998  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1999            (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
2000  def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2001            (KMOVBrk VK8:$src)>;
2002  def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2003            (KMOVBrk VK8:$src)>;
2004}
2005let Predicates = [HasAVX512] in {
2006  def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2007            (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
2008  def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2009            (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
2010  def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2011            (KMOVWrk VK16:$src)>;
2012  def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2013            (KMOVWrk VK16:$src)>;
2014}
2015let Predicates = [HasBWI] in {
2016  def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
2017  def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
2018}
2019let Predicates = [HasBWI] in {
2020  def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
2021  def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
2022}
2023
2024// Load/store kreg
2025let Predicates = [HasDQI] in {
2026  def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
2027            (KMOVBmk addr:$dst, VK8:$src)>;
2028  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2029            (KMOVBkm addr:$src)>;
2030
2031  def : Pat<(store VK4:$src, addr:$dst),
2032            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2033  def : Pat<(store VK2:$src, addr:$dst),
2034            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2035  def : Pat<(store VK1:$src, addr:$dst),
2036            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2037
2038  def : Pat<(v2i1 (load addr:$src)),
2039            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2040  def : Pat<(v4i1 (load addr:$src)),
2041            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2042}
2043let Predicates = [HasAVX512, NoDQI] in {
2044  def : Pat<(store VK1:$src, addr:$dst),
2045            (MOV8mr addr:$dst,
2046             (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
2047              sub_8bit))>;
2048  def : Pat<(store VK2:$src, addr:$dst),
2049            (MOV8mr addr:$dst,
2050             (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
2051              sub_8bit))>;
2052  def : Pat<(store VK4:$src, addr:$dst),
2053            (MOV8mr addr:$dst,
2054             (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
2055              sub_8bit))>;
2056  def : Pat<(store VK8:$src, addr:$dst),
2057            (MOV8mr addr:$dst,
2058             (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
2059              sub_8bit))>;
2060
2061  def : Pat<(v8i1 (load addr:$src)),
2062            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2063  def : Pat<(v2i1 (load addr:$src)),
2064            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
2065  def : Pat<(v4i1 (load addr:$src)),
2066            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
2067}
2068
2069let Predicates = [HasAVX512] in {
2070  def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
2071            (KMOVWmk addr:$dst, VK16:$src)>;
2072  def : Pat<(i1 (load addr:$src)),
2073            (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
2074  def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
2075            (KMOVWkm addr:$src)>;
2076}
2077let Predicates = [HasBWI] in {
2078  def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
2079            (KMOVDmk addr:$dst, VK32:$src)>;
2080  def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
2081            (KMOVDkm addr:$src)>;
2082  def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
2083            (KMOVQmk addr:$dst, VK64:$src)>;
2084  def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
2085            (KMOVQkm addr:$src)>;
2086}
2087
2088def assertzext_i1 : PatFrag<(ops node:$src), (assertzext node:$src), [{
2089  return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
2090}]>;
2091
2092let Predicates = [HasAVX512] in {
2093  def : Pat<(i1 (trunc (i64 GR64:$src))),
2094            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND64ri8 $src, (i64 1)),
2095                                    sub_16bit)), VK1)>;
2096
2097  def : Pat<(i1 (trunc (i64 (assertzext_i1 GR64:$src)))),
2098            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
2099
2100  def : Pat<(i1 (trunc (i32 GR32:$src))),
2101            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND32ri8 $src, (i32 1)),
2102                                    sub_16bit)), VK1)>;
2103
2104  def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
2105            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
2106
2107  def : Pat<(i1 (trunc (i8 GR8:$src))),
2108            (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri8 $src, (i8 1)),
2109                                    sub_8bit)), VK1)>;
2110
2111  def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))),
2112            (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), $src, sub_8bit)), VK1)>;
2113
2114  def : Pat<(i1 (trunc (i16 GR16:$src))),
2115            (COPY_TO_REGCLASS (AND16ri GR16:$src, (i16 1)), VK1)>;
2116
2117  def : Pat<(i1 (trunc (i16 (assertzext_i1 GR16:$src)))),
2118            (COPY_TO_REGCLASS $src, VK1)>;
2119
2120  def : Pat<(i32 (zext VK1:$src)),
2121            (i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
2122                  sub_16bit))>;
2123
2124  def : Pat<(i32 (anyext VK1:$src)),
2125            (i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
2126                  sub_16bit))>;
2127
2128  def : Pat<(i8 (zext VK1:$src)),
2129            (i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS VK1:$src, GR16)), sub_8bit))>;
2130
2131  def : Pat<(i8 (anyext VK1:$src)),
2132            (i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS $src, GR16)), sub_8bit))>;
2133
2134  def : Pat<(i64 (zext VK1:$src)),
2135            (i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
2136                  sub_16bit))>;
2137
2138  def : Pat<(i64 (anyext VK1:$src)),
2139            (i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
2140                  sub_16bit))>;
2141
2142  def : Pat<(i16 (zext VK1:$src)),
2143            (COPY_TO_REGCLASS $src, GR16)>;
2144
2145  def : Pat<(i16 (anyext VK1:$src)),
2146            (i16 (COPY_TO_REGCLASS $src, GR16))>;
2147}
2148def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
2149          (COPY_TO_REGCLASS VK1:$src, VK16)>;
2150def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
2151          (COPY_TO_REGCLASS VK1:$src, VK8)>;
2152def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
2153          (COPY_TO_REGCLASS VK1:$src, VK4)>;
2154def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
2155          (COPY_TO_REGCLASS VK1:$src, VK2)>;
2156def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
2157          (COPY_TO_REGCLASS VK1:$src, VK32)>;
2158def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
2159          (COPY_TO_REGCLASS VK1:$src, VK64)>;
2160
2161def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
2162def : Pat<(store (i1  1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
2163def : Pat<(store (i1  0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
2164
2165// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
2166let Predicates = [HasAVX512, NoDQI] in {
2167  // GR from/to 8-bit mask without native support
2168  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2169            (COPY_TO_REGCLASS
2170             (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), VK8)>;
2171  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2172            (EXTRACT_SUBREG
2173              (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
2174              sub_8bit)>;
2175  def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2176            (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>;
2177  def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2178            (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>;
2179}
2180
2181let Predicates = [HasAVX512] in {
2182  def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
2183            (COPY_TO_REGCLASS VK16:$src, VK1)>;
2184  def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
2185            (COPY_TO_REGCLASS VK8:$src, VK1)>;
2186}
2187let Predicates = [HasBWI] in {
2188  def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
2189            (COPY_TO_REGCLASS VK32:$src, VK1)>;
2190  def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
2191            (COPY_TO_REGCLASS VK64:$src, VK1)>;
2192}
2193
2194// Mask unary operation
2195// - KNOT
2196multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2197                            RegisterClass KRC, SDPatternOperator OpNode,
2198                            Predicate prd> {
2199  let Predicates = [prd] in
2200    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2201               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2202               [(set KRC:$dst, (OpNode KRC:$src))]>;
2203}
2204
2205multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2206                                SDPatternOperator OpNode> {
2207  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2208                            HasDQI>, VEX, PD;
2209  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2210                            HasAVX512>, VEX, PS;
2211  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2212                            HasBWI>, VEX, PD, VEX_W;
2213  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2214                            HasBWI>, VEX, PS, VEX_W;
2215}
2216
2217defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
2218
2219multiclass avx512_mask_unop_int<string IntName, string InstName> {
2220  let Predicates = [HasAVX512] in
2221    def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
2222                (i16 GR16:$src)),
2223              (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
2224              (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
2225}
2226defm : avx512_mask_unop_int<"knot", "KNOT">;
2227
2228let Predicates = [HasDQI] in
2229def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
2230let Predicates = [HasAVX512] in
2231def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
2232let Predicates = [HasBWI] in
2233def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
2234let Predicates = [HasBWI] in
2235def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
2236
2237// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2238let Predicates = [HasAVX512, NoDQI] in {
2239def : Pat<(xor VK8:$src1,  (v8i1 immAllOnesV)),
2240          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
2241def : Pat<(not VK8:$src),
2242          (COPY_TO_REGCLASS
2243            (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2244}
2245def : Pat<(xor VK4:$src1,  (v4i1 immAllOnesV)),
2246          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>;
2247def : Pat<(xor VK2:$src1,  (v2i1 immAllOnesV)),
2248          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>;
2249
2250// Mask binary operation
2251// - KAND, KANDN, KOR, KXNOR, KXOR
2252multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2253                           RegisterClass KRC, SDPatternOperator OpNode,
2254                           Predicate prd, bit IsCommutable> {
2255  let Predicates = [prd], isCommutable = IsCommutable in
2256    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2257               !strconcat(OpcodeStr,
2258                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2259               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
2260}
2261
2262multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2263                               SDPatternOperator OpNode, bit IsCommutable,
2264                               Predicate prdW = HasAVX512> {
2265  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2266                             HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2267  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2268                             prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2269  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2270                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2271  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2272                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2273}
2274
2275def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2276def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2277
2278defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,  1>;
2279defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,   1>;
2280defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
2281defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,  1>;
2282defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
2283defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,  1, HasDQI>;
2284
2285multiclass avx512_mask_binop_int<string IntName, string InstName> {
2286  let Predicates = [HasAVX512] in
2287    def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
2288                (i16 GR16:$src1), (i16 GR16:$src2)),
2289              (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
2290              (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
2291              (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
2292}
2293
2294defm : avx512_mask_binop_int<"kand",  "KAND">;
2295defm : avx512_mask_binop_int<"kandn", "KANDN">;
2296defm : avx512_mask_binop_int<"kor",   "KOR">;
2297defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
2298defm : avx512_mask_binop_int<"kxor",  "KXOR">;
2299
2300multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
2301  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2302  // for the DQI set, this type is legal and KxxxB instruction is used
2303  let Predicates = [NoDQI] in
2304  def : Pat<(OpNode VK8:$src1, VK8:$src2),
2305            (COPY_TO_REGCLASS
2306              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2307                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2308
2309  // All types smaller than 8 bits require conversion anyway
2310  def : Pat<(OpNode VK1:$src1, VK1:$src2),
2311        (COPY_TO_REGCLASS (Inst
2312                           (COPY_TO_REGCLASS VK1:$src1, VK16),
2313                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2314  def : Pat<(OpNode VK2:$src1, VK2:$src2),
2315        (COPY_TO_REGCLASS (Inst
2316                           (COPY_TO_REGCLASS VK2:$src1, VK16),
2317                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2318  def : Pat<(OpNode VK4:$src1, VK4:$src2),
2319        (COPY_TO_REGCLASS (Inst
2320                           (COPY_TO_REGCLASS VK4:$src1, VK16),
2321                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2322}
2323
2324defm : avx512_binop_pat<and,  KANDWrr>;
2325defm : avx512_binop_pat<andn, KANDNWrr>;
2326defm : avx512_binop_pat<or,   KORWrr>;
2327defm : avx512_binop_pat<xnor, KXNORWrr>;
2328defm : avx512_binop_pat<xor,  KXORWrr>;
2329
2330def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
2331          (KXNORWrr VK16:$src1, VK16:$src2)>;
2332def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
2333          (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>;
2334def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
2335          (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>;
2336def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
2337          (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>;
2338
2339let Predicates = [NoDQI] in
2340def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
2341          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16),
2342                             (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2343
2344def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)),
2345          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16),
2346                             (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2347
2348def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)),
2349          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16),
2350                             (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2351
2352def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
2353          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
2354                             (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2355
2356// Mask unpacking
2357multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2358                             RegisterClass KRCSrc, Predicate prd> {
2359  let Predicates = [prd] in {
2360    let hasSideEffects = 0 in
2361    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2362               (ins KRC:$src1, KRC:$src2),
2363               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2364               VEX_4V, VEX_L;
2365
2366    def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2367              (!cast<Instruction>(NAME##rr)
2368                        (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2369                        (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2370  }
2371}
2372
2373defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
2374defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
2375defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
2376
2377// Mask bit testing
2378multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2379                              SDNode OpNode, Predicate prd> {
2380  let Predicates = [prd], Defs = [EFLAGS] in
2381    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2382               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2383               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
2384}
2385
2386multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2387                                Predicate prdW = HasAVX512> {
2388  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
2389                                                                VEX, PD;
2390  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
2391                                                                VEX, PS;
2392  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
2393                                                                VEX, PS, VEX_W;
2394  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
2395                                                                VEX, PD, VEX_W;
2396}
2397
2398defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
2399defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
2400
2401// Mask shift
2402multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2403                             SDNode OpNode> {
2404  let Predicates = [HasAVX512] in
2405    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2406                 !strconcat(OpcodeStr,
2407                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2408                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
2409}
2410
2411multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2412                               SDNode OpNode> {
2413  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
2414                               VEX, TAPD, VEX_W;
2415  let Predicates = [HasDQI] in
2416  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
2417                               VEX, TAPD;
2418  let Predicates = [HasBWI] in {
2419  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
2420                               VEX, TAPD, VEX_W;
2421  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
2422                               VEX, TAPD;
2423  }
2424}
2425
2426defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
2427defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
2428
2429// Mask setting all 0s or 1s
2430multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
2431  let Predicates = [HasAVX512] in
2432    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
2433      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
2434                     [(set KRC:$dst, (VT Val))]>;
2435}
2436
2437multiclass avx512_mask_setop_w<PatFrag Val> {
2438  defm B : avx512_mask_setop<VK8,   v8i1, Val>;
2439  defm W : avx512_mask_setop<VK16, v16i1, Val>;
2440  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
2441  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
2442}
2443
2444defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
2445defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
2446
2447// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
2448let Predicates = [HasAVX512] in {
2449  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
2450  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
2451  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
2452  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
2453  def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
2454  def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
2455  def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
2456}
2457
2458// Patterns for kmask insert_subvector/extract_subvector to/from index=0
2459multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
2460                                             RegisterClass RC, ValueType VT> {
2461  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
2462            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
2463
2464  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
2465            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
2466}
2467
2468defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
2469defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
2470defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
2471defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
2472defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
2473
2474defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
2475defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
2476defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
2477defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
2478
2479defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
2480defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
2481defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
2482
2483defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
2484defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
2485
2486defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
2487
2488def : Pat<(v2i1 (extract_subvector (v4i1 VK4:$src), (iPTR 2))),
2489          (v2i1 (COPY_TO_REGCLASS
2490                  (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16), (i8 2)),
2491                   VK2))>;
2492def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 4))),
2493          (v4i1 (COPY_TO_REGCLASS
2494                  (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (i8 4)),
2495                   VK4))>;
2496def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
2497          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
2498def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
2499          (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
2500def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
2501          (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
2502
2503def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
2504          (v8i1 (COPY_TO_REGCLASS
2505                 (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16),
2506                  (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
2507
2508def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
2509          (v4i1 (COPY_TO_REGCLASS
2510                 (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16),
2511                  (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
2512//===----------------------------------------------------------------------===//
2513// AVX-512 - Aligned and unaligned load and store
2514//
2515
2516
2517multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
2518                         PatFrag ld_frag, PatFrag mload,
2519                         bit IsReMaterializable = 1,
2520                         SDPatternOperator SelectOprr = vselect> {
2521  let hasSideEffects = 0 in {
2522  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
2523                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2524                    _.ExeDomain>, EVEX;
2525  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
2526                      (ins _.KRCWM:$mask,  _.RC:$src),
2527                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
2528                       "${dst} {${mask}} {z}, $src}"),
2529                       [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
2530                                           (_.VT _.RC:$src),
2531                                           _.ImmAllZerosV)))], _.ExeDomain>,
2532                       EVEX, EVEX_KZ;
2533
2534  let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
2535      SchedRW = [WriteLoad] in
2536  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
2537                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2538                    [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
2539                    _.ExeDomain>, EVEX;
2540
2541  let Constraints = "$src0 = $dst" in {
2542  def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
2543                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
2544                    !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
2545                    "${dst} {${mask}}, $src1}"),
2546                    [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
2547                                        (_.VT _.RC:$src1),
2548                                        (_.VT _.RC:$src0))))], _.ExeDomain>,
2549                     EVEX, EVEX_K;
2550    let SchedRW = [WriteLoad] in
2551    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
2552                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
2553                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
2554                      "${dst} {${mask}}, $src1}"),
2555                     [(set _.RC:$dst, (_.VT
2556                         (vselect _.KRCWM:$mask,
2557                          (_.VT (bitconvert (ld_frag addr:$src1))),
2558                           (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
2559  }
2560  let SchedRW = [WriteLoad] in
2561  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
2562                  (ins _.KRCWM:$mask, _.MemOp:$src),
2563                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
2564                                "${dst} {${mask}} {z}, $src}",
2565                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
2566                    (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
2567                  _.ExeDomain>, EVEX, EVEX_KZ;
2568  }
2569  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
2570            (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
2571
2572  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
2573            (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
2574
2575  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
2576            (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
2577             _.KRCWM:$mask, addr:$ptr)>;
2578}
2579
2580multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
2581                                  AVX512VLVectorVTInfo _,
2582                                  Predicate prd,
2583                                  bit IsReMaterializable = 1> {
2584  let Predicates = [prd] in
2585  defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
2586                       masked_load_aligned512, IsReMaterializable>, EVEX_V512;
2587
2588  let Predicates = [prd, HasVLX] in {
2589  defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
2590                          masked_load_aligned256, IsReMaterializable>, EVEX_V256;
2591  defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
2592                          masked_load_aligned128, IsReMaterializable>, EVEX_V128;
2593  }
2594}
2595
2596multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
2597                                  AVX512VLVectorVTInfo _,
2598                                  Predicate prd,
2599                                  bit IsReMaterializable = 1,
2600                                  SDPatternOperator SelectOprr = vselect> {
2601  let Predicates = [prd] in
2602  defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
2603                       masked_load_unaligned, IsReMaterializable,
2604                       SelectOprr>, EVEX_V512;
2605
2606  let Predicates = [prd, HasVLX] in {
2607  defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
2608                         masked_load_unaligned, IsReMaterializable,
2609                         SelectOprr>, EVEX_V256;
2610  defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
2611                         masked_load_unaligned, IsReMaterializable,
2612                         SelectOprr>, EVEX_V128;
2613  }
2614}
2615
2616multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
2617                        PatFrag st_frag, PatFrag mstore> {
2618
2619  let hasSideEffects = 0 in {
2620  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
2621                         OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
2622                         [], _.ExeDomain>, EVEX;
2623  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
2624                         (ins _.KRCWM:$mask, _.RC:$src),
2625                         OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
2626                         "${dst} {${mask}}, $src}",
2627                         [], _.ExeDomain>,  EVEX, EVEX_K;
2628  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
2629                          (ins _.KRCWM:$mask, _.RC:$src),
2630                          OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
2631                          "${dst} {${mask}} {z}, $src}",
2632                          [], _.ExeDomain>, EVEX, EVEX_KZ;
2633  }
2634
2635  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
2636                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2637                    [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
2638  def mrk : AVX512PI<opc, MRMDestMem, (outs),
2639                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
2640              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
2641               [], _.ExeDomain>, EVEX, EVEX_K;
2642
2643  def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
2644           (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
2645                                                    _.KRCWM:$mask, _.RC:$src)>;
2646}
2647
2648
2649multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
2650                            AVX512VLVectorVTInfo _, Predicate prd> {
2651  let Predicates = [prd] in
2652  defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
2653                        masked_store_unaligned>, EVEX_V512;
2654
2655  let Predicates = [prd, HasVLX] in {
2656    defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
2657                             masked_store_unaligned>, EVEX_V256;
2658    defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
2659                             masked_store_unaligned>, EVEX_V128;
2660  }
2661}
2662
2663multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
2664                                  AVX512VLVectorVTInfo _,  Predicate prd> {
2665  let Predicates = [prd] in
2666  defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
2667                        masked_store_aligned512>, EVEX_V512;
2668
2669  let Predicates = [prd, HasVLX] in {
2670    defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
2671                             masked_store_aligned256>, EVEX_V256;
2672    defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
2673                             masked_store_aligned128>, EVEX_V128;
2674  }
2675}
2676
2677defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
2678                                     HasAVX512>,
2679               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
2680                                      HasAVX512>,  PS, EVEX_CD8<32, CD8VF>;
2681
2682defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
2683                                     HasAVX512>,
2684               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
2685                                     HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2686
2687defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
2688                              1, null_frag>,
2689               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
2690                              PS, EVEX_CD8<32, CD8VF>;
2691
2692defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0,
2693                              null_frag>,
2694               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
2695               PD, VEX_W, EVEX_CD8<64, CD8VF>;
2696
2697defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
2698                                       HasAVX512>,
2699                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
2700                                       HasAVX512>, PD, EVEX_CD8<32, CD8VF>;
2701
2702defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
2703                                       HasAVX512>,
2704                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
2705                                    HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2706
2707defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
2708                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
2709                                 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
2710
2711defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
2712                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
2713                                 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
2714
2715defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
2716                                1, null_frag>,
2717                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
2718                                 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
2719
2720defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
2721                                1, null_frag>,
2722                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
2723                                 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
2724
2725def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
2726                          (v8i64 VR512:$src))),
2727   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
2728                                              VK8), VR512:$src)>;
2729
2730def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
2731                           (v16i32 VR512:$src))),
2732                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
2733
2734// These patterns exist to prevent the above patterns from introducing a second
2735// mask inversion when one already exists.
2736def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
2737                          (bc_v8i64 (v16i32 immAllZerosV)),
2738                          (v8i64 VR512:$src))),
2739                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
2740def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
2741                           (v16i32 immAllZerosV),
2742                           (v16i32 VR512:$src))),
2743                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
2744
2745let Predicates = [HasVLX] in {
2746  // Special patterns for storing subvector extracts of lower 128-bits of 256.
2747  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
2748  def : Pat<(alignedstore (v2f64 (extract_subvector
2749                                  (v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
2750     (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2751  def : Pat<(alignedstore (v4f32 (extract_subvector
2752                                  (v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
2753     (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2754  def : Pat<(alignedstore (v2i64 (extract_subvector
2755                                  (v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
2756     (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2757  def : Pat<(alignedstore (v4i32 (extract_subvector
2758                                  (v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
2759     (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2760  def : Pat<(alignedstore (v8i16 (extract_subvector
2761                                  (v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
2762     (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2763  def : Pat<(alignedstore (v16i8 (extract_subvector
2764                                  (v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
2765     (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2766
2767  def : Pat<(store (v2f64 (extract_subvector
2768                           (v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
2769     (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2770  def : Pat<(store (v4f32 (extract_subvector
2771                           (v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
2772     (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2773  def : Pat<(store (v2i64 (extract_subvector
2774                           (v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
2775     (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2776  def : Pat<(store (v4i32 (extract_subvector
2777                           (v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
2778     (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2779  def : Pat<(store (v8i16 (extract_subvector
2780                           (v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
2781     (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2782  def : Pat<(store (v16i8 (extract_subvector
2783                           (v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
2784     (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
2785
2786  // Special patterns for storing subvector extracts of lower 128-bits of 512.
2787  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
2788  def : Pat<(alignedstore (v2f64 (extract_subvector
2789                                  (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
2790     (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2791  def : Pat<(alignedstore (v4f32 (extract_subvector
2792                                  (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
2793     (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2794  def : Pat<(alignedstore (v2i64 (extract_subvector
2795                                  (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
2796     (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2797  def : Pat<(alignedstore (v4i32 (extract_subvector
2798                                  (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
2799     (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2800  def : Pat<(alignedstore (v8i16 (extract_subvector
2801                                  (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
2802     (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2803  def : Pat<(alignedstore (v16i8 (extract_subvector
2804                                  (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
2805     (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2806
2807  def : Pat<(store (v2f64 (extract_subvector
2808                           (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
2809     (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2810  def : Pat<(store (v4f32 (extract_subvector
2811                           (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
2812     (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2813  def : Pat<(store (v2i64 (extract_subvector
2814                           (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
2815     (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2816  def : Pat<(store (v4i32 (extract_subvector
2817                           (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
2818     (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2819  def : Pat<(store (v8i16 (extract_subvector
2820                           (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
2821     (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2822  def : Pat<(store (v16i8 (extract_subvector
2823                           (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
2824     (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
2825
2826  // Special patterns for storing subvector extracts of lower 256-bits of 512.
2827  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
2828  def : Pat<(alignedstore (v4f64 (extract_subvector
2829                                  (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
2830     (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2831  def : Pat<(alignedstore (v8f32 (extract_subvector
2832                                  (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
2833     (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2834  def : Pat<(alignedstore (v4i64 (extract_subvector
2835                                  (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
2836     (VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2837  def : Pat<(alignedstore (v8i32 (extract_subvector
2838                                  (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
2839     (VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2840  def : Pat<(alignedstore (v16i16 (extract_subvector
2841                                   (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
2842     (VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2843  def : Pat<(alignedstore (v32i8 (extract_subvector
2844                                  (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
2845     (VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2846
2847  def : Pat<(store (v4f64 (extract_subvector
2848                           (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
2849     (VMOVUPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2850  def : Pat<(store (v8f32 (extract_subvector
2851                           (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
2852     (VMOVUPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2853  def : Pat<(store (v4i64 (extract_subvector
2854                           (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
2855     (VMOVDQU64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2856  def : Pat<(store (v8i32 (extract_subvector
2857                           (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
2858     (VMOVDQU32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2859  def : Pat<(store (v16i16 (extract_subvector
2860                            (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
2861     (VMOVDQU32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2862  def : Pat<(store (v32i8 (extract_subvector
2863                           (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
2864     (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
2865}
2866
2867
2868// Move Int Doubleword to Packed Double Int
2869//
2870def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
2871                      "vmovd\t{$src, $dst|$dst, $src}",
2872                      [(set VR128X:$dst,
2873                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2874                        EVEX;
2875def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
2876                      "vmovd\t{$src, $dst|$dst, $src}",
2877                      [(set VR128X:$dst,
2878                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2879                        IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
2880def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
2881                      "vmovq\t{$src, $dst|$dst, $src}",
2882                        [(set VR128X:$dst,
2883                          (v2i64 (scalar_to_vector GR64:$src)))],
2884                          IIC_SSE_MOVDQ>, EVEX, VEX_W;
2885let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
2886def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
2887                      (ins i64mem:$src),
2888                      "vmovq\t{$src, $dst|$dst, $src}", []>,
2889                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
2890let isCodeGenOnly = 1 in {
2891def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
2892                       "vmovq\t{$src, $dst|$dst, $src}",
2893                       [(set FR64X:$dst, (bitconvert GR64:$src))],
2894                       IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2895def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
2896                         "vmovq\t{$src, $dst|$dst, $src}",
2897                         [(set GR64:$dst, (bitconvert FR64X:$src))],
2898                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2899def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
2900                         "vmovq\t{$src, $dst|$dst, $src}",
2901                         [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
2902                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2903                         EVEX_CD8<64, CD8VT1>;
2904}
2905
2906// Move Int Doubleword to Single Scalar
2907//
2908let isCodeGenOnly = 1 in {
2909def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
2910                      "vmovd\t{$src, $dst|$dst, $src}",
2911                      [(set FR32X:$dst, (bitconvert GR32:$src))],
2912                      IIC_SSE_MOVDQ>, EVEX;
2913
2914def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
2915                      "vmovd\t{$src, $dst|$dst, $src}",
2916                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2917                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
2918}
2919
2920// Move doubleword from xmm register to r/m32
2921//
2922def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
2923                       "vmovd\t{$src, $dst|$dst, $src}",
2924                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
2925                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2926                       EVEX;
2927def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
2928                       (ins i32mem:$dst, VR128X:$src),
2929                       "vmovd\t{$src, $dst|$dst, $src}",
2930                       [(store (i32 (extractelt (v4i32 VR128X:$src),
2931                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2932                       EVEX, EVEX_CD8<32, CD8VT1>;
2933
2934// Move quadword from xmm1 register to r/m64
2935//
2936def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
2937                      "vmovq\t{$src, $dst|$dst, $src}",
2938                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2939                                                   (iPTR 0)))],
2940                      IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
2941                      Requires<[HasAVX512, In64BitMode]>;
2942
2943let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
2944def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
2945                      "vmovq\t{$src, $dst|$dst, $src}",
2946                      [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
2947                      Requires<[HasAVX512, In64BitMode]>;
2948
2949def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
2950                      (ins i64mem:$dst, VR128X:$src),
2951                      "vmovq\t{$src, $dst|$dst, $src}",
2952                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2953                              addr:$dst)], IIC_SSE_MOVDQ>,
2954                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
2955                      Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2956
2957let hasSideEffects = 0 in
2958def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
2959                             (ins VR128X:$src),
2960                             "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
2961                             EVEX, VEX_W;
2962
2963// Move Scalar Single to Double Int
2964//
2965let isCodeGenOnly = 1 in {
2966def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
2967                      (ins FR32X:$src),
2968                      "vmovd\t{$src, $dst|$dst, $src}",
2969                      [(set GR32:$dst, (bitconvert FR32X:$src))],
2970                      IIC_SSE_MOVD_ToGP>, EVEX;
2971def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
2972                      (ins i32mem:$dst, FR32X:$src),
2973                      "vmovd\t{$src, $dst|$dst, $src}",
2974                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2975                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
2976}
2977
2978// Move Quadword Int to Packed Quadword Int
2979//
2980def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2981                      (ins i64mem:$src),
2982                      "vmovq\t{$src, $dst|$dst, $src}",
2983                      [(set VR128X:$dst,
2984                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2985                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
2986
2987//===----------------------------------------------------------------------===//
2988// AVX-512  MOVSS, MOVSD
2989//===----------------------------------------------------------------------===//
2990
2991multiclass avx512_move_scalar <string asm, SDNode OpNode,
2992                              X86VectorVTInfo _> {
2993  defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
2994                    (ins _.RC:$src1, _.RC:$src2),
2995                    asm, "$src2, $src1","$src1, $src2",
2996                    (_.VT (OpNode (_.VT _.RC:$src1),
2997                                   (_.VT _.RC:$src2))),
2998                                   IIC_SSE_MOV_S_RR>, EVEX_4V;
2999  let Constraints = "$src1 = $dst" in
3000    defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
3001                    (outs _.RC:$dst),
3002                    (ins _.ScalarMemOp:$src),
3003                    asm,"$src","$src",
3004                    (_.VT (OpNode (_.VT _.RC:$src1),
3005                               (_.VT (scalar_to_vector
3006                                     (_.ScalarLdFrag addr:$src)))))>, EVEX;
3007  let isCodeGenOnly = 1 in {
3008    def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3009               (ins _.RC:$src1, _.FRC:$src2),
3010               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3011               [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
3012                                      (scalar_to_vector _.FRC:$src2))))],
3013               _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
3014    def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3015               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3016               [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3017               _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
3018  }
3019  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3020             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3021             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain, IIC_SSE_MOV_S_MR>,
3022             EVEX;
3023  let mayStore = 1 in
3024  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3025              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3026              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3027              [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
3028}
3029
3030defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3031                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3032
3033defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3034                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3035
3036def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
3037          (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3038           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
3039
3040def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3041          (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3042           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
3043
3044def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3045          (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
3046           (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3047
3048let hasSideEffects = 0 in
3049defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
3050                           (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
3051                           "vmovss.s", "$src2, $src1", "$src1, $src2", []>,
3052                           XS, EVEX_4V, VEX_LIG;
3053
3054let hasSideEffects = 0 in
3055defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
3056                           (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
3057                           "vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
3058                           XD, EVEX_4V, VEX_LIG, VEX_W;
3059
3060let Predicates = [HasAVX512] in {
3061  let AddedComplexity = 15 in {
3062  // Move scalar to XMM zero-extended, zeroing a VR128X then do a
3063  // MOVS{S,D} to the lower bits.
3064  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
3065            (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
3066  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
3067            (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3068  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
3069            (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3070  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
3071            (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
3072
3073  // Move low f32 and clear high bits.
3074  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3075            (SUBREG_TO_REG (i32 0),
3076             (VMOVSSZrr (v4f32 (V_SET0)),
3077              (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3078  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3079            (SUBREG_TO_REG (i32 0),
3080             (VMOVSSZrr (v4i32 (V_SET0)),
3081                       (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
3082  }
3083
3084  let AddedComplexity = 20 in {
3085  // MOVSSrm zeros the high parts of the register; represent this
3086  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3087  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3088            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3089  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3090            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3091  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3092            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3093
3094  // MOVSDrm zeros the high parts of the register; represent this
3095  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3096  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
3097            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3098  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
3099            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3100  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
3101            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3102  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
3103            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3104  def : Pat<(v2f64 (X86vzload addr:$src)),
3105            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3106
3107  // Represent the same patterns above but in the form they appear for
3108  // 256-bit types
3109  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
3110                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3111            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3112  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
3113                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3114            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3115  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
3116                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3117            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3118  def : Pat<(v4f64 (X86vzload addr:$src)),
3119            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3120
3121  // Represent the same patterns above but in the form they appear for
3122  // 512-bit types
3123  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
3124                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3125            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3126  def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
3127                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3128            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3129  def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
3130                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3131            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3132  def : Pat<(v8f64 (X86vzload addr:$src)),
3133            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3134  }
3135  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
3136                   (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
3137            (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
3138                                            FR32X:$src)), sub_xmm)>;
3139  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
3140                   (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
3141            (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
3142                                     FR64X:$src)), sub_xmm)>;
3143  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
3144                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
3145            (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
3146
3147  // Move low f64 and clear high bits.
3148  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
3149            (SUBREG_TO_REG (i32 0),
3150             (VMOVSDZrr (v2f64 (V_SET0)),
3151                       (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
3152
3153  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
3154            (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
3155                       (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
3156
3157  // Extract and store.
3158  def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
3159                   addr:$dst),
3160            (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
3161
3162  // Shuffle with VMOVSS
3163  def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
3164            (VMOVSSZrr (v4i32 VR128X:$src1),
3165                      (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
3166  def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
3167            (VMOVSSZrr (v4f32 VR128X:$src1),
3168                      (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
3169
3170  // 256-bit variants
3171  def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
3172            (SUBREG_TO_REG (i32 0),
3173              (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
3174                        (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
3175              sub_xmm)>;
3176  def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
3177            (SUBREG_TO_REG (i32 0),
3178              (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
3179                        (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
3180              sub_xmm)>;
3181
3182  // Shuffle with VMOVSD
3183  def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
3184            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3185  def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
3186            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3187  def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
3188            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3189  def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
3190            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3191
3192  // 256-bit variants
3193  def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
3194            (SUBREG_TO_REG (i32 0),
3195              (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
3196                        (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
3197              sub_xmm)>;
3198  def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
3199            (SUBREG_TO_REG (i32 0),
3200              (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
3201                        (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
3202              sub_xmm)>;
3203
3204  def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
3205            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3206  def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
3207            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3208  def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
3209            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3210  def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
3211            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
3212}
3213
3214let AddedComplexity = 15 in
3215def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
3216                                (ins VR128X:$src),
3217                                "vmovq\t{$src, $dst|$dst, $src}",
3218                                [(set VR128X:$dst, (v2i64 (X86vzmovl
3219                                                   (v2i64 VR128X:$src))))],
3220                                IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
3221
3222let AddedComplexity = 20 , isCodeGenOnly = 1 in
3223def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3224                                 (ins i128mem:$src),
3225                                 "vmovq\t{$src, $dst|$dst, $src}",
3226                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
3227                                                     (loadv2i64 addr:$src))))],
3228                                 IIC_SSE_MOVDQ>, EVEX, VEX_W,
3229                                 EVEX_CD8<8, CD8VT8>;
3230
3231let Predicates = [HasAVX512] in {
3232  let AddedComplexity = 15 in {
3233    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
3234              (VMOVDI2PDIZrr GR32:$src)>;
3235
3236    def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
3237              (VMOV64toPQIZrr GR64:$src)>;
3238
3239    def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
3240                                 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
3241              (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
3242  }
3243  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
3244  let AddedComplexity = 20 in {
3245    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
3246              (VMOVDI2PDIZrm addr:$src)>;
3247
3248    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
3249              (VMOVDI2PDIZrm addr:$src)>;
3250    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
3251              (VMOVDI2PDIZrm addr:$src)>;
3252    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
3253            (VMOVZPQILo2PQIZrm addr:$src)>;
3254    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
3255            (VMOVZPQILo2PQIZrr VR128X:$src)>;
3256    def : Pat<(v2i64 (X86vzload addr:$src)),
3257            (VMOVZPQILo2PQIZrm addr:$src)>;
3258    def : Pat<(v4i64 (X86vzload addr:$src)),
3259              (SUBREG_TO_REG (i64 0), (VMOVZPQILo2PQIZrm addr:$src), sub_xmm)>;
3260  }
3261
3262  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
3263  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
3264                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
3265            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
3266
3267  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
3268  def : Pat<(v8i64 (X86vzload addr:$src)),
3269            (SUBREG_TO_REG (i64 0), (VMOVZPQILo2PQIZrm addr:$src), sub_xmm)>;
3270}
3271
3272def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
3273        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
3274
3275def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
3276        (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
3277
3278def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
3279        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
3280
3281def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
3282        (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
3283
3284//===----------------------------------------------------------------------===//
3285// AVX-512 - Non-temporals
3286//===----------------------------------------------------------------------===//
3287let SchedRW = [WriteLoad] in {
3288  def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
3289                        (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
3290                        [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
3291                        SSEPackedInt>, EVEX, T8PD, EVEX_V512,
3292                        EVEX_CD8<64, CD8VF>;
3293
3294  let Predicates = [HasVLX] in {
3295    def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
3296                         (ins i256mem:$src),
3297                         "vmovntdqa\t{$src, $dst|$dst, $src}",
3298                         [(set VR256X:$dst, (int_x86_avx2_movntdqa addr:$src))],
3299                         SSEPackedInt>, EVEX, T8PD, EVEX_V256,
3300                         EVEX_CD8<64, CD8VF>;
3301
3302    def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
3303                        (ins i128mem:$src),
3304                        "vmovntdqa\t{$src, $dst|$dst, $src}",
3305                        [(set VR128X:$dst, (int_x86_sse41_movntdqa addr:$src))],
3306                        SSEPackedInt>, EVEX, T8PD, EVEX_V128,
3307                        EVEX_CD8<64, CD8VF>;
3308  }
3309}
3310
3311multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
3312                        PatFrag st_frag = alignednontemporalstore,
3313                        InstrItinClass itin = IIC_SSE_MOVNT> {
3314  let SchedRW = [WriteStore], AddedComplexity = 400 in
3315  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3316                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3317                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
3318                    _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
3319}
3320
3321multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
3322                                                  AVX512VLVectorVTInfo VTInfo> {
3323  let Predicates = [HasAVX512] in
3324    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
3325
3326  let Predicates = [HasAVX512, HasVLX] in {
3327    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
3328    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
3329  }
3330}
3331
3332defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
3333defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
3334defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
3335
3336let Predicates = [HasAVX512], AddedComplexity = 400 in {
3337  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
3338            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
3339  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
3340            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
3341  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
3342            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
3343
3344  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
3345            (VMOVNTDQAZrm addr:$src)>;
3346  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
3347            (VMOVNTDQAZrm addr:$src)>;
3348  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
3349            (VMOVNTDQAZrm addr:$src)>;
3350  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
3351            (VMOVNTDQAZrm addr:$src)>;
3352  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
3353            (VMOVNTDQAZrm addr:$src)>;
3354  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
3355            (VMOVNTDQAZrm addr:$src)>;
3356}
3357
3358let Predicates = [HasVLX], AddedComplexity = 400 in {
3359  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
3360            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
3361  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
3362            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
3363  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
3364            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
3365
3366  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
3367            (VMOVNTDQAZ256rm addr:$src)>;
3368  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
3369            (VMOVNTDQAZ256rm addr:$src)>;
3370  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
3371            (VMOVNTDQAZ256rm addr:$src)>;
3372  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
3373            (VMOVNTDQAZ256rm addr:$src)>;
3374  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
3375            (VMOVNTDQAZ256rm addr:$src)>;
3376  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
3377            (VMOVNTDQAZ256rm addr:$src)>;
3378
3379  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
3380            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
3381  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
3382            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
3383  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
3384            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
3385
3386  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
3387            (VMOVNTDQAZ128rm addr:$src)>;
3388  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
3389            (VMOVNTDQAZ128rm addr:$src)>;
3390  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
3391            (VMOVNTDQAZ128rm addr:$src)>;
3392  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
3393            (VMOVNTDQAZ128rm addr:$src)>;
3394  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
3395            (VMOVNTDQAZ128rm addr:$src)>;
3396  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
3397            (VMOVNTDQAZ128rm addr:$src)>;
3398}
3399
3400//===----------------------------------------------------------------------===//
3401// AVX-512 - Integer arithmetic
3402//
3403multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3404                           X86VectorVTInfo _, OpndItins itins,
3405                           bit IsCommutable = 0> {
3406  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3407                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3408                    "$src2, $src1", "$src1, $src2",
3409                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3410                    itins.rr, IsCommutable>,
3411            AVX512BIBase, EVEX_4V;
3412
3413  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3414                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
3415                  "$src2, $src1", "$src1, $src2",
3416                  (_.VT (OpNode _.RC:$src1,
3417                                (bitconvert (_.LdFrag addr:$src2)))),
3418                  itins.rm>,
3419            AVX512BIBase, EVEX_4V;
3420}
3421
3422multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
3423                            X86VectorVTInfo _, OpndItins itins,
3424                            bit IsCommutable = 0> :
3425           avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
3426  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3427                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
3428                  "${src2}"##_.BroadcastStr##", $src1",
3429                  "$src1, ${src2}"##_.BroadcastStr,
3430                  (_.VT (OpNode _.RC:$src1,
3431                                (X86VBroadcast
3432                                    (_.ScalarLdFrag addr:$src2)))),
3433                  itins.rm>,
3434             AVX512BIBase, EVEX_4V, EVEX_B;
3435}
3436
3437multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
3438                              AVX512VLVectorVTInfo VTInfo, OpndItins itins,
3439                              Predicate prd, bit IsCommutable = 0> {
3440  let Predicates = [prd] in
3441    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
3442                             IsCommutable>, EVEX_V512;
3443
3444  let Predicates = [prd, HasVLX] in {
3445    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
3446                             IsCommutable>, EVEX_V256;
3447    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
3448                             IsCommutable>, EVEX_V128;
3449  }
3450}
3451
3452multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
3453                               AVX512VLVectorVTInfo VTInfo, OpndItins itins,
3454                               Predicate prd, bit IsCommutable = 0> {
3455  let Predicates = [prd] in
3456    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
3457                             IsCommutable>, EVEX_V512;
3458
3459  let Predicates = [prd, HasVLX] in {
3460    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
3461                             IsCommutable>, EVEX_V256;
3462    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
3463                             IsCommutable>, EVEX_V128;
3464  }
3465}
3466
3467multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
3468                                OpndItins itins, Predicate prd,
3469                                bit IsCommutable = 0> {
3470  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
3471                               itins, prd, IsCommutable>,
3472                               VEX_W, EVEX_CD8<64, CD8VF>;
3473}
3474
3475multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
3476                                OpndItins itins, Predicate prd,
3477                                bit IsCommutable = 0> {
3478  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
3479                               itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
3480}
3481
3482multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3483                                OpndItins itins, Predicate prd,
3484                                bit IsCommutable = 0> {
3485  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
3486                              itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
3487}
3488
3489multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
3490                                OpndItins itins, Predicate prd,
3491                                bit IsCommutable = 0> {
3492  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
3493                              itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
3494}
3495
3496multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
3497                                 SDNode OpNode, OpndItins itins, Predicate prd,
3498                                 bit IsCommutable = 0> {
3499  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
3500                                   IsCommutable>;
3501
3502  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
3503                                   IsCommutable>;
3504}
3505
3506multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
3507                                 SDNode OpNode, OpndItins itins, Predicate prd,
3508                                 bit IsCommutable = 0> {
3509  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
3510                                   IsCommutable>;
3511
3512  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
3513                                   IsCommutable>;
3514}
3515
3516multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
3517                                  bits<8> opc_d, bits<8> opc_q,
3518                                  string OpcodeStr, SDNode OpNode,
3519                                  OpndItins itins, bit IsCommutable = 0> {
3520  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
3521                                    itins, HasAVX512, IsCommutable>,
3522              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
3523                                    itins, HasBWI, IsCommutable>;
3524}
3525
3526multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
3527                            SDNode OpNode,X86VectorVTInfo _Src,
3528                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
3529                            bit IsCommutable = 0> {
3530  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
3531                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
3532                            "$src2, $src1","$src1, $src2",
3533                            (_Dst.VT (OpNode
3534                                         (_Src.VT _Src.RC:$src1),
3535                                         (_Src.VT _Src.RC:$src2))),
3536                            itins.rr, IsCommutable>,
3537                            AVX512BIBase, EVEX_4V;
3538  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3539                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
3540                        "$src2, $src1", "$src1, $src2",
3541                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
3542                                      (bitconvert (_Src.LdFrag addr:$src2)))),
3543                        itins.rm>,
3544                        AVX512BIBase, EVEX_4V;
3545
3546  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3547                    (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
3548                    OpcodeStr,
3549                    "${src2}"##_Brdct.BroadcastStr##", $src1",
3550                     "$src1, ${src2}"##_Dst.BroadcastStr,
3551                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
3552                                 (_Brdct.VT (X86VBroadcast
3553                                          (_Brdct.ScalarLdFrag addr:$src2)))))),
3554                    itins.rm>,
3555                    AVX512BIBase, EVEX_4V, EVEX_B;
3556}
3557
3558defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
3559                                    SSE_INTALU_ITINS_P, 1>;
3560defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
3561                                    SSE_INTALU_ITINS_P, 0>;
3562defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
3563                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
3564defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
3565                                    SSE_INTALU_ITINS_P, HasBWI, 0>;
3566defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
3567                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3568defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
3569                                     SSE_INTALU_ITINS_P, HasBWI, 0>;
3570defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
3571                                    SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3572defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
3573                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
3574defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
3575                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
3576defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
3577                                    HasBWI, 1>;
3578defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
3579                                     HasBWI, 1>;
3580defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
3581                                      HasBWI, 1>, T8PD;
3582defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
3583                                   SSE_INTALU_ITINS_P, HasBWI, 1>;
3584
3585multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
3586                            AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
3587                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
3588  let Predicates = [prd] in
3589    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
3590                                 _SrcVTInfo.info512, _DstVTInfo.info512,
3591                                 v8i64_info, IsCommutable>,
3592                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
3593  let Predicates = [HasVLX, prd] in {
3594    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
3595                                      _SrcVTInfo.info256, _DstVTInfo.info256,
3596                                      v4i64x_info, IsCommutable>,
3597                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
3598    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
3599                                      _SrcVTInfo.info128, _DstVTInfo.info128,
3600                                      v2i64x_info, IsCommutable>,
3601                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
3602  }
3603}
3604
3605defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
3606                                avx512vl_i32_info, avx512vl_i64_info,
3607                                X86pmuldq, HasAVX512, 1>,T8PD;
3608defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
3609                                avx512vl_i32_info, avx512vl_i64_info,
3610                                X86pmuludq, HasAVX512, 1>;
3611defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
3612                                avx512vl_i8_info, avx512vl_i8_info,
3613                                X86multishift, HasVBMI, 0>, T8PD;
3614
3615multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
3616                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
3617  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3618                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
3619                    OpcodeStr,
3620                    "${src2}"##_Src.BroadcastStr##", $src1",
3621                     "$src1, ${src2}"##_Src.BroadcastStr,
3622                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
3623                                 (_Src.VT (X86VBroadcast
3624                                          (_Src.ScalarLdFrag addr:$src2))))))>,
3625                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
3626}
3627
3628multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
3629                            SDNode OpNode,X86VectorVTInfo _Src,
3630                            X86VectorVTInfo _Dst> {
3631  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
3632                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
3633                            "$src2, $src1","$src1, $src2",
3634                            (_Dst.VT (OpNode
3635                                         (_Src.VT _Src.RC:$src1),
3636                                         (_Src.VT _Src.RC:$src2)))>,
3637                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
3638  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
3639                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
3640                        "$src2, $src1", "$src1, $src2",
3641                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
3642                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
3643                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
3644}
3645
3646multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
3647                                    SDNode OpNode> {
3648  let Predicates = [HasBWI] in
3649  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
3650                                 v32i16_info>,
3651                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
3652                                 v32i16_info>, EVEX_V512;
3653  let Predicates = [HasBWI, HasVLX] in {
3654    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
3655                                     v16i16x_info>,
3656                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
3657                                     v16i16x_info>, EVEX_V256;
3658    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
3659                                     v8i16x_info>,
3660                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
3661                                     v8i16x_info>, EVEX_V128;
3662  }
3663}
3664multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
3665                            SDNode OpNode> {
3666  let Predicates = [HasBWI] in
3667  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
3668                                v64i8_info>, EVEX_V512;
3669  let Predicates = [HasBWI, HasVLX] in {
3670    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
3671                                    v32i8x_info>, EVEX_V256;
3672    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
3673                                    v16i8x_info>, EVEX_V128;
3674  }
3675}
3676
3677multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
3678                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
3679                            AVX512VLVectorVTInfo _Dst> {
3680  let Predicates = [HasBWI] in
3681  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
3682                                _Dst.info512>, EVEX_V512;
3683  let Predicates = [HasBWI, HasVLX] in {
3684    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
3685                                     _Dst.info256>, EVEX_V256;
3686    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
3687                                     _Dst.info128>, EVEX_V128;
3688  }
3689}
3690
3691defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
3692defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
3693defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
3694defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
3695
3696defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
3697                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
3698defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
3699                     avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
3700
3701defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
3702                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3703defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
3704                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3705defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
3706                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3707
3708defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
3709                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3710defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
3711                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3712defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
3713                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3714
3715defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
3716                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3717defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
3718                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3719defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
3720                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3721
3722defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
3723                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
3724defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
3725                                     SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
3726defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
3727                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
3728//===----------------------------------------------------------------------===//
3729// AVX-512  Logical Instructions
3730//===----------------------------------------------------------------------===//
3731
3732defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
3733                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
3734defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
3735                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
3736defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
3737                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
3738defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
3739                                  SSE_INTALU_ITINS_P, HasAVX512, 0>;
3740
3741//===----------------------------------------------------------------------===//
3742// AVX-512  FP arithmetic
3743//===----------------------------------------------------------------------===//
3744multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
3745                         SDNode OpNode, SDNode VecNode, OpndItins itins,
3746                         bit IsCommutable> {
3747
3748  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3749                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3750                           "$src2, $src1", "$src1, $src2",
3751                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
3752                           (i32 FROUND_CURRENT)),
3753                           itins.rr, IsCommutable>;
3754
3755  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
3756                         (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
3757                         "$src2, $src1", "$src1, $src2",
3758                         (VecNode (_.VT _.RC:$src1),
3759                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
3760                           (i32 FROUND_CURRENT)),
3761                         itins.rm, IsCommutable>;
3762  let isCodeGenOnly = 1, isCommutable = IsCommutable,
3763      Predicates = [HasAVX512] in {
3764  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
3765                         (ins _.FRC:$src1, _.FRC:$src2),
3766                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3767                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
3768                          itins.rr>;
3769  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
3770                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
3771                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3772                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
3773                         (_.ScalarLdFrag addr:$src2)))], itins.rm>;
3774  }
3775}
3776
3777multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
3778                         SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
3779
3780  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3781                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
3782                          "$rc, $src2, $src1", "$src1, $src2, $rc",
3783                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
3784                          (i32 imm:$rc)), itins.rr, IsCommutable>,
3785                          EVEX_B, EVEX_RC;
3786}
3787multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
3788                         SDNode VecNode, OpndItins itins, bit IsCommutable> {
3789
3790  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3791                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
3792                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
3793                            (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
3794                            (i32 FROUND_NO_EXC))>, EVEX_B;
3795}
3796
3797multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
3798                                  SDNode VecNode,
3799                                  SizeItins itins, bit IsCommutable> {
3800  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
3801                              itins.s, IsCommutable>,
3802             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
3803                              itins.s, IsCommutable>,
3804                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
3805  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
3806                              itins.d,                  IsCommutable>,
3807             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
3808                              itins.d, IsCommutable>,
3809                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3810}
3811
3812multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
3813                                  SDNode VecNode,
3814                                  SizeItins itins, bit IsCommutable> {
3815  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
3816                              itins.s, IsCommutable>,
3817             avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
3818                              itins.s, IsCommutable>,
3819                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
3820  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
3821                              itins.d,                  IsCommutable>,
3822             avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
3823                              itins.d, IsCommutable>,
3824                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3825}
3826defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
3827defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
3828defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
3829defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
3830defm VMIN : avx512_binop_s_sae  <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 0>;
3831defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 0>;
3832
3833// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
3834// X86fminc and X86fmaxc instead of X86fmin and X86fmax
3835multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
3836                          X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
3837  let isCodeGenOnly = 1, isCommutable =1, Predicates = [HasAVX512] in {
3838  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
3839                         (ins _.FRC:$src1, _.FRC:$src2),
3840                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3841                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
3842                          itins.rr>;
3843  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
3844                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
3845                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3846                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
3847                         (_.ScalarLdFrag addr:$src2)))], itins.rm>;
3848  }
3849}
3850defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
3851                                SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
3852                                EVEX_CD8<32, CD8VT1>;
3853
3854defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
3855                                SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
3856                                EVEX_CD8<64, CD8VT1>;
3857
3858defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
3859                                SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
3860                                EVEX_CD8<32, CD8VT1>;
3861
3862defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
3863                                SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
3864                                EVEX_CD8<64, CD8VT1>;
3865
3866multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
3867                            X86VectorVTInfo _, bit IsCommutable> {
3868  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3869                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3870                  "$src2, $src1", "$src1, $src2",
3871                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V;
3872  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3873                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
3874                  "$src2, $src1", "$src1, $src2",
3875                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V;
3876  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3877                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
3878                   "${src2}"##_.BroadcastStr##", $src1",
3879                   "$src1, ${src2}"##_.BroadcastStr,
3880                   (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
3881                                              (_.ScalarLdFrag addr:$src2))))>,
3882                   EVEX_4V, EVEX_B;
3883}
3884
3885multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
3886                            X86VectorVTInfo _> {
3887  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3888                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
3889                  "$rc, $src2, $src1", "$src1, $src2, $rc",
3890                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
3891                  EVEX_4V, EVEX_B, EVEX_RC;
3892}
3893
3894
3895multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
3896                            X86VectorVTInfo _> {
3897  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3898                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3899                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
3900                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
3901                  EVEX_4V, EVEX_B;
3902}
3903
3904multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3905                             Predicate prd, bit IsCommutable = 0> {
3906  let Predicates = [prd] in {
3907  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
3908                              IsCommutable>, EVEX_V512, PS,
3909                              EVEX_CD8<32, CD8VF>;
3910  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
3911                              IsCommutable>, EVEX_V512, PD, VEX_W,
3912                              EVEX_CD8<64, CD8VF>;
3913  }
3914
3915    // Define only if AVX512VL feature is present.
3916  let Predicates = [prd, HasVLX] in {
3917    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
3918                                   IsCommutable>, EVEX_V128, PS,
3919                                   EVEX_CD8<32, CD8VF>;
3920    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
3921                                   IsCommutable>, EVEX_V256, PS,
3922                                   EVEX_CD8<32, CD8VF>;
3923    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
3924                                   IsCommutable>, EVEX_V128, PD, VEX_W,
3925                                   EVEX_CD8<64, CD8VF>;
3926    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
3927                                   IsCommutable>, EVEX_V256, PD, VEX_W,
3928                                   EVEX_CD8<64, CD8VF>;
3929  }
3930}
3931
3932multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
3933  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
3934                              EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3935  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
3936                              EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
3937}
3938
3939multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
3940  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
3941                              EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
3942  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
3943                              EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
3944}
3945
3946defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, 1>,
3947            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
3948defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, 1>,
3949            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
3950defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512>,
3951            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
3952defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512>,
3953            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
3954defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 0>,
3955            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
3956defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 0>,
3957            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
3958let isCodeGenOnly = 1 in {
3959  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 1>;
3960  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 1>;
3961}
3962defm VAND  : avx512_fp_binop_p<0x54, "vand", X86fand, HasDQI, 1>;
3963defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, HasDQI, 0>;
3964defm VOR   : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI, 1>;
3965defm VXOR  : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI, 1>;
3966
3967multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3968                            X86VectorVTInfo _> {
3969  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
3970                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3971                  "$src2, $src1", "$src1, $src2",
3972                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
3973  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3974                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
3975                  "$src2, $src1", "$src1, $src2",
3976                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
3977  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
3978                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
3979                   "${src2}"##_.BroadcastStr##", $src1",
3980                   "$src1, ${src2}"##_.BroadcastStr,
3981                   (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
3982                                              (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
3983                   EVEX_4V, EVEX_B;
3984}
3985
3986multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
3987                            X86VectorVTInfo _> {
3988  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
3989                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
3990                  "$src2, $src1", "$src1, $src2",
3991                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
3992  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
3993                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
3994                  "$src2, $src1", "$src1, $src2",
3995                  (OpNode _.RC:$src1,
3996                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
3997                          (i32 FROUND_CURRENT))>;
3998}
3999
4000multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
4001  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
4002             avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
4003                              EVEX_V512, EVEX_CD8<32, CD8VF>;
4004  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
4005             avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
4006                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4007  defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f32x_info>,
4008                avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
4009                              EVEX_4V,EVEX_CD8<32, CD8VT1>;
4010  defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f64x_info>,
4011                avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
4012                              EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
4013
4014  // Define only if AVX512VL feature is present.
4015  let Predicates = [HasVLX] in {
4016    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
4017                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
4018    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
4019                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
4020    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
4021                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
4022    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
4023                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
4024  }
4025}
4026defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
4027
4028//===----------------------------------------------------------------------===//
4029// AVX-512  VPTESTM instructions
4030//===----------------------------------------------------------------------===//
4031
4032multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
4033                            X86VectorVTInfo _> {
4034  let isCommutable = 1 in
4035  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
4036                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4037                      "$src2, $src1", "$src1, $src2",
4038                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
4039                    EVEX_4V;
4040  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
4041                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4042                       "$src2, $src1", "$src1, $src2",
4043                   (OpNode (_.VT _.RC:$src1),
4044                    (_.VT (bitconvert (_.LdFrag addr:$src2))))>,
4045                    EVEX_4V,
4046                   EVEX_CD8<_.EltSize, CD8VF>;
4047}
4048
4049multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4050                            X86VectorVTInfo _> {
4051  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
4052                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4053                    "${src2}"##_.BroadcastStr##", $src1",
4054                    "$src1, ${src2}"##_.BroadcastStr,
4055                    (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
4056                                                (_.ScalarLdFrag addr:$src2))))>,
4057                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
4058}
4059
4060// Use 512bit version to implement 128/256 bit in case NoVLX.
4061multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
4062                                  X86VectorVTInfo _, string Suffix> {
4063    def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
4064              (_.KVT (COPY_TO_REGCLASS
4065                       (!cast<Instruction>(NAME # Suffix # "Zrr")
4066                         (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
4067                                        _.RC:$src1, _.SubRegIdx),
4068                         (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
4069                                        _.RC:$src2, _.SubRegIdx)),
4070                     _.KRC))>;
4071}
4072
4073multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4074                                  AVX512VLVectorVTInfo _, string Suffix> {
4075  let Predicates  = [HasAVX512] in
4076  defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
4077           avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
4078
4079  let Predicates = [HasAVX512, HasVLX] in {
4080  defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
4081              avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
4082  defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
4083              avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
4084  }
4085  let Predicates = [HasAVX512, NoVLX] in {
4086  defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
4087  defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
4088  }
4089}
4090
4091multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
4092  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
4093                                 avx512vl_i32_info, "D">;
4094  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
4095                                 avx512vl_i64_info, "Q">, VEX_W;
4096}
4097
4098multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
4099                                 SDNode OpNode> {
4100  let Predicates = [HasBWI] in {
4101  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
4102              EVEX_V512, VEX_W;
4103  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
4104              EVEX_V512;
4105  }
4106  let Predicates = [HasVLX, HasBWI] in {
4107
4108  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
4109              EVEX_V256, VEX_W;
4110  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
4111              EVEX_V128, VEX_W;
4112  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
4113              EVEX_V256;
4114  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
4115              EVEX_V128;
4116  }
4117
4118  let Predicates = [HasAVX512, NoVLX] in {
4119  defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
4120  defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
4121  defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
4122  defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
4123  }
4124
4125}
4126
4127multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
4128                                   SDNode OpNode> :
4129  avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
4130  avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;
4131
4132defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
4133defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;
4134
4135
4136//===----------------------------------------------------------------------===//
4137// AVX-512  Shift instructions
4138//===----------------------------------------------------------------------===//
4139multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
4140                         string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
4141  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
4142                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
4143                      "$src2, $src1", "$src1, $src2",
4144                   (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
4145                   SSE_INTSHIFT_ITINS_P.rr>;
4146  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
4147                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
4148                       "$src2, $src1", "$src1, $src2",
4149                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
4150                          (i8 imm:$src2))),
4151                   SSE_INTSHIFT_ITINS_P.rm>;
4152}
4153
4154multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
4155                         string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
4156  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
4157                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
4158      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
4159     (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
4160     SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
4161}
4162
4163multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4164                         ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
4165   // src2 is always 128-bit
4166  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4167                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
4168                      "$src2, $src1", "$src1, $src2",
4169                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
4170                   SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
4171  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4172                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
4173                       "$src2, $src1", "$src1, $src2",
4174                   (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
4175                   SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
4176                   EVEX_4V;
4177}
4178
4179multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4180                                  ValueType SrcVT, PatFrag bc_frag,
4181                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
4182  let Predicates = [prd] in
4183  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
4184                            VTInfo.info512>, EVEX_V512,
4185                            EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
4186  let Predicates = [prd, HasVLX] in {
4187  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
4188                            VTInfo.info256>, EVEX_V256,
4189                            EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
4190  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
4191                            VTInfo.info128>, EVEX_V128,
4192                            EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
4193  }
4194}
4195
4196multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
4197                              string OpcodeStr, SDNode OpNode> {
4198  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
4199                                 avx512vl_i32_info, HasAVX512>;
4200  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
4201                                 avx512vl_i64_info, HasAVX512>, VEX_W;
4202  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
4203                                 avx512vl_i16_info, HasBWI>;
4204}
4205
4206multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
4207                                 string OpcodeStr, SDNode OpNode,
4208                                 AVX512VLVectorVTInfo VTInfo> {
4209  let Predicates = [HasAVX512] in
4210  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4211                              VTInfo.info512>,
4212             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4213                              VTInfo.info512>, EVEX_V512;
4214  let Predicates = [HasAVX512, HasVLX] in {
4215  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4216                              VTInfo.info256>,
4217             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4218                              VTInfo.info256>, EVEX_V256;
4219  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4220                              VTInfo.info128>,
4221             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4222                              VTInfo.info128>, EVEX_V128;
4223  }
4224}
4225
4226multiclass avx512_shift_rmi_w<bits<8> opcw,
4227                                 Format ImmFormR, Format ImmFormM,
4228                                 string OpcodeStr, SDNode OpNode> {
4229  let Predicates = [HasBWI] in
4230  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4231                               v32i16_info>, EVEX_V512;
4232  let Predicates = [HasVLX, HasBWI] in {
4233  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4234                               v16i16x_info>, EVEX_V256;
4235  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4236                               v8i16x_info>, EVEX_V128;
4237  }
4238}
4239
4240multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
4241                                 Format ImmFormR, Format ImmFormM,
4242                                 string OpcodeStr, SDNode OpNode> {
4243  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
4244                                 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
4245  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
4246                                 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
4247}
4248
4249defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
4250             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
4251
4252defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
4253             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
4254
4255defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
4256             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
4257
4258defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
4259defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
4260
4261defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
4262defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
4263defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
4264
4265//===-------------------------------------------------------------------===//
4266// Variable Bit Shifts
4267//===-------------------------------------------------------------------===//
4268multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
4269                            X86VectorVTInfo _> {
4270  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4271                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4272                      "$src2, $src1", "$src1, $src2",
4273                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
4274                   SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
4275  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4276                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4277                       "$src2, $src1", "$src1, $src2",
4278                   (_.VT (OpNode _.RC:$src1,
4279                   (_.VT (bitconvert (_.LdFrag addr:$src2))))),
4280                   SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
4281                   EVEX_CD8<_.EltSize, CD8VF>;
4282}
4283
4284multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4285                            X86VectorVTInfo _> {
4286  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4287                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4288                    "${src2}"##_.BroadcastStr##", $src1",
4289                    "$src1, ${src2}"##_.BroadcastStr,
4290                    (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4291                                                (_.ScalarLdFrag addr:$src2))))),
4292                    SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
4293                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
4294}
4295multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4296                                  AVX512VLVectorVTInfo _> {
4297  let Predicates  = [HasAVX512] in
4298  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
4299           avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
4300
4301  let Predicates = [HasAVX512, HasVLX] in {
4302  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
4303              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
4304  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
4305              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
4306  }
4307}
4308
4309multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
4310                                 SDNode OpNode> {
4311  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
4312                                 avx512vl_i32_info>;
4313  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
4314                                 avx512vl_i64_info>, VEX_W;
4315}
4316
4317// Use 512bit version to implement 128/256 bit in case NoVLX.
4318multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
4319  let Predicates = [HasBWI, NoVLX] in {
4320  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
4321                                  (_.info256.VT _.info256.RC:$src2))),
4322            (EXTRACT_SUBREG
4323                (!cast<Instruction>(NAME#"WZrr")
4324                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4325                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4326             sub_ymm)>;
4327
4328  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
4329                                  (_.info128.VT _.info128.RC:$src2))),
4330            (EXTRACT_SUBREG
4331                (!cast<Instruction>(NAME#"WZrr")
4332                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4333                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4334             sub_xmm)>;
4335  }
4336}
4337
4338multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
4339                                 SDNode OpNode> {
4340  let Predicates = [HasBWI] in
4341  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
4342              EVEX_V512, VEX_W;
4343  let Predicates = [HasVLX, HasBWI] in {
4344
4345  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
4346              EVEX_V256, VEX_W;
4347  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
4348              EVEX_V128, VEX_W;
4349  }
4350}
4351
4352defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
4353              avx512_var_shift_w<0x12, "vpsllvw", shl>,
4354              avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
4355
4356defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
4357              avx512_var_shift_w<0x11, "vpsravw", sra>,
4358              avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
4359let isCodeGenOnly = 1 in
4360  defm VPSRAV_Int : avx512_var_shift_types<0x46, "vpsrav", X86vsrav>,
4361                    avx512_var_shift_w<0x11, "vpsravw", X86vsrav>;
4362
4363defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
4364              avx512_var_shift_w<0x10, "vpsrlvw", srl>,
4365              avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
4366defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
4367defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
4368
4369//===-------------------------------------------------------------------===//
4370// 1-src variable permutation VPERMW/D/Q
4371//===-------------------------------------------------------------------===//
4372multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
4373                                  AVX512VLVectorVTInfo _> {
4374  let Predicates  = [HasAVX512] in
4375  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
4376           avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
4377
4378  let Predicates = [HasAVX512, HasVLX] in
4379  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
4380              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
4381}
4382
4383multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
4384                                 string OpcodeStr, SDNode OpNode,
4385                                 AVX512VLVectorVTInfo VTInfo> {
4386  let Predicates = [HasAVX512] in
4387  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4388                              VTInfo.info512>,
4389             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4390                              VTInfo.info512>, EVEX_V512;
4391  let Predicates = [HasAVX512, HasVLX] in
4392  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
4393                              VTInfo.info256>,
4394             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
4395                              VTInfo.info256>, EVEX_V256;
4396}
4397
4398multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
4399                              Predicate prd, SDNode OpNode,
4400                              AVX512VLVectorVTInfo _> {
4401  let Predicates = [prd] in
4402  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
4403              EVEX_V512 ;
4404  let Predicates = [HasVLX, prd] in {
4405  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
4406              EVEX_V256 ;
4407  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
4408              EVEX_V128 ;
4409  }
4410}
4411
4412defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
4413                                  avx512vl_i16_info>, VEX_W;
4414defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
4415                                  avx512vl_i8_info>;
4416
4417defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
4418                                    avx512vl_i32_info>;
4419defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
4420                                    avx512vl_i64_info>, VEX_W;
4421defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
4422                                    avx512vl_f32_info>;
4423defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
4424                                    avx512vl_f64_info>, VEX_W;
4425
4426defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
4427                             X86VPermi, avx512vl_i64_info>,
4428                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
4429defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
4430                             X86VPermi, avx512vl_f64_info>,
4431                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
4432//===----------------------------------------------------------------------===//
4433// AVX-512 - VPERMIL
4434//===----------------------------------------------------------------------===//
4435
4436multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr,  SDNode OpNode,
4437                             X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
4438  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
4439                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
4440                  "$src2, $src1", "$src1, $src2",
4441                  (_.VT (OpNode _.RC:$src1,
4442                               (Ctrl.VT Ctrl.RC:$src2)))>,
4443                  T8PD, EVEX_4V;
4444  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
4445                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
4446                  "$src2, $src1", "$src1, $src2",
4447                  (_.VT (OpNode
4448                           _.RC:$src1,
4449                           (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
4450                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
4451  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
4452                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4453                   "${src2}"##_.BroadcastStr##", $src1",
4454                   "$src1, ${src2}"##_.BroadcastStr,
4455                   (_.VT (OpNode
4456                            _.RC:$src1,
4457                            (Ctrl.VT (X86VBroadcast
4458                                       (Ctrl.ScalarLdFrag addr:$src2)))))>,
4459                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
4460}
4461
4462multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
4463                             AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
4464  let Predicates = [HasAVX512] in {
4465    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
4466                                  Ctrl.info512>, EVEX_V512;
4467  }
4468  let Predicates = [HasAVX512, HasVLX] in {
4469    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
4470                                  Ctrl.info128>, EVEX_V128;
4471    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
4472                                  Ctrl.info256>, EVEX_V256;
4473  }
4474}
4475
4476multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
4477                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
4478
4479  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
4480  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
4481                                    X86VPermilpi, _>,
4482                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
4483}
4484
4485defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
4486                               avx512vl_i32_info>;
4487defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
4488                               avx512vl_i64_info>, VEX_W;
4489//===----------------------------------------------------------------------===//
4490// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
4491//===----------------------------------------------------------------------===//
4492
4493defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
4494                             X86PShufd, avx512vl_i32_info>,
4495                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
4496defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
4497                                  X86PShufhw>, EVEX, AVX512XSIi8Base;
4498defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
4499                                  X86PShuflw>, EVEX, AVX512XDIi8Base;
4500
4501multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
4502  let Predicates = [HasBWI] in
4503  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
4504
4505  let Predicates = [HasVLX, HasBWI] in {
4506  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
4507  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
4508  }
4509}
4510
4511defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
4512
4513//===----------------------------------------------------------------------===//
4514// Move Low to High and High to Low packed FP Instructions
4515//===----------------------------------------------------------------------===//
4516def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
4517          (ins VR128X:$src1, VR128X:$src2),
4518          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4519          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
4520           IIC_SSE_MOV_LH>, EVEX_4V;
4521def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
4522          (ins VR128X:$src1, VR128X:$src2),
4523          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4524          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
4525          IIC_SSE_MOV_LH>, EVEX_4V;
4526
4527let Predicates = [HasAVX512] in {
4528  // MOVLHPS patterns
4529  def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
4530            (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
4531  def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
4532            (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
4533
4534  // MOVHLPS patterns
4535  def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
4536            (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
4537}
4538
4539//===----------------------------------------------------------------------===//
4540// VMOVHPS/PD VMOVLPS Instructions
4541// All patterns was taken from SSS implementation.
4542//===----------------------------------------------------------------------===//
4543multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
4544                                  X86VectorVTInfo _> {
4545  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
4546                  (ins _.RC:$src1, f64mem:$src2),
4547                  !strconcat(OpcodeStr,
4548                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4549                  [(set _.RC:$dst,
4550                     (OpNode _.RC:$src1,
4551                       (_.VT (bitconvert
4552                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
4553                  IIC_SSE_MOV_LH>, EVEX_4V;
4554}
4555
4556defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
4557                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
4558defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
4559                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
4560defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
4561                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
4562defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
4563                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
4564
4565let Predicates = [HasAVX512] in {
4566  // VMOVHPS patterns
4567  def : Pat<(X86Movlhps VR128X:$src1,
4568               (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
4569          (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
4570  def : Pat<(X86Movlhps VR128X:$src1,
4571               (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
4572          (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
4573  // VMOVHPD patterns
4574  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
4575                    (scalar_to_vector (loadf64 addr:$src2)))),
4576           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
4577  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
4578                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
4579           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
4580  // VMOVLPS patterns
4581  def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
4582          (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
4583  def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
4584          (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
4585  // VMOVLPD patterns
4586  def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
4587          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
4588  def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
4589          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
4590  def : Pat<(v2f64 (X86Movsd VR128X:$src1,
4591                           (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
4592          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
4593}
4594
4595def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
4596                       (ins f64mem:$dst, VR128X:$src),
4597                       "vmovhps\t{$src, $dst|$dst, $src}",
4598                       [(store (f64 (extractelt
4599                                     (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
4600                                                (bc_v2f64 (v4f32 VR128X:$src))),
4601                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
4602                       EVEX, EVEX_CD8<32, CD8VT2>;
4603def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
4604                       (ins f64mem:$dst, VR128X:$src),
4605                       "vmovhpd\t{$src, $dst|$dst, $src}",
4606                       [(store (f64 (extractelt
4607                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
4608                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
4609                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
4610def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
4611                       (ins f64mem:$dst, VR128X:$src),
4612                       "vmovlps\t{$src, $dst|$dst, $src}",
4613                       [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
4614                                     (iPTR 0))), addr:$dst)],
4615                                     IIC_SSE_MOV_LH>,
4616                       EVEX, EVEX_CD8<32, CD8VT2>;
4617def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
4618                       (ins f64mem:$dst, VR128X:$src),
4619                       "vmovlpd\t{$src, $dst|$dst, $src}",
4620                       [(store (f64 (extractelt (v2f64 VR128X:$src),
4621                                     (iPTR 0))), addr:$dst)],
4622                                     IIC_SSE_MOV_LH>,
4623                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
4624
4625let Predicates = [HasAVX512] in {
4626  // VMOVHPD patterns
4627  def : Pat<(store (f64 (extractelt
4628                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
4629                           (iPTR 0))), addr:$dst),
4630           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
4631  // VMOVLPS patterns
4632  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
4633                   addr:$src1),
4634            (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
4635  def : Pat<(store (v4i32 (X86Movlps
4636                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
4637            (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
4638  // VMOVLPD patterns
4639  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
4640                   addr:$src1),
4641            (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
4642  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
4643                   addr:$src1),
4644            (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
4645}
4646//===----------------------------------------------------------------------===//
4647// FMA - Fused Multiply Operations
4648//
4649
4650let Constraints = "$src1 = $dst" in {
4651multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4652                                                            X86VectorVTInfo _> {
4653  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4654          (ins _.RC:$src2, _.RC:$src3),
4655          OpcodeStr, "$src3, $src2", "$src2, $src3",
4656          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
4657         AVX512FMA3Base;
4658
4659  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4660          (ins _.RC:$src2, _.MemOp:$src3),
4661          OpcodeStr, "$src3, $src2", "$src2, $src3",
4662          (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
4663          AVX512FMA3Base;
4664
4665  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4666            (ins _.RC:$src2, _.ScalarMemOp:$src3),
4667            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
4668            !strconcat("$src2, ${src3}", _.BroadcastStr ),
4669            (OpNode _.RC:$src1,
4670             _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
4671            AVX512FMA3Base, EVEX_B;
4672}
4673
4674multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4675                                                            X86VectorVTInfo _> {
4676  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4677          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
4678          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
4679          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
4680          AVX512FMA3Base, EVEX_B, EVEX_RC;
4681}
4682} // Constraints = "$src1 = $dst"
4683
4684multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4685                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
4686  let Predicates = [HasAVX512] in {
4687    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
4688                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
4689                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4690  }
4691  let Predicates = [HasVLX, HasAVX512] in {
4692    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
4693                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4694    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
4695                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4696  }
4697}
4698
4699multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
4700                                                            SDNode OpNodeRnd > {
4701    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
4702                                      avx512vl_f32_info>;
4703    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
4704                                      avx512vl_f64_info>, VEX_W;
4705}
4706
4707defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
4708defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
4709defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
4710defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
4711defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
4712defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
4713
4714
4715let Constraints = "$src1 = $dst" in {
4716multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4717                                                            X86VectorVTInfo _> {
4718  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4719          (ins _.RC:$src2, _.RC:$src3),
4720          OpcodeStr, "$src3, $src2", "$src2, $src3",
4721          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
4722         AVX512FMA3Base;
4723
4724  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4725          (ins _.RC:$src2, _.MemOp:$src3),
4726          OpcodeStr, "$src3, $src2", "$src2, $src3",
4727          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
4728         AVX512FMA3Base;
4729
4730  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4731         (ins _.RC:$src2, _.ScalarMemOp:$src3),
4732         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
4733         "$src2, ${src3}"##_.BroadcastStr,
4734         (_.VT (OpNode _.RC:$src2,
4735                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
4736                      _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
4737}
4738
4739multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4740                                                            X86VectorVTInfo _> {
4741  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4742          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
4743          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
4744          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
4745          AVX512FMA3Base, EVEX_B, EVEX_RC;
4746}
4747} // Constraints = "$src1 = $dst"
4748
4749multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4750                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
4751  let Predicates = [HasAVX512] in {
4752    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
4753                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
4754                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4755  }
4756  let Predicates = [HasVLX, HasAVX512] in {
4757    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
4758                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4759    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
4760                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4761  }
4762}
4763
4764multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
4765                                                            SDNode OpNodeRnd > {
4766    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
4767                                      avx512vl_f32_info>;
4768    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
4769                                      avx512vl_f64_info>, VEX_W;
4770}
4771
4772defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
4773defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
4774defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
4775defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
4776defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
4777defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
4778
4779let Constraints = "$src1 = $dst" in {
4780multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4781                                                            X86VectorVTInfo _> {
4782  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4783          (ins _.RC:$src3, _.RC:$src2),
4784          OpcodeStr, "$src2, $src3", "$src3, $src2",
4785          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
4786         AVX512FMA3Base;
4787
4788  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4789          (ins _.RC:$src3, _.MemOp:$src2),
4790          OpcodeStr, "$src2, $src3", "$src3, $src2",
4791          (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
4792         AVX512FMA3Base;
4793
4794  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4795         (ins _.RC:$src3, _.ScalarMemOp:$src2),
4796         OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
4797         "$src3, ${src2}"##_.BroadcastStr,
4798         (_.VT (OpNode _.RC:$src1,
4799                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
4800                      _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
4801}
4802
4803multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4804                                                            X86VectorVTInfo _> {
4805  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4806          (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
4807          OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
4808          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
4809          AVX512FMA3Base, EVEX_B, EVEX_RC;
4810}
4811} // Constraints = "$src1 = $dst"
4812
4813multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4814                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
4815  let Predicates = [HasAVX512] in {
4816    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
4817                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
4818                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4819  }
4820  let Predicates = [HasVLX, HasAVX512] in {
4821    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
4822                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4823    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
4824                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4825  }
4826}
4827
4828multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
4829                                                            SDNode OpNodeRnd > {
4830    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
4831                                      avx512vl_f32_info>;
4832    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
4833                                      avx512vl_f64_info>, VEX_W;
4834}
4835
4836defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
4837defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
4838defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
4839defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
4840defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
4841defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
4842
4843// Scalar FMA
4844let Constraints = "$src1 = $dst" in {
4845multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4846                               dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
4847                                                        dag RHS_r, dag RHS_m > {
4848  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4849          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
4850          "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;
4851
4852  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4853          (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr,
4854          "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
4855
4856  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4857         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
4858         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
4859                                       AVX512FMA3Base, EVEX_B, EVEX_RC;
4860
4861  let isCodeGenOnly = 1 in {
4862    def r     : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
4863                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
4864                     !strconcat(OpcodeStr,
4865                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4866                     [RHS_r]>;
4867    def m     : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
4868                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
4869                    !strconcat(OpcodeStr,
4870                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4871                    [RHS_m]>;
4872  }// isCodeGenOnly = 1
4873}
4874}// Constraints = "$src1 = $dst"
4875
4876multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
4877         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
4878                                                                  string SUFF> {
4879
4880  defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
4881                (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 FROUND_CURRENT))),
4882                (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src1,
4883                         (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), (i32 FROUND_CURRENT))),
4884                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
4885                         (i32 imm:$rc))),
4886                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
4887                         _.FRC:$src3))),
4888                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
4889                         (_.ScalarLdFrag addr:$src3))))>;
4890
4891  defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
4892                (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
4893                (_.VT (OpNodeRnd _.RC:$src2,
4894                       (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
4895                              _.RC:$src1, (i32 FROUND_CURRENT))),
4896                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
4897                                  (i32 imm:$rc))),
4898                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
4899                                          _.FRC:$src1))),
4900                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
4901                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
4902
4903  defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
4904                (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
4905                (_.VT (OpNodeRnd _.RC:$src1,
4906                       (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
4907                              _.RC:$src2, (i32 FROUND_CURRENT))),
4908                (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
4909                         (i32 imm:$rc))),
4910                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
4911                         _.FRC:$src2))),
4912                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
4913                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
4914}
4915
4916multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
4917                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
4918  let Predicates = [HasAVX512] in {
4919    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
4920                                   OpNodeRnd, f32x_info, "SS">,
4921                                   EVEX_CD8<32, CD8VT1>, VEX_LIG;
4922    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
4923                                   OpNodeRnd, f64x_info, "SD">,
4924                                   EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
4925  }
4926}
4927
4928defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
4929defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
4930defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
4931defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
4932
4933//===----------------------------------------------------------------------===//
4934// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
4935//===----------------------------------------------------------------------===//
4936let Constraints = "$src1 = $dst" in {
4937multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4938                                                            X86VectorVTInfo _> {
4939  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
4940          (ins _.RC:$src2, _.RC:$src3),
4941          OpcodeStr, "$src3, $src2", "$src2, $src3",
4942          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
4943         AVX512FMA3Base;
4944
4945  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4946          (ins _.RC:$src2, _.MemOp:$src3),
4947          OpcodeStr, "$src3, $src2", "$src2, $src3",
4948          (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
4949          AVX512FMA3Base;
4950
4951  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
4952            (ins _.RC:$src2, _.ScalarMemOp:$src3),
4953            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
4954            !strconcat("$src2, ${src3}", _.BroadcastStr ),
4955            (OpNode _.RC:$src1,
4956             _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
4957            AVX512FMA3Base, EVEX_B;
4958}
4959} // Constraints = "$src1 = $dst"
4960
4961multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
4962                                     AVX512VLVectorVTInfo _> {
4963  let Predicates = [HasIFMA] in {
4964    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info512>,
4965                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
4966  }
4967  let Predicates = [HasVLX, HasIFMA] in {
4968    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info256>,
4969                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
4970    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info128>,
4971                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
4972  }
4973}
4974
4975defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
4976                                  avx512vl_i64_info>, VEX_W;
4977defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
4978                                  avx512vl_i64_info>, VEX_W;
4979
4980//===----------------------------------------------------------------------===//
4981// AVX-512  Scalar convert from sign integer to float/double
4982//===----------------------------------------------------------------------===//
4983
4984multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
4985                    X86VectorVTInfo DstVT, X86MemOperand x86memop,
4986                    PatFrag ld_frag, string asm> {
4987  let hasSideEffects = 0 in {
4988    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
4989              (ins DstVT.FRC:$src1, SrcRC:$src),
4990              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
4991              EVEX_4V;
4992    let mayLoad = 1 in
4993      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
4994              (ins DstVT.FRC:$src1, x86memop:$src),
4995              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
4996              EVEX_4V;
4997  } // hasSideEffects = 0
4998  let isCodeGenOnly = 1 in {
4999    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
5000                  (ins DstVT.RC:$src1, SrcRC:$src2),
5001                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5002                  [(set DstVT.RC:$dst,
5003                        (OpNode (DstVT.VT DstVT.RC:$src1),
5004                                 SrcRC:$src2,
5005                                 (i32 FROUND_CURRENT)))]>, EVEX_4V;
5006
5007    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
5008                  (ins DstVT.RC:$src1, x86memop:$src2),
5009                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5010                  [(set DstVT.RC:$dst,
5011                        (OpNode (DstVT.VT DstVT.RC:$src1),
5012                                 (ld_frag addr:$src2),
5013                                 (i32 FROUND_CURRENT)))]>, EVEX_4V;
5014  }//isCodeGenOnly = 1
5015}
5016
5017multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
5018                    X86VectorVTInfo DstVT, string asm> {
5019  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
5020              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
5021              !strconcat(asm,
5022                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
5023              [(set DstVT.RC:$dst,
5024                    (OpNode (DstVT.VT DstVT.RC:$src1),
5025                             SrcRC:$src2,
5026                             (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
5027}
5028
5029multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
5030                    X86VectorVTInfo DstVT, X86MemOperand x86memop,
5031                    PatFrag ld_frag, string asm> {
5032  defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
5033              avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
5034                        VEX_LIG;
5035}
5036
5037let Predicates = [HasAVX512] in {
5038defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
5039                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
5040                                 XS, EVEX_CD8<32, CD8VT1>;
5041defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
5042                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
5043                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
5044defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
5045                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
5046                                 XD, EVEX_CD8<32, CD8VT1>;
5047defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
5048                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
5049                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
5050
5051def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
5052          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
5053def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
5054          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
5055def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
5056          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
5057def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
5058          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
5059
5060def : Pat<(f32 (sint_to_fp GR32:$src)),
5061          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
5062def : Pat<(f32 (sint_to_fp GR64:$src)),
5063          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
5064def : Pat<(f64 (sint_to_fp GR32:$src)),
5065          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
5066def : Pat<(f64 (sint_to_fp GR64:$src)),
5067          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
5068
5069defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
5070                                  v4f32x_info, i32mem, loadi32,
5071                                  "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
5072defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
5073                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
5074                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
5075defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
5076                                  i32mem, loadi32, "cvtusi2sd{l}">,
5077                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5078defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
5079                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
5080                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
5081
5082def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
5083          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
5084def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
5085          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
5086def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
5087          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
5088def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
5089          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
5090
5091def : Pat<(f32 (uint_to_fp GR32:$src)),
5092          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
5093def : Pat<(f32 (uint_to_fp GR64:$src)),
5094          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
5095def : Pat<(f64 (uint_to_fp GR32:$src)),
5096          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
5097def : Pat<(f64 (uint_to_fp GR64:$src)),
5098          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
5099}
5100
5101//===----------------------------------------------------------------------===//
5102// AVX-512  Scalar convert from float/double to integer
5103//===----------------------------------------------------------------------===//
5104multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
5105                                  X86VectorVTInfo DstVT, SDNode OpNode, string asm> {
5106  let Predicates = [HasAVX512] in {
5107    def rr : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
5108                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
5109                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
5110                EVEX, VEX_LIG;
5111    def rb : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
5112                !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
5113                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
5114                EVEX, VEX_LIG, EVEX_B, EVEX_RC;
5115    def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
5116                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
5117                [(set DstVT.RC:$dst, (OpNode
5118                      (SrcVT.VT (scalar_to_vector (SrcVT.ScalarLdFrag addr:$src))),
5119                      (i32 FROUND_CURRENT)))]>,
5120                EVEX, VEX_LIG;
5121  } // Predicates = [HasAVX512]
5122}
5123
5124// Convert float/double to signed/unsigned int 32/64
5125defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
5126                                   X86cvts2si, "cvtss2si">,
5127                                   XS, EVEX_CD8<32, CD8VT1>;
5128defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
5129                                   X86cvts2si, "cvtss2si">,
5130                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
5131defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
5132                                   X86cvts2usi, "cvtss2usi">,
5133                                   XS, EVEX_CD8<32, CD8VT1>;
5134defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
5135                                   X86cvts2usi, "cvtss2usi">, XS, VEX_W,
5136                                   EVEX_CD8<32, CD8VT1>;
5137defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
5138                                   X86cvts2si, "cvtsd2si">,
5139                                   XD, EVEX_CD8<64, CD8VT1>;
5140defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
5141                                   X86cvts2si, "cvtsd2si">,
5142                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
5143defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
5144                                   X86cvts2usi, "cvtsd2usi">,
5145                                   XD, EVEX_CD8<64, CD8VT1>;
5146defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
5147                                   X86cvts2usi, "cvtsd2usi">, XD, VEX_W,
5148                                   EVEX_CD8<64, CD8VT1>;
5149
5150// The SSE version of these instructions are disabled for AVX512.
5151// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
5152let Predicates = [HasAVX512] in {
5153  def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
5154            (VCVTSS2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
5155  def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
5156            (VCVTSS2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
5157  def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
5158            (VCVTSD2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
5159  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
5160            (VCVTSD2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
5161} // HasAVX512
5162
5163let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
5164  defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
5165            int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
5166            SSE_CVT_Scalar, 0>, XS, EVEX_4V;
5167  defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
5168            int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
5169            SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
5170  defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
5171            int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
5172            SSE_CVT_Scalar, 0>, XD, EVEX_4V;
5173  defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
5174            int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
5175            SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
5176
5177  defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x7B, GR32, VR128X,
5178            int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
5179            SSE_CVT_Scalar, 0>, XD, EVEX_4V;
5180} // isCodeGenOnly = 1, Predicates = [HasAVX512]
5181
5182// Convert float/double to signed/unsigned int 32/64 with truncation
5183multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
5184                            X86VectorVTInfo _DstRC, SDNode OpNode,
5185                            SDNode OpNodeRnd>{
5186let Predicates = [HasAVX512] in {
5187  def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
5188              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
5189              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
5190  def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
5191                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
5192                []>, EVEX, EVEX_B;
5193  def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
5194              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
5195              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
5196              EVEX;
5197
5198  let isCodeGenOnly = 1 in {
5199      def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
5200                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
5201               [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
5202                                     (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
5203      def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
5204                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
5205                [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
5206                                      (i32 FROUND_NO_EXC)))]>,
5207                                      EVEX,VEX_LIG , EVEX_B;
5208      let mayLoad = 1, hasSideEffects = 0 in
5209        def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
5210                    (ins _SrcRC.MemOp:$src),
5211                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
5212                    []>, EVEX, VEX_LIG;
5213
5214  } // isCodeGenOnly = 1
5215} //HasAVX512
5216}
5217
5218
5219defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info,
5220                        fp_to_sint,X86cvtts2IntRnd>,
5221                        XS, EVEX_CD8<32, CD8VT1>;
5222defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info,
5223                        fp_to_sint,X86cvtts2IntRnd>,
5224                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
5225defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info,
5226                        fp_to_sint,X86cvtts2IntRnd>,
5227                        XD, EVEX_CD8<64, CD8VT1>;
5228defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info,
5229                        fp_to_sint,X86cvtts2IntRnd>,
5230                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
5231
5232defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info,
5233                        fp_to_uint,X86cvtts2UIntRnd>,
5234                        XS, EVEX_CD8<32, CD8VT1>;
5235defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info,
5236                        fp_to_uint,X86cvtts2UIntRnd>,
5237                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
5238defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info,
5239                        fp_to_uint,X86cvtts2UIntRnd>,
5240                        XD, EVEX_CD8<64, CD8VT1>;
5241defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info,
5242                        fp_to_uint,X86cvtts2UIntRnd>,
5243                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
5244let Predicates = [HasAVX512] in {
5245  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
5246            (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
5247  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
5248            (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
5249  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
5250            (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
5251  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
5252            (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
5253
5254} // HasAVX512
5255//===----------------------------------------------------------------------===//
5256// AVX-512  Convert form float to double and back
5257//===----------------------------------------------------------------------===//
5258multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5259                         X86VectorVTInfo _Src, SDNode OpNode> {
5260  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5261                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
5262                         "$src2, $src1", "$src1, $src2",
5263                         (_.VT (OpNode (_.VT _.RC:$src1),
5264                                       (_Src.VT _Src.RC:$src2)))>,
5265                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
5266  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5267                         (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
5268                         "$src2, $src1", "$src1, $src2",
5269                         (_.VT (OpNode (_.VT _.RC:$src1),
5270                                  (_Src.VT (scalar_to_vector
5271                                            (_Src.ScalarLdFrag addr:$src2)))))>,
5272                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
5273}
5274
5275// Scalar Coversion with SAE - suppress all exceptions
5276multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5277                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5278  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5279                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
5280                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5281                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
5282                                         (_Src.VT _Src.RC:$src2),
5283                                         (i32 FROUND_NO_EXC)))>,
5284                        EVEX_4V, VEX_LIG, EVEX_B;
5285}
5286
5287// Scalar Conversion with rounding control (RC)
5288multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5289                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5290  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5291                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
5292                        "$rc, $src2, $src1", "$src1, $src2, $rc",
5293                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
5294                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
5295                        EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
5296                        EVEX_B, EVEX_RC;
5297}
5298multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode,
5299                                  SDNode OpNodeRnd, X86VectorVTInfo _src,
5300                                                        X86VectorVTInfo _dst> {
5301  let Predicates = [HasAVX512] in {
5302    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
5303             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
5304                               OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
5305                               EVEX_V512, XD;
5306  }
5307}
5308
5309multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode,
5310                                    SDNode OpNodeRnd, X86VectorVTInfo _src,
5311                                                          X86VectorVTInfo _dst> {
5312  let Predicates = [HasAVX512] in {
5313    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
5314             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
5315             EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
5316  }
5317}
5318defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
5319                                         X86froundRnd, f64x_info, f32x_info>;
5320defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext,
5321                                          X86fpextRnd,f32x_info, f64x_info >;
5322
5323def : Pat<(f64 (fextend FR32X:$src)),
5324          (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
5325                               (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
5326          Requires<[HasAVX512]>;
5327def : Pat<(f64 (fextend (loadf32 addr:$src))),
5328          (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
5329          Requires<[HasAVX512]>;
5330
5331def : Pat<(f64 (extloadf32 addr:$src)),
5332      (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
5333      Requires<[HasAVX512, OptForSize]>;
5334
5335def : Pat<(f64 (extloadf32 addr:$src)),
5336          (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
5337                    (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
5338          Requires<[HasAVX512, OptForSpeed]>;
5339
5340def : Pat<(f32 (fround FR64X:$src)),
5341          (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
5342                    (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
5343           Requires<[HasAVX512]>;
5344//===----------------------------------------------------------------------===//
5345// AVX-512  Vector convert from signed/unsigned integer to float/double
5346//          and from float/double to signed/unsigned integer
5347//===----------------------------------------------------------------------===//
5348
5349multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5350                         X86VectorVTInfo _Src, SDNode OpNode,
5351                         string Broadcast = _.BroadcastStr,
5352                         string Alias = ""> {
5353
5354  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5355                         (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
5356                         (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
5357
5358  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5359                         (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src",
5360                         (_.VT (OpNode (_Src.VT
5361                             (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
5362
5363  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5364                         (ins _Src.ScalarMemOp:$src), OpcodeStr,
5365                         "${src}"##Broadcast, "${src}"##Broadcast,
5366                         (_.VT (OpNode (_Src.VT
5367                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
5368                            ))>, EVEX, EVEX_B;
5369}
5370// Coversion with SAE - suppress all exceptions
5371multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5372                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5373  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5374                        (ins _Src.RC:$src), OpcodeStr,
5375                        "{sae}, $src", "$src, {sae}",
5376                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
5377                               (i32 FROUND_NO_EXC)))>,
5378                        EVEX, EVEX_B;
5379}
5380
5381// Conversion with rounding control (RC)
5382multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5383                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
5384  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5385                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
5386                        "$rc, $src", "$src, $rc",
5387                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
5388                        EVEX, EVEX_B, EVEX_RC;
5389}
5390
5391// Extend Float to Double
5392multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
5393  let Predicates = [HasAVX512] in {
5394    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>,
5395             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
5396                                X86vfpextRnd>, EVEX_V512;
5397  }
5398  let Predicates = [HasVLX] in {
5399    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
5400                               X86vfpext, "{1to2}">, EVEX_V128;
5401    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>,
5402                                     EVEX_V256;
5403  }
5404}
5405
5406// Truncate Double to Float
5407multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
5408  let Predicates = [HasAVX512] in {
5409    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>,
5410             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
5411                               X86vfproundRnd>, EVEX_V512;
5412  }
5413  let Predicates = [HasVLX] in {
5414    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
5415                               X86vfpround, "{1to2}", "{x}">, EVEX_V128;
5416    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround,
5417                               "{1to4}", "{y}">, EVEX_V256;
5418  }
5419}
5420
5421defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
5422                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
5423defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
5424                                  PS, EVEX_CD8<32, CD8VH>;
5425
5426def : Pat<(v8f64 (extloadv8f32 addr:$src)),
5427            (VCVTPS2PDZrm addr:$src)>;
5428
5429let Predicates = [HasVLX] in {
5430  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
5431              (VCVTPS2PDZ256rm addr:$src)>;
5432}
5433
5434// Convert Signed/Unsigned Doubleword to Double
5435multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
5436                           SDNode OpNode128> {
5437  // No rounding in this op
5438  let Predicates = [HasAVX512] in
5439    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
5440                                     EVEX_V512;
5441
5442  let Predicates = [HasVLX] in {
5443    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
5444                                     OpNode128, "{1to2}">, EVEX_V128;
5445    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
5446                                     EVEX_V256;
5447  }
5448}
5449
5450// Convert Signed/Unsigned Doubleword to Float
5451multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
5452                           SDNode OpNodeRnd> {
5453  let Predicates = [HasAVX512] in
5454    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
5455             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
5456                               OpNodeRnd>, EVEX_V512;
5457
5458  let Predicates = [HasVLX] in {
5459    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
5460                                     EVEX_V128;
5461    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
5462                                     EVEX_V256;
5463  }
5464}
5465
5466// Convert Float to Signed/Unsigned Doubleword with truncation
5467multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
5468                                  SDNode OpNode, SDNode OpNodeRnd> {
5469  let Predicates = [HasAVX512] in {
5470    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
5471             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
5472                                OpNodeRnd>, EVEX_V512;
5473  }
5474  let Predicates = [HasVLX] in {
5475    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
5476                                     EVEX_V128;
5477    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
5478                                     EVEX_V256;
5479  }
5480}
5481
5482// Convert Float to Signed/Unsigned Doubleword
5483multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
5484                                  SDNode OpNode, SDNode OpNodeRnd> {
5485  let Predicates = [HasAVX512] in {
5486    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
5487             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
5488                                OpNodeRnd>, EVEX_V512;
5489  }
5490  let Predicates = [HasVLX] in {
5491    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
5492                                     EVEX_V128;
5493    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
5494                                     EVEX_V256;
5495  }
5496}
5497
5498// Convert Double to Signed/Unsigned Doubleword with truncation
5499multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
5500                                  SDNode OpNode, SDNode OpNodeRnd> {
5501  let Predicates = [HasAVX512] in {
5502    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
5503             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
5504                                OpNodeRnd>, EVEX_V512;
5505  }
5506  let Predicates = [HasVLX] in {
5507    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
5508    // memory forms of these instructions in Asm Parcer. They have the same
5509    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
5510    // due to the same reason.
5511    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
5512                               "{1to2}", "{x}">, EVEX_V128;
5513    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
5514                               "{1to4}", "{y}">, EVEX_V256;
5515  }
5516}
5517
5518// Convert Double to Signed/Unsigned Doubleword
5519multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
5520                                  SDNode OpNode, SDNode OpNodeRnd> {
5521  let Predicates = [HasAVX512] in {
5522    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
5523             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
5524                               OpNodeRnd>, EVEX_V512;
5525  }
5526  let Predicates = [HasVLX] in {
5527    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
5528    // memory forms of these instructions in Asm Parcer. They have the same
5529    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
5530    // due to the same reason.
5531    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
5532                               "{1to2}", "{x}">, EVEX_V128;
5533    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
5534                               "{1to4}", "{y}">, EVEX_V256;
5535  }
5536}
5537
5538// Convert Double to Signed/Unsigned Quardword
5539multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
5540                                  SDNode OpNode, SDNode OpNodeRnd> {
5541  let Predicates = [HasDQI] in {
5542    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
5543             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
5544                               OpNodeRnd>, EVEX_V512;
5545  }
5546  let Predicates = [HasDQI, HasVLX] in {
5547    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
5548                               EVEX_V128;
5549    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
5550                               EVEX_V256;
5551  }
5552}
5553
5554// Convert Double to Signed/Unsigned Quardword with truncation
5555multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
5556                                  SDNode OpNode, SDNode OpNodeRnd> {
5557  let Predicates = [HasDQI] in {
5558    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
5559             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
5560                               OpNodeRnd>, EVEX_V512;
5561  }
5562  let Predicates = [HasDQI, HasVLX] in {
5563    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
5564                               EVEX_V128;
5565    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
5566                               EVEX_V256;
5567  }
5568}
5569
5570// Convert Signed/Unsigned Quardword to Double
5571multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
5572                                  SDNode OpNode, SDNode OpNodeRnd> {
5573  let Predicates = [HasDQI] in {
5574    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
5575             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
5576                               OpNodeRnd>, EVEX_V512;
5577  }
5578  let Predicates = [HasDQI, HasVLX] in {
5579    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
5580                               EVEX_V128;
5581    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
5582                               EVEX_V256;
5583  }
5584}
5585
5586// Convert Float to Signed/Unsigned Quardword
5587multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
5588                                  SDNode OpNode, SDNode OpNodeRnd> {
5589  let Predicates = [HasDQI] in {
5590    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
5591             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
5592                               OpNodeRnd>, EVEX_V512;
5593  }
5594  let Predicates = [HasDQI, HasVLX] in {
5595    // Explicitly specified broadcast string, since we take only 2 elements
5596    // from v4f32x_info source
5597    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
5598                               "{1to2}">, EVEX_V128;
5599    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
5600                               EVEX_V256;
5601  }
5602}
5603
5604// Convert Float to Signed/Unsigned Quardword with truncation
5605multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
5606                                  SDNode OpNode, SDNode OpNodeRnd> {
5607  let Predicates = [HasDQI] in {
5608    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
5609             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
5610                               OpNodeRnd>, EVEX_V512;
5611  }
5612  let Predicates = [HasDQI, HasVLX] in {
5613    // Explicitly specified broadcast string, since we take only 2 elements
5614    // from v4f32x_info source
5615    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
5616                               "{1to2}">, EVEX_V128;
5617    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
5618                               EVEX_V256;
5619  }
5620}
5621
5622// Convert Signed/Unsigned Quardword to Float
5623multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
5624                                  SDNode OpNode, SDNode OpNodeRnd> {
5625  let Predicates = [HasDQI] in {
5626    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
5627             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
5628                               OpNodeRnd>, EVEX_V512;
5629  }
5630  let Predicates = [HasDQI, HasVLX] in {
5631    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
5632    // memory forms of these instructions in Asm Parcer. They have the same
5633    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
5634    // due to the same reason.
5635    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode,
5636                               "{1to2}", "{x}">, EVEX_V128;
5637    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
5638                               "{1to4}", "{y}">, EVEX_V256;
5639  }
5640}
5641
5642defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS,
5643                                EVEX_CD8<32, CD8VH>;
5644
5645defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
5646                                X86VSintToFpRnd>,
5647                                PS, EVEX_CD8<32, CD8VF>;
5648
5649defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
5650                                X86VFpToSintRnd>,
5651                                XS, EVEX_CD8<32, CD8VF>;
5652
5653defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
5654                                 X86VFpToSintRnd>,
5655                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
5656
5657defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
5658                                 X86VFpToUintRnd>, PS,
5659                                 EVEX_CD8<32, CD8VF>;
5660
5661defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
5662                                 X86VFpToUintRnd>, PS, VEX_W,
5663                                 EVEX_CD8<64, CD8VF>;
5664
5665defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
5666                                 XS, EVEX_CD8<32, CD8VH>;
5667
5668defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
5669                                 X86VUintToFpRnd>, XD,
5670                                 EVEX_CD8<32, CD8VF>;
5671
5672defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
5673                                 X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
5674
5675defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
5676                                 X86cvtp2IntRnd>, XD, VEX_W,
5677                                 EVEX_CD8<64, CD8VF>;
5678
5679defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
5680                                 X86cvtp2UIntRnd>,
5681                                 PS, EVEX_CD8<32, CD8VF>;
5682defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
5683                                 X86cvtp2UIntRnd>, VEX_W,
5684                                 PS, EVEX_CD8<64, CD8VF>;
5685
5686defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
5687                                 X86cvtp2IntRnd>, VEX_W,
5688                                 PD, EVEX_CD8<64, CD8VF>;
5689
5690defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
5691                                 X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
5692
5693defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
5694                                 X86cvtp2UIntRnd>, VEX_W,
5695                                 PD, EVEX_CD8<64, CD8VF>;
5696
5697defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
5698                                 X86cvtp2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
5699
5700defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
5701                                 X86VFpToSintRnd>, VEX_W,
5702                                 PD, EVEX_CD8<64, CD8VF>;
5703
5704defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint,
5705                                 X86VFpToSintRnd>, PD, EVEX_CD8<32, CD8VH>;
5706
5707defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
5708                                 X86VFpToUintRnd>, VEX_W,
5709                                 PD, EVEX_CD8<64, CD8VF>;
5710
5711defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint,
5712                                 X86VFpToUintRnd>, PD, EVEX_CD8<32, CD8VH>;
5713
5714defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
5715                            X86VSintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
5716
5717defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
5718                            X86VUintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
5719
5720defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
5721                            X86VSintToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
5722
5723defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
5724                            X86VUintToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
5725
5726let Predicates = [HasAVX512, NoVLX] in {
5727def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
5728          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
5729           (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
5730
5731def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
5732          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
5733           (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
5734
5735def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
5736          (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
5737           (v8f64 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_xmm)>;
5738
5739def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
5740          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
5741           (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
5742
5743def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
5744          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
5745           (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
5746
5747def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
5748          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
5749           (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
5750}
5751
5752let Predicates = [HasAVX512] in {
5753  def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
5754            (VCVTPD2PSZrm addr:$src)>;
5755  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
5756            (VCVTPS2PDZrm addr:$src)>;
5757}
5758
5759//===----------------------------------------------------------------------===//
5760// Half precision conversion instructions
5761//===----------------------------------------------------------------------===//
5762multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
5763                           X86MemOperand x86memop, PatFrag ld_frag> {
5764  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
5765                    "vcvtph2ps", "$src", "$src",
5766                   (X86cvtph2ps (_src.VT _src.RC:$src),
5767                                                (i32 FROUND_CURRENT))>, T8PD;
5768  defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
5769                    "vcvtph2ps", "$src", "$src",
5770                    (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
5771                                     (i32 FROUND_CURRENT))>, T8PD;
5772}
5773
5774multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
5775  defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
5776                    "vcvtph2ps", "{sae}, $src", "$src, {sae}",
5777                   (X86cvtph2ps (_src.VT _src.RC:$src),
5778                                                (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
5779
5780}
5781
5782let Predicates = [HasAVX512] in {
5783  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
5784                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
5785                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
5786  let Predicates = [HasVLX] in {
5787    defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
5788                         loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
5789    defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
5790                         loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
5791  }
5792}
5793
5794multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
5795                           X86MemOperand x86memop> {
5796  defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
5797                   (ins _src.RC:$src1, i32u8imm:$src2),
5798                   "vcvtps2ph", "$src2, $src1", "$src1, $src2",
5799                   (X86cvtps2ph (_src.VT _src.RC:$src1),
5800                                (i32 imm:$src2),
5801                                (i32 FROUND_CURRENT)),
5802                   NoItinerary, 0, X86select>, AVX512AIi8Base;
5803  def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
5804             (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
5805             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5806             [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
5807                                     (i32 imm:$src2), (i32 FROUND_CURRENT) )),
5808                                     addr:$dst)]>;
5809  let hasSideEffects = 0, mayStore = 1 in
5810  def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
5811             (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
5812             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
5813              []>, EVEX_K;
5814}
5815multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
5816  defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
5817                   (ins _src.RC:$src1, i32u8imm:$src2),
5818                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
5819                   (X86cvtps2ph (_src.VT _src.RC:$src1),
5820                                (i32 imm:$src2),
5821                                (i32 FROUND_NO_EXC)),
5822                   NoItinerary, 0, X86select>, EVEX_B, AVX512AIi8Base;
5823}
5824let Predicates = [HasAVX512] in {
5825  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
5826                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
5827                      EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
5828  let Predicates = [HasVLX] in {
5829    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
5830                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
5831    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
5832                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
5833  }
5834}
5835
5836//  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
5837multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
5838                            string OpcodeStr> {
5839  def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
5840                 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
5841                 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2,
5842                                                        (i32 FROUND_NO_EXC)))],
5843                 IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
5844                 Sched<[WriteFAdd]>;
5845}
5846
5847let Defs = [EFLAGS], Predicates = [HasAVX512] in {
5848  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
5849                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
5850  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
5851                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
5852  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
5853                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
5854  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
5855                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
5856}
5857
5858let Defs = [EFLAGS], Predicates = [HasAVX512] in {
5859  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
5860                                 "ucomiss">, PS, EVEX, VEX_LIG,
5861                                 EVEX_CD8<32, CD8VT1>;
5862  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
5863                                  "ucomisd">, PD, EVEX,
5864                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5865  let Pattern = []<dag> in {
5866    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
5867                                   "comiss">, PS, EVEX, VEX_LIG,
5868                                   EVEX_CD8<32, CD8VT1>;
5869    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
5870                                   "comisd">, PD, EVEX,
5871                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5872  }
5873  let isCodeGenOnly = 1 in {
5874    defm Int_VUCOMISSZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
5875                              load, "ucomiss">, PS, EVEX, VEX_LIG,
5876                              EVEX_CD8<32, CD8VT1>;
5877    defm Int_VUCOMISDZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
5878                              load, "ucomisd">, PD, EVEX,
5879                              VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5880
5881    defm Int_VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
5882                              load, "comiss">, PS, EVEX, VEX_LIG,
5883                              EVEX_CD8<32, CD8VT1>;
5884    defm Int_VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
5885                              load, "comisd">, PD, EVEX,
5886                              VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
5887  }
5888}
5889
5890/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
5891multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
5892                            X86VectorVTInfo _> {
5893  let AddedComplexity = 20 , Predicates = [HasAVX512] in {
5894  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5895                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5896                           "$src2, $src1", "$src1, $src2",
5897                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
5898  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5899                         (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5900                         "$src2, $src1", "$src1, $src2",
5901                         (OpNode (_.VT _.RC:$src1),
5902                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
5903}
5904}
5905
5906defm VRCP14SS   : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
5907                  EVEX_CD8<32, CD8VT1>, T8PD;
5908defm VRCP14SD   : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
5909                  VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
5910defm VRSQRT14SS   : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
5911                  EVEX_CD8<32, CD8VT1>, T8PD;
5912defm VRSQRT14SD   : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
5913                  VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
5914
5915/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
5916multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5917                         X86VectorVTInfo _> {
5918  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5919                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
5920                         (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
5921  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5922                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
5923                         (OpNode (_.FloatVT
5924                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
5925  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5926                          (ins _.ScalarMemOp:$src), OpcodeStr,
5927                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
5928                          (OpNode (_.FloatVT
5929                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
5930                          EVEX, T8PD, EVEX_B;
5931}
5932
5933multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5934  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
5935                          EVEX_V512, EVEX_CD8<32, CD8VF>;
5936  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
5937                          EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5938
5939  // Define only if AVX512VL feature is present.
5940  let Predicates = [HasVLX] in {
5941    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
5942                                OpNode, v4f32x_info>,
5943                               EVEX_V128, EVEX_CD8<32, CD8VF>;
5944    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
5945                                OpNode, v8f32x_info>,
5946                               EVEX_V256, EVEX_CD8<32, CD8VF>;
5947    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
5948                                OpNode, v2f64x_info>,
5949                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5950    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
5951                                OpNode, v4f64x_info>,
5952                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5953  }
5954}
5955
5956defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
5957defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
5958
5959/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
5960multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5961                         SDNode OpNode> {
5962
5963  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5964                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5965                           "$src2, $src1", "$src1, $src2",
5966                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5967                           (i32 FROUND_CURRENT))>;
5968
5969  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5970                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5971                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5972                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5973                            (i32 FROUND_NO_EXC))>, EVEX_B;
5974
5975  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5976                         (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5977                         "$src2, $src1", "$src1, $src2",
5978                         (OpNode (_.VT _.RC:$src1),
5979                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
5980                         (i32 FROUND_CURRENT))>;
5981}
5982
5983multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5984  defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
5985              EVEX_CD8<32, CD8VT1>;
5986  defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
5987              EVEX_CD8<64, CD8VT1>, VEX_W;
5988}
5989
5990let Predicates = [HasERI] in {
5991  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28",   X86rcp28s>,   T8PD, EVEX_4V;
5992  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
5993}
5994
5995defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
5996/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
5997
5998multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
5999                         SDNode OpNode> {
6000
6001  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6002                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
6003                         (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
6004
6005  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6006                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
6007                         (OpNode (_.FloatVT
6008                             (bitconvert (_.LdFrag addr:$src))),
6009                          (i32 FROUND_CURRENT))>;
6010
6011  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6012                         (ins _.ScalarMemOp:$src), OpcodeStr,
6013                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
6014                         (OpNode (_.FloatVT
6015                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))),
6016                                 (i32 FROUND_CURRENT))>, EVEX_B;
6017}
6018multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6019                         SDNode OpNode> {
6020  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6021                        (ins _.RC:$src), OpcodeStr,
6022                        "{sae}, $src", "$src, {sae}",
6023                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
6024}
6025
6026multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6027   defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
6028             avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
6029             T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
6030   defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
6031             avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
6032             T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
6033}
6034
6035multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
6036                                  SDNode OpNode> {
6037  // Define only if AVX512VL feature is present.
6038  let Predicates = [HasVLX] in {
6039    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
6040                                     EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
6041    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
6042                                     EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
6043    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
6044                                     EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
6045    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
6046                                     EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
6047  }
6048}
6049let Predicates = [HasERI] in {
6050
6051 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
6052 defm VRCP28   : avx512_eri<0xCA, "vrcp28",   X86rcp28>,   EVEX;
6053 defm VEXP2    : avx512_eri<0xC8, "vexp2",    X86exp2>,    EVEX;
6054}
6055defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
6056                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
6057
6058multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
6059                              SDNode OpNodeRnd, X86VectorVTInfo _>{
6060  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6061                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
6062                         (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
6063                         EVEX, EVEX_B, EVEX_RC;
6064}
6065
6066multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
6067                              SDNode OpNode, X86VectorVTInfo _>{
6068  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6069                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
6070                         (_.FloatVT (OpNode _.RC:$src))>, EVEX;
6071  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6072                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
6073                         (OpNode (_.FloatVT
6074                           (bitconvert (_.LdFrag addr:$src))))>, EVEX;
6075
6076  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6077                          (ins _.ScalarMemOp:$src), OpcodeStr,
6078                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
6079                          (OpNode (_.FloatVT
6080                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
6081                          EVEX, EVEX_B;
6082}
6083
6084multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
6085                                  SDNode OpNode> {
6086  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
6087                                v16f32_info>,
6088                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
6089  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
6090                                v8f64_info>,
6091                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
6092  // Define only if AVX512VL feature is present.
6093  let Predicates = [HasVLX] in {
6094    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
6095                                     OpNode, v4f32x_info>,
6096                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
6097    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
6098                                     OpNode, v8f32x_info>,
6099                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
6100    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
6101                                     OpNode, v2f64x_info>,
6102                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
6103    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
6104                                     OpNode, v4f64x_info>,
6105                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
6106  }
6107}
6108
6109multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
6110                                          SDNode OpNodeRnd> {
6111  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
6112                                v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
6113  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
6114                                v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
6115}
6116
6117multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
6118                              string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
6119
6120  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6121                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6122                         "$src2, $src1", "$src1, $src2",
6123                         (OpNodeRnd (_.VT _.RC:$src1),
6124                                    (_.VT _.RC:$src2),
6125                                    (i32 FROUND_CURRENT))>;
6126  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6127                       (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6128                       "$src2, $src1", "$src1, $src2",
6129                       (OpNodeRnd (_.VT _.RC:$src1),
6130                                  (_.VT (scalar_to_vector
6131                                            (_.ScalarLdFrag addr:$src2))),
6132                                  (i32 FROUND_CURRENT))>;
6133
6134  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6135                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
6136                         "$rc, $src2, $src1", "$src1, $src2, $rc",
6137                         (OpNodeRnd (_.VT _.RC:$src1),
6138                                     (_.VT _.RC:$src2),
6139                                     (i32 imm:$rc))>,
6140                         EVEX_B, EVEX_RC;
6141
6142  let isCodeGenOnly = 1, hasSideEffects = 0 in {
6143    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6144               (ins _.FRC:$src1, _.FRC:$src2),
6145               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
6146
6147    let mayLoad = 1 in
6148      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6149                 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
6150                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
6151  }
6152
6153  def : Pat<(_.EltVT (OpNode _.FRC:$src)),
6154            (!cast<Instruction>(NAME#SUFF#Zr)
6155                (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
6156
6157  def : Pat<(_.EltVT (OpNode (load addr:$src))),
6158            (!cast<Instruction>(NAME#SUFF#Zm)
6159                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
6160}
6161
6162multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
6163  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
6164                        X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
6165  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
6166                        X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
6167}
6168
6169defm VSQRT   : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
6170               avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
6171
6172defm VSQRT   : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
6173
6174let Predicates = [HasAVX512] in {
6175  def : Pat<(f32 (X86frsqrt FR32X:$src)),
6176            (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
6177  def : Pat<(f32 (X86frsqrt (load addr:$src))),
6178            (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
6179            Requires<[OptForSize]>;
6180  def : Pat<(f32 (X86frcp FR32X:$src)),
6181            (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
6182  def : Pat<(f32 (X86frcp (load addr:$src))),
6183            (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
6184            Requires<[OptForSize]>;
6185}
6186
6187multiclass
6188avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
6189
6190  let ExeDomain = _.ExeDomain in {
6191  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6192                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
6193                           "$src3, $src2, $src1", "$src1, $src2, $src3",
6194                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
6195                            (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
6196
6197  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6198                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
6199                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
6200                         (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
6201                         (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
6202
6203  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6204                         (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
6205                         OpcodeStr,
6206                         "$src3, $src2, $src1", "$src1, $src2, $src3",
6207                         (_.VT (X86RndScales (_.VT _.RC:$src1),
6208                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
6209                          (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
6210  }
6211  let Predicates = [HasAVX512] in {
6212  def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
6213             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
6214             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
6215  def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
6216             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
6217             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>;
6218  def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
6219             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
6220             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>;
6221  def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
6222             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
6223             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
6224  def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
6225             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
6226             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
6227
6228  def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6229             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6230             addr:$src, (i32 0x1))), _.FRC)>;
6231  def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6232             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6233             addr:$src, (i32 0x2))), _.FRC)>;
6234  def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6235             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6236             addr:$src, (i32 0x3))), _.FRC)>;
6237  def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6238             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6239             addr:$src, (i32 0x4))), _.FRC)>;
6240  def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
6241             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
6242             addr:$src, (i32 0xc))), _.FRC)>;
6243  }
6244}
6245
6246defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
6247                                AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
6248
6249defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
6250                                AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
6251
6252//-------------------------------------------------
6253// Integer truncate and extend operations
6254//-------------------------------------------------
6255
6256multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6257                              X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
6258                              X86MemOperand x86memop> {
6259
6260  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
6261                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
6262                      (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
6263                       EVEX, T8XS;
6264
6265  // for intrinsic patter match
6266  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
6267                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
6268                           undef)),
6269            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
6270                                      SrcInfo.RC:$src1)>;
6271
6272  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
6273                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
6274                           DestInfo.ImmAllZerosV)),
6275            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
6276                                      SrcInfo.RC:$src1)>;
6277
6278  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
6279                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
6280                           DestInfo.RC:$src0)),
6281            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
6282                                      DestInfo.KRCWM:$mask ,
6283                                      SrcInfo.RC:$src1)>;
6284
6285  let mayStore = 1, mayLoad = 1, hasSideEffects = 0 in {
6286    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
6287               (ins x86memop:$dst, SrcInfo.RC:$src),
6288               OpcodeStr # "\t{$src, $dst|$dst, $src}",
6289               []>, EVEX;
6290
6291    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
6292               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
6293               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
6294               []>, EVEX, EVEX_K;
6295  }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
6296}
6297
6298multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
6299                                    X86VectorVTInfo DestInfo,
6300                                    PatFrag truncFrag, PatFrag mtruncFrag > {
6301
6302  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
6303            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
6304                                    addr:$dst, SrcInfo.RC:$src)>;
6305
6306  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
6307                                               (SrcInfo.VT SrcInfo.RC:$src)),
6308            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
6309                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
6310}
6311
6312multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
6313                                        X86VectorVTInfo DestInfo, string sat > {
6314
6315  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
6316                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
6317                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
6318           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
6319                    (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
6320                    (SrcInfo.VT SrcInfo.RC:$src))>;
6321
6322  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
6323                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
6324                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
6325           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
6326                    (SrcInfo.VT SrcInfo.RC:$src))>;
6327}
6328
6329multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
6330         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
6331         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
6332         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
6333         X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
6334                                                     Predicate prd = HasAVX512>{
6335
6336  let Predicates = [HasVLX, prd] in {
6337    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
6338                             DestInfoZ128, x86memopZ128>,
6339                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
6340                             truncFrag, mtruncFrag>, EVEX_V128;
6341
6342    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
6343                             DestInfoZ256, x86memopZ256>,
6344                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
6345                             truncFrag, mtruncFrag>, EVEX_V256;
6346  }
6347  let Predicates = [prd] in
6348    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
6349                             DestInfoZ, x86memopZ>,
6350                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
6351                             truncFrag, mtruncFrag>, EVEX_V512;
6352}
6353
6354multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
6355         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
6356         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
6357         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
6358         X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
6359
6360  let Predicates = [HasVLX, prd] in {
6361    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
6362                             DestInfoZ128, x86memopZ128>,
6363                avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
6364                             sat>, EVEX_V128;
6365
6366    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
6367                             DestInfoZ256, x86memopZ256>,
6368                avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
6369                             sat>, EVEX_V256;
6370  }
6371  let Predicates = [prd] in
6372    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
6373                             DestInfoZ, x86memopZ>,
6374                avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
6375                             sat>, EVEX_V512;
6376}
6377
6378multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6379  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
6380               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
6381               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
6382}
6383multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
6384  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
6385               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
6386               sat>, EVEX_CD8<8, CD8VO>;
6387}
6388
6389multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6390  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
6391               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
6392               truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
6393}
6394multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
6395  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
6396               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
6397               sat>, EVEX_CD8<16, CD8VQ>;
6398}
6399
6400multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6401  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
6402               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
6403               truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
6404}
6405multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
6406  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
6407               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
6408               sat>, EVEX_CD8<32, CD8VH>;
6409}
6410
6411multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6412  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
6413               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
6414               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
6415}
6416multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
6417  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
6418               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
6419               sat>, EVEX_CD8<8, CD8VQ>;
6420}
6421
6422multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6423  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
6424              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
6425              truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
6426}
6427multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
6428  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
6429              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
6430              sat>, EVEX_CD8<16, CD8VH>;
6431}
6432
6433multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
6434  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
6435              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
6436              truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
6437}
6438multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
6439  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
6440              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
6441              sat, HasBWI>, EVEX_CD8<16, CD8VH>;
6442}
6443
6444defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
6445defm VPMOVSQB   : avx512_trunc_sat_qb<0x22, "s",   X86vtruncs>;
6446defm VPMOVUSQB  : avx512_trunc_sat_qb<0x12, "us",  X86vtruncus>;
6447
6448defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
6449defm VPMOVSQW   : avx512_trunc_sat_qw<0x24, "s",   X86vtruncs>;
6450defm VPMOVUSQW  : avx512_trunc_sat_qw<0x14, "us",  X86vtruncus>;
6451
6452defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
6453defm VPMOVSQD   : avx512_trunc_sat_qd<0x25, "s",   X86vtruncs>;
6454defm VPMOVUSQD  : avx512_trunc_sat_qd<0x15, "us",  X86vtruncus>;
6455
6456defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
6457defm VPMOVSDB   : avx512_trunc_sat_db<0x21, "s",   X86vtruncs>;
6458defm VPMOVUSDB  : avx512_trunc_sat_db<0x11, "us",  X86vtruncus>;
6459
6460defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
6461defm VPMOVSDW   : avx512_trunc_sat_dw<0x23, "s",   X86vtruncs>;
6462defm VPMOVUSDW  : avx512_trunc_sat_dw<0x13, "us",  X86vtruncus>;
6463
6464defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
6465defm VPMOVSWB   : avx512_trunc_sat_wb<0x20, "s",   X86vtruncs>;
6466defm VPMOVUSWB  : avx512_trunc_sat_wb<0x10, "us",  X86vtruncus>;
6467
6468let Predicates = [HasAVX512, NoVLX] in {
6469def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
6470         (v8i16 (EXTRACT_SUBREG
6471                 (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
6472                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
6473def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
6474         (v4i32 (EXTRACT_SUBREG
6475                 (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
6476                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
6477}
6478
6479let Predicates = [HasBWI, NoVLX] in {
6480def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
6481         (v16i8 (EXTRACT_SUBREG  (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
6482                                            VR256X:$src, sub_ymm))), sub_xmm))>;
6483}
6484
6485multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
6486              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
6487              X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
6488  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
6489                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
6490                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
6491                  EVEX;
6492
6493  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
6494                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
6495                  (DestInfo.VT (LdFrag addr:$src))>,
6496                EVEX;
6497}
6498
6499multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
6500          SDPatternOperator OpNode,
6501          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
6502  let Predicates = [HasVLX, HasBWI] in {
6503    defm Z128:  avx512_extend_common<opc, OpcodeStr, v8i16x_info,
6504                    v16i8x_info, i64mem, LdFrag, OpNode>,
6505                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
6506
6507    defm Z256:  avx512_extend_common<opc, OpcodeStr, v16i16x_info,
6508                    v16i8x_info, i128mem, LdFrag, OpNode>,
6509                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
6510  }
6511  let Predicates = [HasBWI] in {
6512    defm Z   :  avx512_extend_common<opc, OpcodeStr, v32i16_info,
6513                    v32i8x_info, i256mem, LdFrag, OpNode>,
6514                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
6515  }
6516}
6517
6518multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
6519          SDPatternOperator OpNode,
6520          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
6521  let Predicates = [HasVLX, HasAVX512] in {
6522    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
6523                   v16i8x_info, i32mem, LdFrag, OpNode>,
6524                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
6525
6526    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
6527                   v16i8x_info, i64mem, LdFrag, OpNode>,
6528                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
6529  }
6530  let Predicates = [HasAVX512] in {
6531    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
6532                   v16i8x_info, i128mem, LdFrag, OpNode>,
6533                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
6534  }
6535}
6536
6537multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
6538          SDPatternOperator OpNode,
6539          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
6540  let Predicates = [HasVLX, HasAVX512] in {
6541    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
6542                   v16i8x_info, i16mem, LdFrag, OpNode>,
6543                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
6544
6545    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
6546                   v16i8x_info, i32mem, LdFrag, OpNode>,
6547                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
6548  }
6549  let Predicates = [HasAVX512] in {
6550    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
6551                   v16i8x_info, i64mem, LdFrag, OpNode>,
6552                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
6553  }
6554}
6555
6556multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
6557         SDPatternOperator OpNode,
6558         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
6559  let Predicates = [HasVLX, HasAVX512] in {
6560    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
6561                   v8i16x_info, i64mem, LdFrag, OpNode>,
6562                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
6563
6564    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
6565                   v8i16x_info, i128mem, LdFrag, OpNode>,
6566                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
6567  }
6568  let Predicates = [HasAVX512] in {
6569    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
6570                   v16i16x_info, i256mem, LdFrag, OpNode>,
6571                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
6572  }
6573}
6574
6575multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
6576         SDPatternOperator OpNode,
6577         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
6578  let Predicates = [HasVLX, HasAVX512] in {
6579    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
6580                   v8i16x_info, i32mem, LdFrag, OpNode>,
6581                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
6582
6583    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
6584                   v8i16x_info, i64mem, LdFrag, OpNode>,
6585                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
6586  }
6587  let Predicates = [HasAVX512] in {
6588    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
6589                   v8i16x_info, i128mem, LdFrag, OpNode>,
6590                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
6591  }
6592}
6593
6594multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
6595         SDPatternOperator OpNode,
6596         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
6597
6598  let Predicates = [HasVLX, HasAVX512] in {
6599    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
6600                   v4i32x_info, i64mem, LdFrag, OpNode>,
6601                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
6602
6603    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
6604                   v4i32x_info, i128mem, LdFrag, OpNode>,
6605                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
6606  }
6607  let Predicates = [HasAVX512] in {
6608    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
6609                   v8i32x_info, i256mem, LdFrag, OpNode>,
6610                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
6611  }
6612}
6613
6614defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
6615defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
6616defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
6617defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
6618defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
6619defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
6620
6621defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
6622defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
6623defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
6624defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
6625defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
6626defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
6627
6628// EXTLOAD patterns, implemented using vpmovz
6629multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
6630                               X86VectorVTInfo From, PatFrag LdFrag> {
6631  def : Pat<(To.VT (LdFrag addr:$src)),
6632            (!cast<Instruction>("VPMOVZX"#InstrStr#"rm") addr:$src)>;
6633  def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)),
6634            (!cast<Instruction>("VPMOVZX"#InstrStr#"rmk") To.RC:$src0,
6635             To.KRC:$mask, addr:$src)>;
6636  def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src),
6637                    To.ImmAllZerosV)),
6638            (!cast<Instruction>("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask,
6639             addr:$src)>;
6640}
6641
6642let Predicates = [HasVLX, HasBWI] in {
6643  defm : avx512_ext_lowering<"BWZ128", v8i16x_info,  v16i8x_info,  extloadvi8>;
6644  defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info,  extloadvi8>;
6645}
6646let Predicates = [HasBWI] in {
6647  defm : avx512_ext_lowering<"BWZ",    v32i16_info,  v32i8x_info,  extloadvi8>;
6648}
6649let Predicates = [HasVLX, HasAVX512] in {
6650  defm : avx512_ext_lowering<"BDZ128", v4i32x_info,  v16i8x_info,  extloadvi8>;
6651  defm : avx512_ext_lowering<"BDZ256", v8i32x_info,  v16i8x_info,  extloadvi8>;
6652  defm : avx512_ext_lowering<"BQZ128", v2i64x_info,  v16i8x_info,  extloadvi8>;
6653  defm : avx512_ext_lowering<"BQZ256", v4i64x_info,  v16i8x_info,  extloadvi8>;
6654  defm : avx512_ext_lowering<"WDZ128", v4i32x_info,  v8i16x_info,  extloadvi16>;
6655  defm : avx512_ext_lowering<"WDZ256", v8i32x_info,  v8i16x_info,  extloadvi16>;
6656  defm : avx512_ext_lowering<"WQZ128", v2i64x_info,  v8i16x_info,  extloadvi16>;
6657  defm : avx512_ext_lowering<"WQZ256", v4i64x_info,  v8i16x_info,  extloadvi16>;
6658  defm : avx512_ext_lowering<"DQZ128", v2i64x_info,  v4i32x_info,  extloadvi32>;
6659  defm : avx512_ext_lowering<"DQZ256", v4i64x_info,  v4i32x_info,  extloadvi32>;
6660}
6661let Predicates = [HasAVX512] in {
6662  defm : avx512_ext_lowering<"BDZ",    v16i32_info,  v16i8x_info,  extloadvi8>;
6663  defm : avx512_ext_lowering<"BQZ",    v8i64_info,   v16i8x_info,  extloadvi8>;
6664  defm : avx512_ext_lowering<"WDZ",    v16i32_info,  v16i16x_info, extloadvi16>;
6665  defm : avx512_ext_lowering<"WQZ",    v8i64_info,   v8i16x_info,  extloadvi16>;
6666  defm : avx512_ext_lowering<"DQZ",    v8i64_info,   v8i32x_info,  extloadvi32>;
6667}
6668
6669//===----------------------------------------------------------------------===//
6670// GATHER - SCATTER Operations
6671
6672multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6673                         X86MemOperand memop, PatFrag GatherNode> {
6674  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
6675      ExeDomain = _.ExeDomain in
6676  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
6677            (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
6678            !strconcat(OpcodeStr#_.Suffix,
6679            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
6680            [(set _.RC:$dst, _.KRCWM:$mask_wb,
6681              (GatherNode  (_.VT _.RC:$src1), _.KRCWM:$mask,
6682                     vectoraddr:$src2))]>, EVEX, EVEX_K,
6683             EVEX_CD8<_.EltSize, CD8VT1>;
6684}
6685
6686multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
6687                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6688  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
6689                                      vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
6690  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
6691                                      vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
6692let Predicates = [HasVLX] in {
6693  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
6694                              vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
6695  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
6696                              vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
6697  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
6698                              vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
6699  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
6700                              vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
6701}
6702}
6703
6704multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
6705                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6706  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
6707                                       mgatherv16i32>, EVEX_V512;
6708  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz512mem,
6709                                       mgatherv8i64>, EVEX_V512;
6710let Predicates = [HasVLX] in {
6711  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
6712                                          vy256xmem, mgatherv8i32>, EVEX_V256;
6713  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
6714                                          vy128xmem, mgatherv4i64>, EVEX_V256;
6715  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
6716                                          vx128xmem, mgatherv4i32>, EVEX_V128;
6717  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
6718                                          vx64xmem, mgatherv2i64>, EVEX_V128;
6719}
6720}
6721
6722
6723defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
6724               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
6725
6726defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
6727                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
6728
6729multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6730                          X86MemOperand memop, PatFrag ScatterNode> {
6731
6732let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
6733
6734  def mr  : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
6735            (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
6736            !strconcat(OpcodeStr#_.Suffix,
6737            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
6738            [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
6739                                     _.KRCWM:$mask,  vectoraddr:$dst))]>,
6740            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
6741}
6742
6743multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
6744                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6745  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
6746                                      vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
6747  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
6748                                      vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
6749let Predicates = [HasVLX] in {
6750  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
6751                              vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
6752  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
6753                              vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
6754  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
6755                              vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
6756  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
6757                              vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
6758}
6759}
6760
6761multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
6762                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
6763  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
6764                                       mscatterv16i32>, EVEX_V512;
6765  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz512mem,
6766                                       mscatterv8i64>, EVEX_V512;
6767let Predicates = [HasVLX] in {
6768  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
6769                                          vy256xmem, mscatterv8i32>, EVEX_V256;
6770  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
6771                                          vy128xmem, mscatterv4i64>, EVEX_V256;
6772  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
6773                                          vx128xmem, mscatterv4i32>, EVEX_V128;
6774  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
6775                                          vx64xmem, mscatterv2i64>, EVEX_V128;
6776}
6777}
6778
6779defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
6780               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
6781
6782defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
6783                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
6784
6785// prefetch
6786multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
6787                       RegisterClass KRC, X86MemOperand memop> {
6788  let Predicates = [HasPFI], hasSideEffects = 1 in
6789  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
6790            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
6791            []>, EVEX, EVEX_K;
6792}
6793
6794defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
6795                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6796
6797defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
6798                     VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6799
6800defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
6801                     VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6802
6803defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
6804                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6805
6806defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
6807                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6808
6809defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
6810                     VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6811
6812defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
6813                     VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6814
6815defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
6816                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6817
6818defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
6819                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6820
6821defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
6822                     VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6823
6824defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
6825                     VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6826
6827defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
6828                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6829
6830defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
6831                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
6832
6833defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
6834                     VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
6835
6836defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
6837                     VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
6838
6839defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
6840                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
6841
6842// Helper fragments to match sext vXi1 to vXiY.
6843def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
6844def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
6845
6846multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
6847def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
6848                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
6849                  [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
6850}
6851
6852multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
6853                                 string OpcodeStr, Predicate prd> {
6854let Predicates = [prd] in
6855  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
6856
6857  let Predicates = [prd, HasVLX] in {
6858    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
6859    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
6860  }
6861}
6862
6863multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
6864  defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info,  OpcodeStr,
6865                                       HasBWI>;
6866  defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
6867                                       HasBWI>, VEX_W;
6868  defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
6869                                       HasDQI>;
6870  defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
6871                                       HasDQI>, VEX_W;
6872}
6873
6874defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
6875
6876multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
6877    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
6878                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6879                        [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
6880}
6881
6882// Use 512bit version to implement 128/256 bit in case NoVLX.
6883multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
6884                                                            X86VectorVTInfo _> {
6885
6886  def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
6887            (_.KVT (COPY_TO_REGCLASS
6888                     (!cast<Instruction>(NAME#"Zrr")
6889                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
6890                                      _.RC:$src, _.SubRegIdx)),
6891                   _.KRC))>;
6892}
6893
6894multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
6895                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
6896  let Predicates = [prd] in
6897    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
6898                                            EVEX_V512;
6899
6900  let Predicates = [prd, HasVLX] in {
6901    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
6902                                              EVEX_V256;
6903    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
6904                                               EVEX_V128;
6905  }
6906  let Predicates = [prd, NoVLX] in {
6907    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
6908    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
6909  }
6910}
6911
6912defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
6913                                              avx512vl_i8_info, HasBWI>;
6914defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
6915                                              avx512vl_i16_info, HasBWI>, VEX_W;
6916defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
6917                                              avx512vl_i32_info, HasDQI>;
6918defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
6919                                              avx512vl_i64_info, HasDQI>, VEX_W;
6920
6921//===----------------------------------------------------------------------===//
6922// AVX-512 - COMPRESS and EXPAND
6923//
6924
6925multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
6926                                 string OpcodeStr> {
6927  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
6928              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
6929              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
6930
6931  let mayStore = 1, hasSideEffects = 0 in
6932  def mr : AVX5128I<opc, MRMDestMem, (outs),
6933              (ins _.MemOp:$dst, _.RC:$src),
6934              OpcodeStr # "\t{$src, $dst|$dst, $src}",
6935              []>, EVEX_CD8<_.EltSize, CD8VT1>;
6936
6937  def mrk : AVX5128I<opc, MRMDestMem, (outs),
6938              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
6939              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
6940              [(store (_.VT (vselect _.KRCWM:$mask,
6941                             (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
6942                addr:$dst)]>,
6943              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
6944}
6945
6946multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
6947                                 AVX512VLVectorVTInfo VTInfo> {
6948  defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
6949
6950  let Predicates = [HasVLX] in {
6951    defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
6952    defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
6953  }
6954}
6955
6956defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
6957                                         EVEX;
6958defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
6959                                         EVEX, VEX_W;
6960defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
6961                                         EVEX;
6962defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
6963                                         EVEX, VEX_W;
6964
6965// expand
6966multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
6967                                 string OpcodeStr> {
6968  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6969              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
6970              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
6971
6972  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6973              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
6974              (_.VT (X86expand (_.VT (bitconvert
6975                                      (_.LdFrag addr:$src1)))))>,
6976            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
6977}
6978
6979multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
6980                                 AVX512VLVectorVTInfo VTInfo> {
6981  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
6982
6983  let Predicates = [HasVLX] in {
6984    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
6985    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
6986  }
6987}
6988
6989defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
6990                                         EVEX;
6991defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
6992                                         EVEX, VEX_W;
6993defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
6994                                         EVEX;
6995defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
6996                                         EVEX, VEX_W;
6997
6998//handle instruction  reg_vec1 = op(reg_vec,imm)
6999//                               op(mem_vec,imm)
7000//                               op(broadcast(eltVt),imm)
7001//all instruction created with FROUND_CURRENT
7002multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7003                                                            X86VectorVTInfo _>{
7004  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7005                      (ins _.RC:$src1, i32u8imm:$src2),
7006                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
7007                      (OpNode (_.VT _.RC:$src1),
7008                              (i32 imm:$src2),
7009                              (i32 FROUND_CURRENT))>;
7010  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7011                    (ins _.MemOp:$src1, i32u8imm:$src2),
7012                    OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
7013                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
7014                            (i32 imm:$src2),
7015                            (i32 FROUND_CURRENT))>;
7016  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7017                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
7018                    OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
7019                    "${src1}"##_.BroadcastStr##", $src2",
7020                    (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
7021                            (i32 imm:$src2),
7022                            (i32 FROUND_CURRENT))>, EVEX_B;
7023}
7024
7025//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
7026multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
7027                                             SDNode OpNode, X86VectorVTInfo _>{
7028  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7029                      (ins _.RC:$src1, i32u8imm:$src2),
7030                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
7031                      "$src1, {sae}, $src2",
7032                      (OpNode (_.VT _.RC:$src1),
7033                              (i32 imm:$src2),
7034                              (i32 FROUND_NO_EXC))>, EVEX_B;
7035}
7036
7037multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
7038            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
7039  let Predicates = [prd] in {
7040    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
7041                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
7042                                  EVEX_V512;
7043  }
7044  let Predicates = [prd, HasVLX] in {
7045    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
7046                                  EVEX_V128;
7047    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
7048                                  EVEX_V256;
7049  }
7050}
7051
7052//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
7053//                               op(reg_vec2,mem_vec,imm)
7054//                               op(reg_vec2,broadcast(eltVt),imm)
7055//all instruction created with FROUND_CURRENT
7056multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7057                                                            X86VectorVTInfo _>{
7058  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7059                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
7060                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7061                      (OpNode (_.VT _.RC:$src1),
7062                              (_.VT _.RC:$src2),
7063                              (i32 imm:$src3),
7064                              (i32 FROUND_CURRENT))>;
7065  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7066                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
7067                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7068                    (OpNode (_.VT _.RC:$src1),
7069                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
7070                            (i32 imm:$src3),
7071                            (i32 FROUND_CURRENT))>;
7072  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7073                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
7074                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
7075                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
7076                    (OpNode (_.VT _.RC:$src1),
7077                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
7078                            (i32 imm:$src3),
7079                            (i32 FROUND_CURRENT))>, EVEX_B;
7080}
7081
7082//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
7083//                               op(reg_vec2,mem_vec,imm)
7084multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
7085                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
7086
7087  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
7088                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
7089                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7090                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
7091                               (SrcInfo.VT SrcInfo.RC:$src2),
7092                               (i8 imm:$src3)))>;
7093  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
7094                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
7095                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7096                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
7097                             (SrcInfo.VT (bitconvert
7098                                                (SrcInfo.LdFrag addr:$src2))),
7099                             (i8 imm:$src3)))>;
7100}
7101
7102//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
7103//                               op(reg_vec2,mem_vec,imm)
7104//                               op(reg_vec2,broadcast(eltVt),imm)
7105multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
7106                           X86VectorVTInfo _>:
7107  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
7108
7109  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7110                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
7111                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
7112                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
7113                    (OpNode (_.VT _.RC:$src1),
7114                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
7115                            (i8 imm:$src3))>, EVEX_B;
7116}
7117
7118//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
7119//                                      op(reg_vec2,mem_scalar,imm)
7120//all instruction created with FROUND_CURRENT
7121multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7122                                                           X86VectorVTInfo _> {
7123
7124  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7125                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
7126                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7127                      (OpNode (_.VT _.RC:$src1),
7128                              (_.VT _.RC:$src2),
7129                              (i32 imm:$src3),
7130                              (i32 FROUND_CURRENT))>;
7131  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7132                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
7133                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7134                    (OpNode (_.VT _.RC:$src1),
7135                            (_.VT (scalar_to_vector
7136                                      (_.ScalarLdFrag addr:$src2))),
7137                            (i32 imm:$src3),
7138                            (i32 FROUND_CURRENT))>;
7139
7140  let isAsmParserOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
7141    defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst),
7142                    (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
7143                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
7144                    []>;
7145  }
7146}
7147
7148//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
7149multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
7150                                             SDNode OpNode, X86VectorVTInfo _>{
7151  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7152                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
7153                      OpcodeStr, "$src3, {sae}, $src2, $src1",
7154                      "$src1, $src2, {sae}, $src3",
7155                      (OpNode (_.VT _.RC:$src1),
7156                              (_.VT _.RC:$src2),
7157                              (i32 imm:$src3),
7158                              (i32 FROUND_NO_EXC))>, EVEX_B;
7159}
7160//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
7161multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
7162                                             SDNode OpNode, X86VectorVTInfo _> {
7163  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7164                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
7165                      OpcodeStr, "$src3, {sae}, $src2, $src1",
7166                      "$src1, $src2, {sae}, $src3",
7167                      (OpNode (_.VT _.RC:$src1),
7168                              (_.VT _.RC:$src2),
7169                              (i32 imm:$src3),
7170                              (i32 FROUND_NO_EXC))>, EVEX_B;
7171}
7172
7173multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
7174            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
7175  let Predicates = [prd] in {
7176    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
7177                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
7178                                  EVEX_V512;
7179
7180  }
7181  let Predicates = [prd, HasVLX] in {
7182    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
7183                                  EVEX_V128;
7184    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
7185                                  EVEX_V256;
7186  }
7187}
7188
7189multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
7190                   AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
7191  let Predicates = [HasBWI] in {
7192    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
7193                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
7194  }
7195  let Predicates = [HasBWI, HasVLX] in {
7196    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
7197                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
7198    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode,  DestInfo.info256,
7199                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
7200  }
7201}
7202
7203multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
7204                                bits<8> opc, SDNode OpNode>{
7205  let Predicates = [HasAVX512] in {
7206    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
7207  }
7208  let Predicates = [HasAVX512, HasVLX] in {
7209    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
7210    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
7211  }
7212}
7213
7214multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
7215                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
7216  let Predicates = [prd] in {
7217     defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
7218                 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
7219  }
7220}
7221
7222multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
7223                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
7224  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
7225                            opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
7226  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
7227                            opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
7228}
7229
7230
7231defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
7232                              X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
7233defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
7234                              X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
7235defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
7236                              X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
7237
7238
7239defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
7240                                                       0x50, X86VRange, HasDQI>,
7241      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
7242defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
7243                                                       0x50, X86VRange, HasDQI>,
7244      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
7245
7246defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
7247                                                 0x51, X86VRange, HasDQI>,
7248      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7249defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
7250                                                 0x51, X86VRange, HasDQI>,
7251      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7252
7253defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
7254                                                 0x57, X86Reduces, HasDQI>,
7255      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7256defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
7257                                                 0x57, X86Reduces, HasDQI>,
7258      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7259
7260defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
7261                                                 0x27, X86GetMants, HasAVX512>,
7262      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7263defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
7264                                                 0x27, X86GetMants, HasAVX512>,
7265      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7266
7267multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
7268                                       bits<8> opc, SDNode OpNode = X86Shuf128>{
7269  let Predicates = [HasAVX512] in {
7270    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
7271
7272  }
7273  let Predicates = [HasAVX512, HasVLX] in {
7274     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
7275  }
7276}
7277let Predicates = [HasAVX512] in {
7278def : Pat<(v16f32 (ffloor VR512:$src)),
7279          (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
7280def : Pat<(v16f32 (fnearbyint VR512:$src)),
7281          (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
7282def : Pat<(v16f32 (fceil VR512:$src)),
7283          (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
7284def : Pat<(v16f32 (frint VR512:$src)),
7285          (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
7286def : Pat<(v16f32 (ftrunc VR512:$src)),
7287          (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
7288
7289def : Pat<(v8f64 (ffloor VR512:$src)),
7290          (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
7291def : Pat<(v8f64 (fnearbyint VR512:$src)),
7292          (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
7293def : Pat<(v8f64 (fceil VR512:$src)),
7294          (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
7295def : Pat<(v8f64 (frint VR512:$src)),
7296          (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
7297def : Pat<(v8f64 (ftrunc VR512:$src)),
7298          (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
7299}
7300
7301defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
7302      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
7303defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
7304      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
7305defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
7306      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
7307defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
7308      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
7309
7310multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
7311  defm NAME:       avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
7312                           AVX512AIi8Base, EVEX_4V;
7313}
7314
7315defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
7316                                                  EVEX_CD8<32, CD8VF>;
7317defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
7318                                                  EVEX_CD8<64, CD8VF>, VEX_W;
7319
7320multiclass avx512_vpalignr_lowering<X86VectorVTInfo _ , list<Predicate> p>{
7321  let Predicates = p in
7322    def NAME#_.VTName#rri:
7323          Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
7324              (!cast<Instruction>(NAME#_.ZSuffix#rri)
7325                    _.RC:$src1, _.RC:$src2, imm:$imm)>;
7326}
7327
7328multiclass avx512_vpalignr_lowering_common<AVX512VLVectorVTInfo _>:
7329      avx512_vpalignr_lowering<_.info512, [HasBWI]>,
7330      avx512_vpalignr_lowering<_.info128, [HasBWI, HasVLX]>,
7331      avx512_vpalignr_lowering<_.info256, [HasBWI, HasVLX]>;
7332
7333defm VPALIGNR:   avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
7334                                          avx512vl_i8_info, avx512vl_i8_info>,
7335                avx512_vpalignr_lowering_common<avx512vl_i16_info>,
7336                avx512_vpalignr_lowering_common<avx512vl_i32_info>,
7337                avx512_vpalignr_lowering_common<avx512vl_f32_info>,
7338                avx512_vpalignr_lowering_common<avx512vl_i64_info>,
7339                avx512_vpalignr_lowering_common<avx512vl_f64_info>,
7340                EVEX_CD8<8, CD8VF>;
7341
7342defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
7343                    avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
7344
7345multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7346                           X86VectorVTInfo _> {
7347  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7348                    (ins _.RC:$src1), OpcodeStr,
7349                    "$src1", "$src1",
7350                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
7351
7352  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7353                  (ins _.MemOp:$src1), OpcodeStr,
7354                  "$src1", "$src1",
7355                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
7356            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
7357}
7358
7359multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
7360                            X86VectorVTInfo _> :
7361           avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
7362  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7363                  (ins _.ScalarMemOp:$src1), OpcodeStr,
7364                  "${src1}"##_.BroadcastStr,
7365                  "${src1}"##_.BroadcastStr,
7366                  (_.VT (OpNode (X86VBroadcast
7367                                    (_.ScalarLdFrag addr:$src1))))>,
7368             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
7369}
7370
7371multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
7372                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
7373  let Predicates = [prd] in
7374    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
7375
7376  let Predicates = [prd, HasVLX] in {
7377    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
7378                              EVEX_V256;
7379    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
7380                              EVEX_V128;
7381  }
7382}
7383
7384multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
7385                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
7386  let Predicates = [prd] in
7387    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
7388                              EVEX_V512;
7389
7390  let Predicates = [prd, HasVLX] in {
7391    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
7392                                 EVEX_V256;
7393    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
7394                                 EVEX_V128;
7395  }
7396}
7397
7398multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
7399                                 SDNode OpNode, Predicate prd> {
7400  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
7401                               prd>, VEX_W;
7402  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
7403                               prd>;
7404}
7405
7406multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
7407                                 SDNode OpNode, Predicate prd> {
7408  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
7409  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
7410}
7411
7412multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
7413                                  bits<8> opc_d, bits<8> opc_q,
7414                                  string OpcodeStr, SDNode OpNode> {
7415  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
7416                                    HasAVX512>,
7417              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
7418                                    HasBWI>;
7419}
7420
7421defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
7422
7423def : Pat<(xor
7424          (bc_v16i32 (v16i1sextv16i32)),
7425          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
7426          (VPABSDZrr VR512:$src)>;
7427def : Pat<(xor
7428          (bc_v8i64 (v8i1sextv8i64)),
7429          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
7430          (VPABSQZrr VR512:$src)>;
7431
7432multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
7433
7434  defm NAME :          avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
7435}
7436
7437defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
7438defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
7439
7440//===---------------------------------------------------------------------===//
7441// Replicate Single FP - MOVSHDUP and MOVSLDUP
7442//===---------------------------------------------------------------------===//
7443multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
7444  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
7445                                      HasAVX512>, XS;
7446}
7447
7448defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
7449defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
7450
7451//===----------------------------------------------------------------------===//
7452// AVX-512 - MOVDDUP
7453//===----------------------------------------------------------------------===//
7454
7455multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
7456                                                            X86VectorVTInfo _> {
7457  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7458                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
7459                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
7460  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7461                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
7462                 (_.VT (OpNode (_.VT (scalar_to_vector
7463                                       (_.ScalarLdFrag addr:$src)))))>,
7464                 EVEX, EVEX_CD8<_.EltSize, CD8VH>;
7465}
7466
7467multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7468                                                  AVX512VLVectorVTInfo VTInfo> {
7469
7470  defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
7471
7472  let Predicates = [HasAVX512, HasVLX] in {
7473    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
7474                               EVEX_V256;
7475    defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
7476                               EVEX_V128;
7477  }
7478}
7479
7480multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
7481  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode,
7482                                        avx512vl_f64_info>, XD, VEX_W;
7483}
7484
7485defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
7486
7487def : Pat<(X86Movddup (loadv2f64 addr:$src)),
7488          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
7489def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
7490          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
7491
7492//===----------------------------------------------------------------------===//
7493// AVX-512 - Unpack Instructions
7494//===----------------------------------------------------------------------===//
7495defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512>;
7496defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512>;
7497
7498defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
7499                                       SSE_INTALU_ITINS_P, HasBWI>;
7500defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
7501                                       SSE_INTALU_ITINS_P, HasBWI>;
7502defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
7503                                       SSE_INTALU_ITINS_P, HasBWI>;
7504defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
7505                                       SSE_INTALU_ITINS_P, HasBWI>;
7506
7507defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
7508                                       SSE_INTALU_ITINS_P, HasAVX512>;
7509defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
7510                                       SSE_INTALU_ITINS_P, HasAVX512>;
7511defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
7512                                       SSE_INTALU_ITINS_P, HasAVX512>;
7513defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
7514                                       SSE_INTALU_ITINS_P, HasAVX512>;
7515
7516//===----------------------------------------------------------------------===//
7517// AVX-512 - Extract & Insert Integer Instructions
7518//===----------------------------------------------------------------------===//
7519
7520multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
7521                                                            X86VectorVTInfo _> {
7522  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
7523              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
7524              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7525              [(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
7526                                                          imm:$src2)))),
7527                      addr:$dst)]>,
7528              EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
7529}
7530
7531multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
7532  let Predicates = [HasBWI] in {
7533    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
7534                  (ins _.RC:$src1, u8imm:$src2),
7535                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7536                  [(set GR32orGR64:$dst,
7537                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
7538                  EVEX, TAPD;
7539
7540    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
7541  }
7542}
7543
7544multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
7545  let Predicates = [HasBWI] in {
7546    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
7547                  (ins _.RC:$src1, u8imm:$src2),
7548                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7549                  [(set GR32orGR64:$dst,
7550                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
7551                  EVEX, PD;
7552
7553    let hasSideEffects = 0 in
7554    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
7555                   (ins _.RC:$src1, u8imm:$src2),
7556                   OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7557                   EVEX, TAPD;
7558
7559    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
7560  }
7561}
7562
7563multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
7564                                                            RegisterClass GRC> {
7565  let Predicates = [HasDQI] in {
7566    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
7567                  (ins _.RC:$src1, u8imm:$src2),
7568                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7569                  [(set GRC:$dst,
7570                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
7571                  EVEX, TAPD;
7572
7573    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
7574                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
7575                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7576                [(store (extractelt (_.VT _.RC:$src1),
7577                                    imm:$src2),addr:$dst)]>,
7578                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
7579  }
7580}
7581
7582defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
7583defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
7584defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
7585defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
7586
7587multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
7588                                            X86VectorVTInfo _, PatFrag LdFrag> {
7589  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
7590      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
7591      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7592      [(set _.RC:$dst,
7593          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
7594      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
7595}
7596
7597multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
7598                                            X86VectorVTInfo _, PatFrag LdFrag> {
7599  let Predicates = [HasBWI] in {
7600    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
7601        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
7602        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7603        [(set _.RC:$dst,
7604            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
7605
7606    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
7607  }
7608}
7609
7610multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
7611                                         X86VectorVTInfo _, RegisterClass GRC> {
7612  let Predicates = [HasDQI] in {
7613    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
7614        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
7615        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7616        [(set _.RC:$dst,
7617            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
7618        EVEX_4V, TAPD;
7619
7620    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
7621                                    _.ScalarLdFrag>, TAPD;
7622  }
7623}
7624
7625defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
7626                                     extloadi8>, TAPD;
7627defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
7628                                     extloadi16>, PD;
7629defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
7630defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
7631//===----------------------------------------------------------------------===//
7632// VSHUFPS - VSHUFPD Operations
7633//===----------------------------------------------------------------------===//
7634multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
7635                                                AVX512VLVectorVTInfo VTInfo_FP>{
7636  defm NAME:     avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
7637                                   EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
7638                                   AVX512AIi8Base, EVEX_4V;
7639}
7640
7641defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
7642defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
7643//===----------------------------------------------------------------------===//
7644// AVX-512 - Byte shift Left/Right
7645//===----------------------------------------------------------------------===//
7646
7647multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
7648                             Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
7649  def rr : AVX512<opc, MRMr,
7650             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
7651             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7652             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
7653  def rm : AVX512<opc, MRMm,
7654           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
7655           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7656           [(set _.RC:$dst,(_.VT (OpNode
7657                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
7658                                 (i8 imm:$src2))))]>;
7659}
7660
7661multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
7662                                 Format MRMm, string OpcodeStr, Predicate prd>{
7663  let Predicates = [prd] in
7664    defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
7665                                    OpcodeStr, v64i8_info>, EVEX_V512;
7666  let Predicates = [prd, HasVLX] in {
7667    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
7668                                    OpcodeStr, v32i8x_info>, EVEX_V256;
7669    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
7670                                    OpcodeStr, v16i8x_info>, EVEX_V128;
7671  }
7672}
7673defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
7674                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
7675defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
7676                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
7677
7678
7679multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
7680                                string OpcodeStr, X86VectorVTInfo _dst,
7681                                X86VectorVTInfo _src>{
7682  def rr : AVX512BI<opc, MRMSrcReg,
7683             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
7684             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7685             [(set _dst.RC:$dst,(_dst.VT
7686                                (OpNode (_src.VT _src.RC:$src1),
7687                                        (_src.VT _src.RC:$src2))))]>;
7688  def rm : AVX512BI<opc, MRMSrcMem,
7689           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
7690           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7691           [(set _dst.RC:$dst,(_dst.VT
7692                              (OpNode (_src.VT _src.RC:$src1),
7693                              (_src.VT (bitconvert
7694                                        (_src.LdFrag addr:$src2))))))]>;
7695}
7696
7697multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
7698                                    string OpcodeStr, Predicate prd> {
7699  let Predicates = [prd] in
7700    defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
7701                                    v64i8_info>, EVEX_V512;
7702  let Predicates = [prd, HasVLX] in {
7703    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
7704                                    v32i8x_info>, EVEX_V256;
7705    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
7706                                    v16i8x_info>, EVEX_V128;
7707  }
7708}
7709
7710defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
7711                                       HasBWI>, EVEX_4V;
7712
7713multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
7714                                                            X86VectorVTInfo _>{
7715  let Constraints = "$src1 = $dst" in {
7716  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7717                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
7718                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
7719                      (OpNode (_.VT _.RC:$src1),
7720                              (_.VT _.RC:$src2),
7721                              (_.VT _.RC:$src3),
7722                              (i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
7723  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7724                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
7725                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
7726                    (OpNode (_.VT _.RC:$src1),
7727                            (_.VT _.RC:$src2),
7728                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
7729                            (i8 imm:$src4))>,
7730                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
7731  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7732                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
7733                    OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
7734                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
7735                    (OpNode (_.VT _.RC:$src1),
7736                            (_.VT _.RC:$src2),
7737                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
7738                            (i8 imm:$src4))>, EVEX_B,
7739                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
7740  }// Constraints = "$src1 = $dst"
7741}
7742
7743multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
7744  let Predicates = [HasAVX512] in
7745    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
7746  let Predicates = [HasAVX512, HasVLX] in {
7747    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
7748    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
7749  }
7750}
7751
7752defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
7753defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
7754
7755//===----------------------------------------------------------------------===//
7756// AVX-512 - FixupImm
7757//===----------------------------------------------------------------------===//
7758
7759multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
7760                                                            X86VectorVTInfo _>{
7761  let Constraints = "$src1 = $dst" in {
7762    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7763                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
7764                         OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
7765                        (OpNode (_.VT _.RC:$src1),
7766                                (_.VT _.RC:$src2),
7767                                (_.IntVT _.RC:$src3),
7768                                (i32 imm:$src4),
7769                                (i32 FROUND_CURRENT))>;
7770    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7771                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
7772                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
7773                      (OpNode (_.VT _.RC:$src1),
7774                              (_.VT _.RC:$src2),
7775                              (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
7776                              (i32 imm:$src4),
7777                              (i32 FROUND_CURRENT))>;
7778    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7779                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
7780                    OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
7781                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
7782                      (OpNode (_.VT _.RC:$src1),
7783                              (_.VT _.RC:$src2),
7784                              (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
7785                              (i32 imm:$src4),
7786                              (i32 FROUND_CURRENT))>, EVEX_B;
7787  } // Constraints = "$src1 = $dst"
7788}
7789
7790multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
7791                                             SDNode OpNode, X86VectorVTInfo _>{
7792let Constraints = "$src1 = $dst" in {
7793  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7794                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
7795                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
7796                      "$src2, $src3, {sae}, $src4",
7797                      (OpNode (_.VT _.RC:$src1),
7798                                (_.VT _.RC:$src2),
7799                                (_.IntVT _.RC:$src3),
7800                                (i32 imm:$src4),
7801                                (i32 FROUND_NO_EXC))>, EVEX_B;
7802  }
7803}
7804
7805multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
7806                                  X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
7807  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in {
7808    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7809                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
7810                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
7811                      (OpNode (_.VT _.RC:$src1),
7812                              (_.VT _.RC:$src2),
7813                              (_src3VT.VT _src3VT.RC:$src3),
7814                              (i32 imm:$src4),
7815                              (i32 FROUND_CURRENT))>;
7816
7817    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7818                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
7819                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
7820                      "$src2, $src3, {sae}, $src4",
7821                      (OpNode (_.VT _.RC:$src1),
7822                              (_.VT _.RC:$src2),
7823                              (_src3VT.VT _src3VT.RC:$src3),
7824                              (i32 imm:$src4),
7825                              (i32 FROUND_NO_EXC))>, EVEX_B;
7826    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7827                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
7828                     OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
7829                     (OpNode (_.VT _.RC:$src1),
7830                             (_.VT _.RC:$src2),
7831                             (_src3VT.VT (scalar_to_vector
7832                                       (_src3VT.ScalarLdFrag addr:$src3))),
7833                             (i32 imm:$src4),
7834                             (i32 FROUND_CURRENT))>;
7835  }
7836}
7837
7838multiclass avx512_fixupimm_packed_all<AVX512VLVectorVTInfo _Vec>{
7839  let Predicates = [HasAVX512] in
7840    defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
7841                avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
7842                                  AVX512AIi8Base, EVEX_4V, EVEX_V512;
7843  let Predicates = [HasAVX512, HasVLX] in {
7844    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>,
7845                                  AVX512AIi8Base, EVEX_4V, EVEX_V128;
7846    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>,
7847                                  AVX512AIi8Base, EVEX_4V, EVEX_V256;
7848  }
7849}
7850
7851defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
7852                                          f32x_info, v4i32x_info>,
7853                         AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7854defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
7855                                          f64x_info, v2i64x_info>,
7856                         AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
7857defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
7858                         EVEX_CD8<32, CD8VF>;
7859defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
7860                         EVEX_CD8<64, CD8VF>, VEX_W;
7861