1//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes XOP (eXtended OPerations) 11// 12//===----------------------------------------------------------------------===// 13 14multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { 15 def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 17 [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>; 18 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 19 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 20 [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, 21 Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>; 22} 23 24let ExeDomain = SSEPackedInt in { 25 defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>; 26 defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>; 27 defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>; 28 defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>; 29 defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>; 30 defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>; 31 defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>; 32 defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>; 33 defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>; 34 defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>; 35 defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>; 36 defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>; 37 defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>; 38 defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>; 39 defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>; 40} 41 42// Scalar load 2 addr operand instructions 43multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int, 44 Operand memop, ComplexPattern mem_cpat, 45 X86FoldableSchedWrite sched> { 46 def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 47 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 48 [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>; 49 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src), 50 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 51 [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP, 52 Sched<[sched.Folded, ReadAfterLd]>; 53} 54 55multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int, 56 PatFrag memop, X86FoldableSchedWrite sched> { 57 def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 58 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 59 [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>; 60 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 61 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 62 [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, 63 Sched<[sched.Folded, ReadAfterLd]>; 64} 65 66multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int, 67 PatFrag memop, X86FoldableSchedWrite sched> { 68 def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 69 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 70 [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>; 71 def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 72 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 73 [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L, 74 Sched<[sched.Folded, ReadAfterLd]>; 75} 76 77let ExeDomain = SSEPackedSingle in { 78 defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, 79 ssmem, sse_load_f32, SchedWriteFRnd.Scl>; 80 defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32, 81 SchedWriteFRnd.XMM>; 82 defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32, 83 SchedWriteFRnd.YMM>; 84} 85 86let ExeDomain = SSEPackedDouble in { 87 defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd, 88 sdmem, sse_load_f64, SchedWriteFRnd.Scl>; 89 defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64, 90 SchedWriteFRnd.XMM>; 91 defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64, 92 SchedWriteFRnd.YMM>; 93} 94 95multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode, 96 ValueType vt128, X86FoldableSchedWrite sched> { 97 def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst), 98 (ins VR128:$src1, VR128:$src2), 99 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 100 [(set VR128:$dst, 101 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>, 102 XOP, Sched<[sched]>; 103 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), 104 (ins VR128:$src1, i128mem:$src2), 105 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 106 [(set VR128:$dst, 107 (vt128 (OpNode (vt128 VR128:$src1), 108 (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, 109 XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>; 110 def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst), 111 (ins i128mem:$src1, VR128:$src2), 112 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 113 [(set VR128:$dst, 114 (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), 115 (vt128 VR128:$src2))))]>, 116 XOP, Sched<[sched.Folded, ReadAfterLd]>; 117 // For disassembler 118 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 119 def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst), 120 (ins VR128:$src1, VR128:$src2), 121 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 122 []>, 123 XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>; 124} 125 126let ExeDomain = SSEPackedInt in { 127 defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>; 128 defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>; 129 defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>; 130 defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>; 131 defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>; 132 defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>; 133 defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>; 134 defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>; 135 defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>; 136 defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>; 137 defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>; 138 defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>; 139} 140 141multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode, 142 ValueType vt128, X86FoldableSchedWrite sched> { 143 def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), 144 (ins VR128:$src1, u8imm:$src2), 145 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 146 [(set VR128:$dst, 147 (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, 148 XOP, Sched<[sched]>; 149 def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), 150 (ins i128mem:$src1, u8imm:$src2), 151 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 152 [(set VR128:$dst, 153 (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>, 154 XOP, Sched<[sched.Folded, ReadAfterLd]>; 155} 156 157let ExeDomain = SSEPackedInt in { 158 defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8, 159 SchedWriteVecShiftImm.XMM>; 160 defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32, 161 SchedWriteVecShiftImm.XMM>; 162 defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64, 163 SchedWriteVecShiftImm.XMM>; 164 defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16, 165 SchedWriteVecShiftImm.XMM>; 166} 167 168// Instruction where second source can be memory, but third must be register 169multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int, 170 X86FoldableSchedWrite sched> { 171 let isCommutable = 1 in 172 def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst), 173 (ins VR128:$src1, VR128:$src2, VR128:$src3), 174 !strconcat(OpcodeStr, 175 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 176 [(set VR128:$dst, 177 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V, 178 Sched<[sched]>; 179 def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), 180 (ins VR128:$src1, i128mem:$src2, VR128:$src3), 181 !strconcat(OpcodeStr, 182 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 183 [(set VR128:$dst, 184 (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)), 185 VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>; 186} 187 188let ExeDomain = SSEPackedInt in { 189 defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", 190 int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>; 191 defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", 192 int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>; 193 defm VPMACSWW : xop4opm2<0x95, "vpmacsww", 194 int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>; 195 defm VPMACSWD : xop4opm2<0x96, "vpmacswd", 196 int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>; 197 defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", 198 int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>; 199 defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", 200 int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>; 201 defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", 202 int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>; 203 defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", 204 int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>; 205 defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", 206 int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>; 207 defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", 208 int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>; 209 defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", 210 int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>; 211 defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", 212 int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>; 213} 214 215// IFMA patterns - for cases where we can safely ignore the overflow bits from 216// the multiply or easily match with existing intrinsics. 217let Predicates = [HasXOP] in { 218 def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)), 219 (v8i16 VR128:$src3))), 220 (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>; 221 def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)), 222 (v4i32 VR128:$src3))), 223 (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 224 def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))), 225 (bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))), 226 (v2i64 VR128:$src3))), 227 (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>; 228 def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)), 229 (v2i64 VR128:$src3))), 230 (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>; 231 def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)), 232 (v4i32 VR128:$src3))), 233 (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 234} 235 236// Transforms to swizzle an immediate to help matching memory operand in first 237// operand. 238def CommuteVPCOMCC : SDNodeXForm<imm, [{ 239 uint8_t Imm = N->getZExtValue() & 0x7; 240 Imm = X86::getSwappedVPCOMImm(Imm); 241 return getI8Imm(Imm, SDLoc(N)); 242}]>; 243 244// Instruction where second source can be memory, third must be imm8 245multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128, 246 X86FoldableSchedWrite sched> { 247 let ExeDomain = SSEPackedInt in { // SSE integer instructions 248 let isCommutable = 1 in 249 def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), 250 (ins VR128:$src1, VR128:$src2, XOPCC:$cc), 251 !strconcat("vpcom${cc}", Suffix, 252 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 253 [(set VR128:$dst, 254 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), 255 imm:$cc)))]>, 256 XOP_4V, Sched<[sched]>; 257 def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), 258 (ins VR128:$src1, i128mem:$src2, XOPCC:$cc), 259 !strconcat("vpcom${cc}", Suffix, 260 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 261 [(set VR128:$dst, 262 (vt128 (OpNode (vt128 VR128:$src1), 263 (vt128 (bitconvert (loadv2i64 addr:$src2))), 264 imm:$cc)))]>, 265 XOP_4V, Sched<[sched.Folded, ReadAfterLd]>; 266 let isAsmParserOnly = 1, hasSideEffects = 0 in { 267 def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), 268 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 269 !strconcat("vpcom", Suffix, 270 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 271 []>, XOP_4V, Sched<[sched]>, NotMemoryFoldable; 272 let mayLoad = 1 in 273 def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), 274 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 275 !strconcat("vpcom", Suffix, 276 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 277 []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>, 278 NotMemoryFoldable; 279 } 280 } 281 282 def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)), 283 (vt128 VR128:$src1), imm:$cc), 284 (!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2, 285 (CommuteVPCOMCC imm:$cc))>; 286} 287 288defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>; 289defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>; 290defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>; 291defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>; 292defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>; 293defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>; 294defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>; 295defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>; 296 297multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, 298 ValueType vt128, X86FoldableSchedWrite sched> { 299 def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst), 300 (ins VR128:$src1, VR128:$src2, VR128:$src3), 301 !strconcat(OpcodeStr, 302 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 303 [(set VR128:$dst, 304 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), 305 (vt128 VR128:$src3))))]>, 306 XOP_4V, Sched<[sched]>; 307 def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst), 308 (ins VR128:$src1, VR128:$src2, i128mem:$src3), 309 !strconcat(OpcodeStr, 310 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 311 [(set VR128:$dst, 312 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), 313 (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>, 314 XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; 315 def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), 316 (ins VR128:$src1, i128mem:$src2, VR128:$src3), 317 !strconcat(OpcodeStr, 318 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 319 [(set VR128:$dst, 320 (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))), 321 (vt128 VR128:$src3))))]>, 322 XOP_4V, Sched<[sched.Folded, ReadAfterLd, 323 // 128mem:$src2 324 ReadDefault, ReadDefault, ReadDefault, ReadDefault, 325 ReadDefault, 326 // VR128:$src3 327 ReadAfterLd]>; 328 // For disassembler 329 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 330 def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst), 331 (ins VR128:$src1, VR128:$src2, VR128:$src3), 332 !strconcat(OpcodeStr, 333 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 334 []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>; 335} 336 337let ExeDomain = SSEPackedInt in { 338 defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8, 339 SchedWriteVarShuffle.XMM>; 340} 341 342// Instruction where either second or third source can be memory 343multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 344 X86MemOperand x86memop, ValueType VT, 345 X86FoldableSchedWrite sched> { 346 def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst), 347 (ins RC:$src1, RC:$src2, RC:$src3), 348 !strconcat(OpcodeStr, 349 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 350 [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), 351 (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V, 352 Sched<[sched]>; 353 def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst), 354 (ins RC:$src1, RC:$src2, x86memop:$src3), 355 !strconcat(OpcodeStr, 356 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 357 [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1), 358 (X86andnp (load addr:$src3), RC:$src2))))]>, 359 XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; 360 def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst), 361 (ins RC:$src1, x86memop:$src2, RC:$src3), 362 !strconcat(OpcodeStr, 363 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 364 [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), 365 (X86andnp RC:$src3, (load addr:$src2)))))]>, 366 XOP_4V, Sched<[sched.Folded, ReadAfterLd, 367 // x86memop:$src2 368 ReadDefault, ReadDefault, ReadDefault, ReadDefault, 369 ReadDefault, 370 // RC::$src3 371 ReadAfterLd]>; 372 // For disassembler 373 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 374 def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst), 375 (ins RC:$src1, RC:$src2, RC:$src3), 376 !strconcat(OpcodeStr, 377 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 378 []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>; 379} 380 381let ExeDomain = SSEPackedInt in { 382 defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64, 383 SchedWriteShuffle.XMM>; 384 defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64, 385 SchedWriteShuffle.YMM>, VEX_L; 386} 387 388multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, 389 X86MemOperand intmemop, X86MemOperand fpmemop, 390 ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag, 391 X86FoldableSchedWrite sched> { 392 def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst), 393 (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), 394 !strconcat(OpcodeStr, 395 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 396 [(set RC:$dst, 397 (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>, 398 Sched<[sched]>; 399 def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst), 400 (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4), 401 !strconcat(OpcodeStr, 402 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 403 [(set RC:$dst, 404 (VT (X86vpermil2 RC:$src1, RC:$src2, 405 (bitconvert (IntLdFrag addr:$src3)), 406 (i8 imm:$src4))))]>, VEX_W, 407 Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; 408 def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst), 409 (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4), 410 !strconcat(OpcodeStr, 411 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 412 [(set RC:$dst, 413 (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), 414 RC:$src3, (i8 imm:$src4))))]>, 415 Sched<[sched.Folded, ReadAfterLd, 416 // fpmemop:$src2 417 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, 418 // RC:$src3 419 ReadAfterLd]>; 420 // For disassembler 421 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 422 def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst), 423 (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), 424 !strconcat(OpcodeStr, 425 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 426 []>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>; 427} 428 429let ExeDomain = SSEPackedDouble in { 430 defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem, 431 v2f64, loadv2f64, loadv2i64, 432 SchedWriteFVarShuffle.XMM>; 433 defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem, 434 v4f64, loadv4f64, loadv4i64, 435 SchedWriteFVarShuffle.YMM>, VEX_L; 436} 437 438let ExeDomain = SSEPackedSingle in { 439 defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem, 440 v4f32, loadv4f32, loadv2i64, 441 SchedWriteFVarShuffle.XMM>; 442 defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem, 443 v8f32, loadv8f32, loadv4i64, 444 SchedWriteFVarShuffle.YMM>, VEX_L; 445} 446 447